11 #include <sys/types.h>
18 extern int verbose_flag;
20 typedef struct source_t source_t;
28 typedef struct next_export_t {
37 char *name; /* full path name of this executable file */
38 /* ELF-related information: */
43 int shnum; /* number of sections */
45 section_info_t symtab;
46 section_info_t strtab;
47 section_info_t dynamic;
50 section_info_t *relocations;
51 int num_relocations; /* number of relocs (<= relocations_size) */
52 int relocations_size; /* sice of array -- NOT number of relocs! */
54 /* satisfied_execs: array containing pointers to the libraries or
55 executables that this executable satisfies symbol references for. */
56 source_t **satisfied_execs;
57 int num_satisfied_execs;
58 int satisfied_execs_size;
60 /* satisfied: array is parallel to symbol table; for each undefined symbol
61 in that array, we maintain a flag stating whether that symbol has been
62 satisfied, and if so, by which library. This applies both to executable
67 /* exports: array is parallel to symbol table; for each global symbol
68 in that array, we maintain a flag stating whether that symbol satisfies
69 a dependency in some other file. num_syms is the length of the exports
70 array, as well as the satisfied array. This applied to libraries only.
72 next_exports: this is a bit tricky. We use this field to maintain a
73 linked list of source_t for each global symbol of a shared library.
74 For a shared library's global symbol at index N has the property that
75 exports[N] is the head of a linked list (threaded through next_export)
76 of all source_t that this symbol resolves a reference to. For example,
77 if symbol printf has index 1000 in libc.so, and an executable A and
78 library L use printf, then the source_t entry corresponding to libc.so
79 will have exports[1000] be a linked list that contains the nodes for
80 application A and library L.
83 next_export_t *exports;
84 /* num_exported is the number of symbols in this file actually used by
85 somebody else; it's not the size of the exports array. */
87 next_export_t *next_export;
91 int num_syms; /* number of symbols in symbol table. This is the length of
92 both exports[] and satisfied[] arrays. */
94 /* This is an array that contains one element for each library dependency
95 listed in the executable or shared library. */
96 source_t **lib_deps; /* list of library dependencies */
97 int num_lib_deps; /* actual number of library dependencies */
98 int lib_deps_size; /* size of lib_deps array--NOT actual number of deps! */
102 static source_t *sources = NULL;
104 static char * find_file(const char *libname,
105 char **lib_lookup_dirs,
106 int num_lib_lookup_dirs);
108 static inline source_t* find_source(const char *name,
109 char **lib_lookup_dirs,
110 int num_lib_lookup_dirs) {
111 source_t *trav = sources;
112 char *full = find_file(name, lib_lookup_dirs, num_lib_lookup_dirs);
113 FAILIF(full == NULL, "Cannot construct full path for file [%s]!\n", name);
115 if (!strcmp(trav->name, full))
123 static inline void add_to_sources(source_t *src) {
128 static source_t* init_source(char *full_path) {
129 source_t *source = (source_t *)CALLOC(1, sizeof(source_t));
132 source->name = full_path;
135 INFO("Opening %s...\n", full_path);
136 source->elf_fd = open(full_path, O_RDONLY);
137 FAILIF(source->elf_fd < 0, "open(%s): %s (%d)\n",
141 INFO("Calling elf_begin(%s)...\n", full_path);
142 source->elf = elf_begin(source->elf_fd, ELF_C_READ, NULL);
143 FAILIF_LIBELF(source->elf == NULL, elf_begin);
145 /* libelf can recognize COFF and A.OUT formats, but we handle only ELF. */
146 if (elf_kind(source->elf) != ELF_K_ELF) {
147 ERROR("Input file %s is not in ELF format!\n", full_path);
151 /* Make sure this is a shared library or an executable. */
153 INFO("Making sure %s is a shared library or an executable...\n",
155 FAILIF_LIBELF(0 == gelf_getehdr(source->elf, &source->elf_hdr), gelf_getehdr);
156 FAILIF(source->elf_hdr.e_type != ET_DYN &&
157 source->elf_hdr.e_type != ET_EXEC,
158 "%s must be a shared library (elf type is %d, expecting %d).\n",
160 source->elf_hdr.e_type,
164 /* Get the index of the section-header-strings-table section. */
165 FAILIF_LIBELF(elf_getshstrndx (source->elf, &source->shstrndx) < 0,
168 FAILIF_LIBELF(elf_getshnum (source->elf, &source->shnum) < 0, elf_getshnum);
170 /* Find various sections. */
173 GElf_Shdr *shdr, shdr_mem;
174 INFO("Locating %d sections in %s...\n", source->shnum, full_path);
175 for (scnidx = 1; scnidx < source->shnum; scnidx++) {
176 scn = elf_getscn(source->elf, scnidx);
177 FAILIF_LIBELF(NULL == scn, elf_getscn);
178 shdr = gelf_getshdr(scn, &shdr_mem);
179 FAILIF_LIBELF(NULL == shdr, gelf_getshdr);
180 INFO("\tfound section [%s]...\n", elf_strptr(source->elf, source->shstrndx, shdr->sh_name));
181 if (shdr->sh_type == SHT_DYNSYM) {
182 source->symtab.scn = scn;
183 source->symtab.data = elf_getdata(scn, NULL);
184 FAILIF_LIBELF(NULL == source->symtab.data, elf_getdata);
185 memcpy(&source->symtab.shdr, shdr, sizeof(GElf_Shdr));
187 /* The sh_link field of the section header of the symbol table
188 contains the index of the associated strings table. */
189 source->strtab.scn = elf_getscn(source->elf,
190 source->symtab.shdr.sh_link);
191 FAILIF_LIBELF(NULL == source->strtab.scn, elf_getscn);
192 FAILIF_LIBELF(NULL == gelf_getshdr(scn, &source->strtab.shdr),
194 source->strtab.data = elf_getdata(source->strtab.scn, NULL);
195 FAILIF_LIBELF(NULL == source->strtab.data, elf_getdata);
197 else if (shdr->sh_type == SHT_DYNAMIC) {
198 source->dynamic.scn = scn;
199 source->dynamic.data = elf_getdata(scn, NULL);
200 FAILIF_LIBELF(NULL == source->symtab.data, elf_getdata);
201 memcpy(&source->dynamic.shdr, shdr, sizeof(GElf_Shdr));
203 else if (shdr->sh_type == SHT_HASH) {
204 source->hash.scn = scn;
205 source->hash.data = elf_getdata(scn, NULL);
206 FAILIF_LIBELF(NULL == source->hash.data, elf_getdata);
207 memcpy(&source->hash.shdr, shdr, sizeof(GElf_Shdr));
209 else if (shdr->sh_type == SHT_REL || shdr->sh_type == SHT_RELA) {
210 if (source->num_relocations == source->relocations_size) {
211 source->relocations_size += 5;
212 source->relocations =
213 (section_info_t *)REALLOC(source->relocations,
214 source->relocations_size *
215 sizeof(section_info_t));
217 section_info_t *reloc =
218 source->relocations + source->num_relocations;
220 reloc->data = elf_getdata(scn, NULL);
221 FAILIF_LIBELF(NULL == reloc->data, elf_getdata);
222 memcpy(&reloc->shdr, shdr, sizeof(GElf_Shdr));
223 source->num_relocations++;
227 if (source->dynamic.scn == NULL) {
228 INFO("File [%s] does not have a dynamic section!\n", full_path);
232 FAILIF(source->symtab.scn == NULL,
233 "File [%s] does not have a dynamic symbol table!\n",
236 FAILIF(source->hash.scn == NULL,
237 "File [%s] does not have a hash table!\n",
239 FAILIF(source->hash.shdr.sh_link != elf_ndxscn(source->symtab.scn),
240 "Hash points to section %d, not to %d as expected!\n",
241 source->hash.shdr.sh_link,
244 /* Now, find out how many symbols we have and allocate the array of
247 NOTE: We don't count the number of undefined symbols here; we will
248 iterate over the symbol table later, and count them then, when it is
251 size_t symsize = gelf_fsize (source->elf,
253 1, source->elf_hdr.e_version);
256 source->num_syms = source->symtab.data->d_size / symsize;
257 source->satisfied = (source_t **)CALLOC(source->num_syms,
259 source->exports = (source_t **)CALLOC(source->num_syms,
260 sizeof(next_export_t));
262 source->num_exported = 0;
263 source->satisfied_execs = NULL;
264 source->num_satisfied_execs = 0;
265 source->satisfied_execs_size = 0;
267 add_to_sources(source);
271 static void destroy_source(source_t *source) {
272 FREE(source->satisfied_execs);
273 FREE(source->satisfied);
274 FREE(source->exports);
275 FREE(source->next_export);
276 FREE(source->lib_deps); /* list of library dependencies */
277 FAILIF_LIBELF(elf_end(source->elf), elf_end);
278 FAILIF(close(source->elf_fd) < 0, "Could not close file %s: %s (%d)!\n",
279 source->name, strerror(errno), errno);
284 static void print_needed_libs(source_t *source)
287 for (idx = 0; idx < source->num_lib_deps; idx++) {
290 source->lib_deps[idx]->name);
294 static int is_symbol_imported(source_t *source,
298 const char *symname = elf_strptr(source->elf,
299 elf_ndxscn(source->strtab.scn),
302 /* A symbol is imported by an executable or a library if it is undefined
303 and is either global or weak. There is an additional case for
304 executables that we will check below. */
305 if (sym->st_shndx == SHN_UNDEF &&
306 (GELF_ST_BIND(sym->st_info) == STB_GLOBAL ||
307 GELF_ST_BIND(sym->st_info) == STB_WEAK)) {
308 INFO("*** symbol [%s:%s] is imported (UNDEFIEND).\n",
314 #ifdef ARM_SPECIFIC_HACKS
315 /* A symbol is imported by an executable if is marked as an undefined
316 symbol--this is standard to all ELF formats. Alternatively, according
317 to the ARM specifications, a symbol in a BSS section that is also marked
318 by an R_ARM_COPY relocation is also imported. */
320 if (source->elf_hdr.e_type != ET_EXEC) {
321 INFO("is_symbol_imported(): [%s] is a library, "
322 "no further checks.\n", source->name);
326 /* Is the symbol in the BSS section, and is there a COPY relocation on
328 INFO("*** [%s:%s] checking further to see if symbol is imported.\n",
329 source->name, symname);
330 if (sym->st_shndx < source->shnum) {
331 /* Is it the .bss section? */
332 Elf_Scn *scn = elf_getscn(source->elf, sym->st_shndx);
333 FAILIF_LIBELF(NULL == scn, elf_getscn);
334 GElf_Shdr *shdr, shdr_mem;
335 shdr = gelf_getshdr(scn, &shdr_mem);
336 FAILIF_LIBELF(NULL == shdr, gelf_getshdr);
337 if (!strcmp(".bss", elf_strptr(source->elf,
341 /* Is there an R_ARM_COPY relocation on this symbol? Iterate
342 over the list of relocation sections and scan each section for
343 an entry that matches the symbol. */
345 for (idx = 0; idx < source->num_relocations; idx++) {
346 section_info_t *reloc = source->relocations + idx;
347 /* Does the relocation section refer to the symbol table in
348 which this symbol resides, and does it relocate the .bss
350 if (reloc->shdr.sh_link == elf_ndxscn(source->symtab.scn) &&
351 reloc->shdr.sh_info == sym->st_shndx)
353 /* Go over the relocations and see if any of them matches
355 size_t nrels = reloc->shdr.sh_size / reloc->shdr.sh_entsize;
356 size_t relidx, newidx;
357 if (reloc->shdr.sh_type == SHT_REL) {
358 for (newidx = relidx = 0; relidx < nrels; ++relidx) {
360 FAILIF_LIBELF(gelf_getrel (reloc->data,
364 if (GELF_R_TYPE(rel_mem.r_info) == R_ARM_COPY &&
365 GELF_R_SYM (rel_mem.r_info) == symidx)
367 INFO("*** symbol [%s:%s] is imported "
368 "(DEFINED, REL-COPY-RELOCATED).\n",
373 } /* for each rel entry... */
375 for (newidx = relidx = 0; relidx < nrels; ++relidx) {
377 FAILIF_LIBELF(gelf_getrela (reloc->data,
381 if (GELF_R_TYPE(rel_mem.r_info) == R_ARM_COPY &&
382 GELF_R_SYM (rel_mem.r_info) == symidx)
384 INFO("*** symbol [%s:%s] is imported "
385 "(DEFINED, RELA-COPY-RELOCATED).\n",
390 } /* for each rela entry... */
391 } /* if rel else rela */
396 #endif/*ARM_SPECIFIC_HACKS*/
401 static void resolve(source_t *source) {
402 /* Iterate the symbol table. For each undefined symbol, scan the
403 list of dependencies till we find a global symbol in one of them that
404 satisfies the undefined reference. At this point, we update both the
405 satisfied[] array of the sources entry, as well as the exports array of
406 the dependency where we found the match.
409 GElf_Sym *sym, sym_mem;
411 for (symidx = 0; symidx < source->num_syms; symidx++) {
412 sym = gelf_getsymshndx(source->symtab.data,
417 FAILIF_LIBELF(NULL == sym, gelf_getsymshndx);
418 if (is_symbol_imported(source, sym, symidx))
420 /* This is an undefined symbol. Go over the list of libraries
424 source_t *last_found = NULL;
425 const char *symname = elf_strptr(source->elf,
426 elf_ndxscn(source->strtab.scn),
428 for (libidx = 0; libidx < source->num_lib_deps; libidx++) {
429 source_t *lib = source->lib_deps[libidx];
430 int lib_symidx = hash_lookup(lib->elf,
435 if (STN_UNDEF != lib_symidx)
437 /* We found the symbol--now check to see if it is global
438 or weak. If this is the case, then the symbol satisfies
440 GElf_Sym *lib_sym, lib_sym_mem;
441 lib_sym = gelf_getsymshndx(lib->symtab.data,
446 FAILIF_LIBELF(NULL == lib_sym, gelf_getsymshndx);
448 if(lib_sym->st_shndx != STN_UNDEF &&
449 (GELF_ST_BIND(lib_sym->st_info) == STB_GLOBAL ||
450 GELF_ST_BIND(lib_sym->st_info) == STB_WEAK))
452 /* We found the symbol! Update the satisfied array at this
454 source->satisfied[symidx] = lib;
455 /* Now, link this structure into the linked list
456 corresponding to the found symbol in the library's
458 if (source->num_next_export == source->next_export_size) {
459 source->next_export_size += 30;
460 source->next_export =
461 (source_t **)REALLOC(source->next_export,
462 source->next_export_size *
463 sizeof(struct next_export_t));
465 source->next_export[source->num_next_export] = lib->exports[lib_symidx];
466 lib->exports[lib_symidx].source = source;
467 lib->exports[lib_symidx].next_idx = source->num_next_export;
469 source->num_next_export++;
472 INFO("[%s:%s (index %d)] satisfied by [%s] (index %d)\n",
481 ERROR("ERROR: multiple definitions found for [%s:%s]!\n",
482 source->name, symname);
483 ERROR("\tthis definition [%s]\n", lib->name);
485 ERROR("\tprevious definition [%s]\n", last_found->name);
489 if (!found) found = 1;
494 ERROR("ERROR: could not find match for %s:%s.\n",
498 } /* if we found the symbol... */
499 } /* for each symbol... */
502 static void print_used_symbols(source_t *source) {
504 int name_len = strlen(source->name);
505 static const char ext[] = ".syms";
506 char *filter = (char *)MALLOC(name_len + sizeof(ext));
507 strcpy(filter, source->name);
508 strcpy(filter + name_len, ext);
510 FILE *fp = fopen(filter, "w+");
512 "Can't open %s: %s (%d)\n",
514 strerror(errno), errno);
516 /* Is anybody using the symbols defined in source? */
518 if (source->num_exported > 0) {
519 INFO("[%s] exports %d symbols to %d libraries and executables.\n",
521 source->num_exported,
522 source->num_satisfied_execs);
524 for (symidx = 0; symidx < source->num_syms; symidx++) {
525 if (source->exports[symidx].source != NULL) {
526 GElf_Sym *sym, sym_mem;
527 sym = gelf_getsymshndx(source->symtab.data,
532 FAILIF_LIBELF(NULL == sym, gelf_getsymshndx);
533 fprintf(fp, "%s\n", elf_strptr(source->elf,
534 elf_ndxscn(source->strtab.scn),
539 else if (source->num_satisfied_execs > 0) {
541 /* Is the source listed as a depenency on anyone? If so, then the source exports no symbols
542 to anyone, but someone lists it as a dependency, which is unnecessary, so we print a warning.
545 ERROR("WARNING: [%s] is listed as a dependency in: ", source->name);
547 for (i = 0; i < source->num_satisfied_execs; i++) {
548 ERROR(" [%s],", source->satisfied_execs[i]->name);
550 ERROR(" but none of its symbols are used!.\n");
552 #if 0 /* This is not really an error--a library's symbols may not be used anyone as specified in the ELF file,
553 but someone may still open a library via dlopen().
556 ERROR("WARNING: None of [%s]'s symbols are used by any library or executable!\n", source->name);
564 static void print_symbol_references(source_t *source) {
566 int name_len = strlen(source->name);
567 static const char ext[] = ".info";
568 char *filter = (char *)MALLOC(name_len + sizeof(ext));
569 strcpy(filter, source->name);
570 strcpy(filter + name_len, ext);
572 FILE *fp = fopen(filter, "w+");
574 "Can't open %s: %s (%d)\n",
576 strerror(errno), errno);
578 if (source->num_exported > 0) {
580 for (symidx = 0; symidx < source->num_syms; symidx++) {
581 if (source->exports[symidx].source != NULL) {
583 GElf_Sym *sym, sym_mem;
584 sym = gelf_getsymshndx(source->symtab.data,
589 FAILIF_LIBELF(NULL == sym, gelf_getsymshndx);
590 symname = elf_strptr(source->elf,
591 elf_ndxscn(source->strtab.scn),
593 fprintf(fp, "%s\n", symname);
594 next_export_t *export = &source->exports[symidx];
595 while (export->source != NULL) {
596 //fprintf(stderr, "%s:%s\n", symname, export->source->name);
597 fprintf(fp, "\t%s\n", export->source->name);
598 export = &export->source->next_export[export->next_idx];
608 static char * find_file(const char *libname,
609 char **lib_lookup_dirs,
610 int num_lib_lookup_dirs) {
611 if (libname[0] == '/') {
612 /* This is an absolute path name--just return it. */
613 INFO("ABSOLUTE PATH: [%s].\n", libname);
614 return strdup(libname);
616 /* First try the working directory. */
618 if ((fd = open(libname, O_RDONLY)) > 0) {
620 INFO("FOUND IN CURRENT DIR: [%s].\n", libname);
621 return strdup(libname);
623 /* Iterate over all library paths. For each path, append the file
624 name and see if there is a file at that place. If that fails,
628 while (num_lib_lookup_dirs--) {
629 size_t lib_len = strlen(*lib_lookup_dirs);
630 /* one extra character for the slash, and another for the
632 name = (char *)MALLOC(lib_len + strlen(libname) + 2);
633 strcpy(name, *lib_lookup_dirs);
635 strcpy(name + lib_len + 1, libname);
636 if ((fd = open(name, O_RDONLY)) > 0) {
638 INFO("FOUND: [%s] in [%s].\n", libname, name);
641 INFO("NOT FOUND: [%s] in [%s].\n", libname, name);
649 static source_t* process_library(const char *libname,
650 char **lib_lookup_dirs,
651 int num_lib_lookup_dirs) {
652 source_t *source = find_source(libname, lib_lookup_dirs, num_lib_lookup_dirs);
653 if (NULL == source) {
654 INFO("Processing [%s].\n", libname);
655 char *full = find_file(libname, lib_lookup_dirs, num_lib_lookup_dirs);
657 "Could not find [%s] in the current directory or in any of "
658 "the search paths!\n", libname);
659 source = init_source(full);
661 GElf_Dyn *dyn, dyn_mem;
664 source->dynamic.shdr.sh_size /
665 source->dynamic.shdr.sh_entsize;
667 for (dynidx = 0; dynidx < numdyn; dynidx++) {
668 dyn = gelf_getdyn (source->dynamic.data,
671 FAILIF_LIBELF(NULL == dyn, gelf_getdyn);
672 if (dyn->d_tag == DT_NEEDED) {
673 /* Process the needed library recursively. */
674 const char *dep_lib =
675 elf_strptr (source->elf,
676 source->dynamic.shdr.sh_link,
678 INFO("[%s] depends on [%s].\n", libname, dep_lib);
679 source_t *dep = process_library(dep_lib,
681 num_lib_lookup_dirs);
683 /* Tell dep that source depends on it. */
684 if (dep->num_satisfied_execs == dep->satisfied_execs_size) {
685 dep->satisfied_execs_size += 10;
686 dep->satisfied_execs =
687 REALLOC(dep->satisfied_execs,
688 dep->satisfied_execs_size *
691 dep->satisfied_execs[dep->num_satisfied_execs++] = source;
693 /* Add the library to the dependency list. */
694 if (source->num_lib_deps == source->lib_deps_size) {
695 source->lib_deps_size += 10;
696 source->lib_deps = REALLOC(source->lib_deps,
697 source->lib_deps_size *
700 source->lib_deps[source->num_lib_deps++] = dep;
702 } /* for each dynamic entry... */
704 } else INFO("[%s] has been processed already.\n", libname);
709 void lsd(char **execs, int num_execs,
710 int list_needed_libs,
712 char **lib_lookup_dirs, int num_lib_lookup_dirs) {
714 source_t *source; /* for general usage */
717 for (input_idx = 0; input_idx < num_execs; input_idx++) {
718 INFO("executable: [%s]\n", execs[input_idx]);
719 /* Here process library is actually processing the top-level executable
721 process_library(execs[input_idx], lib_lookup_dirs, num_lib_lookup_dirs);
722 /* if source is NULL, then the respective executable is static */
723 /* Mark the source as an executable */
724 } /* for each input executable... */
726 if (list_needed_libs) {
729 print_needed_libs(source);
730 source = source->next;
734 /* Now, for each entry in the sources array, iterate its symbol table. For
735 each undefined symbol, scan the list of dependencies till we find a
736 global symbol in one of them that satisfies the undefined reference.
737 At this point, we update both the satisfied[] array of the sources entry,
738 as well as the exports array of the dependency where we found the match.
744 source = source->next;
747 /* We are done! Since the end result of our calculations is a set of
748 symbols for each library that other libraries or executables link
749 against, we iterate over the set of libraries one last time, and for
750 each symbol that is marked as satisfying some dependence, we emit
751 a line with the symbol's name to a text file derived from the library's
752 name by appending the suffix .syms to it. */
756 /* If it's a library, print the results. */
757 if (source->elf_hdr.e_type == ET_DYN) {
758 print_used_symbols(source);
760 print_symbol_references(source);
762 source = source->next;
765 /* Free the resources--you can't do it in the loop above because function
766 print_symbol_references() accesses nodes other than the one being
771 source_t *old = source;
772 source = source->next;
773 /* Destroy the evidence. */