| #! /usr/bin/python |
| |
| import os |
| import sys |
| import string |
| import re |
| |
| ## hash from symbol name to list of symbols with that name, |
| ## where the list of symbols contains a list representing each symbol |
| symbols = {} |
| roots = {} |
| |
| def createBacklinks(name, syms): |
| for s in syms: |
| refs = s[2] |
| for r in refs: |
| ## for each ref, add ourselves as a referencer |
| if symbols.has_key(r): |
| targets = symbols[r] |
| for t in targets: |
| if name not in t[5]: |
| t[5].append(name) |
| |
| def markSymbol(frm, name): |
| if not symbols.has_key(name): |
| print "%s referenced but was not in the objdump" |
| syms = symbols[name] |
| ## print ambiguous references unless they are internal noise like ".L129" |
| if len(syms) > 1 and name[0] != '.': |
| print "Reference to symbol '%s' from '%s' is ambiguous, marking all '%s'" % (name, frm, name) |
| print syms |
| for s in syms: |
| if s[4]: |
| pass ## already marked |
| else: |
| s[4] = 1 |
| refs = s[2] |
| for r in refs: |
| markSymbol(s[0], r) |
| |
| def cmpFilename(a, b): |
| v = cmp(a[1], b[1]) |
| if v == 0: |
| v = cmp(a[0], b[0]) |
| return v |
| |
| def sizeAsString(bytes): |
| if bytes < 1024: |
| return "%d bytes" % bytes |
| elif bytes < 1024*1024: |
| return "%.2gK" % (bytes / 1024.0) |
| else: |
| return "%.2gM" % (bytes / 1024.0 / 1024.0) |
| |
| def printLost(): |
| list = [] |
| filename = None |
| for (name, syms) in symbols.items(): |
| s = syms[0] ## we always mark all or none for now |
| if not s[4] and name[0] != '.': ## skip .L129 type symbols |
| filename = s[3] |
| if not filename: |
| filename = "unknown file" |
| list.append ((name, filename, s[5], s[7])) |
| |
| file_summaries = [] |
| total_unused = 0 |
| total_this_file = 0 |
| filename = None |
| list.sort(cmpFilename) |
| for l in list: |
| next_filename = l[1] |
| if next_filename != filename: |
| if total_this_file > 0: |
| file_summaries.append (" %s may be unused in %s" % (sizeAsString(total_this_file), filename)) |
| print "%s has these symbols not reachable from exported symbols:" % next_filename |
| filename = next_filename |
| total_this_file = 0 |
| print " %s %s" % (l[0], sizeAsString(l[3])) |
| total_unused = total_unused + l[3] |
| total_this_file = total_this_file + l[3] |
| for trace in l[2]: |
| print " referenced from %s" % trace |
| |
| for fs in file_summaries: |
| print fs |
| print "%s total may be unused" % sizeAsString(total_unused) |
| |
| def main(): |
| |
| ## 0001aa44 <_dbus_message_get_network_data>: |
| sym_re = re.compile ('([0-9a-f]+) <([^>]+)>:') |
| ## 1aa49: e8 00 00 00 00 call 1aa4e <_dbus_message_get_network_data+0xa> |
| ref_re = re.compile (' <([^>]+)> *$') |
| ## /home/hp/dbus-cvs/dbus/dbus/dbus-message.c:139 |
| file_re = re.compile ('^(\/[^:].*):[0-9]+$') |
| ## _dbus_message_get_network_data+0xa |
| funcname_re = re.compile ('([^+]+)\+[0-9a-fx]+') |
| ## 00005410 T dbus_address_entries_free |
| dynsym_re = re.compile ('T ([^ \n]+)$') |
| |
| filename = sys.argv[1] |
| |
| command = """ |
| objdump -D --demangle -l %s |
| """ % filename |
| |
| command = string.strip (command) |
| |
| print "Running: %s" % command |
| |
| f = os.popen(command) |
| |
| ## first we find which functions reference which other functions |
| current_sym = None |
| lines = f.readlines() |
| for l in lines: |
| addr = None |
| name = None |
| target = None |
| file = None |
| |
| match = sym_re.match(l) |
| if match: |
| addr = match.group(1) |
| name = match.group(2) |
| else: |
| match = ref_re.search(l) |
| if match: |
| target = match.group(1) |
| else: |
| match = file_re.match(l) |
| if match: |
| file = match.group(1) |
| |
| if name: |
| ## 0 symname, 1 address, 2 references, 3 filename, 4 reached, 5 referenced-by 6 backlinked 7 approx size |
| item = [name, addr, [], None, 0, [], 0, 0] |
| if symbols.has_key(name): |
| symbols[name].append(item) |
| else: |
| symbols[name] = [item] |
| |
| if current_sym: |
| prev_addr = long(current_sym[1], 16) |
| our_addr = long(item[1], 16) |
| item[7] = our_addr - prev_addr |
| if item[7] < 0: |
| print "Computed negative size %d for %s" % (item[7], item[0]) |
| item[7] = 0 |
| |
| current_sym = item |
| |
| elif target and current_sym: |
| match = funcname_re.match(target) |
| if match: |
| ## dump the "+address" |
| target = match.group(1) |
| if target == current_sym[0]: |
| pass ## skip self-references |
| else: |
| current_sym[2].append (target) |
| |
| elif file and current_sym: |
| if file.startswith('/usr/include'): |
| ## inlined libc thingy |
| pass |
| elif current_sym[0].startswith('.debug'): |
| ## debug info |
| pass |
| elif current_sym[3] and current_sym[3] != file: |
| raise Exception ("%s in both %s and %s" % (current_sym[0], current_sym[3], file)) |
| else: |
| current_sym[3] = file |
| |
| ## now we need to find the roots (exported symbols) |
| command = "nm -D %s" % filename |
| print "Running: %s" % command |
| f = os.popen(command) |
| lines = f.readlines () |
| for l in lines: |
| match = dynsym_re.search(l) |
| if match: |
| name = match.group(1) |
| if roots.has_key(name): |
| raise Exception("symbol %s exported twice?" % name) |
| else: |
| roots[name] = 1 |
| |
| print "%d symbols exported from this object" % len(roots) |
| |
| ## these functions are used only indirectly, so we don't |
| ## notice they are used. Manually add them as roots... |
| vtable_roots = ['unix_finalize', |
| 'unix_handle_watch', |
| 'unix_disconnect', |
| 'unix_connection_set', |
| 'unix_do_iteration', |
| 'unix_live_messages_changed', |
| 'unix_get_unix_fd', |
| 'handle_client_data_cookie_sha1_mech', |
| 'handle_client_data_external_mech', |
| 'handle_server_data_cookie_sha1_mech', |
| 'handle_server_data_external_mech', |
| 'handle_client_initial_response_cookie_sha1_mech', |
| 'handle_client_initial_response_external_mech', |
| 'handle_client_shutdown_cookie_sha1_mech', |
| 'handle_client_shutdown_external_mech', |
| 'handle_server_shutdown_cookie_sha1_mech', |
| 'handle_server_shutdown_external_mech' |
| ] |
| |
| for vr in vtable_roots: |
| if roots.has_key(vr): |
| raise Exception("%s is already a root" % vr) |
| roots[vr] = 1 |
| |
| for k in roots.keys(): |
| markSymbol("root", k) |
| |
| for (k, v) in symbols.items(): |
| createBacklinks(k, v) |
| |
| print """ |
| |
| The symbols mentioned below don't appear to be reachable starting from |
| the dynamic exports of the library. However, this program is pretty |
| dumb; a limitation that creates false positives is that it can only |
| trace 'reachable' through hardcoded function calls, if a function is |
| called only through a vtable, it won't be marked reachable (and |
| neither will its children in the call graph). |
| |
| Also, the sizes mentioned are more or less completely bogus. |
| |
| """ |
| |
| print "The following are hardcoded in as vtable roots: %s" % vtable_roots |
| |
| printLost() |
| |
| if __name__ == "__main__": |
| main() |