From ba7eb55ec661e18156d9d390b196b6659d0a4752 Mon Sep 17 00:00:00 2001 From: David Lamparter Date: Wed, 27 Nov 2019 23:19:10 +0100 Subject: [PATCH] tools: symalyzer Signed-off-by: David Lamparter --- .gitignore | 4 + tools/symalyzer.html | 347 +++++++++++++++++++++++++++++++++++++++ tools/symalyzer.py | 383 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 734 insertions(+) create mode 100644 tools/symalyzer.html create mode 100755 tools/symalyzer.py diff --git a/.gitignore b/.gitignore index 6cfe23e921..226dca09d0 100644 --- a/.gitignore +++ b/.gitignore @@ -33,6 +33,10 @@ /Makefile /Makefile.in +/symalyzer_report.html +/jquery-3.4.1.min.js +/jquery-3.4.1.min.js.tmp + ### autoconf/automake subdir stuff .deps diff --git a/tools/symalyzer.html b/tools/symalyzer.html new file mode 100644 index 0000000000..eefeee3b05 --- /dev/null +++ b/tools/symalyzer.html @@ -0,0 +1,347 @@ + + + + Symalyzer report + + + + + + + + + +
+
    +
  • S means the symbol is not used outside its own file. + It could either be completely unused or used locally. It might be appropriate to make it + static.
  • +
  • Z means the symbol is not used outside its own file, + and it's not visible to the outside of the library or daemon (i.e. ELF hidden linkage.) + It could still be completely unused, or used within the library. It might be appropriate to make it + static.
  • +
  • L means the symbol is used from other files in the library, + but not from outside. It might be appropriate to make it DSO_LOCAL.
  • +
  • A means the symbol is used from some other file, most likely a + loadable module. Note this is only flagged for symbols in executable files, not libraries.
  • +
+
    +
  • T are normal functions ("program Text")
  • +
  • + B (BSS),
    + C (Common),
    + D (Data)
    + are various types of writable global variables
  • +
  • R are read-only global variables ("Rodata")
  • +
+
+
+ {%- for subdir, subreport in dirgroups.items()|sort %} +
{{ subdir }}
+ {%- for obj, reports in subreport.items()|sort %} +
{{ obj }}
+ {%- for report in reports|sort %} + {#-
#} +
{{ report.idshort }}
+
{{ report.sym.klass }}
+
{{ report.sym.name }}
+ {% if report.sym.loc %} +
{{ report.sym.loc }}
+ {% else %} +
unknown
+ {% endif %} + {#- #} + {%- endfor %} + {%- endfor %} + {%- endfor %} +
+ + diff --git a/tools/symalyzer.py b/tools/symalyzer.py new file mode 100755 index 0000000000..b3b5c4e567 --- /dev/null +++ b/tools/symalyzer.py @@ -0,0 +1,383 @@ +#!/usr/bin/python3 +# +# 2019 by David Lamparter, placed in public domain +# +# This tool generates a report of possibly unused symbols in the build. It's +# particularly useful for libfrr to find bitrotting functions that aren't even +# used anywhere anymore. +# +# Note that the tool can't distinguish between "a symbol is completely unused" +# and "a symbol is used only in its file" since file-internal references are +# invisible in nm output. However, the compiler will warn you if a static +# symbol is unused. +# +# This tool is only tested on Linux, it probably needs `nm` from GNU binutils +# (as opposed to BSD `nm`). Could use pyelftools instead but that's a lot of +# extra work. +# +# This is a developer tool, please don't put it in any packages :) + +import sys, os, subprocess +import re +from collections import namedtuple + +class MakeVars(object): + ''' + makevars['FOO_CFLAGS'] gets you "FOO_CFLAGS" from Makefile + ''' + def __init__(self): + self._data = dict() + + def getvars(self, varlist): + ''' + get a batch list of variables from make. faster than individual calls. + ''' + rdfd, wrfd = os.pipe() + + shvars = ['shvar-%s' % s for s in varlist] + make = subprocess.Popen(['make', '-s', 'VARFD=%d' % wrfd] + shvars, pass_fds = [wrfd]) + os.close(wrfd) + data = b'' + + rdf = os.fdopen(rdfd, 'rb') + while True: + rdata = rdf.read() + if len(rdata) == 0: + break + data += rdata + + del rdf + make.wait() + + data = data.decode('US-ASCII').strip().split('\n') + for row in data: + k, v = row.split('=', 1) + v = v[1:-1] + self._data[k] = v + + def __getitem__(self, k): + if k not in self._data: + self.getvars([k]) + return self._data[k] + + def get(self, k, defval = None): + if k not in self._data: + self.getvars([k]) + return self._data[k] or defval + +SymRowBase = namedtuple('SymRow', ['target', 'object', 'name', 'address', 'klass', 'typ', 'size', 'line', 'section', 'loc']) +class SymRow(SymRowBase): + ''' + wrapper around a line of `nm` output + ''' + lib_re = re.compile(r'/lib[^/]+\.(so|la)$') + def is_global(self): + return self.klass.isupper() or self.klass in 'uvw' + def scope(self): + if self.lib_re.search(self.target) is None: + return self.target + # "global" + return None + + def is_export(self): + ''' + FRR-specific list of symbols which are considered "externally used" + + e.g. hooks are by design APIs for external use, same for qobj_t_* + frr_inet_ntop is here because it's used through an ELF alias to + "inet_ntop()" + ''' + if self.name in ['main', 'frr_inet_ntop', '_libfrr_version']: + return True + if self.name.startswith('_hook_'): + return True + if self.name.startswith('qobj_t_'): + return True + return False + +class Symbols(dict): + ''' + dict of all symbols in all libs & executables + ''' + + from_re = re.compile(r'^Symbols from (.*?):$') + lt_re = re.compile(r'^(.*/)([^/]+)\.l[oa]$') + + def __init__(self): + super().__init__() + + class ReportSym(object): + def __init__(self, sym): + self.sym = sym + def __repr__(self): + return '<%-25s %-40s [%s]>' % (self.__class__.__name__ + ':', self.sym.name, self.sym.loc) + def __lt__(self, other): + return self.sym.name.__lt__(other.sym.name) + + class ReportSymCouldBeStaticAlreadyLocal(ReportSym): + idshort = 'Z' + idlong = 'extrastatic' + title = "symbol is local to library, but only used in its source file (make static?)" + class ReportSymCouldBeStatic(ReportSym): + idshort = 'S' + idlong = 'static' + title = "symbol is only used in its source file (make static?)" + class ReportSymCouldBeLibLocal(ReportSym): + idshort = 'L' + idlong = 'liblocal' + title = "symbol is only used inside of library" + class ReportSymModuleAPI(ReportSym): + idshort = 'A' + idlong = 'api' + title = "symbol (in executable) is referenced externally from a module" + + class Symbol(object): + def __init__(self, name): + super().__init__() + self.name = name + self.defs = {} + self.refs = [] + + def process(self, row): + scope = row.scope() + if row.section == '*UND*': + self.refs.append(row) + else: + self.defs.setdefault(scope, []).append(row) + + def evaluate(self, out): + ''' + generate output report + + invoked after all object files have been read in, so it can look + at inter-object-file relationships + ''' + if len(self.defs) == 0: + out.extsyms.add(self.name) + return + + for scopename, symdefs in self.defs.items(): + common_defs = [symdef for symdef in symdefs if symdef.section == '*COM*'] + proper_defs = [symdef for symdef in symdefs if symdef.section != '*COM*'] + + if len(proper_defs) > 1: + print(self.name, ' DUPLICATE') + print('\tD: %s %s' % (scopename, '\n\t\t'.join([repr(s) for s in symdefs]))) + for syms in self.refs: + print('\tR: %s' % (syms, )) + return + + if len(proper_defs): + primary_def = proper_defs[0] + elif len(common_defs): + # "common" = global variables without initializer; + # they can occur in multiple .o files and the linker will + # merge them into one variable/storage location. + primary_def = common_defs[0] + else: + # undefined symbol, e.g. libc + continue + + if scopename is not None and len(self.refs) > 0: + for ref in self.refs: + if ref.target != primary_def.target and ref.target.endswith('.la'): + outobj = out.report.setdefault(primary_def.object, []) + outobj.append(out.ReportSymModuleAPI(primary_def)) + break + + if len(self.refs) == 0: + if primary_def.is_export(): + continue + outobj = out.report.setdefault(primary_def.object, []) + if primary_def.visible: + outobj.append(out.ReportSymCouldBeStatic(primary_def)) + else: + outobj.append(out.ReportSymCouldBeStaticAlreadyLocal(primary_def)) + continue + + if scopename is None and primary_def.visible: + # lib symbol + for ref in self.refs: + if ref.target != primary_def.target: + break + else: + outobj = out.report.setdefault(primary_def.object, []) + outobj.append(out.ReportSymCouldBeLibLocal(primary_def)) + + + def evaluate(self): + self.extsyms = set() + self.report = {} + + for sym in self.values(): + sym.evaluate(self) + + def load(self, target, files): + def libtoolmustdie(fn): + m = self.lt_re.match(fn) + if m is None: + return fn + return m.group(1) + '.libs/' + m.group(2) + '.o' + + def libtooltargetmustdie(fn): + m = self.lt_re.match(fn) + if m is None: + a, b = fn.rsplit('/', 1) + return '%s/.libs/%s' % (a, b) + return m.group(1) + '.libs/' + m.group(2) + '.so' + + files = list(set([libtoolmustdie(fn) for fn in files])) + + def parse_nm_output(text): + filename = None + path_rel_to = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) + + for line in text.split('\n'): + if line.strip() == '': + continue + m = self.from_re.match(line) + if m is not None: + filename = m.group(1) + continue + if line.startswith('Name'): + continue + + items = [i.strip() for i in line.split('|')] + loc = None + if '\t' in items[-1]: + items[-1], loc = items[-1].split('\t', 1) + fn, lno = loc.rsplit(':', 1) + fn = os.path.relpath(fn, path_rel_to) + loc = '%s:%s' % (fn, lno) + + items[1] = int(items[1] if items[1] != '' else '0', 16) + items[4] = int(items[4] if items[4] != '' else '0', 16) + items.append(loc) + row = SymRow(target, filename, *items) + + if row.section == '.group' or row.name == '_GLOBAL_OFFSET_TABLE_': + continue + if not row.is_global(): + continue + + yield row + + visible_syms = set() + + # the actual symbol report uses output from the individual object files + # (e.g. lib/.libs/foo.o), but we also read the linked binary (e.g. + # lib/.libs/libfrr.so) to determine which symbols are actually visible + # in the linked result (this covers ELF "hidden"/"internal" linkage) + + libfile = libtooltargetmustdie(target) + nmlib = subprocess.Popen(['nm', '-l', '-g', '--defined-only', '-f', 'sysv', libfile], stdout = subprocess.PIPE) + out = nmlib.communicate()[0].decode('US-ASCII') + + for row in parse_nm_output(out): + visible_syms.add(row.name) + + nm = subprocess.Popen(['nm', '-l', '-f', 'sysv'] + files, stdout = subprocess.PIPE) + out = nm.communicate()[0].decode('US-ASCII') + + for row in parse_nm_output(out): + row.visible = row.name in visible_syms + sym = self.setdefault(row.name, self.Symbol(row.name)) + sym.process(row) + + +def write_html_report(syms): + try: + import jinja2 + except ImportError: + sys.stderr.write('jinja2 could not be imported, not writing HTML report!\n') + return + + self_path = os.path.dirname(os.path.abspath(__file__)) + jenv = jinja2.Environment(loader=jinja2.FileSystemLoader(self_path)) + template = jenv.get_template('symalyzer.html') + + dirgroups = {} + for fn, reports in syms.report.items(): + dirname, filename = fn.replace('.libs/', '').rsplit('/', 1) + dirgroups.setdefault(dirname, {})[fn] = reports + + klasses = { + 'T': 'code / plain old regular function (Text)', + 'D': 'global variable, read-write, with nonzero initializer (Data)', + 'B': 'global variable, read-write, with zero initializer (BSS)', + 'C': 'global variable, read-write, with zero initializer (Common)', + 'R': 'global variable, read-only (Rodata)', + } + + with open('symalyzer_report.html.tmp', 'w') as fd: + fd.write(template.render(dirgroups = dirgroups, klasses = klasses)) + os.rename('symalyzer_report.html.tmp', 'symalyzer_report.html') + + if not os.path.exists('jquery-3.4.1.min.js'): + url = 'https://code.jquery.com/jquery-3.4.1.min.js' + sys.stderr.write( + 'trying to grab a copy of jquery from %s\nif this fails, please get it manually (the HTML output is done.)\n' % (url)) + import requests + r = requests.get('https://code.jquery.com/jquery-3.4.1.min.js') + if r.status_code != 200: + sys.stderr.write('failed -- please download jquery-3.4.1.min.js and put it next to the HTML report\n') + else: + with open('jquery-3.4.1.min.js.tmp', 'w') as fd: + fd.write(r.text) + os.rename('jquery-3.4.1.min.js.tmp', 'jquery-3.4.1.min.js.tmp') + sys.stderr.write('done.\n') + +def automake_escape(s): + return s.replace('.', '_').replace('/', '_') + +if __name__ == '__main__': + mv = MakeVars() + + if not (os.path.exists('config.version') and os.path.exists('lib/.libs/libfrr.so')): + sys.stderr.write('please execute this script in the root directory of an FRR build tree\n') + sys.stderr.write('./configure && make need to have completed successfully\n') + sys.exit(1) + + amtargets = ['bin_PROGRAMS', 'sbin_PROGRAMS', 'lib_LTLIBRARIES', 'module_LTLIBRARIES'] + targets = [] + + mv.getvars(amtargets) + for amtarget in amtargets: + targets.extend([item for item in mv[amtarget].strip().split() if item != 'tools/ssd']) + + mv.getvars(['%s_LDADD' % automake_escape(t) for t in targets]) + ldobjs = targets[:] + for t in targets: + ldadd = mv['%s_LDADD' % automake_escape(t)].strip().split() + for item in ldadd: + if item.startswith('-'): + continue + if item.endswith('.a'): + ldobjs.append(item) + + mv.getvars(['%s_OBJECTS' % automake_escape(o) for o in ldobjs]) + + syms = Symbols() + + for t in targets: + objs = mv['%s_OBJECTS' % automake_escape(t)].strip().split() + ldadd = mv['%s_LDADD' % automake_escape(t)].strip().split() + for item in ldadd: + if item.startswith('-'): + continue + if item.endswith('.a'): + objs.extend(mv['%s_OBJECTS' % automake_escape(item)].strip().split()) + + sys.stderr.write('processing %s...\n' % t) + sys.stderr.flush() + #print(t, '\n\t', objs) + syms.load(t, objs) + + syms.evaluate() + + for obj, reports in sorted(syms.report.items()): + print('%s:' % obj) + for report in reports: + print('\t%r' % report) + + write_html_report(syms) -- 2.39.5