]> git.puffer.fish Git - matthieu/frr.git/commitdiff
python/xrelfo: the ELF xref extractor
authorDavid Lamparter <equinox@diac24.net>
Thu, 30 Apr 2020 19:33:58 +0000 (21:33 +0200)
committerDavid Lamparter <equinox@diac24.net>
Tue, 23 Feb 2021 15:56:58 +0000 (16:56 +0100)
This creates JSON dumps of all the xref structs littered around FRR.

Signed-off-by: David Lamparter <equinox@diac24.net>
Makefile.am
python/clippy/__init__.py
python/clippy/elf.py [new file with mode: 0644]
python/clippy/uidhash.py [new file with mode: 0644]
python/runtests.py [new file with mode: 0644]
python/test_xrelfo.py [new file with mode: 0644]
python/tiabwarfo.py [new file with mode: 0644]
python/xrefstructs.json [new file with mode: 0644]
python/xrelfo.py [new file with mode: 0644]

index 90c8407010b6c88faf081ae8096ab6bb63d7e480..bb8e97a115f4857bec08dc51414acef53c814e7c 100644 (file)
@@ -187,8 +187,16 @@ EXTRA_DIST += \
        \
        python/clidef.py \
        python/clippy/__init__.py \
+       python/clippy/elf.py \
+       python/clippy/uidhash.py \
        python/makevars.py \
        python/makefile.py \
+       python/tiabwarfo.py \
+       python/xrelfo.py \
+       python/test_xrelfo.py \
+       python/runtests.py \
+       \
+       python/xrefstructs.json \
        \
        redhat/frr.logrotate \
        redhat/frr.pam \
index d6865ff4842e6c0e7738f43b7fd692b87557611e..344a1c91eec29f7c658c557276e3125f98640761 100644 (file)
@@ -21,6 +21,8 @@ import _clippy
 from _clippy import parse, Graph, GraphNode
 
 
+frr_top_src = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
 def graph_iterate(graph):
     """iterator yielding all nodes of a graph
 
diff --git a/python/clippy/elf.py b/python/clippy/elf.py
new file mode 100644 (file)
index 0000000..4ed334f
--- /dev/null
@@ -0,0 +1,574 @@
+# FRR libelf wrapper
+#
+# Copyright (C) 2020  David Lamparter for NetDEF, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; see the file COPYING; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+'''
+Wrapping layer and additional utility around _clippy.ELFFile.
+
+Essentially, the C bits have the low-level ELF access bits that should be
+fast while this has the bits that string everything together (and would've
+been a PITA to do in C.)
+
+Surprisingly - or maybe through proper engineering - this actually works
+across architecture, word size and even endianness boundaries.  Both the C
+module (through GElf_*) and this code (cf. struct.unpack format mangling
+in ELFDissectStruct) will take appropriate measures to flip and resize
+fields as needed.
+'''
+
+import struct
+from collections import OrderedDict
+from weakref import WeakValueDictionary
+
+from _clippy import ELFFile, ELFAccessError
+
+#
+# data access
+#
+
+class ELFNull(object):
+    '''
+    NULL pointer, returned instead of ELFData
+    '''
+    def __init__(self):
+        self.symname = None
+        self._dstsect = None
+
+    def __repr__(self):
+        return '<ptr: NULL>'
+
+    def __hash__(self):
+        return hash(None)
+
+    def get_string(self):
+        return None
+
+class ELFUnresolved(object):
+    '''
+    Reference to an unresolved external symbol, returned instead of ELFData
+
+    :param symname: name of the referenced symbol
+    :param addend:  offset added to the symbol, normally zero
+    '''
+    def __init__(self, symname, addend):
+        self.addend = addend
+        self.symname = symname
+        self._dstsect = None
+
+    def __repr__(self):
+        return '<unresolved: %s+%d>' % (self.symname, self.addend)
+
+    def __hash__(self):
+        return hash((self.symname, self.addend))
+
+class ELFData(object):
+    '''
+    Actual data somewhere in the ELF file.
+
+    :type dstsect:  ELFSubset
+    :param dstsect: container data area (section or entire file)
+    :param dstoffs: byte offset into dstsect
+    :param dstlen:  byte size of object, or None if unknown, open-ended or string
+    '''
+    def __init__(self, dstsect, dstoffs, dstlen):
+        self._dstsect = dstsect
+        self._dstoffs = dstoffs
+        self._dstlen = dstlen
+        self.symname = None
+
+    def __repr__(self):
+        return '<ptr: %s+0x%05x/%d>' % (self._dstsect.name, self._dstoffs, self._dstlen or -1)
+
+    def __hash__(self):
+        return hash((self._dstsect, self._dstoffs))
+
+    def get_string(self):
+        '''
+        Interpret as C string / null terminated UTF-8 and get the actual text.
+        '''
+        try:
+            return self._dstsect[self._dstoffs:str].decode('UTF-8')
+        except:
+            import pdb; pdb.set_trace()
+
+    def get_data(self, reflen):
+        '''
+        Interpret as some structure (and check vs. expected length)
+
+        :param reflen: expected size of the object, compared against actual
+            size (which is only known in rare cases, mostly when directly
+            accessing a symbol since symbols have their destination object
+            size recorded)
+        '''
+        if self._dstlen is not None and self._dstlen != reflen:
+            raise ValueError('symbol size mismatch (got %d, expected %d)' % (self._dstlen, reflen))
+        return self._dstsect[self._dstoffs:self._dstoffs+reflen]
+
+    def offset(self, offs, within_symbol=False):
+        '''
+        Get another ELFData at an offset
+
+        :param offs:          byte offset, can be negative (e.g. in container_of)
+        :param within_symbol: retain length information
+        '''
+        if self._dstlen is None or not within_symbol:
+            return ELFData(self._dstsect, self._dstoffs + offs, None)
+        else:
+            return ELFData(self._dstsect, self._dstoffs + offs, self._dstlen - offs)
+
+#
+# dissection data items
+#
+
+class ELFDissectData(object):
+    '''
+    Common bits for ELFDissectStruct and ELFDissectUnion
+    '''
+
+    def __len__(self):
+        '''
+        Used for boolean evaluation, e.g. "if struct: ..."
+        '''
+        return not (isinstance(self._data, ELFNull) or isinstance(self._data, ELFUnresolved))
+
+    def container_of(self, parent, fieldname):
+        '''
+        Assume this struct is embedded in a larger struct and get at the larger
+
+        Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)``
+
+        :param parent:    class (not instance) of the larger struct
+        :param fieldname: fieldname that refers back to this
+        :returns:         instance of parent, with fieldname set to this object
+        '''
+        offset = 0
+        if not hasattr(parent, '_efields'):
+            parent._setup_efields()
+
+        for field in parent._efields[self.elfclass]:
+            if field[0] == fieldname:
+                break
+            offset += struct.calcsize(field[1])
+        else:
+            raise AttributeError('%r not found in %r.fields' % (fieldname, parent))
+
+        return parent(self._data.offset(-offset), replace = {fieldname: self})
+
+class ELFDissectStruct(ELFDissectData):
+    '''
+    Decode and provide access to a struct somewhere in the ELF file
+
+    Handles pointers and strings somewhat nicely.  Create a subclass for each
+    struct that is to be accessed, and give a field list in a "fields"
+    class-member.
+
+    :param dataptr: ELFData referring to the data bits to decode.
+    :param parent:  where this was instantiated from; only for reference, has
+        no functional impact.
+    :param replace: substitute data values for specific fields.  Used by
+        `container_of` to replace the inner struct when creating the outer
+        one.
+
+    .. attribute:: fields
+
+       List of tuples describing the struct members.  Items can be:
+       - ``('name', ELFDissectData)`` - directly embed another struct
+       - ``('name', 'I')`` - simple data types; second item for struct.unpack
+       - ``('name', 'I', None)`` - field to ignore
+       - ``('name', 'P', str)`` - pointer to string
+       - ``('name', 'P', ELFDissectData)`` - pointer to another struct
+
+       ``P`` is added as unpack format for pointers (sized appropriately for
+       the ELF file.)
+
+       Refer to tiabwarfo.py for extracting this from ``pahole``.
+
+       TBD: replace tuples with a class.
+
+    .. attribute:: fieldrename
+
+       Dictionary to rename fields, useful if fields comes from tiabwarfo.py.
+    '''
+
+    class Pointer(object):
+        '''
+        Quick wrapper for pointers to further structs
+
+        This is just here to avoid going into infinite loops when loading
+        structs that have pointers to each other (e.g. struct xref <-->
+        struct xrefdata.)  The pointer destination is only instantiated when
+        actually accessed.
+        '''
+        def __init__(self, cls, ptr):
+            self.cls = cls
+            self.ptr = ptr
+
+        def __repr__(self):
+            return '<Pointer:%s %r>' % (self.cls.__name__, self.ptr)
+
+        def __call__(self):
+            if isinstance(self.ptr, ELFNull):
+                return None
+            return self.cls(self.ptr)
+
+    def __new__(cls, dataptr, parent = None, replace = None):
+        if dataptr._dstsect is None:
+            return super().__new__(cls)
+
+        obj = dataptr._dstsect._pointers.get((cls, dataptr))
+        if obj is not None:
+            return obj
+        obj = super().__new__(cls)
+        dataptr._dstsect._pointers[(cls, dataptr)] = obj
+        return obj
+
+    replacements = 'lLnN'
+
+    @classmethod
+    def _preproc_structspec(cls, elfclass, spec):
+        elfbits = elfclass
+
+        if hasattr(spec, 'calcsize'):
+            spec = '%ds' % (spec.calcsize(elfclass),)
+
+        if elfbits == 32:
+            repl = ['i', 'I']
+        else:
+            repl = ['q', 'Q']
+        for c in cls.replacements:
+            spec = spec.replace(c, repl[int(c.isupper())])
+        return spec
+
+    @classmethod
+    def _setup_efields(cls):
+        cls._efields = {}
+        cls._esize = {}
+        for elfclass in [32, 64]:
+            cls._efields[elfclass] = []
+            size = 0
+            for f in cls.fields:
+                newf = (f[0], cls._preproc_structspec(elfclass, f[1])) + f[2:]
+                cls._efields[elfclass].append(newf)
+                size += struct.calcsize(newf[1])
+            cls._esize[elfclass] = size
+
+    def __init__(self, dataptr, parent = None, replace = None):
+        if not hasattr(self.__class__, '_efields'):
+            self._setup_efields()
+
+        self._fdata = None
+        self._data = dataptr
+        self._parent = parent
+        self.symname = dataptr.symname
+        if isinstance(dataptr, ELFNull) or isinstance(dataptr, ELFUnresolved):
+            self._fdata = {}
+            return
+
+        self._elfsect = dataptr._dstsect
+        self.elfclass = self._elfsect._elffile.elfclass
+        self.offset = dataptr._dstoffs
+
+        pspecl = [f[1] for f in self._efields[self.elfclass]]
+
+        # need to correlate output from struct.unpack with extra metadata
+        # about the particular fields, so note down byte offsets (in locs)
+        # and tuple indices of pointers (in ptrs)
+        pspec = ''
+        locs = {}
+        ptrs = set()
+
+        for idx, spec in enumerate(pspecl):
+            if spec == 'P':
+                ptrs.add(idx)
+                spec = self._elfsect.ptrtype
+
+            locs[idx] = struct.calcsize(pspec)
+            pspec = pspec + spec
+
+        self._total_size = struct.calcsize(pspec)
+
+        def replace_ptrs(v):
+            idx, val = v[0], v[1]
+            if idx not in ptrs:
+                return val
+            return self._elfsect.pointer(self.offset + locs[idx])
+
+        data = dataptr.get_data(struct.calcsize(pspec))
+        unpacked = struct.unpack(self._elfsect.endian + pspec, data)
+        unpacked = list(map(replace_ptrs, enumerate(unpacked)))
+        self._fraw = unpacked
+        self._fdata = OrderedDict()
+        replace = replace or {}
+
+        for i, item in enumerate(unpacked):
+            name = self.fields[i][0]
+            if name is None:
+                continue
+
+            if name in replace:
+                self._fdata[name] = replace[name]
+                continue
+
+            if isinstance(self.fields[i][1], type) and issubclass(self.fields[i][1], ELFDissectData):
+                dataobj = self.fields[i][1](dataptr.offset(locs[i]), self)
+                self._fdata[name] = dataobj
+                continue
+            if len(self.fields[i]) == 3:
+                if self.fields[i][2] == str:
+                    self._fdata[name] = item.get_string()
+                    continue
+                elif self.fields[i][2] is None:
+                    pass
+                elif issubclass(self.fields[i][2], ELFDissectData):
+                    cls = self.fields[i][2]
+                    dataobj = self.Pointer(cls, item)
+                    self._fdata[name] = dataobj
+                    continue
+
+            self._fdata[name] = item
+
+    def __getattr__(self, attrname):
+        if attrname not in self._fdata:
+            raise AttributeError(attrname)
+        if isinstance(self._fdata[attrname], self.Pointer):
+            self._fdata[attrname] = self._fdata[attrname]()
+        return self._fdata[attrname]
+
+    def __repr__(self):
+        if not isinstance(self._data, ELFData):
+            return '<%s: %r>' % (self.__class__.__name__, self._data)
+        return '<%s: %s>' % (self.__class__.__name__,
+                ', '.join(['%s=%r' % t for t in self._fdata.items()]))
+
+    @classmethod
+    def calcsize(cls, elfclass):
+        '''
+        Sum up byte size of this struct
+
+        Wraps struct.calcsize with some extra features.
+        '''
+        if not hasattr(cls, '_efields'):
+            cls._setup_efields()
+
+        pspec = ''.join([f[1] for f in cls._efields[elfclass]])
+
+        ptrtype = 'I' if elfclass == 32 else 'Q'
+        pspec = pspec.replace('P', ptrtype)
+
+        return struct.calcsize(pspec)
+
+class ELFDissectUnion(ELFDissectData):
+    '''
+    Decode multiple structs in the same place.
+
+    Not currently used (and hence not tested.)  Worked at some point but not
+    needed anymore and may be borked now.  Remove this comment when using.
+    '''
+    def __init__(self, dataptr, parent = None):
+        self._dataptr = dataptr
+        self._parent = parent
+        self.members = []
+        for name, membercls in self.__class__.members:
+            item = membercls(dataptr, parent)
+            self.members.append(item)
+            setattr(self, name, item)
+
+    def __repr__(self):
+        return '<%s: %s>' % (self.__class__.__name__, ', '.join([repr(i) for i in self.members]))
+
+    @classmethod
+    def calcsize(cls, elfclass):
+        return max([member.calcsize(elfclass) for name, member in cls.members])
+
+#
+# wrappers for spans of ELF data
+#
+
+class ELFSubset(object):
+    '''
+    Common abstract base for section-level and file-level access.
+    '''
+
+    def __init__(self):
+        super().__init__()
+
+        self._pointers = WeakValueDictionary()
+
+    def __hash__(self):
+        return hash(self.name)
+
+    def __getitem__(self, k):
+        '''
+        Read data from slice
+
+        Subscript **must** be a slice; a simple index will not return a byte
+        but rather throw an exception.  Valid slice syntaxes are defined by
+        the C module:
+
+        - `this[123:456]` - extract specific range
+        - `this[123:str]` - extract until null byte.  The slice stop value is
+            the `str` type (or, technically, `unicode`.)
+        '''
+        return self._obj[k]
+
+    def getreloc(self, offset):
+        '''
+        Check for a relocation record at the specified offset.
+        '''
+        return self._obj.getreloc(offset)
+
+    def iter_data(self, scls, slice_ = slice(None)):
+        '''
+        Assume an array of structs present at a particular slice and decode
+
+        :param scls:   ELFDissectData subclass for the struct
+        :param slice_: optional range specification
+        '''
+        size = scls.calcsize(self._elffile.elfclass)
+
+        offset = slice_.start or 0
+        stop = slice_.stop or self._obj.len
+        if stop < 0:
+            stop = self._obj.len - stop
+
+        while offset < stop:
+            yield scls(ELFData(self, offset, size))
+            offset += size
+
+    def pointer(self, offset):
+        '''
+        Try to dereference a pointer value
+
+        This checks whether there's a relocation at the given offset and
+        uses that;  otherwise (e.g. in a non-PIE executable where the pointer
+        is already resolved by the linker) the data at the location is used.
+
+        :param offset: byte offset from beginning of section,
+            or virtual address in file
+        :returns:      ELFData wrapping pointed-to object
+        '''
+
+        ptrsize = struct.calcsize(self.ptrtype)
+        data = struct.unpack(self.endian + self.ptrtype, self[offset:offset + ptrsize])[0]
+
+        reloc = self.getreloc(offset)
+        dstsect = None
+        if reloc:
+            # section won't be available in whole-file operation
+            dstsect = reloc.getsection(data)
+            addend = reloc.r_addend
+
+            if reloc.relative:
+                # old-style ELF REL instead of RELA, not well-tested
+                addend += data
+
+            if reloc.unresolved and reloc.symvalid:
+                return ELFUnresolved(reloc.symname, addend)
+            elif reloc.symvalid:
+                data = addend + reloc.st_value
+            else:
+                data = addend
+
+        # 0 could technically be a valid pointer for a shared library,
+        # since libraries may use 0 as default virtual start address (it'll
+        # be adjusted on loading)
+        # That said, if the library starts at 0, that's where the ELF header
+        # would be so it's still an invalid pointer.
+        if data == 0 and dstsect == None:
+            return ELFNull()
+
+        # wrap_data is different between file & section
+        return self._wrap_data(data, dstsect)
+
+class ELFDissectSection(ELFSubset):
+    '''
+    Access the contents of an ELF section like ``.text`` or ``.data``
+
+    :param elfwrap: ELFDissectFile wrapper for the file
+    :param idx:     section index in section header table
+    :param section: section object from C module
+    '''
+
+    def __init__(self, elfwrap, idx, section):
+        super().__init__()
+
+        self._elfwrap = elfwrap
+        self._elffile = elfwrap._elffile
+        self._idx = idx
+        self._section = self._obj = section
+        self.name = section.name
+        self.ptrtype = elfwrap.ptrtype
+        self.endian = elfwrap.endian
+
+    def _wrap_data(self, data, dstsect):
+        if dstsect is None:
+            dstsect = self._elfwrap._elffile.get_section_addr(data)
+        offs = data - dstsect.sh_addr
+        dstsect = self._elfwrap.get_section(dstsect.idx)
+        return ELFData(dstsect, offs, None)
+
+class ELFDissectFile(ELFSubset):
+    '''
+    Access the contents of an ELF file.
+
+    Note that offsets for array subscript and relocation/pointer access are
+    based on the file's virtual address space and are NOT offsets to the
+    start of the file on disk!
+
+    (Shared libraries frequently have a virtual address space starting at 0,
+    but non-PIE executables have an architecture specific default loading
+    address like 0x400000 on x86.
+
+    :param filename: ELF file to open
+    '''
+
+    def __init__(self, filename):
+        super().__init__()
+
+        self.name = filename
+        self._elffile = self._obj = ELFFile(filename)
+        self._sections = {}
+
+        self.ptrtype = 'I' if self._elffile.elfclass == 32 else 'Q'
+        self.endian = '>' if self._elffile.bigendian else '<'
+
+    @property
+    def _elfwrap(self):
+        return self
+
+    def _wrap_data(self, data, dstsect):
+        return ELFData(self, data, None)
+
+    def get_section(self, secname):
+        '''
+        Look up section by name or index
+        '''
+        if isinstance(secname, int):
+            sh_idx = secname
+            section = self._elffile.get_section_idx(secname)
+        else:
+            section = self._elffile.get_section(secname)
+
+        if section is None:
+            return None
+
+        sh_idx = section.idx
+
+        if sh_idx not in self._sections:
+            self._sections[sh_idx] = ELFDissectSection(self, sh_idx, section)
+
+        return self._sections[sh_idx]
diff --git a/python/clippy/uidhash.py b/python/clippy/uidhash.py
new file mode 100644 (file)
index 0000000..bf994d3
--- /dev/null
@@ -0,0 +1,71 @@
+# xref unique ID hash calculation
+#
+# Copyright (C) 2020  David Lamparter for NetDEF, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; see the file COPYING; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import struct
+from hashlib import sha256
+
+def bititer(data, bits, startbit = True):
+    '''
+    just iterate the individual bits out from a bytes object
+
+    if startbit is True, an '1' bit is inserted at the very beginning
+    goes <bits> at a time, starts at LSB.
+    '''
+    bitavail, v = 0, 0
+    if startbit and len(data) > 0:
+        v = data.pop(0)
+        yield (v & ((1 << bits) - 1)) | (1 << (bits - 1))
+        bitavail = 9 - bits
+        v >>= bits - 1
+
+    while len(data) > 0:
+        while bitavail < bits:
+            v |= data.pop(0) << bitavail
+            bitavail += 8
+        yield v & ((1 << bits) - 1)
+        bitavail -= bits
+        v >>= bits
+
+def base32c(data):
+    '''
+    Crockford base32 with extra dashes
+    '''
+    chs = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
+    o = ''
+    if type(data) == str:
+        data = [ord(v) for v in data]
+    else:
+        data = list(data)
+    for i, bits in enumerate(bititer(data, 5)):
+        if i == 5:
+            o = o + '-'
+        elif i == 10:
+            break
+        o = o + chs[bits]
+    return o
+
+def uidhash(filename, hashstr, hashu32a, hashu32b):
+    '''
+    xref Unique ID hash used in FRRouting
+    '''
+    filename = '/'.join(filename.rsplit('/')[-2:])
+
+    hdata = filename.encode('UTF-8') + hashstr.encode('UTF-8')
+    hdata += struct.pack('>II', hashu32a, hashu32b)
+    i = sha256(hdata).digest()
+    return base32c(i)
diff --git a/python/runtests.py b/python/runtests.py
new file mode 100644 (file)
index 0000000..bcf650b
--- /dev/null
@@ -0,0 +1,14 @@
+import pytest
+import sys
+import os
+
+try:
+    import _clippy
+except ImportError:
+    sys.stderr.write('''these tests need to be run with the _clippy C extension
+module available.  Try running "clippy runtests.py ...".
+''')
+    sys.exit(1)
+
+os.chdir(os.path.dirname(os.path.abspath(__file__)))
+raise SystemExit(pytest.main(sys.argv[1:]))
diff --git a/python/test_xrelfo.py b/python/test_xrelfo.py
new file mode 100644 (file)
index 0000000..3ae24ea
--- /dev/null
@@ -0,0 +1,65 @@
+# some basic tests for xrelfo & the python ELF machinery
+#
+# Copyright (C) 2020  David Lamparter for NetDEF, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; see the file COPYING; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import sys
+import os
+import pytest
+from pprint import pprint
+
+root = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(os.path.join(root, 'python'))
+
+import xrelfo
+from clippy import elf, uidhash
+
+def test_uidhash():
+    assert uidhash.uidhash("lib/test_xref.c", "logging call", 3, 0) \
+            == 'H7KJB-67TBH'
+
+def test_xrelfo_other():
+    for data in [
+            elf.ELFNull(),
+            elf.ELFUnresolved('somesym', 0),
+        ]:
+
+        dissect = xrelfo.XrefPtr(data)
+        print(repr(dissect))
+
+        with pytest.raises(AttributeError):
+            dissect.xref
+
+def test_xrelfo_obj():
+    xrelfo_ = xrelfo.Xrelfo()
+    edf = xrelfo_.load_elf(os.path.join(root, 'lib/.libs/zclient.o'), 'zclient.lo')
+    xrefs = xrelfo_._xrefs
+
+    with pytest.raises(elf.ELFAccessError):
+        edf[0:4]
+
+    pprint(xrefs[0])
+    pprint(xrefs[0]._data)
+
+def test_xrelfo_bin():
+    xrelfo_ = xrelfo.Xrelfo()
+    edf = xrelfo_.load_elf(os.path.join(root, 'lib/.libs/libfrr.so'), 'libfrr.la')
+    xrefs = xrelfo_._xrefs
+
+    assert edf[0:4] == b'\x7fELF'
+
+    pprint(xrefs[0])
+    pprint(xrefs[0]._data)
diff --git a/python/tiabwarfo.py b/python/tiabwarfo.py
new file mode 100644 (file)
index 0000000..bddbeef
--- /dev/null
@@ -0,0 +1,195 @@
+# FRR DWARF structure definition extractor
+#
+# Copyright (C) 2020  David Lamparter for NetDEF, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; see the file COPYING; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import sys
+import os
+import subprocess
+import re
+import argparse
+import subprocess
+import json
+
+structs = ['xref', 'xref_logmsg', 'xref_threadsched', 'xref_install_element', 'xrefdata', 'xrefdata_logmsg', 'cmd_element']
+
+def extract(filename='lib/.libs/libfrr.so'):
+    '''
+    Convert output from "pahole" to JSON.
+
+    Example pahole output:
+    $ pahole -C xref lib/.libs/libfrr.so
+    struct xref {
+        struct xrefdata *          xrefdata;             /*     0     8 */
+        enum xref_type             type;                 /*     8     4 */
+        int                        line;                 /*    12     4 */
+        const char  *              file;                 /*    16     8 */
+        const char  *              func;                 /*    24     8 */
+
+        /* size: 32, cachelines: 1, members: 5 */
+        /* last cacheline: 32 bytes */
+    };
+    '''
+    pahole = subprocess.check_output(['pahole', '-C', ','.join(structs), filename]).decode('UTF-8')
+
+    struct_re = re.compile(r'^struct ([^ ]+) \{([^\}]+)};', flags=re.M | re.S)
+    field_re = re.compile(r'^\s*(?P<type>[^;\(]+)\s+(?P<name>[^;\[\]]+)(?:\[(?P<array>\d+)\])?;\s*\/\*(?P<comment>.*)\*\/\s*$')
+    comment_re = re.compile(r'^\s*\/\*.*\*\/\s*$')
+
+    pastructs = struct_re.findall(pahole)
+    out = {}
+
+    for name, data in pastructs:
+        this = out.setdefault(name, {})
+        fields = this.setdefault('fields', [])
+
+        lines = data.strip().splitlines()
+
+        for line in lines:
+            if line.strip() == '':
+                continue
+            m = comment_re.match(line)
+            if m is not None:
+                continue
+
+            m = field_re.match(line)
+            if m is not None:
+                offs, size = m.group('comment').strip().split()
+                offs = int(offs)
+                size = int(size)
+                typ_ = m.group('type').strip()
+                name = m.group('name')
+
+                if name.startswith('(*'):
+                    # function pointer
+                    typ_ = typ_ + ' *'
+                    name = name[2:].split(')')[0]
+
+                data = {
+                    'name': name,
+                    'type': typ_,
+                    'offset': offs,
+                    'size': size,
+                }
+                if m.group('array'):
+                    data['array'] = int(m.group('array'))
+
+                fields.append(data)
+                continue
+
+            raise ValueError('cannot process line: %s' % line)
+
+    return out
+
+class FieldApplicator(object):
+    '''
+    Fill ELFDissectStruct fields list from pahole/JSON
+
+    Uses the JSON file created by the above code to fill in the struct fields
+    in subclasses of ELFDissectStruct.
+    '''
+
+    # only what we really need.  add more as needed.
+    packtypes = {
+        'int': 'i',
+        'uint8_t': 'B',
+        'uint16_t': 'H',
+        'uint32_t': 'I',
+        'char': 's',
+    }
+
+    def __init__(self, data):
+        self.data = data
+        self.classes = []
+        self.clsmap = {}
+
+    def add(self, cls):
+        self.classes.append(cls)
+        self.clsmap[cls.struct] = cls
+
+    def resolve(self, cls):
+        out = []
+        offset = 0
+
+        fieldrename = getattr(cls, 'fieldrename', {})
+        def mkname(n):
+            return (fieldrename.get(n, n),)
+
+        for field in self.data[cls.struct]['fields']:
+            typs = field['type'].split()
+            typs = [i for i in typs if i not in ['const']]
+
+            if field['offset'] != offset:
+                assert offset < field['offset']
+                out.append(('_pad', '%ds' % (field['offset'] - offset,)))
+
+            # pretty hacky C types handling, but covers what we need
+
+            ptrlevel = 0
+            while typs[-1] == '*':
+                typs.pop(-1)
+                ptrlevel += 1
+
+            if ptrlevel > 0:
+                packtype = ('P', None)
+                if ptrlevel == 1:
+                    if typs[0] == 'char':
+                        packtype = ('P', str)
+                    elif typs[0] == 'struct' and typs[1] in self.clsmap:
+                        packtype = ('P', self.clsmap[typs[1]])
+            elif typs[0] == 'enum':
+                packtype = ('I',)
+            elif typs[0] in self.packtypes:
+                packtype = (self.packtypes[typs[0]],)
+            elif typs[0] == 'struct':
+                if typs[1] in self.clsmap:
+                    packtype = (self.clsmap[typs[1]],)
+                else:
+                    packtype = ('%ds' % field['size'],)
+            else:
+                raise ValueError('cannot decode field %s in struct %s (%s)' % (
+                        cls.struct, field['name'], field['type']))
+
+            if 'array' in field and typs[0] == 'char':
+                packtype = ('%ds' % field['array'],)
+                out.append(mkname(field['name']) + packtype)
+            elif 'array' in field:
+                for i in range(0, field['array']):
+                    out.append(mkname('%s_%d' % (field['name'], i)) + packtype)
+            else:
+                out.append(mkname(field['name']) + packtype)
+
+            offset = field['offset'] + field['size']
+
+        cls.fields = out
+
+    def __call__(self):
+        for cls in self.classes:
+            self.resolve(cls)
+
+def main():
+    argp = argparse.ArgumentParser(description = 'FRR DWARF structure extractor')
+    argp.add_argument('-o', dest='output', type=str, help='write JSON output', default='python/xrefstructs.json')
+    argp.add_argument('-i', dest='input',  type=str, help='ELF file to read',  default='lib/.libs/libfrr.so')
+    args = argp.parse_args()
+
+    out = extract(args.input)
+    with open(args.output + '.tmp', 'w') as fd:
+        json.dump(out, fd, indent=2, sort_keys=True)
+    os.rename(args.output + '.tmp', args.output)
+
+if __name__ == '__main__':
+    main()
diff --git a/python/xrefstructs.json b/python/xrefstructs.json
new file mode 100644 (file)
index 0000000..537afb8
--- /dev/null
@@ -0,0 +1,190 @@
+{
+  "cmd_element": {
+    "fields": [
+      {
+        "name": "string",
+        "offset": 0,
+        "size": 8,
+        "type": "const char  *"
+      },
+      {
+        "name": "doc",
+        "offset": 8,
+        "size": 8,
+        "type": "const char  *"
+      },
+      {
+        "name": "daemon",
+        "offset": 16,
+        "size": 4,
+        "type": "int"
+      },
+      {
+        "name": "attr",
+        "offset": 20,
+        "size": 1,
+        "type": "uint8_t"
+      },
+      {
+        "name": "func",
+        "offset": 24,
+        "size": 8,
+        "type": "int *"
+      },
+      {
+        "name": "name",
+        "offset": 32,
+        "size": 8,
+        "type": "const char  *"
+      },
+      {
+        "name": "xref",
+        "offset": 40,
+        "size": 32,
+        "type": "struct xref"
+      }
+    ]
+  },
+  "xref": {
+    "fields": [
+      {
+        "name": "xrefdata",
+        "offset": 0,
+        "size": 8,
+        "type": "struct xrefdata *"
+      },
+      {
+        "name": "type",
+        "offset": 8,
+        "size": 4,
+        "type": "enum xref_type"
+      },
+      {
+        "name": "line",
+        "offset": 12,
+        "size": 4,
+        "type": "int"
+      },
+      {
+        "name": "file",
+        "offset": 16,
+        "size": 8,
+        "type": "const char  *"
+      },
+      {
+        "name": "func",
+        "offset": 24,
+        "size": 8,
+        "type": "const char  *"
+      }
+    ]
+  },
+  "xref_install_element": {
+    "fields": [
+      {
+        "name": "xref",
+        "offset": 0,
+        "size": 32,
+        "type": "struct xref"
+      },
+      {
+        "name": "cmd_element",
+        "offset": 32,
+        "size": 8,
+        "type": "const struct cmd_element  *"
+      },
+      {
+        "name": "node_type",
+        "offset": 40,
+        "size": 4,
+        "type": "enum node_type"
+      }
+    ]
+  },
+  "xref_logmsg": {
+    "fields": [
+      {
+        "name": "xref",
+        "offset": 0,
+        "size": 32,
+        "type": "struct xref"
+      },
+      {
+        "name": "fmtstring",
+        "offset": 32,
+        "size": 8,
+        "type": "const char  *"
+      },
+      {
+        "name": "priority",
+        "offset": 40,
+        "size": 4,
+        "type": "uint32_t"
+      },
+      {
+        "name": "ec",
+        "offset": 44,
+        "size": 4,
+        "type": "uint32_t"
+      }
+    ]
+  },
+  "xref_threadsched": {
+    "fields": [
+      {
+        "name": "xref",
+        "offset": 0,
+        "size": 32,
+        "type": "struct xref"
+      },
+      {
+        "name": "funcname",
+        "offset": 32,
+        "size": 8,
+        "type": "const char  *"
+      },
+      {
+        "name": "dest",
+        "offset": 40,
+        "size": 8,
+        "type": "const char  *"
+      },
+      {
+        "name": "thread_type",
+        "offset": 48,
+        "size": 4,
+        "type": "uint32_t"
+      }
+    ]
+  },
+  "xrefdata": {
+    "fields": [
+      {
+        "name": "xref",
+        "offset": 0,
+        "size": 8,
+        "type": "const struct xref  *"
+      },
+      {
+        "array": 16,
+        "name": "uid",
+        "offset": 8,
+        "size": 16,
+        "type": "char"
+      },
+      {
+        "name": "hashstr",
+        "offset": 24,
+        "size": 8,
+        "type": "const char  *"
+      },
+      {
+        "array": 2,
+        "name": "hashu32",
+        "offset": 32,
+        "size": 8,
+        "type": "uint32_t"
+      }
+    ]
+  }
+}
\ No newline at end of file
diff --git a/python/xrelfo.py b/python/xrelfo.py
new file mode 100644 (file)
index 0000000..b726d28
--- /dev/null
@@ -0,0 +1,397 @@
+# FRR ELF xref extractor
+#
+# Copyright (C) 2020  David Lamparter for NetDEF, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; see the file COPYING; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import sys
+import os
+import struct
+import re
+import traceback
+import json
+import argparse
+
+from clippy.uidhash import uidhash
+from clippy.elf import *
+from clippy import frr_top_src
+from tiabwarfo import FieldApplicator
+
+try:
+    with open(os.path.join(frr_top_src, 'python', 'xrefstructs.json'), 'r') as fd:
+        xrefstructs = json.load(fd)
+except FileNotFoundError:
+    sys.stderr.write('''
+The "xrefstructs.json" file (created by running tiabwarfo.py with the pahole
+tool available) could not be found.  It should be included with the sources.
+''')
+    sys.exit(1)
+
+# constants, need to be kept in sync manually...
+
+XREFT_THREADSCHED = 0x100
+XREFT_LOGMSG = 0x200
+XREFT_DEFUN = 0x300
+XREFT_INSTALL_ELEMENT = 0x301
+
+# LOG_*
+priovals = {}
+prios = ['0', '1', '2', 'E', 'W', 'N', 'I', 'D']
+
+
+class XrelfoJson(object):
+    def dump(self):
+        pass
+
+    def check(self, wopt):
+        yield from []
+
+    def to_dict(self, refs):
+        pass
+
+class Xref(ELFDissectStruct, XrelfoJson):
+    struct = 'xref'
+    fieldrename = {'type': 'typ'}
+    containers = {}
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+        self._container = None
+        if self.xrefdata:
+            self.xrefdata.ref_from(self, self.typ)
+
+    def container(self):
+        if self._container is None:
+            if self.typ in self.containers:
+                self._container = self.container_of(self.containers[self.typ], 'xref')
+        return self._container
+
+    def check(self, *args, **kwargs):
+        if self._container:
+            yield from self._container.check(*args, **kwargs)
+
+
+class Xrefdata(ELFDissectStruct):
+    struct = 'xrefdata'
+
+    # uid is all zeroes in the data loaded from ELF
+    fieldrename = {'uid': '_uid'}
+
+    def ref_from(self, xref, typ):
+        self.xref = xref
+
+    @property
+    def uid(self):
+        if self.hashstr is None:
+            return None
+        return uidhash(self.xref.file, self.hashstr, self.hashu32_0, self.hashu32_1)
+
+class XrefPtr(ELFDissectStruct):
+    fields = [
+        ('xref', 'P', Xref),
+    ]
+
+class XrefThreadSched(ELFDissectStruct, XrelfoJson):
+    struct = 'xref_threadsched'
+Xref.containers[XREFT_THREADSCHED] = XrefThreadSched
+
+class XrefLogmsg(ELFDissectStruct, XrelfoJson):
+    struct = 'xref_logmsg'
+
+    def _warn_fmt(self, text):
+        yield ((self.xref.file, self.xref.line), '%s:%d: %s (in %s())\n' % (self.xref.file, self.xref.line, text, self.xref.func))
+
+    regexes = [
+        (re.compile(r'([\n\t]+)'), 'error: log message contains tab or newline'),
+    #    (re.compile(r'^(\s+)'),   'warning: log message starts with whitespace'),
+        (re.compile(r'^((?:warn(?:ing)?|error):\s*)', re.I), 'warning: log message starts with severity'),
+    ]
+
+    def check(self, wopt):
+        if wopt.Wlog_format:
+            for rex, msg in self.regexes:
+                if not rex.search(self.fmtstring):
+                    continue
+
+                if sys.stderr.isatty():
+                    items = rex.split(self.fmtstring)
+                    out = []
+                    for i, text in enumerate(items):
+                        if (i % 2) == 1:
+                            out.append('\033[41;37;1m%s\033[m' % repr(text)[1:-1])
+                        else:
+                            out.append(repr(text)[1:-1])
+
+                    excerpt = ''.join(out)
+
+                else:
+                    excerpt = repr(self.fmtstring)[1:-1]
+
+                yield from self._warn_fmt('%s: "%s"' % (msg, excerpt))
+
+    def dump(self):
+        print('%-60s %s%s %-25s [EC %d] %s' % (
+            '%s:%d %s()' % (self.xref.file, self.xref.line, self.xref.func),
+            prios[self.priority & 7],
+            priovals.get(self.priority & 0x30, ' '),
+            self.xref.xrefdata.uid, self.ec, self.fmtstring))
+
+    def to_dict(self, xrelfo):
+        jsobj = dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']])
+        if self.ec != 0:
+            jsobj['ec'] = self.ec
+        jsobj['fmtstring'] = self.fmtstring
+        jsobj['priority'] = self.priority & 7
+        jsobj['type'] = 'logmsg'
+        jsobj['binary'] = self._elfsect._elfwrap.orig_filename
+
+        if self.priority & 0x10:
+            jsobj.setdefault('flags', []).append('errno')
+        if self.priority & 0x20:
+            jsobj.setdefault('flags', []).append('getaddrinfo')
+
+        xrelfo['refs'].setdefault(self.xref.xrefdata.uid, []).append(jsobj)
+
+Xref.containers[XREFT_LOGMSG] = XrefLogmsg
+
+class CmdElement(ELFDissectStruct, XrelfoJson):
+    struct = 'cmd_element'
+
+    cmd_attrs = { 0: None, 1: 'deprecated', 2: 'hidden'}
+
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+
+    def to_dict(self, xrelfo):
+        jsobj = xrelfo['cli'].setdefault(self.name, {}).setdefault(self._elfsect._elfwrap.orig_filename, {})
+
+        jsobj.update({
+            'string': self.string,
+            'doc': self.doc,
+            'attr': self.cmd_attrs.get(self.attr, self.attr),
+        })
+        if jsobj['attr'] is None:
+            del jsobj['attr']
+
+        jsobj['defun'] = dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']])
+
+Xref.containers[XREFT_DEFUN] = CmdElement
+
+class XrefInstallElement(ELFDissectStruct, XrelfoJson):
+    struct = 'xref_install_element'
+
+    def to_dict(self, xrelfo):
+        jsobj = xrelfo['cli'].setdefault(self.cmd_element.name, {}).setdefault(self._elfsect._elfwrap.orig_filename, {})
+        nodes = jsobj.setdefault('nodes', [])
+
+        nodes.append({
+            'node': self.node_type,
+            'install': dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']]),
+        })
+
+Xref.containers[XREFT_INSTALL_ELEMENT] = XrefInstallElement
+
+# shove in field defs
+fieldapply = FieldApplicator(xrefstructs)
+fieldapply.add(Xref)
+fieldapply.add(Xrefdata)
+fieldapply.add(XrefLogmsg)
+fieldapply.add(XrefThreadSched)
+fieldapply.add(CmdElement)
+fieldapply.add(XrefInstallElement)
+fieldapply()
+
+
+class Xrelfo(dict):
+    def __init__(self):
+        super().__init__({
+            'refs': {},
+            'cli': {},
+        })
+        self._xrefs = []
+
+    def load_file(self, filename):
+        orig_filename = filename
+        if filename.endswith('.la') or filename.endswith('.lo'):
+            with open(filename, 'r') as fd:
+                for line in fd:
+                    line = line.strip()
+                    if line.startswith('#') or line == '' or '=' not in line:
+                        continue
+
+                    var, val = line.split('=', 1)
+                    if var not in ['library_names', 'pic_object']:
+                        continue
+                    if val.startswith("'") or val.startswith('"'):
+                        val = val[1:-1]
+
+                    if var == 'pic_object':
+                        filename = os.path.join(os.path.dirname(filename), val)
+                        break
+
+                    val = val.strip().split()[0]
+                    filename = os.path.join(os.path.dirname(filename), '.libs', val)
+                    break
+                else:
+                    raise ValueError('could not process libtool file "%s"' % orig_filename)
+
+        while True:
+            with open(filename, 'rb') as fd:
+                hdr = fd.read(4)
+
+            if hdr == b'\x7fELF':
+                self.load_elf(filename, orig_filename)
+                return
+
+            if hdr[:2] == b'#!':
+                path, name = os.path.split(filename)
+                filename = os.path.join(path, '.libs', name)
+                continue
+
+            if hdr[:1] == b'{':
+                with open(filename, 'r') as fd:
+                    self.load_json(fd)
+                return
+
+            raise ValueError('cannot determine file type for %s' % (filename))
+
+    def load_elf(self, filename, orig_filename):
+        edf = ELFDissectFile(filename)
+        edf.orig_filename = orig_filename
+
+        note = edf._elffile.find_note('FRRouting', 'XREF')
+        if note is not None:
+            endian = '>' if edf._elffile.bigendian else '<'
+            mem = edf._elffile[note]
+            if edf._elffile.elfclass == 64:
+                start, end = struct.unpack(endian + 'QQ', mem)
+                start += note.start
+                end += note.start + 8
+            else:
+                start, end = struct.unpack(endian + 'II', mem)
+                start += note.start
+                end += note.start + 4
+
+            ptrs = edf.iter_data(XrefPtr, slice(start, end))
+
+        else:
+            xrefarray = edf.get_section('xref_array')
+            if xrefarray is None:
+                raise ValueError('file has neither xref note nor xref_array section')
+
+            ptrs = xrefarray.iter_data(XrefPtr)
+
+        for ptr in ptrs:
+            if ptr.xref is None:
+                print('NULL xref')
+                continue
+            self._xrefs.append(ptr.xref)
+
+            container = ptr.xref.container()
+            if container is None:
+                continue
+            container.to_dict(self)
+
+        return edf
+
+    def load_json(self, fd):
+        data = json.load(fd)
+        for uid, items in data['refs'].items():
+            myitems = self['refs'].setdefault(uid, [])
+            for item in items:
+                if item in myitems:
+                    continue
+                myitems.append(item)
+
+        for cmd, items in data['cli'].items():
+            self['cli'].setdefault(cmd, {}).update(items)
+
+        return data
+
+    def check(self, checks):
+        for xref in self._xrefs:
+            yield from xref.check(checks)
+
+def main():
+    argp = argparse.ArgumentParser(description = 'FRR xref ELF extractor')
+    argp.add_argument('-o', dest='output', type=str, help='write JSON output')
+    argp.add_argument('--out-by-file',     type=str, help='write by-file JSON output')
+    argp.add_argument('-Wlog-format',      action='store_const', const=True)
+    argp.add_argument('--profile',         action='store_const', const=True)
+    argp.add_argument('binaries', metavar='BINARY', nargs='+', type=str, help='files to read (ELF files or libtool objects)')
+    args = argp.parse_args()
+
+    if args.profile:
+        import cProfile
+        cProfile.runctx('_main(args)', globals(), {'args': args}, sort='cumtime')
+    else:
+        _main(args)
+
+def _main(args):
+    errors = 0
+    xrelfo = Xrelfo()
+
+    for fn in args.binaries:
+        try:
+            xrelfo.load_file(fn)
+        except:
+            errors += 1
+            sys.stderr.write('while processing %s:\n' % (fn))
+            traceback.print_exc()
+
+    for option in dir(args):
+        if option.startswith('W'):
+            checks = sorted(xrelfo.check(args))
+            sys.stderr.write(''.join([c[-1] for c in checks]))
+            break
+
+
+    refs = xrelfo['refs']
+
+    counts = {}
+    for k, v in refs.items():
+        strs = set([i['fmtstring'] for i in v])
+        if len(strs) != 1:
+            print('\033[31;1m%s\033[m' % k)
+        counts[k] = len(v)
+
+    out = xrelfo
+    outbyfile = {}
+    for uid, locs in refs.items():
+        for loc in locs:
+            filearray = outbyfile.setdefault(loc['file'], [])
+            loc = dict(loc)
+            del loc['file']
+            filearray.append(loc)
+
+    for k in outbyfile.keys():
+        outbyfile[k] = sorted(outbyfile[k], key=lambda x: x['line'])
+
+    if errors:
+        sys.exit(1)
+
+    if args.output:
+        with open(args.output + '.tmp', 'w') as fd:
+            json.dump(out, fd, indent=2, sort_keys=True)
+        os.rename(args.output + '.tmp', args.output)
+
+    if args.out_by_file:
+        with open(args.out_by_file + '.tmp', 'w') as fd:
+            json.dump(outbyfile, fd, indent=2, sort_keys=True)
+        os.rename(args.out_by_file + '.tmp', args.out_by_file)
+
+if __name__ == '__main__':
+    main()