diff options
Diffstat (limited to 'python')
| -rw-r--r-- | python/clippy/__init__.py | 2 | ||||
| -rw-r--r-- | python/clippy/elf.py | 574 | ||||
| -rw-r--r-- | python/clippy/uidhash.py | 71 | ||||
| -rw-r--r-- | python/runtests.py | 14 | ||||
| -rw-r--r-- | python/test_xrelfo.py | 65 | ||||
| -rw-r--r-- | python/tiabwarfo.py | 195 | ||||
| -rw-r--r-- | python/xrefstructs.json | 190 | ||||
| -rw-r--r-- | python/xrelfo.py | 397 | 
8 files changed, 1508 insertions, 0 deletions
diff --git a/python/clippy/__init__.py b/python/clippy/__init__.py index d6865ff484..344a1c91ee 100644 --- a/python/clippy/__init__.py +++ b/python/clippy/__init__.py @@ -21,6 +21,8 @@ import _clippy  from _clippy import parse, Graph, GraphNode +frr_top_src = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) +  def graph_iterate(graph):      """iterator yielding all nodes of a graph diff --git a/python/clippy/elf.py b/python/clippy/elf.py new file mode 100644 index 0000000000..4ed334f0c4 --- /dev/null +++ b/python/clippy/elf.py @@ -0,0 +1,574 @@ +# FRR libelf wrapper +# +# Copyright (C) 2020  David Lamparter for NetDEF, Inc. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; see the file COPYING; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +''' +Wrapping layer and additional utility around _clippy.ELFFile. + +Essentially, the C bits have the low-level ELF access bits that should be +fast while this has the bits that string everything together (and would've +been a PITA to do in C.) + +Surprisingly - or maybe through proper engineering - this actually works +across architecture, word size and even endianness boundaries.  Both the C +module (through GElf_*) and this code (cf. struct.unpack format mangling +in ELFDissectStruct) will take appropriate measures to flip and resize +fields as needed. +''' + +import struct +from collections import OrderedDict +from weakref import WeakValueDictionary + +from _clippy import ELFFile, ELFAccessError + +# +# data access +# + +class ELFNull(object): +    ''' +    NULL pointer, returned instead of ELFData +    ''' +    def __init__(self): +        self.symname = None +        self._dstsect = None + +    def __repr__(self): +        return '<ptr: NULL>' + +    def __hash__(self): +        return hash(None) + +    def get_string(self): +        return None + +class ELFUnresolved(object): +    ''' +    Reference to an unresolved external symbol, returned instead of ELFData + +    :param symname: name of the referenced symbol +    :param addend:  offset added to the symbol, normally zero +    ''' +    def __init__(self, symname, addend): +        self.addend = addend +        self.symname = symname +        self._dstsect = None + +    def __repr__(self): +        return '<unresolved: %s+%d>' % (self.symname, self.addend) + +    def __hash__(self): +        return hash((self.symname, self.addend)) + +class ELFData(object): +    ''' +    Actual data somewhere in the ELF file. + +    :type dstsect:  ELFSubset +    :param dstsect: container data area (section or entire file) +    :param dstoffs: byte offset into dstsect +    :param dstlen:  byte size of object, or None if unknown, open-ended or string +    ''' +    def __init__(self, dstsect, dstoffs, dstlen): +        self._dstsect = dstsect +        self._dstoffs = dstoffs +        self._dstlen = dstlen +        self.symname = None + +    def __repr__(self): +        return '<ptr: %s+0x%05x/%d>' % (self._dstsect.name, self._dstoffs, self._dstlen or -1) + +    def __hash__(self): +        return hash((self._dstsect, self._dstoffs)) + +    def get_string(self): +        ''' +        Interpret as C string / null terminated UTF-8 and get the actual text. +        ''' +        try: +            return self._dstsect[self._dstoffs:str].decode('UTF-8') +        except: +            import pdb; pdb.set_trace() + +    def get_data(self, reflen): +        ''' +        Interpret as some structure (and check vs. expected length) + +        :param reflen: expected size of the object, compared against actual +            size (which is only known in rare cases, mostly when directly +            accessing a symbol since symbols have their destination object +            size recorded) +        ''' +        if self._dstlen is not None and self._dstlen != reflen: +            raise ValueError('symbol size mismatch (got %d, expected %d)' % (self._dstlen, reflen)) +        return self._dstsect[self._dstoffs:self._dstoffs+reflen] + +    def offset(self, offs, within_symbol=False): +        ''' +        Get another ELFData at an offset + +        :param offs:          byte offset, can be negative (e.g. in container_of) +        :param within_symbol: retain length information +        ''' +        if self._dstlen is None or not within_symbol: +            return ELFData(self._dstsect, self._dstoffs + offs, None) +        else: +            return ELFData(self._dstsect, self._dstoffs + offs, self._dstlen - offs) + +# +# dissection data items +# + +class ELFDissectData(object): +    ''' +    Common bits for ELFDissectStruct and ELFDissectUnion +    ''' + +    def __len__(self): +        ''' +        Used for boolean evaluation, e.g. "if struct: ..." +        ''' +        return not (isinstance(self._data, ELFNull) or isinstance(self._data, ELFUnresolved)) + +    def container_of(self, parent, fieldname): +        ''' +        Assume this struct is embedded in a larger struct and get at the larger + +        Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)`` + +        :param parent:    class (not instance) of the larger struct +        :param fieldname: fieldname that refers back to this +        :returns:         instance of parent, with fieldname set to this object +        ''' +        offset = 0 +        if not hasattr(parent, '_efields'): +            parent._setup_efields() + +        for field in parent._efields[self.elfclass]: +            if field[0] == fieldname: +                break +            offset += struct.calcsize(field[1]) +        else: +            raise AttributeError('%r not found in %r.fields' % (fieldname, parent)) + +        return parent(self._data.offset(-offset), replace = {fieldname: self}) + +class ELFDissectStruct(ELFDissectData): +    ''' +    Decode and provide access to a struct somewhere in the ELF file + +    Handles pointers and strings somewhat nicely.  Create a subclass for each +    struct that is to be accessed, and give a field list in a "fields" +    class-member. + +    :param dataptr: ELFData referring to the data bits to decode. +    :param parent:  where this was instantiated from; only for reference, has +        no functional impact. +    :param replace: substitute data values for specific fields.  Used by +        `container_of` to replace the inner struct when creating the outer +        one. + +    .. attribute:: fields + +       List of tuples describing the struct members.  Items can be: +       - ``('name', ELFDissectData)`` - directly embed another struct +       - ``('name', 'I')`` - simple data types; second item for struct.unpack +       - ``('name', 'I', None)`` - field to ignore +       - ``('name', 'P', str)`` - pointer to string +       - ``('name', 'P', ELFDissectData)`` - pointer to another struct + +       ``P`` is added as unpack format for pointers (sized appropriately for +       the ELF file.) + +       Refer to tiabwarfo.py for extracting this from ``pahole``. + +       TBD: replace tuples with a class. + +    .. attribute:: fieldrename + +       Dictionary to rename fields, useful if fields comes from tiabwarfo.py. +    ''' + +    class Pointer(object): +        ''' +        Quick wrapper for pointers to further structs + +        This is just here to avoid going into infinite loops when loading +        structs that have pointers to each other (e.g. struct xref <--> +        struct xrefdata.)  The pointer destination is only instantiated when +        actually accessed. +        ''' +        def __init__(self, cls, ptr): +            self.cls = cls +            self.ptr = ptr + +        def __repr__(self): +            return '<Pointer:%s %r>' % (self.cls.__name__, self.ptr) + +        def __call__(self): +            if isinstance(self.ptr, ELFNull): +                return None +            return self.cls(self.ptr) + +    def __new__(cls, dataptr, parent = None, replace = None): +        if dataptr._dstsect is None: +            return super().__new__(cls) + +        obj = dataptr._dstsect._pointers.get((cls, dataptr)) +        if obj is not None: +            return obj +        obj = super().__new__(cls) +        dataptr._dstsect._pointers[(cls, dataptr)] = obj +        return obj + +    replacements = 'lLnN' + +    @classmethod +    def _preproc_structspec(cls, elfclass, spec): +        elfbits = elfclass + +        if hasattr(spec, 'calcsize'): +            spec = '%ds' % (spec.calcsize(elfclass),) + +        if elfbits == 32: +            repl = ['i', 'I'] +        else: +            repl = ['q', 'Q'] +        for c in cls.replacements: +            spec = spec.replace(c, repl[int(c.isupper())]) +        return spec + +    @classmethod +    def _setup_efields(cls): +        cls._efields = {} +        cls._esize = {} +        for elfclass in [32, 64]: +            cls._efields[elfclass] = [] +            size = 0 +            for f in cls.fields: +                newf = (f[0], cls._preproc_structspec(elfclass, f[1])) + f[2:] +                cls._efields[elfclass].append(newf) +                size += struct.calcsize(newf[1]) +            cls._esize[elfclass] = size + +    def __init__(self, dataptr, parent = None, replace = None): +        if not hasattr(self.__class__, '_efields'): +            self._setup_efields() + +        self._fdata = None +        self._data = dataptr +        self._parent = parent +        self.symname = dataptr.symname +        if isinstance(dataptr, ELFNull) or isinstance(dataptr, ELFUnresolved): +            self._fdata = {} +            return + +        self._elfsect = dataptr._dstsect +        self.elfclass = self._elfsect._elffile.elfclass +        self.offset = dataptr._dstoffs + +        pspecl = [f[1] for f in self._efields[self.elfclass]] + +        # need to correlate output from struct.unpack with extra metadata +        # about the particular fields, so note down byte offsets (in locs) +        # and tuple indices of pointers (in ptrs) +        pspec = '' +        locs = {} +        ptrs = set() + +        for idx, spec in enumerate(pspecl): +            if spec == 'P': +                ptrs.add(idx) +                spec = self._elfsect.ptrtype + +            locs[idx] = struct.calcsize(pspec) +            pspec = pspec + spec + +        self._total_size = struct.calcsize(pspec) + +        def replace_ptrs(v): +            idx, val = v[0], v[1] +            if idx not in ptrs: +                return val +            return self._elfsect.pointer(self.offset + locs[idx]) + +        data = dataptr.get_data(struct.calcsize(pspec)) +        unpacked = struct.unpack(self._elfsect.endian + pspec, data) +        unpacked = list(map(replace_ptrs, enumerate(unpacked))) +        self._fraw = unpacked +        self._fdata = OrderedDict() +        replace = replace or {} + +        for i, item in enumerate(unpacked): +            name = self.fields[i][0] +            if name is None: +                continue + +            if name in replace: +                self._fdata[name] = replace[name] +                continue + +            if isinstance(self.fields[i][1], type) and issubclass(self.fields[i][1], ELFDissectData): +                dataobj = self.fields[i][1](dataptr.offset(locs[i]), self) +                self._fdata[name] = dataobj +                continue +            if len(self.fields[i]) == 3: +                if self.fields[i][2] == str: +                    self._fdata[name] = item.get_string() +                    continue +                elif self.fields[i][2] is None: +                    pass +                elif issubclass(self.fields[i][2], ELFDissectData): +                    cls = self.fields[i][2] +                    dataobj = self.Pointer(cls, item) +                    self._fdata[name] = dataobj +                    continue + +            self._fdata[name] = item + +    def __getattr__(self, attrname): +        if attrname not in self._fdata: +            raise AttributeError(attrname) +        if isinstance(self._fdata[attrname], self.Pointer): +            self._fdata[attrname] = self._fdata[attrname]() +        return self._fdata[attrname] + +    def __repr__(self): +        if not isinstance(self._data, ELFData): +            return '<%s: %r>' % (self.__class__.__name__, self._data) +        return '<%s: %s>' % (self.__class__.__name__, +                ', '.join(['%s=%r' % t for t in self._fdata.items()])) + +    @classmethod +    def calcsize(cls, elfclass): +        ''' +        Sum up byte size of this struct + +        Wraps struct.calcsize with some extra features. +        ''' +        if not hasattr(cls, '_efields'): +            cls._setup_efields() + +        pspec = ''.join([f[1] for f in cls._efields[elfclass]]) + +        ptrtype = 'I' if elfclass == 32 else 'Q' +        pspec = pspec.replace('P', ptrtype) + +        return struct.calcsize(pspec) + +class ELFDissectUnion(ELFDissectData): +    ''' +    Decode multiple structs in the same place. + +    Not currently used (and hence not tested.)  Worked at some point but not +    needed anymore and may be borked now.  Remove this comment when using. +    ''' +    def __init__(self, dataptr, parent = None): +        self._dataptr = dataptr +        self._parent = parent +        self.members = [] +        for name, membercls in self.__class__.members: +            item = membercls(dataptr, parent) +            self.members.append(item) +            setattr(self, name, item) + +    def __repr__(self): +        return '<%s: %s>' % (self.__class__.__name__, ', '.join([repr(i) for i in self.members])) + +    @classmethod +    def calcsize(cls, elfclass): +        return max([member.calcsize(elfclass) for name, member in cls.members]) + +# +# wrappers for spans of ELF data +# + +class ELFSubset(object): +    ''' +    Common abstract base for section-level and file-level access. +    ''' + +    def __init__(self): +        super().__init__() + +        self._pointers = WeakValueDictionary() + +    def __hash__(self): +        return hash(self.name) + +    def __getitem__(self, k): +        ''' +        Read data from slice + +        Subscript **must** be a slice; a simple index will not return a byte +        but rather throw an exception.  Valid slice syntaxes are defined by +        the C module: + +        - `this[123:456]` - extract specific range +        - `this[123:str]` - extract until null byte.  The slice stop value is +            the `str` type (or, technically, `unicode`.) +        ''' +        return self._obj[k] + +    def getreloc(self, offset): +        ''' +        Check for a relocation record at the specified offset. +        ''' +        return self._obj.getreloc(offset) + +    def iter_data(self, scls, slice_ = slice(None)): +        ''' +        Assume an array of structs present at a particular slice and decode + +        :param scls:   ELFDissectData subclass for the struct +        :param slice_: optional range specification +        ''' +        size = scls.calcsize(self._elffile.elfclass) + +        offset = slice_.start or 0 +        stop = slice_.stop or self._obj.len +        if stop < 0: +            stop = self._obj.len - stop + +        while offset < stop: +            yield scls(ELFData(self, offset, size)) +            offset += size + +    def pointer(self, offset): +        ''' +        Try to dereference a pointer value + +        This checks whether there's a relocation at the given offset and +        uses that;  otherwise (e.g. in a non-PIE executable where the pointer +        is already resolved by the linker) the data at the location is used. + +        :param offset: byte offset from beginning of section, +            or virtual address in file +        :returns:      ELFData wrapping pointed-to object +        ''' + +        ptrsize = struct.calcsize(self.ptrtype) +        data = struct.unpack(self.endian + self.ptrtype, self[offset:offset + ptrsize])[0] + +        reloc = self.getreloc(offset) +        dstsect = None +        if reloc: +            # section won't be available in whole-file operation +            dstsect = reloc.getsection(data) +            addend = reloc.r_addend + +            if reloc.relative: +                # old-style ELF REL instead of RELA, not well-tested +                addend += data + +            if reloc.unresolved and reloc.symvalid: +                return ELFUnresolved(reloc.symname, addend) +            elif reloc.symvalid: +                data = addend + reloc.st_value +            else: +                data = addend + +        # 0 could technically be a valid pointer for a shared library, +        # since libraries may use 0 as default virtual start address (it'll +        # be adjusted on loading) +        # That said, if the library starts at 0, that's where the ELF header +        # would be so it's still an invalid pointer. +        if data == 0 and dstsect == None: +            return ELFNull() + +        # wrap_data is different between file & section +        return self._wrap_data(data, dstsect) + +class ELFDissectSection(ELFSubset): +    ''' +    Access the contents of an ELF section like ``.text`` or ``.data`` + +    :param elfwrap: ELFDissectFile wrapper for the file +    :param idx:     section index in section header table +    :param section: section object from C module +    ''' + +    def __init__(self, elfwrap, idx, section): +        super().__init__() + +        self._elfwrap = elfwrap +        self._elffile = elfwrap._elffile +        self._idx = idx +        self._section = self._obj = section +        self.name = section.name +        self.ptrtype = elfwrap.ptrtype +        self.endian = elfwrap.endian + +    def _wrap_data(self, data, dstsect): +        if dstsect is None: +            dstsect = self._elfwrap._elffile.get_section_addr(data) +        offs = data - dstsect.sh_addr +        dstsect = self._elfwrap.get_section(dstsect.idx) +        return ELFData(dstsect, offs, None) + +class ELFDissectFile(ELFSubset): +    ''' +    Access the contents of an ELF file. + +    Note that offsets for array subscript and relocation/pointer access are +    based on the file's virtual address space and are NOT offsets to the +    start of the file on disk! + +    (Shared libraries frequently have a virtual address space starting at 0, +    but non-PIE executables have an architecture specific default loading +    address like 0x400000 on x86. + +    :param filename: ELF file to open +    ''' + +    def __init__(self, filename): +        super().__init__() + +        self.name = filename +        self._elffile = self._obj = ELFFile(filename) +        self._sections = {} + +        self.ptrtype = 'I' if self._elffile.elfclass == 32 else 'Q' +        self.endian = '>' if self._elffile.bigendian else '<' + +    @property +    def _elfwrap(self): +        return self + +    def _wrap_data(self, data, dstsect): +        return ELFData(self, data, None) + +    def get_section(self, secname): +        ''' +        Look up section by name or index +        ''' +        if isinstance(secname, int): +            sh_idx = secname +            section = self._elffile.get_section_idx(secname) +        else: +            section = self._elffile.get_section(secname) + +        if section is None: +            return None + +        sh_idx = section.idx + +        if sh_idx not in self._sections: +            self._sections[sh_idx] = ELFDissectSection(self, sh_idx, section) + +        return self._sections[sh_idx] diff --git a/python/clippy/uidhash.py b/python/clippy/uidhash.py new file mode 100644 index 0000000000..bf994d389e --- /dev/null +++ b/python/clippy/uidhash.py @@ -0,0 +1,71 @@ +# xref unique ID hash calculation +# +# Copyright (C) 2020  David Lamparter for NetDEF, Inc. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; see the file COPYING; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import struct +from hashlib import sha256 + +def bititer(data, bits, startbit = True): +    ''' +    just iterate the individual bits out from a bytes object + +    if startbit is True, an '1' bit is inserted at the very beginning +    goes <bits> at a time, starts at LSB. +    ''' +    bitavail, v = 0, 0 +    if startbit and len(data) > 0: +        v = data.pop(0) +        yield (v & ((1 << bits) - 1)) | (1 << (bits - 1)) +        bitavail = 9 - bits +        v >>= bits - 1 + +    while len(data) > 0: +        while bitavail < bits: +            v |= data.pop(0) << bitavail +            bitavail += 8 +        yield v & ((1 << bits) - 1) +        bitavail -= bits +        v >>= bits + +def base32c(data): +    ''' +    Crockford base32 with extra dashes +    ''' +    chs = "0123456789ABCDEFGHJKMNPQRSTVWXYZ" +    o = '' +    if type(data) == str: +        data = [ord(v) for v in data] +    else: +        data = list(data) +    for i, bits in enumerate(bititer(data, 5)): +        if i == 5: +            o = o + '-' +        elif i == 10: +            break +        o = o + chs[bits] +    return o + +def uidhash(filename, hashstr, hashu32a, hashu32b): +    ''' +    xref Unique ID hash used in FRRouting +    ''' +    filename = '/'.join(filename.rsplit('/')[-2:]) + +    hdata = filename.encode('UTF-8') + hashstr.encode('UTF-8') +    hdata += struct.pack('>II', hashu32a, hashu32b) +    i = sha256(hdata).digest() +    return base32c(i) diff --git a/python/runtests.py b/python/runtests.py new file mode 100644 index 0000000000..bcf650b329 --- /dev/null +++ b/python/runtests.py @@ -0,0 +1,14 @@ +import pytest +import sys +import os + +try: +    import _clippy +except ImportError: +    sys.stderr.write('''these tests need to be run with the _clippy C extension +module available.  Try running "clippy runtests.py ...". +''') +    sys.exit(1) + +os.chdir(os.path.dirname(os.path.abspath(__file__))) +raise SystemExit(pytest.main(sys.argv[1:])) diff --git a/python/test_xrelfo.py b/python/test_xrelfo.py new file mode 100644 index 0000000000..3ae24ea7b3 --- /dev/null +++ b/python/test_xrelfo.py @@ -0,0 +1,65 @@ +# some basic tests for xrelfo & the python ELF machinery +# +# Copyright (C) 2020  David Lamparter for NetDEF, Inc. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; see the file COPYING; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import sys +import os +import pytest +from pprint import pprint + +root = os.path.dirname(os.path.dirname(__file__)) +sys.path.append(os.path.join(root, 'python')) + +import xrelfo +from clippy import elf, uidhash + +def test_uidhash(): +    assert uidhash.uidhash("lib/test_xref.c", "logging call", 3, 0) \ +            == 'H7KJB-67TBH' + +def test_xrelfo_other(): +    for data in [ +            elf.ELFNull(), +            elf.ELFUnresolved('somesym', 0), +        ]: + +        dissect = xrelfo.XrefPtr(data) +        print(repr(dissect)) + +        with pytest.raises(AttributeError): +            dissect.xref + +def test_xrelfo_obj(): +    xrelfo_ = xrelfo.Xrelfo() +    edf = xrelfo_.load_elf(os.path.join(root, 'lib/.libs/zclient.o'), 'zclient.lo') +    xrefs = xrelfo_._xrefs + +    with pytest.raises(elf.ELFAccessError): +        edf[0:4] + +    pprint(xrefs[0]) +    pprint(xrefs[0]._data) + +def test_xrelfo_bin(): +    xrelfo_ = xrelfo.Xrelfo() +    edf = xrelfo_.load_elf(os.path.join(root, 'lib/.libs/libfrr.so'), 'libfrr.la') +    xrefs = xrelfo_._xrefs + +    assert edf[0:4] == b'\x7fELF' + +    pprint(xrefs[0]) +    pprint(xrefs[0]._data) diff --git a/python/tiabwarfo.py b/python/tiabwarfo.py new file mode 100644 index 0000000000..bddbeef268 --- /dev/null +++ b/python/tiabwarfo.py @@ -0,0 +1,195 @@ +# FRR DWARF structure definition extractor +# +# Copyright (C) 2020  David Lamparter for NetDEF, Inc. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; see the file COPYING; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import sys +import os +import subprocess +import re +import argparse +import subprocess +import json + +structs = ['xref', 'xref_logmsg', 'xref_threadsched', 'xref_install_element', 'xrefdata', 'xrefdata_logmsg', 'cmd_element'] + +def extract(filename='lib/.libs/libfrr.so'): +    ''' +    Convert output from "pahole" to JSON. + +    Example pahole output: +    $ pahole -C xref lib/.libs/libfrr.so +    struct xref { +        struct xrefdata *          xrefdata;             /*     0     8 */ +        enum xref_type             type;                 /*     8     4 */ +        int                        line;                 /*    12     4 */ +        const char  *              file;                 /*    16     8 */ +        const char  *              func;                 /*    24     8 */ + +        /* size: 32, cachelines: 1, members: 5 */ +        /* last cacheline: 32 bytes */ +    }; +    ''' +    pahole = subprocess.check_output(['pahole', '-C', ','.join(structs), filename]).decode('UTF-8') + +    struct_re = re.compile(r'^struct ([^ ]+) \{([^\}]+)};', flags=re.M | re.S) +    field_re = re.compile(r'^\s*(?P<type>[^;\(]+)\s+(?P<name>[^;\[\]]+)(?:\[(?P<array>\d+)\])?;\s*\/\*(?P<comment>.*)\*\/\s*$') +    comment_re = re.compile(r'^\s*\/\*.*\*\/\s*$') + +    pastructs = struct_re.findall(pahole) +    out = {} + +    for name, data in pastructs: +        this = out.setdefault(name, {}) +        fields = this.setdefault('fields', []) + +        lines = data.strip().splitlines() + +        for line in lines: +            if line.strip() == '': +                continue +            m = comment_re.match(line) +            if m is not None: +                continue + +            m = field_re.match(line) +            if m is not None: +                offs, size = m.group('comment').strip().split() +                offs = int(offs) +                size = int(size) +                typ_ = m.group('type').strip() +                name = m.group('name') + +                if name.startswith('(*'): +                    # function pointer +                    typ_ = typ_ + ' *' +                    name = name[2:].split(')')[0] + +                data = { +                    'name': name, +                    'type': typ_, +                    'offset': offs, +                    'size': size, +                } +                if m.group('array'): +                    data['array'] = int(m.group('array')) + +                fields.append(data) +                continue + +            raise ValueError('cannot process line: %s' % line) + +    return out + +class FieldApplicator(object): +    ''' +    Fill ELFDissectStruct fields list from pahole/JSON + +    Uses the JSON file created by the above code to fill in the struct fields +    in subclasses of ELFDissectStruct. +    ''' + +    # only what we really need.  add more as needed. +    packtypes = { +        'int': 'i', +        'uint8_t': 'B', +        'uint16_t': 'H', +        'uint32_t': 'I', +        'char': 's', +    } + +    def __init__(self, data): +        self.data = data +        self.classes = [] +        self.clsmap = {} + +    def add(self, cls): +        self.classes.append(cls) +        self.clsmap[cls.struct] = cls + +    def resolve(self, cls): +        out = [] +        offset = 0 + +        fieldrename = getattr(cls, 'fieldrename', {}) +        def mkname(n): +            return (fieldrename.get(n, n),) + +        for field in self.data[cls.struct]['fields']: +            typs = field['type'].split() +            typs = [i for i in typs if i not in ['const']] + +            if field['offset'] != offset: +                assert offset < field['offset'] +                out.append(('_pad', '%ds' % (field['offset'] - offset,))) + +            # pretty hacky C types handling, but covers what we need + +            ptrlevel = 0 +            while typs[-1] == '*': +                typs.pop(-1) +                ptrlevel += 1 + +            if ptrlevel > 0: +                packtype = ('P', None) +                if ptrlevel == 1: +                    if typs[0] == 'char': +                        packtype = ('P', str) +                    elif typs[0] == 'struct' and typs[1] in self.clsmap: +                        packtype = ('P', self.clsmap[typs[1]]) +            elif typs[0] == 'enum': +                packtype = ('I',) +            elif typs[0] in self.packtypes: +                packtype = (self.packtypes[typs[0]],) +            elif typs[0] == 'struct': +                if typs[1] in self.clsmap: +                    packtype = (self.clsmap[typs[1]],) +                else: +                    packtype = ('%ds' % field['size'],) +            else: +                raise ValueError('cannot decode field %s in struct %s (%s)' % ( +                        cls.struct, field['name'], field['type'])) + +            if 'array' in field and typs[0] == 'char': +                packtype = ('%ds' % field['array'],) +                out.append(mkname(field['name']) + packtype) +            elif 'array' in field: +                for i in range(0, field['array']): +                    out.append(mkname('%s_%d' % (field['name'], i)) + packtype) +            else: +                out.append(mkname(field['name']) + packtype) + +            offset = field['offset'] + field['size'] + +        cls.fields = out + +    def __call__(self): +        for cls in self.classes: +            self.resolve(cls) + +def main(): +    argp = argparse.ArgumentParser(description = 'FRR DWARF structure extractor') +    argp.add_argument('-o', dest='output', type=str, help='write JSON output', default='python/xrefstructs.json') +    argp.add_argument('-i', dest='input',  type=str, help='ELF file to read',  default='lib/.libs/libfrr.so') +    args = argp.parse_args() + +    out = extract(args.input) +    with open(args.output + '.tmp', 'w') as fd: +        json.dump(out, fd, indent=2, sort_keys=True) +    os.rename(args.output + '.tmp', args.output) + +if __name__ == '__main__': +    main() diff --git a/python/xrefstructs.json b/python/xrefstructs.json new file mode 100644 index 0000000000..537afb87e6 --- /dev/null +++ b/python/xrefstructs.json @@ -0,0 +1,190 @@ +{ +  "cmd_element": { +    "fields": [ +      { +        "name": "string", +        "offset": 0, +        "size": 8, +        "type": "const char  *" +      }, +      { +        "name": "doc", +        "offset": 8, +        "size": 8, +        "type": "const char  *" +      }, +      { +        "name": "daemon", +        "offset": 16, +        "size": 4, +        "type": "int" +      }, +      { +        "name": "attr", +        "offset": 20, +        "size": 1, +        "type": "uint8_t" +      }, +      { +        "name": "func", +        "offset": 24, +        "size": 8, +        "type": "int *" +      }, +      { +        "name": "name", +        "offset": 32, +        "size": 8, +        "type": "const char  *" +      }, +      { +        "name": "xref", +        "offset": 40, +        "size": 32, +        "type": "struct xref" +      } +    ] +  }, +  "xref": { +    "fields": [ +      { +        "name": "xrefdata", +        "offset": 0, +        "size": 8, +        "type": "struct xrefdata *" +      }, +      { +        "name": "type", +        "offset": 8, +        "size": 4, +        "type": "enum xref_type" +      }, +      { +        "name": "line", +        "offset": 12, +        "size": 4, +        "type": "int" +      }, +      { +        "name": "file", +        "offset": 16, +        "size": 8, +        "type": "const char  *" +      }, +      { +        "name": "func", +        "offset": 24, +        "size": 8, +        "type": "const char  *" +      } +    ] +  }, +  "xref_install_element": { +    "fields": [ +      { +        "name": "xref", +        "offset": 0, +        "size": 32, +        "type": "struct xref" +      }, +      { +        "name": "cmd_element", +        "offset": 32, +        "size": 8, +        "type": "const struct cmd_element  *" +      }, +      { +        "name": "node_type", +        "offset": 40, +        "size": 4, +        "type": "enum node_type" +      } +    ] +  }, +  "xref_logmsg": { +    "fields": [ +      { +        "name": "xref", +        "offset": 0, +        "size": 32, +        "type": "struct xref" +      }, +      { +        "name": "fmtstring", +        "offset": 32, +        "size": 8, +        "type": "const char  *" +      }, +      { +        "name": "priority", +        "offset": 40, +        "size": 4, +        "type": "uint32_t" +      }, +      { +        "name": "ec", +        "offset": 44, +        "size": 4, +        "type": "uint32_t" +      } +    ] +  }, +  "xref_threadsched": { +    "fields": [ +      { +        "name": "xref", +        "offset": 0, +        "size": 32, +        "type": "struct xref" +      }, +      { +        "name": "funcname", +        "offset": 32, +        "size": 8, +        "type": "const char  *" +      }, +      { +        "name": "dest", +        "offset": 40, +        "size": 8, +        "type": "const char  *" +      }, +      { +        "name": "thread_type", +        "offset": 48, +        "size": 4, +        "type": "uint32_t" +      } +    ] +  }, +  "xrefdata": { +    "fields": [ +      { +        "name": "xref", +        "offset": 0, +        "size": 8, +        "type": "const struct xref  *" +      }, +      { +        "array": 16, +        "name": "uid", +        "offset": 8, +        "size": 16, +        "type": "char" +      }, +      { +        "name": "hashstr", +        "offset": 24, +        "size": 8, +        "type": "const char  *" +      }, +      { +        "array": 2, +        "name": "hashu32", +        "offset": 32, +        "size": 8, +        "type": "uint32_t" +      } +    ] +  } +}
\ No newline at end of file diff --git a/python/xrelfo.py b/python/xrelfo.py new file mode 100644 index 0000000000..b726d2895d --- /dev/null +++ b/python/xrelfo.py @@ -0,0 +1,397 @@ +# FRR ELF xref extractor +# +# Copyright (C) 2020  David Lamparter for NetDEF, Inc. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; see the file COPYING; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import sys +import os +import struct +import re +import traceback +import json +import argparse + +from clippy.uidhash import uidhash +from clippy.elf import * +from clippy import frr_top_src +from tiabwarfo import FieldApplicator + +try: +    with open(os.path.join(frr_top_src, 'python', 'xrefstructs.json'), 'r') as fd: +        xrefstructs = json.load(fd) +except FileNotFoundError: +    sys.stderr.write(''' +The "xrefstructs.json" file (created by running tiabwarfo.py with the pahole +tool available) could not be found.  It should be included with the sources. +''') +    sys.exit(1) + +# constants, need to be kept in sync manually... + +XREFT_THREADSCHED = 0x100 +XREFT_LOGMSG = 0x200 +XREFT_DEFUN = 0x300 +XREFT_INSTALL_ELEMENT = 0x301 + +# LOG_* +priovals = {} +prios = ['0', '1', '2', 'E', 'W', 'N', 'I', 'D'] + + +class XrelfoJson(object): +    def dump(self): +        pass + +    def check(self, wopt): +        yield from [] + +    def to_dict(self, refs): +        pass + +class Xref(ELFDissectStruct, XrelfoJson): +    struct = 'xref' +    fieldrename = {'type': 'typ'} +    containers = {} + +    def __init__(self, *args, **kwargs): +        super().__init__(*args, **kwargs) + +        self._container = None +        if self.xrefdata: +            self.xrefdata.ref_from(self, self.typ) + +    def container(self): +        if self._container is None: +            if self.typ in self.containers: +                self._container = self.container_of(self.containers[self.typ], 'xref') +        return self._container + +    def check(self, *args, **kwargs): +        if self._container: +            yield from self._container.check(*args, **kwargs) + + +class Xrefdata(ELFDissectStruct): +    struct = 'xrefdata' + +    # uid is all zeroes in the data loaded from ELF +    fieldrename = {'uid': '_uid'} + +    def ref_from(self, xref, typ): +        self.xref = xref + +    @property +    def uid(self): +        if self.hashstr is None: +            return None +        return uidhash(self.xref.file, self.hashstr, self.hashu32_0, self.hashu32_1) + +class XrefPtr(ELFDissectStruct): +    fields = [ +        ('xref', 'P', Xref), +    ] + +class XrefThreadSched(ELFDissectStruct, XrelfoJson): +    struct = 'xref_threadsched' +Xref.containers[XREFT_THREADSCHED] = XrefThreadSched + +class XrefLogmsg(ELFDissectStruct, XrelfoJson): +    struct = 'xref_logmsg' + +    def _warn_fmt(self, text): +        yield ((self.xref.file, self.xref.line), '%s:%d: %s (in %s())\n' % (self.xref.file, self.xref.line, text, self.xref.func)) + +    regexes = [ +        (re.compile(r'([\n\t]+)'), 'error: log message contains tab or newline'), +    #    (re.compile(r'^(\s+)'),   'warning: log message starts with whitespace'), +        (re.compile(r'^((?:warn(?:ing)?|error):\s*)', re.I), 'warning: log message starts with severity'), +    ] + +    def check(self, wopt): +        if wopt.Wlog_format: +            for rex, msg in self.regexes: +                if not rex.search(self.fmtstring): +                    continue + +                if sys.stderr.isatty(): +                    items = rex.split(self.fmtstring) +                    out = [] +                    for i, text in enumerate(items): +                        if (i % 2) == 1: +                            out.append('\033[41;37;1m%s\033[m' % repr(text)[1:-1]) +                        else: +                            out.append(repr(text)[1:-1]) + +                    excerpt = ''.join(out) + +                else: +                    excerpt = repr(self.fmtstring)[1:-1] + +                yield from self._warn_fmt('%s: "%s"' % (msg, excerpt)) + +    def dump(self): +        print('%-60s %s%s %-25s [EC %d] %s' % ( +            '%s:%d %s()' % (self.xref.file, self.xref.line, self.xref.func), +            prios[self.priority & 7], +            priovals.get(self.priority & 0x30, ' '), +            self.xref.xrefdata.uid, self.ec, self.fmtstring)) + +    def to_dict(self, xrelfo): +        jsobj = dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']]) +        if self.ec != 0: +            jsobj['ec'] = self.ec +        jsobj['fmtstring'] = self.fmtstring +        jsobj['priority'] = self.priority & 7 +        jsobj['type'] = 'logmsg' +        jsobj['binary'] = self._elfsect._elfwrap.orig_filename + +        if self.priority & 0x10: +            jsobj.setdefault('flags', []).append('errno') +        if self.priority & 0x20: +            jsobj.setdefault('flags', []).append('getaddrinfo') + +        xrelfo['refs'].setdefault(self.xref.xrefdata.uid, []).append(jsobj) + +Xref.containers[XREFT_LOGMSG] = XrefLogmsg + +class CmdElement(ELFDissectStruct, XrelfoJson): +    struct = 'cmd_element' + +    cmd_attrs = { 0: None, 1: 'deprecated', 2: 'hidden'} + +    def __init__(self, *args, **kwargs): +        super().__init__(*args, **kwargs) + +    def to_dict(self, xrelfo): +        jsobj = xrelfo['cli'].setdefault(self.name, {}).setdefault(self._elfsect._elfwrap.orig_filename, {}) + +        jsobj.update({ +            'string': self.string, +            'doc': self.doc, +            'attr': self.cmd_attrs.get(self.attr, self.attr), +        }) +        if jsobj['attr'] is None: +            del jsobj['attr'] + +        jsobj['defun'] = dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']]) + +Xref.containers[XREFT_DEFUN] = CmdElement + +class XrefInstallElement(ELFDissectStruct, XrelfoJson): +    struct = 'xref_install_element' + +    def to_dict(self, xrelfo): +        jsobj = xrelfo['cli'].setdefault(self.cmd_element.name, {}).setdefault(self._elfsect._elfwrap.orig_filename, {}) +        nodes = jsobj.setdefault('nodes', []) + +        nodes.append({ +            'node': self.node_type, +            'install': dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']]), +        }) + +Xref.containers[XREFT_INSTALL_ELEMENT] = XrefInstallElement + +# shove in field defs +fieldapply = FieldApplicator(xrefstructs) +fieldapply.add(Xref) +fieldapply.add(Xrefdata) +fieldapply.add(XrefLogmsg) +fieldapply.add(XrefThreadSched) +fieldapply.add(CmdElement) +fieldapply.add(XrefInstallElement) +fieldapply() + + +class Xrelfo(dict): +    def __init__(self): +        super().__init__({ +            'refs': {}, +            'cli': {}, +        }) +        self._xrefs = [] + +    def load_file(self, filename): +        orig_filename = filename +        if filename.endswith('.la') or filename.endswith('.lo'): +            with open(filename, 'r') as fd: +                for line in fd: +                    line = line.strip() +                    if line.startswith('#') or line == '' or '=' not in line: +                        continue + +                    var, val = line.split('=', 1) +                    if var not in ['library_names', 'pic_object']: +                        continue +                    if val.startswith("'") or val.startswith('"'): +                        val = val[1:-1] + +                    if var == 'pic_object': +                        filename = os.path.join(os.path.dirname(filename), val) +                        break + +                    val = val.strip().split()[0] +                    filename = os.path.join(os.path.dirname(filename), '.libs', val) +                    break +                else: +                    raise ValueError('could not process libtool file "%s"' % orig_filename) + +        while True: +            with open(filename, 'rb') as fd: +                hdr = fd.read(4) + +            if hdr == b'\x7fELF': +                self.load_elf(filename, orig_filename) +                return + +            if hdr[:2] == b'#!': +                path, name = os.path.split(filename) +                filename = os.path.join(path, '.libs', name) +                continue + +            if hdr[:1] == b'{': +                with open(filename, 'r') as fd: +                    self.load_json(fd) +                return + +            raise ValueError('cannot determine file type for %s' % (filename)) + +    def load_elf(self, filename, orig_filename): +        edf = ELFDissectFile(filename) +        edf.orig_filename = orig_filename + +        note = edf._elffile.find_note('FRRouting', 'XREF') +        if note is not None: +            endian = '>' if edf._elffile.bigendian else '<' +            mem = edf._elffile[note] +            if edf._elffile.elfclass == 64: +                start, end = struct.unpack(endian + 'QQ', mem) +                start += note.start +                end += note.start + 8 +            else: +                start, end = struct.unpack(endian + 'II', mem) +                start += note.start +                end += note.start + 4 + +            ptrs = edf.iter_data(XrefPtr, slice(start, end)) + +        else: +            xrefarray = edf.get_section('xref_array') +            if xrefarray is None: +                raise ValueError('file has neither xref note nor xref_array section') + +            ptrs = xrefarray.iter_data(XrefPtr) + +        for ptr in ptrs: +            if ptr.xref is None: +                print('NULL xref') +                continue +            self._xrefs.append(ptr.xref) + +            container = ptr.xref.container() +            if container is None: +                continue +            container.to_dict(self) + +        return edf + +    def load_json(self, fd): +        data = json.load(fd) +        for uid, items in data['refs'].items(): +            myitems = self['refs'].setdefault(uid, []) +            for item in items: +                if item in myitems: +                    continue +                myitems.append(item) + +        for cmd, items in data['cli'].items(): +            self['cli'].setdefault(cmd, {}).update(items) + +        return data + +    def check(self, checks): +        for xref in self._xrefs: +            yield from xref.check(checks) + +def main(): +    argp = argparse.ArgumentParser(description = 'FRR xref ELF extractor') +    argp.add_argument('-o', dest='output', type=str, help='write JSON output') +    argp.add_argument('--out-by-file',     type=str, help='write by-file JSON output') +    argp.add_argument('-Wlog-format',      action='store_const', const=True) +    argp.add_argument('--profile',         action='store_const', const=True) +    argp.add_argument('binaries', metavar='BINARY', nargs='+', type=str, help='files to read (ELF files or libtool objects)') +    args = argp.parse_args() + +    if args.profile: +        import cProfile +        cProfile.runctx('_main(args)', globals(), {'args': args}, sort='cumtime') +    else: +        _main(args) + +def _main(args): +    errors = 0 +    xrelfo = Xrelfo() + +    for fn in args.binaries: +        try: +            xrelfo.load_file(fn) +        except: +            errors += 1 +            sys.stderr.write('while processing %s:\n' % (fn)) +            traceback.print_exc() + +    for option in dir(args): +        if option.startswith('W'): +            checks = sorted(xrelfo.check(args)) +            sys.stderr.write(''.join([c[-1] for c in checks])) +            break + + +    refs = xrelfo['refs'] + +    counts = {} +    for k, v in refs.items(): +        strs = set([i['fmtstring'] for i in v]) +        if len(strs) != 1: +            print('\033[31;1m%s\033[m' % k) +        counts[k] = len(v) + +    out = xrelfo +    outbyfile = {} +    for uid, locs in refs.items(): +        for loc in locs: +            filearray = outbyfile.setdefault(loc['file'], []) +            loc = dict(loc) +            del loc['file'] +            filearray.append(loc) + +    for k in outbyfile.keys(): +        outbyfile[k] = sorted(outbyfile[k], key=lambda x: x['line']) + +    if errors: +        sys.exit(1) + +    if args.output: +        with open(args.output + '.tmp', 'w') as fd: +            json.dump(out, fd, indent=2, sort_keys=True) +        os.rename(args.output + '.tmp', args.output) + +    if args.out_by_file: +        with open(args.out_by_file + '.tmp', 'w') as fd: +            json.dump(outbyfile, fd, indent=2, sort_keys=True) +        os.rename(args.out_by_file + '.tmp', args.out_by_file) + +if __name__ == '__main__': +    main()  | 
