diff options
| author | Russ White <russ@riw.us> | 2021-03-09 07:58:43 -0500 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2021-03-09 07:58:43 -0500 |
| commit | da0a277faeb50082e92fe2c962cbf69ab3b153c1 (patch) | |
| tree | ffb9bb3cdc52672e73c73a61fff3c876499f21cc | |
| parent | 6adedc6fe3773fe1abd4ef62797cfb2d336348be (diff) | |
| parent | 3cb0eab36fe5b6536b261e704942ff03fc855f13 (diff) | |
Merge pull request #6807 from opensourcerouting/xref-extract
xrefs extraction tool
32 files changed, 2933 insertions, 16 deletions
diff --git a/.gitignore b/.gitignore index fbbb04b60c..97349769ad 100644 --- a/.gitignore +++ b/.gitignore @@ -59,6 +59,7 @@ *.cg.json *.cg.dot *.cg.svg +*.xref ### gcov outputs diff --git a/Makefile.am b/Makefile.am index 90c8407010..bb8e97a115 100644 --- a/Makefile.am +++ b/Makefile.am @@ -187,8 +187,16 @@ EXTRA_DIST += \ \ python/clidef.py \ python/clippy/__init__.py \ + python/clippy/elf.py \ + python/clippy/uidhash.py \ python/makevars.py \ python/makefile.py \ + python/tiabwarfo.py \ + python/xrelfo.py \ + python/test_xrelfo.py \ + python/runtests.py \ + \ + python/xrefstructs.json \ \ redhat/frr.logrotate \ redhat/frr.pam \ diff --git a/configure.ac b/configure.ac index f3d1f38986..139fca7c42 100755 --- a/configure.ac +++ b/configure.ac @@ -794,6 +794,20 @@ fi # AS_IF([test "$host" = "$build"], [ + AC_CHECK_HEADER([gelf.h], [], [ + AC_MSG_ERROR([libelf headers are required for building clippy. (Host only when cross-compiling.)]) + ]) + AC_CHECK_LIB([elf], [elf_memory], [], [ + AC_MSG_ERROR([libelf is required for building clippy. (Host only when cross-compiling.)]) + ]) + + AC_CHECK_LIB([elf], [elf_getdata_rawchunk], [ + AC_DEFINE([HAVE_ELF_GETDATA_RAWCHUNK], [1], [Have elf_getdata_rawchunk()]) + ]) + AC_CHECK_LIB([elf], [gelf_getnote], [ + AC_DEFINE([HAVE_GELF_GETNOTE], [1], [Have gelf_getnote()]) + ]) + FRR_PYTHON_DEV ], [ FRR_PYTHON diff --git a/debian/control b/debian/control index b9e96b55d0..7a08cbbdb0 100644 --- a/debian/control +++ b/debian/control @@ -13,6 +13,7 @@ Build-Depends: bison, install-info, libc-ares-dev, libcap-dev, + libelf-dev, libjson-c-dev | libjson0-dev, libpam0g-dev | libpam-dev, libpcre3-dev, diff --git a/doc/developer/building-frr-for-archlinux.rst b/doc/developer/building-frr-for-archlinux.rst index f62add5963..e589a9f724 100644 --- a/doc/developer/building-frr-for-archlinux.rst +++ b/doc/developer/building-frr-for-archlinux.rst @@ -11,7 +11,7 @@ Installing Dependencies git autoconf automake libtool make cmake pcre readline texinfo \ pkg-config pam json-c bison flex python-pytest \ c-ares python systemd python2-ipaddress python-sphinx \ - systemd-libs net-snmp perl libcap + systemd-libs net-snmp perl libcap libelf .. include:: building-libyang.rst diff --git a/doc/developer/building-frr-for-centos6.rst b/doc/developer/building-frr-for-centos6.rst index b730a5ee32..5d3be492de 100644 --- a/doc/developer/building-frr-for-centos6.rst +++ b/doc/developer/building-frr-for-centos6.rst @@ -45,7 +45,8 @@ Add packages: sudo yum install git autoconf automake libtool make \ readline-devel texinfo net-snmp-devel groff pkgconfig \ - json-c-devel pam-devel flex epel-release c-ares-devel libcap-devel + json-c-devel pam-devel flex epel-release c-ares-devel libcap-devel \ + elfutils-libelf-devel Install newer version of bison (CentOS 6 package source is too old) from CentOS 7: diff --git a/doc/developer/building-frr-for-centos7.rst b/doc/developer/building-frr-for-centos7.rst index eb97150d67..8d0aea943c 100644 --- a/doc/developer/building-frr-for-centos7.rst +++ b/doc/developer/building-frr-for-centos7.rst @@ -21,7 +21,8 @@ Add packages: sudo yum install git autoconf automake libtool make \ readline-devel texinfo net-snmp-devel groff pkgconfig \ json-c-devel pam-devel bison flex pytest c-ares-devel \ - python-devel systemd-devel python-sphinx libcap-devel + python-devel systemd-devel python-sphinx libcap-devel \ + elfutils-libelf-devel .. include:: building-libyang.rst diff --git a/doc/developer/building-frr-for-centos8.rst b/doc/developer/building-frr-for-centos8.rst index 75beb53378..77fe489358 100644 --- a/doc/developer/building-frr-for-centos8.rst +++ b/doc/developer/building-frr-for-centos8.rst @@ -14,7 +14,8 @@ Add packages: sudo dnf install --enablerepo=PowerTools git autoconf pcre-devel \ automake libtool make readline-devel texinfo net-snmp-devel pkgconfig \ groff pkgconfig json-c-devel pam-devel bison flex python2-pytest \ - c-ares-devel python2-devel systemd-devel libcap-devel + c-ares-devel python2-devel systemd-devel libcap-devel \ + elfutils-libelf-devel .. include:: building-libyang.rst diff --git a/doc/developer/building-frr-for-debian8.rst b/doc/developer/building-frr-for-debian8.rst index c12bf46f8d..51dd07c42a 100644 --- a/doc/developer/building-frr-for-debian8.rst +++ b/doc/developer/building-frr-for-debian8.rst @@ -18,7 +18,7 @@ Add packages: sudo apt-get install git autoconf automake libtool make \ libreadline-dev texinfo libjson-c-dev pkg-config bison flex python3-pip \ libc-ares-dev python3-dev python3-sphinx build-essential libsystemd-dev \ - libsnmp-dev libcap-dev + libsnmp-dev libcap-dev libelf-dev Install newer pytest (>3.0) from pip diff --git a/doc/developer/building-frr-for-debian9.rst b/doc/developer/building-frr-for-debian9.rst index f976b9f49a..919b010314 100644 --- a/doc/developer/building-frr-for-debian9.rst +++ b/doc/developer/building-frr-for-debian9.rst @@ -11,7 +11,7 @@ Add packages: sudo apt-get install git autoconf automake libtool make \ libreadline-dev texinfo libjson-c-dev pkg-config bison flex \ libc-ares-dev python3-dev python3-pytest python3-sphinx build-essential \ - libsnmp-dev libsystemd-dev libcap-dev + libsnmp-dev libsystemd-dev libcap-dev libelf-dev .. include:: building-libyang.rst diff --git a/doc/developer/building-frr-for-fedora.rst b/doc/developer/building-frr-for-fedora.rst index 4ab59490fd..5fecd8a826 100644 --- a/doc/developer/building-frr-for-fedora.rst +++ b/doc/developer/building-frr-for-fedora.rst @@ -14,7 +14,8 @@ Installing Dependencies sudo dnf install git autoconf automake libtool make \ readline-devel texinfo net-snmp-devel groff pkgconfig json-c-devel \ pam-devel python3-pytest bison flex c-ares-devel python3-devel \ - python3-sphinx perl-core patch systemd-devel libcap-devel + python3-sphinx perl-core patch systemd-devel libcap-devel \ + elfutils-libelf-devel .. include:: building-libyang.rst diff --git a/doc/developer/building-frr-for-opensuse.rst b/doc/developer/building-frr-for-opensuse.rst index 5ed714a67e..4e886e9c25 100644 --- a/doc/developer/building-frr-for-opensuse.rst +++ b/doc/developer/building-frr-for-opensuse.rst @@ -13,7 +13,8 @@ Installing Dependencies zypper in git autoconf automake libtool make \ readline-devel texinfo net-snmp-devel groff pkgconfig libjson-c-devel\ pam-devel python3-pytest bison flex c-ares-devel python3-devel\ - python3-Sphinx perl patch systemd-devel libcap-devel libyang-devel + python3-Sphinx perl patch systemd-devel libcap-devel libyang-devel \ + libelf-devel Building & Installing FRR ------------------------- diff --git a/doc/developer/building-frr-for-ubuntu1404.rst b/doc/developer/building-frr-for-ubuntu1404.rst index cc54415266..2711e92b6f 100644 --- a/doc/developer/building-frr-for-ubuntu1404.rst +++ b/doc/developer/building-frr-for-ubuntu1404.rst @@ -14,7 +14,7 @@ Installing Dependencies git autoconf automake libtool make libreadline-dev texinfo \ pkg-config libpam0g-dev libjson-c-dev bison flex python3-pytest \ libc-ares-dev python3-dev python3-sphinx install-info build-essential \ - libsnmp-dev perl libcap-dev + libsnmp-dev perl libcap-dev libelf-dev .. include:: building-libyang.rst diff --git a/doc/developer/building-frr-for-ubuntu1604.rst b/doc/developer/building-frr-for-ubuntu1604.rst index 63c6f8648c..2cb9536f9b 100644 --- a/doc/developer/building-frr-for-ubuntu1604.rst +++ b/doc/developer/building-frr-for-ubuntu1604.rst @@ -14,7 +14,8 @@ Installing Dependencies git autoconf automake libtool make libreadline-dev texinfo \ pkg-config libpam0g-dev libjson-c-dev bison flex python3-pytest \ libc-ares-dev python3-dev libsystemd-dev python-ipaddress python3-sphinx \ - install-info build-essential libsystemd-dev libsnmp-dev perl libcap-dev + install-info build-essential libsystemd-dev libsnmp-dev perl libcap-dev \ + libelf-dev .. include:: building-libyang.rst diff --git a/doc/developer/building-frr-for-ubuntu1804.rst b/doc/developer/building-frr-for-ubuntu1804.rst index 9d85957d88..eb3991c139 100644 --- a/doc/developer/building-frr-for-ubuntu1804.rst +++ b/doc/developer/building-frr-for-ubuntu1804.rst @@ -14,7 +14,8 @@ Installing Dependencies git autoconf automake libtool make libreadline-dev texinfo \ pkg-config libpam0g-dev libjson-c-dev bison flex python3-pytest \ libc-ares-dev python3-dev libsystemd-dev python-ipaddress python3-sphinx \ - install-info build-essential libsystemd-dev libsnmp-dev perl libcap-dev + install-info build-essential libsystemd-dev libsnmp-dev perl libcap-dev \ + libelf-dev .. include:: building-libyang.rst diff --git a/doc/developer/building-frr-for-ubuntu2004.rst b/doc/developer/building-frr-for-ubuntu2004.rst index ef5d8da551..ffc05a6841 100644 --- a/doc/developer/building-frr-for-ubuntu2004.rst +++ b/doc/developer/building-frr-for-ubuntu2004.rst @@ -15,7 +15,7 @@ Installing Dependencies pkg-config libpam0g-dev libjson-c-dev bison flex python3-pytest \ libc-ares-dev python3-dev libsystemd-dev python-ipaddress python3-sphinx \ install-info build-essential libsystemd-dev libsnmp-dev perl \ - libcap-dev python2 + libcap-dev python2 libelf-dev Note that Ubuntu 20 no longer installs python 2.x, so it must be installed explicitly. Ensure that your system has a symlink named diff --git a/doc/developer/xrefs.rst b/doc/developer/xrefs.rst index 6a0794d41b..e8e07dfe1d 100644 --- a/doc/developer/xrefs.rst +++ b/doc/developer/xrefs.rst @@ -20,8 +20,6 @@ To verify xrefs have been included in a binary or dynamic library, run ``readelf -n binary``. For individual object files, it's ``readelf -S object.o | grep xref_array`` instead. -An extraction tool will be added in a future commit. - Structure and contents ---------------------- @@ -168,3 +166,50 @@ entry point. for C++ code when compiled by GCC. A workaround is present for runtime functionality, but to extract the xrefs from a C++ source file, it needs to be built with clang (or a future fixed version of GCC) instead. + +Extraction tool +--------------- + +The FRR source contains a matching tool to extract xref data from compiled ELF +binaries in ``python/xrelfo.py``. This tool uses CPython extensions +implemented in ``clippy`` and must therefore be executed with that. + +``xrelfo.py`` processes input from one or more ELF file (.o, .so, executable), +libtool object (.lo, .la, executable wrapper script) or JSON (output from +``xrelfo.py``) and generates an output JSON file. During standard FRR build, +it is invoked on all binaries and libraries and the result is combined into +``frr.json``. + +ELF files from any operating system, CPU architecture and endianness can be +processed on any host. Any issues with this are bugs in ``xrelfo.py`` +(or clippy's ELF code.) + +``xrelfo.py`` also performs some sanity checking, particularly on log +messages. The following options are available: + +.. option:: -o OUTPUT + + Filename to write JSON output to. As a convention, a ``.xref`` filename + extension is used. + +.. option:: -Wlog-format + + Performs extra checks on log message format strings, particularly checks + for ``\t`` and ``\n`` characters (which should not be used in log messages). + +.. option:: -Wlog-args + + Generates cleanup hints for format string arguments where + :c:func:`printfrr()` extensions could be used, e.g. replacing ``inet_ntoa`` + with ``%pI4``. + +.. option:: --profile + + Runs the Python profiler to identify hotspots in the ``xrelfo.py`` code. + +``xrelfo.py`` uses information about C structure definitions saved in +``python/xrefstructs.json``. This file is included with the FRR sources and +only needs to be regenerated when some of the ``struct xref_*`` definitions +are changed (which should be almost never). The file is written by +``python/tiabwarfo.py``, which uses ``pahole`` to extract the necessary data +from DWARF information. diff --git a/lib/clippy.h b/lib/clippy.h index be4db6e638..95af274106 100644 --- a/lib/clippy.h +++ b/lib/clippy.h @@ -20,6 +20,7 @@ #ifndef _FRR_CLIPPY_H #define _FRR_CLIPPY_H +#include <stdbool.h> #include <Python.h> #ifdef __cplusplus @@ -28,6 +29,7 @@ extern "C" { extern PyObject *clippy_parse(PyObject *self, PyObject *args); extern PyMODINIT_FUNC command_py_init(void); +extern bool elf_py_init(PyObject *pymod); #ifdef __cplusplus } diff --git a/lib/command_graph.h b/lib/command_graph.h index 09824460e6..86715410ce 100644 --- a/lib/command_graph.h +++ b/lib/command_graph.h @@ -99,7 +99,7 @@ struct cmd_element { const char *string; /* Command specification by string. */ const char *doc; /* Documentation of this command. */ int daemon; /* Daemon to which this command belong. */ - uint8_t attr; /* Command attributes */ + uint32_t attr; /* Command attributes */ /* handler function for command */ int (*func)(const struct cmd_element *, struct vty *, int, diff --git a/lib/command_py.c b/lib/command_py.c index 4ec116df33..7f19008fbf 100644 --- a/lib/command_py.c +++ b/lib/command_py.c @@ -345,5 +345,7 @@ PyMODINIT_FUNC command_py_init(void) PyModule_AddObject(pymod, "GraphNode", (PyObject *)&typeobj_graph_node); Py_INCREF(&typeobj_graph); PyModule_AddObject(pymod, "Graph", (PyObject *)&typeobj_graph); + if (!elf_py_init(pymod)) + initret(NULL); initret(pymod); } diff --git a/lib/elf_py.c b/lib/elf_py.c new file mode 100644 index 0000000000..0d8ad76e1c --- /dev/null +++ b/lib/elf_py.c @@ -0,0 +1,1301 @@ +/* + * fast ELF file accessor + * Copyright (C) 2018-2020 David Lamparter for NetDEF, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* Note: this wrapper is intended to be used as build-time helper. While + * it should be generally correct and proper, there may be the occasional + * memory leak or SEGV for things that haven't been well-tested. + * _ + * / \ This code is NOT SUITABLE FOR UNTRUSTED ELF FILES. It's used + * / ! \ in FRR to read files created by its own build. Don't take it out + * /_____\ of FRR and use it to parse random ELF files you found somewhere. + * + * If you're working with this code (or even reading it), you really need to + * read a bunch of the ELF specs. There's no way around it, things in here + * just represent pieces of ELF pretty much 1:1. Also, readelf & objdump are + * your friends. + * + * Required reading: + * https://refspecs.linuxfoundation.org/elf/elf.pdf + * https://refspecs.linuxfoundation.org/elf/x86_64-SysV-psABI.pdf + * Recommended reading: + * https://github.com/ARM-software/abi-aa/releases/download/2020Q4/aaelf64.pdf + * + * The core ELF spec is *not* enough, you should read at least one of the + * processor specific (psABI) docs. They define what & how relocations work. + * Luckily we don't need to care about the processor specifics since this only + * does data relocations, but without looking at the psABI, some things aren't + * quite clear. + */ + +/* the API of this module roughly follows a very small subset of the one + * provided by the python elfutils package, which unfortunately is painfully + * slow. + */ + +#define PY_SSIZE_T_CLEAN + +#include <Python.h> +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "structmember.h" +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> + +#if defined(__sun__) && (__SIZEOF_POINTER__ == 4) +/* Solaris libelf bails otherwise ... */ +#undef _FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 32 +#endif + +#include <elf.h> +#include <libelf.h> +#include <gelf.h> + +#include "typesafe.h" +#include "jhash.h" +#include "clippy.h" + +static bool debug; + +#define debugf(...) \ + do { \ + if (debug) \ + fprintf(stderr, __VA_ARGS__); \ + } while (0) + +/* Exceptions */ +static PyObject *ELFFormatError; +static PyObject *ELFAccessError; + +/* most objects can only be created as return values from one of the methods */ +static PyObject *refuse_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ + PyErr_SetString(PyExc_ValueError, + "cannot create instances of this type"); + return NULL; +} + +struct elfreloc; +struct elfsect; + +PREDECL_HASH(elfrelocs) + +/* ELFFile and ELFSection intentionally share some behaviour, particularly + * subscript[123:456] access to file data. This is because relocatables + * (.o files) do things section-based, but linked executables/libraries do + * things file-based. Having the two behave similar allows simplifying the + * Python code. + */ + +/* class ELFFile: + * + * overall entry point, instantiated by reading in an ELF file + */ +struct elffile { + PyObject_HEAD + + char *filename; + char *mmap, *mmend; + size_t len; + Elf *elf; + + /* note from here on there are several instances of + * + * GElf_Something *x, _x; + * + * this is a pattern used by libelf's generic ELF routines; the _x + * field is used to create a copy of the ELF structure from the file + * with 32/64bit and endianness adjusted. + */ + + GElf_Ehdr *ehdr, _ehdr; + Elf_Scn *symtab; + size_t nsym, symstridx; + Elf_Data *symdata; + + PyObject **sects; + size_t n_sect; + + struct elfrelocs_head dynrelocs; + + int elfclass; + bool bigendian; + bool has_symbols; +}; + +/* class ELFSection: + * + * note that executables and shared libraries can have their section headers + * removed, though in practice this is only used as an obfuscation technique. + */ +struct elfsect { + PyObject_HEAD + + const char *name; + struct elffile *ef; + + GElf_Shdr _shdr, *shdr; + Elf_Scn *scn; + unsigned long idx, len; + + struct elfrelocs_head relocs; +}; + +/* class ELFReloc: + * + * note: relocations in object files (.o) are section-based while relocations + * in executables and shared libraries are file-based. + * + * Whenever accessing something that is a pointer in the ELF file, the Python + * code needs to check for a relocation; if the pointer is pointing to some + * unresolved symbol the file will generally contain 0 bytes. The relocation + * will tell what the pointer is actually pointing to. + * + * This represents both static (.o file) and dynamic (.so/exec) relocations. + */ +struct elfreloc { + PyObject_HEAD + + struct elfrelocs_item elfrelocs_item; + + struct elfsect *es; + struct elffile *ef; + + /* there's also old-fashioned GElf_Rel; we're converting that to + * GElf_Rela in elfsect_add_relocations() + */ + GElf_Rela _rela, *rela; + GElf_Sym _sym, *sym; + size_t symidx; + const char *symname; + + /* documented below in python docstrings */ + bool symvalid, unresolved, relative; + unsigned long long st_value; +}; + +static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b); +static uint32_t elfreloc_hash(const struct elfreloc *reloc); + +DECLARE_HASH(elfrelocs, struct elfreloc, elfrelocs_item, + elfreloc_cmp, elfreloc_hash) + +static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx); +static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx); +static PyObject *elfreloc_getsection(PyObject *self, PyObject *args); +static PyObject *elfreloc_getaddend(PyObject *obj, void *closure); + +/* --- end of declarations -------------------------------------------------- */ + +/* + * class ELFReloc: + */ + +static const char elfreloc_doc[] = + "Represents an ELF relocation record\n" + "\n" + "(struct elfreloc * in elf_py.c)"; + +#define member(name, type, doc) \ + { \ + (char *)#name, type, offsetof(struct elfreloc, name), READONLY,\ + (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \ + } +static PyMemberDef members_elfreloc[] = { + member(symname, T_STRING, + "Name of symbol this relocation refers to.\n" + "\n" + "Will frequently be `None` in executables and shared libraries." + ), + member(symvalid, T_BOOL, + "Target symbol has a valid type, i.e. not STT_NOTYPE"), + member(unresolved, T_BOOL, + "Target symbol refers to an existing section"), + member(relative, T_BOOL, + "Relocation is a REL (not RELA) record and thus relative."), + member(st_value, T_ULONGLONG, + "Target symbol's value, if known\n\n" + "Will be zero for unresolved/external symbols."), + {} +}; +#undef member + +static PyGetSetDef getset_elfreloc[] = { + { .name = (char *)"r_addend", .get = elfreloc_getaddend, .doc = + (char *)"Relocation addend value"}, + {} +}; + +static PyMethodDef methods_elfreloc[] = { + {"getsection", elfreloc_getsection, METH_VARARGS, + "Find relocation target's ELF section\n\n" + "Args: address of relocatee (TODO: fix/remove?)\n" + "Returns: ELFSection or None\n\n" + "Not possible if section headers have been stripped."}, + {} +}; + +static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b) +{ + if (a->rela->r_offset < b->rela->r_offset) + return -1; + if (a->rela->r_offset > b->rela->r_offset) + return 1; + return 0; +} + +static uint32_t elfreloc_hash(const struct elfreloc *reloc) +{ + return jhash(&reloc->rela->r_offset, sizeof(reloc->rela->r_offset), + 0xc9a2b7f4); +} + +static struct elfreloc *elfrelocs_get(struct elfrelocs_head *head, + GElf_Addr offset) +{ + struct elfreloc dummy; + + dummy.rela = &dummy._rela; + dummy.rela->r_offset = offset; + return elfrelocs_find(head, &dummy); +} + +static PyObject *elfreloc_getsection(PyObject *self, PyObject *args) +{ + struct elfreloc *w = (struct elfreloc *)self; + long data; + + if (!PyArg_ParseTuple(args, "k", &data)) + return NULL; + + if (!w->es) + Py_RETURN_NONE; + + if (w->symidx == 0) { + size_t idx = 0; + Elf_Scn *scn; + + data = (w->relative ? data : 0) + w->rela->r_addend; + scn = elf_find_addr(w->es->ef, data, &idx); + if (!scn) + Py_RETURN_NONE; + return elffile_secbyidx(w->es->ef, scn, idx); + } + return elffile_secbyidx(w->es->ef, NULL, w->sym->st_shndx); +} + +static PyObject *elfreloc_getaddend(PyObject *obj, void *closure) +{ + struct elfreloc *w = (struct elfreloc *)obj; + + return Py_BuildValue("K", (unsigned long long)w->rela->r_addend); +} + +static PyObject *elfreloc_repr(PyObject *arg) +{ + struct elfreloc *w = (struct elfreloc *)arg; + + return PyUnicode_FromFormat("<ELFReloc @%lu %s+%lu>", + (unsigned long)w->rela->r_offset, + (w->symname && w->symname[0]) ? w->symname + : "[0]", + (unsigned long)w->rela->r_addend); +} + +static void elfreloc_free(void *arg) +{ + struct elfreloc *w = arg; + + (void)w; +} + +static PyTypeObject typeobj_elfreloc = { + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFReloc", + .tp_basicsize = sizeof(struct elfreloc), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = elfreloc_doc, + .tp_new = refuse_new, + .tp_free = elfreloc_free, + .tp_repr = elfreloc_repr, + .tp_members = members_elfreloc, + .tp_methods = methods_elfreloc, + .tp_getset = getset_elfreloc, +}; + +/* + * class ELFSection: + */ + +static const char elfsect_doc[] = + "Represents an ELF section\n" + "\n" + "To access section contents, use subscript notation, e.g.\n" + " section[123:456]\n" + "To read null terminated C strings, replace the end with str:\n" + " section[123:str]\n\n" + "(struct elfsect * in elf_py.c)"; + +static PyObject *elfsect_getaddr(PyObject *self, void *closure); + +#define member(name, type, doc) \ + { \ + (char *)#name, type, offsetof(struct elfsect, name), READONLY, \ + (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \ + } +static PyMemberDef members_elfsect[] = { + member(name, T_STRING, + "Section name, e.g. \".text\""), + member(idx, T_ULONG, + "Section index in file"), + member(len, T_ULONG, + "Section length in bytes"), + {}, +}; +#undef member + +static PyGetSetDef getset_elfsect[] = { + { .name = (char *)"sh_addr", .get = elfsect_getaddr, .doc = + (char *)"Section virtual address (mapped program view)"}, + {} +}; + +static PyObject *elfsect_getaddr(PyObject *self, void *closure) +{ + struct elfsect *w = (struct elfsect *)self; + + return Py_BuildValue("K", (unsigned long long)w->shdr->sh_addr); +} + + +static PyObject *elfsect_getreloc(PyObject *self, PyObject *args) +{ + struct elfsect *w = (struct elfsect *)self; + struct elfreloc *relw; + unsigned long offs; + PyObject *ret; + + if (!PyArg_ParseTuple(args, "k", &offs)) + return NULL; + + relw = elfrelocs_get(&w->relocs, offs + w->shdr->sh_addr); + if (!relw) + Py_RETURN_NONE; + + ret = (PyObject *)relw; + Py_INCREF(ret); + return ret; +} + +static PyMethodDef methods_elfsect[] = { + {"getreloc", elfsect_getreloc, METH_VARARGS, + "Check for / get relocation at offset into section\n\n" + "Args: byte offset into section to check\n" + "Returns: ELFReloc or None"}, + {} +}; + +static PyObject *elfsect_subscript(PyObject *self, PyObject *key) +{ + Py_ssize_t start, stop, step, sllen; + struct elfsect *w = (struct elfsect *)self; + PySliceObject *slice; + unsigned long offs, len = ~0UL; + + if (!PySlice_Check(key)) { + PyErr_SetString(PyExc_IndexError, + "ELFSection subscript must be slice"); + return NULL; + } + slice = (PySliceObject *)key; + if (PyLong_Check(slice->stop)) { + if (PySlice_GetIndicesEx(key, w->shdr->sh_size, + &start, &stop, &step, &sllen)) + return NULL; + + if (step != 1) { + PyErr_SetString(PyExc_IndexError, + "ELFSection subscript slice step must be 1"); + return NULL; + } + if ((GElf_Xword)stop > w->shdr->sh_size) { + PyErr_Format(ELFAccessError, + "access (%lu) beyond end of section %lu/%s (%lu)", + stop, w->idx, w->name, w->shdr->sh_size); + return NULL; + } + + offs = start; + len = sllen; + } else { + if (slice->stop != (void *)&PyUnicode_Type + || !PyLong_Check(slice->start)) { + PyErr_SetString(PyExc_IndexError, "invalid slice"); + return NULL; + } + + offs = PyLong_AsUnsignedLongLong(slice->start); + len = ~0UL; + } + + offs += w->shdr->sh_offset; + if (offs > w->ef->len) { + PyErr_Format(ELFAccessError, + "access (%lu) beyond end of file (%lu)", + offs, w->ef->len); + return NULL; + } + if (len == ~0UL) + len = strnlen(w->ef->mmap + offs, w->ef->len - offs); + + Py_ssize_t pylen = len; + +#if PY_MAJOR_VERSION >= 3 + return Py_BuildValue("y#", w->ef->mmap + offs, pylen); +#else + return Py_BuildValue("s#", w->ef->mmap + offs, pylen); +#endif +} + +static PyMappingMethods mp_elfsect = { + .mp_subscript = elfsect_subscript, +}; + +static void elfsect_free(void *arg) +{ + struct elfsect *w = arg; + + (void)w; +} + +static PyObject *elfsect_repr(PyObject *arg) +{ + struct elfsect *w = (struct elfsect *)arg; + + return PyUnicode_FromFormat("<ELFSection %s>", w->name); +} + +static PyTypeObject typeobj_elfsect = { + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFSection", + .tp_basicsize = sizeof(struct elfsect), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = elfsect_doc, + .tp_new = refuse_new, + .tp_free = elfsect_free, + .tp_repr = elfsect_repr, + .tp_as_mapping = &mp_elfsect, + .tp_members = members_elfsect, + .tp_methods = methods_elfsect, + .tp_getset = getset_elfsect, +}; + +static void elfsect_add_relocations(struct elfsect *w, Elf_Scn *rel, + GElf_Shdr *relhdr) +{ + size_t i, entries; + Elf_Scn *symtab = elf_getscn(w->ef->elf, relhdr->sh_link); + GElf_Shdr _symhdr, *symhdr = gelf_getshdr(symtab, &_symhdr); + Elf_Data *symdata = elf_getdata(symtab, NULL); + Elf_Data *reldata = elf_getdata(rel, NULL); + + entries = relhdr->sh_size / relhdr->sh_entsize; + for (i = 0; i < entries; i++) { + struct elfreloc *relw; + size_t symidx; + GElf_Rela *rela; + GElf_Sym *sym; + + relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc( + &typeobj_elfreloc, 0); + relw->es = w; + + if (relhdr->sh_type == SHT_REL) { + GElf_Rel _rel, *rel; + + rel = gelf_getrel(reldata, i, &_rel); + relw->rela = &relw->_rela; + relw->rela->r_offset = rel->r_offset; + relw->rela->r_info = rel->r_info; + relw->rela->r_addend = 0; + relw->relative = true; + } else + relw->rela = gelf_getrela(reldata, i, &relw->_rela); + + rela = relw->rela; + if (rela->r_offset < w->shdr->sh_addr + || rela->r_offset >= w->shdr->sh_addr + w->shdr->sh_size) + continue; + + symidx = relw->symidx = GELF_R_SYM(rela->r_info); + sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym); + if (sym) { + relw->symname = elf_strptr(w->ef->elf, symhdr->sh_link, + sym->st_name); + relw->symvalid = GELF_ST_TYPE(sym->st_info) + != STT_NOTYPE; + relw->unresolved = sym->st_shndx == SHN_UNDEF; + relw->st_value = sym->st_value; + } else { + relw->symname = NULL; + relw->symvalid = false; + relw->unresolved = false; + relw->st_value = 0; + } + + debugf("reloc @ %016llx sym %5llu %016llx %s\n", + (long long)rela->r_offset, (unsigned long long)symidx, + (long long)rela->r_addend, relw->symname); + + elfrelocs_add(&w->relocs, relw); + } +} + +/* + * bindings & loading code between ELFFile and ELFSection + */ + +static PyObject *elfsect_wrap(struct elffile *ef, Elf_Scn *scn, size_t idx, + const char *name) +{ + struct elfsect *w; + size_t i; + + w = (struct elfsect *)typeobj_elfsect.tp_alloc(&typeobj_elfsect, 0); + if (!w) + return NULL; + + w->name = name; + w->ef = ef; + w->scn = scn; + w->shdr = gelf_getshdr(scn, &w->_shdr); + w->len = w->shdr->sh_size; + w->idx = idx; + elfrelocs_init(&w->relocs); + + for (i = 0; i < ef->ehdr->e_shnum; i++) { + Elf_Scn *scn = elf_getscn(ef->elf, i); + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + if (shdr->sh_type != SHT_RELA && shdr->sh_type != SHT_REL) + continue; + if (shdr->sh_info && shdr->sh_info != idx) + continue; + elfsect_add_relocations(w, scn, shdr); + } + + return (PyObject *)w; +} + +static Elf_Scn *elf_find_section(struct elffile *ef, const char *name, + size_t *idx) +{ + size_t i; + const char *secname; + + for (i = 0; i < ef->ehdr->e_shnum; i++) { + Elf_Scn *scn = elf_getscn(ef->elf, i); + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + secname = elf_strptr(ef->elf, ef->ehdr->e_shstrndx, + shdr->sh_name); + if (strcmp(secname, name)) + continue; + if (idx) + *idx = i; + return scn; + } + return NULL; +} + +static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx) +{ + size_t i; + + for (i = 0; i < ef->ehdr->e_shnum; i++) { + Elf_Scn *scn = elf_getscn(ef->elf, i); + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + if (addr < shdr->sh_addr || + addr >= shdr->sh_addr + shdr->sh_size) + continue; + + if (idx) + *idx = i; + return scn; + } + return NULL; +} + +/* + * class ELFFile: + */ + +static const char elffile_doc[] = + "Represents an ELF file\n" + "\n" + "Args: filename to load\n" + "\n" + "To access raw file contents, use subscript notation, e.g.\n" + " file[123:456]\n" + "To read null terminated C strings, replace the end with str:\n" + " file[123:str]\n\n" + "(struct elffile * in elf_py.c)"; + + +#define member(name, type, doc) \ + { \ + (char *)#name, type, offsetof(struct elffile, name), READONLY, \ + (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \ + } +static PyMemberDef members_elffile[] = { + member(filename, T_STRING, + "Original file name as given when opening"), + member(elfclass, T_INT, + "ELF class (architecture bit size)\n\n" + "Either 32 or 64, straight integer."), + member(bigendian, T_BOOL, + "ELF file is big-endian\n\n" + "All internal ELF structures are automatically converted."), + member(has_symbols, T_BOOL, + "A symbol section is present\n\n" + "Note: only refers to .symtab/SHT_SYMTAB section, not DT_SYMTAB" + ), + {}, +}; +#undef member + +static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx) +{ + const char *name; + PyObject *ret; + + if (!scn) + scn = elf_getscn(w->elf, idx); + if (!scn || idx >= w->n_sect) + Py_RETURN_NONE; + + if (!w->sects[idx]) { + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + name = elf_strptr(w->elf, w->ehdr->e_shstrndx, shdr->sh_name); + w->sects[idx] = elfsect_wrap(w, scn, idx, name); + } + + ret = w->sects[idx]; + Py_INCREF(ret); + return ret; +} + +static PyObject *elffile_get_section(PyObject *self, PyObject *args) +{ + const char *name; + struct elffile *w = (struct elffile *)self; + Elf_Scn *scn; + size_t idx = 0; + + if (!PyArg_ParseTuple(args, "s", &name)) + return NULL; + + scn = elf_find_section(w, name, &idx); + return elffile_secbyidx(w, scn, idx); +} + +static PyObject *elffile_get_section_addr(PyObject *self, PyObject *args) +{ + unsigned long long addr; + struct elffile *w = (struct elffile *)self; + Elf_Scn *scn; + size_t idx = 0; + + if (!PyArg_ParseTuple(args, "K", &addr)) + return NULL; + + scn = elf_find_addr(w, addr, &idx); + return elffile_secbyidx(w, scn, idx); +} + +static PyObject *elffile_get_section_idx(PyObject *self, PyObject *args) +{ + unsigned long long idx; + struct elffile *w = (struct elffile *)self; + + if (!PyArg_ParseTuple(args, "K", &idx)) + return NULL; + + return elffile_secbyidx(w, NULL, idx); +} + +static PyObject *elffile_get_symbol(PyObject *self, PyObject *args) +{ + const char *name, *symname; + struct elffile *w = (struct elffile *)self; + GElf_Sym _sym, *sym; + size_t i; + + if (!PyArg_ParseTuple(args, "s", &name)) + return NULL; + + for (i = 0; i < w->nsym; i++) { + sym = gelf_getsym(w->symdata, i, &_sym); + if (sym->st_name == 0) + continue; + symname = elf_strptr(w->elf, w->symstridx, sym->st_name); + if (strcmp(symname, name)) + continue; + + PyObject *pysect; + Elf_Scn *scn = elf_getscn(w->elf, sym->st_shndx); + + if (scn) + pysect = elffile_secbyidx(w, scn, sym->st_shndx); + else { + pysect = Py_None; + Py_INCREF(pysect); + } + return Py_BuildValue("sKN", symname, + (unsigned long long)sym->st_value, pysect); + } + Py_RETURN_NONE; +} + +static PyObject *elffile_getreloc(PyObject *self, PyObject *args) +{ + struct elffile *w = (struct elffile *)self; + struct elfreloc *relw; + unsigned long offs; + PyObject *ret; + + if (!PyArg_ParseTuple(args, "k", &offs)) + return NULL; + + relw = elfrelocs_get(&w->dynrelocs, offs); + if (!relw) + Py_RETURN_NONE; + + ret = (PyObject *)relw; + Py_INCREF(ret); + return ret; +} + +static PyObject *elffile_find_note(PyObject *self, PyObject *args) +{ +#if defined(HAVE_GELF_GETNOTE) && defined(HAVE_ELF_GETDATA_RAWCHUNK) + const char *owner; + const uint8_t *ids; + GElf_Word id; + struct elffile *w = (struct elffile *)self; + size_t i; + + if (!PyArg_ParseTuple(args, "ss", &owner, &ids)) + return NULL; + + if (strlen((char *)ids) != 4) { + PyErr_SetString(PyExc_ValueError, + "ELF note ID must be exactly 4-byte string"); + return NULL; + } + if (w->bigendian) + id = (ids[0] << 24) | (ids[1] << 16) | (ids[2] << 8) | ids[3]; + else + id = (ids[3] << 24) | (ids[2] << 16) | (ids[1] << 8) | ids[0]; + + for (i = 0; i < w->ehdr->e_phnum; i++) { + GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + Elf_Data *notedata; + size_t offset; + + if (phdr->p_type != PT_NOTE) + continue; + + notedata = elf_getdata_rawchunk(w->elf, phdr->p_offset, + phdr->p_filesz, ELF_T_NHDR); + + GElf_Nhdr nhdr[1]; + size_t nameoffs, dataoffs; + + offset = 0; + while ((offset = gelf_getnote(notedata, offset, nhdr, + &nameoffs, &dataoffs))) { + if (phdr->p_offset + nameoffs >= w->len) + continue; + + const char *name = w->mmap + phdr->p_offset + nameoffs; + + if (strcmp(name, owner)) + continue; + if (id != nhdr->n_type) + continue; + + PyObject *s, *e; + + s = PyLong_FromUnsignedLongLong( + phdr->p_vaddr + dataoffs); + e = PyLong_FromUnsignedLongLong( + phdr->p_vaddr + dataoffs + nhdr->n_descsz); + return PySlice_New(s, e, NULL); + } + } +#endif + Py_RETURN_NONE; +} + +static bool elffile_virt2file(struct elffile *w, GElf_Addr virt, + GElf_Addr *offs) +{ + *offs = 0; + + for (size_t i = 0; i < w->ehdr->e_phnum; i++) { + GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + + if (phdr->p_type != PT_LOAD) + continue; + + if (virt < phdr->p_vaddr + || virt >= phdr->p_vaddr + phdr->p_memsz) + continue; + + if (virt >= phdr->p_vaddr + phdr->p_filesz) + return false; + + *offs = virt - phdr->p_vaddr + phdr->p_offset; + return true; + } + + return false; +} + +static PyObject *elffile_subscript(PyObject *self, PyObject *key) +{ + Py_ssize_t start, stop, step; + PySliceObject *slice; + struct elffile *w = (struct elffile *)self; + bool str = false; + + if (!PySlice_Check(key)) { + PyErr_SetString(PyExc_IndexError, + "ELFFile subscript must be slice"); + return NULL; + } + slice = (PySliceObject *)key; + stop = -1; + step = 1; + if (PyLong_Check(slice->stop)) { + start = PyLong_AsSsize_t(slice->start); + if (PyErr_Occurred()) + return NULL; + if (slice->stop != Py_None) { + stop = PyLong_AsSsize_t(slice->stop); + if (PyErr_Occurred()) + return NULL; + } + if (slice->step != Py_None) { + step = PyLong_AsSsize_t(slice->step); + if (PyErr_Occurred()) + return NULL; + } + } else { + if (slice->stop != (void *)&PyUnicode_Type + || !PyLong_Check(slice->start)) { + PyErr_SetString(PyExc_IndexError, "invalid slice"); + return NULL; + } + + str = true; + start = PyLong_AsUnsignedLongLong(slice->start); + } + if (step != 1) { + PyErr_SetString(PyExc_IndexError, + "ELFFile subscript slice step must be 1"); + return NULL; + } + + GElf_Addr xstart = start, xstop = stop; + + for (size_t i = 0; i < w->ehdr->e_phnum; i++) { + GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + + if (phdr->p_type != PT_LOAD) + continue; + + if (xstart < phdr->p_vaddr + || xstart >= phdr->p_vaddr + phdr->p_memsz) + continue; + if (!str && (xstop < phdr->p_vaddr + || xstop > phdr->p_vaddr + phdr->p_memsz)) { + PyErr_Format(ELFAccessError, + "access (%llu) beyond end of program header (%llu)", + (long long)xstop, + (long long)(phdr->p_vaddr + + phdr->p_memsz)); + return NULL; + } + + xstart = xstart - phdr->p_vaddr + phdr->p_offset; + + if (str) + xstop = strlen(w->mmap + xstart); + else + xstop = xstop - phdr->p_vaddr + phdr->p_offset; + + Py_ssize_t pylen = xstop - xstart; + +#if PY_MAJOR_VERSION >= 3 + return Py_BuildValue("y#", w->mmap + xstart, pylen); +#else + return Py_BuildValue("s#", w->mmap + xstart, pylen); +#endif + }; + + return PyErr_Format(ELFAccessError, + "virtual address (%llu) not found in program headers", + (long long)start); +} + +static PyMethodDef methods_elffile[] = { + {"find_note", elffile_find_note, METH_VARARGS, + "find specific note entry"}, + {"getreloc", elffile_getreloc, METH_VARARGS, + "find relocation"}, + {"get_symbol", elffile_get_symbol, METH_VARARGS, + "find symbol by name"}, + {"get_section", elffile_get_section, METH_VARARGS, + "find section by name"}, + {"get_section_addr", elffile_get_section_addr, METH_VARARGS, + "find section by address"}, + {"get_section_idx", elffile_get_section_idx, METH_VARARGS, + "find section by index"}, + {} +}; + +static PyObject *elffile_load(PyTypeObject *type, PyObject *args, + PyObject *kwds); + +static void elffile_free(void *arg) +{ + struct elffile *w = arg; + + elf_end(w->elf); + munmap(w->mmap, w->len); + free(w->filename); +} + +static PyMappingMethods mp_elffile = { + .mp_subscript = elffile_subscript, +}; + +static PyTypeObject typeobj_elffile = { + PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFFile", + .tp_basicsize = sizeof(struct elffile), + .tp_flags = Py_TPFLAGS_DEFAULT, + .tp_doc = elffile_doc, + .tp_new = elffile_load, + .tp_free = elffile_free, + .tp_as_mapping = &mp_elffile, + .tp_members = members_elffile, + .tp_methods = methods_elffile, +}; + +static char *elfdata_strptr(Elf_Data *data, size_t offset) +{ + char *p; + + if (offset >= data->d_size) + return NULL; + + p = (char *)data->d_buf + offset; + if (strnlen(p, data->d_size - offset) >= data->d_size - offset) + return NULL; + + return p; +} + +static void elffile_add_dynreloc(struct elffile *w, Elf_Data *reldata, + size_t entries, Elf_Data *symdata, + Elf_Data *strdata) +{ + size_t i; + + for (i = 0; i < entries; i++) { + struct elfreloc *relw; + size_t symidx; + GElf_Rela *rela; + GElf_Sym *sym; + + relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc( + &typeobj_elfreloc, 0); + relw->ef = w; + + rela = relw->rela = gelf_getrela(reldata, i, &relw->_rela); + symidx = relw->symidx = GELF_R_SYM(rela->r_info); + sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym); + if (sym) { + relw->symname = elfdata_strptr(strdata, sym->st_name); + relw->symvalid = GELF_ST_TYPE(sym->st_info) + != STT_NOTYPE; + relw->unresolved = sym->st_shndx == SHN_UNDEF; + relw->st_value = sym->st_value; + } else { + relw->symname = NULL; + relw->symvalid = false; + relw->unresolved = false; + relw->st_value = 0; + } + + debugf("dynreloc @ %016llx sym %5llu %016llx %s\n", + (long long)rela->r_offset, (unsigned long long)symidx, + (long long)rela->r_addend, relw->symname); + + elfrelocs_add(&w->dynrelocs, relw); + } + +} + +/* primary (only, really) entry point to anything in this module */ +static PyObject *elffile_load(PyTypeObject *type, PyObject *args, + PyObject *kwds) +{ + const char *filename; + static const char * const kwnames[] = {"filename", NULL}; + struct elffile *w; + struct stat st; + int fd, err; + + w = (struct elffile *)typeobj_elffile.tp_alloc(&typeobj_elffile, 0); + if (!w) + return NULL; + + if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", (char **)kwnames, + &filename)) + return NULL; + + w->filename = strdup(filename); + fd = open(filename, O_RDONLY | O_NOCTTY); + if (fd < 0 || fstat(fd, &st)) { + PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename); + close(fd); + goto out; + } + w->len = st.st_size; + w->mmap = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); + if (!w->mmap) { + PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename); + close(fd); + goto out; + } + close(fd); + w->mmend = w->mmap + st.st_size; + + if (w->len < EI_NIDENT || memcmp(w->mmap, ELFMAG, SELFMAG)) { + PyErr_SetString(ELFFormatError, "invalid ELF signature"); + goto out; + } + + switch (w->mmap[EI_CLASS]) { + case ELFCLASS32: + w->elfclass = 32; + break; + case ELFCLASS64: + w->elfclass = 64; + break; + default: + PyErr_SetString(ELFFormatError, "invalid ELF class"); + goto out; + } + switch (w->mmap[EI_DATA]) { + case ELFDATA2LSB: + w->bigendian = false; + break; + case ELFDATA2MSB: + w->bigendian = true; + break; + default: + PyErr_SetString(ELFFormatError, "invalid ELF byte order"); + goto out; + } + + w->elf = elf_memory(w->mmap, w->len); + if (!w->elf) + goto out_elferr; + w->ehdr = gelf_getehdr(w->elf, &w->_ehdr); + if (!w->ehdr) + goto out_elferr; + + for (size_t i = 0; i < w->ehdr->e_shnum; i++) { + Elf_Scn *scn = elf_getscn(w->elf, i); + GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + + if (shdr->sh_type == SHT_SYMTAB) { + w->symtab = scn; + w->nsym = shdr->sh_size / shdr->sh_entsize; + w->symdata = elf_getdata(scn, NULL); + w->symstridx = shdr->sh_link; + break; + } + } + w->has_symbols = w->symtab && w->symstridx; + elfrelocs_init(&w->dynrelocs); + +#ifdef HAVE_ELF_GETDATA_RAWCHUNK + for (size_t i = 0; i < w->ehdr->e_phnum; i++) { + GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + + if (phdr->p_type != PT_DYNAMIC) + continue; + + Elf_Data *dyndata = elf_getdata_rawchunk(w->elf, + phdr->p_offset, phdr->p_filesz, ELF_T_DYN); + + GElf_Addr dynrela = 0, symtab = 0, strtab = 0; + size_t dynrelasz = 0, dynrelaent = 0, strsz = 0; + GElf_Dyn _dyn, *dyn; + + for (size_t j = 0;; j++) { + dyn = gelf_getdyn(dyndata, j, &_dyn); + + if (dyn->d_tag == DT_NULL) + break; + + switch (dyn->d_tag) { + case DT_SYMTAB: + symtab = dyn->d_un.d_ptr; + break; + + case DT_STRTAB: + strtab = dyn->d_un.d_ptr; + break; + case DT_STRSZ: + strsz = dyn->d_un.d_val; + break; + + case DT_RELA: + dynrela = dyn->d_un.d_ptr; + break; + case DT_RELASZ: + dynrelasz = dyn->d_un.d_val; + break; + case DT_RELAENT: + dynrelaent = dyn->d_un.d_val; + break; + + case DT_RELSZ: + if (dyn->d_un.d_val) + fprintf(stderr, + "WARNING: ignoring non-empty DT_REL!\n"); + break; + } + } + + GElf_Addr offset; + Elf_Data *symdata = NULL, *strdata = NULL, *reladata = NULL; + + if (elffile_virt2file(w, symtab, &offset)) + symdata = elf_getdata_rawchunk(w->elf, offset, + w->len - offset, + ELF_T_SYM); + if (elffile_virt2file(w, strtab, &offset)) + strdata = elf_getdata_rawchunk(w->elf, offset, + strsz, ELF_T_BYTE); + + if (!dynrela || !dynrelasz || !dynrelaent) + continue; + + if (!elffile_virt2file(w, dynrela, &offset)) + continue; + + debugf("dynrela @%llx/%llx+%llx\n", (long long)dynrela, + (long long)offset, (long long)dynrelasz); + + reladata = elf_getdata_rawchunk(w->elf, offset, dynrelasz, + ELF_T_RELA); + elffile_add_dynreloc(w, reladata, dynrelasz / dynrelaent, + symdata, strdata); + } +#endif + + w->sects = calloc(sizeof(PyObject *), w->ehdr->e_shnum); + w->n_sect = w->ehdr->e_shnum; + + return (PyObject *)w; + +out_elferr: + err = elf_errno(); + + PyErr_Format(ELFFormatError, "libelf error %d: %s", + err, elf_errmsg(err)); +out: + if (w->elf) + elf_end(w->elf); + free(w->filename); + return NULL; +} + +static PyObject *elfpy_debug(PyObject *self, PyObject *args) +{ + int arg; + + if (!PyArg_ParseTuple(args, "p", &arg)) + return NULL; + + debug = arg; + + Py_RETURN_NONE; +} + +static PyMethodDef methods_elfpy[] = { + {"elfpy_debug", elfpy_debug, METH_VARARGS, "switch debuging on/off"}, + {} +}; + +bool elf_py_init(PyObject *pymod) +{ + if (PyType_Ready(&typeobj_elffile) < 0) + return false; + if (PyType_Ready(&typeobj_elfsect) < 0) + return false; + if (PyType_Ready(&typeobj_elfreloc) < 0) + return false; + if (elf_version(EV_CURRENT) == EV_NONE) + return false; + +#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 5 + PyModule_AddFunctions(pymod, methods_elfpy); +#else + (void)methods_elfpy; +#endif + + ELFFormatError = PyErr_NewException("_clippy.ELFFormatError", + PyExc_ValueError, NULL); + PyModule_AddObject(pymod, "ELFFormatError", ELFFormatError); + ELFAccessError = PyErr_NewException("_clippy.ELFAccessError", + PyExc_IndexError, NULL); + PyModule_AddObject(pymod, "ELFAccessError", ELFAccessError); + + Py_INCREF(&typeobj_elffile); + PyModule_AddObject(pymod, "ELFFile", (PyObject *)&typeobj_elffile); + Py_INCREF(&typeobj_elfsect); + PyModule_AddObject(pymod, "ELFSection", (PyObject *)&typeobj_elfsect); + Py_INCREF(&typeobj_elfreloc); + PyModule_AddObject(pymod, "ELFReloc", (PyObject *)&typeobj_elfreloc); + return true; +} diff --git a/lib/subdir.am b/lib/subdir.am index d5ffa08546..38d1a3f773 100644 --- a/lib/subdir.am +++ b/lib/subdir.am @@ -410,7 +410,7 @@ lib_grammar_sandbox_LDADD = \ lib_clippy_CPPFLAGS = $(AM_CPPFLAGS) -D_GNU_SOURCE -DBUILDING_CLIPPY lib_clippy_CFLAGS = $(PYTHON_CFLAGS) -lib_clippy_LDADD = $(PYTHON_LIBS) $(UST_LIBS) +lib_clippy_LDADD = $(PYTHON_LIBS) $(UST_LIBS) -lelf lib_clippy_LDFLAGS = -export-dynamic lib_clippy_SOURCES = \ lib/jhash.c \ @@ -420,9 +420,11 @@ lib_clippy_SOURCES = \ lib/command_parse.y \ lib/command_py.c \ lib/defun_lex.l \ + lib/elf_py.c \ lib/graph.c \ lib/libfrr_trace.c \ lib/memory.c \ + lib/typesafe.c \ lib/vector.c \ # end @@ -439,6 +441,32 @@ SUFFIXES += _clippy.c .c_clippy.c: $(AM_V_CLIPPY) $(CLIPPY) $(top_srcdir)/python/clidef.py -o $@ $< +# xrelfo, the ELF xref extractor + +AM_V_XRELFO = $(am__v_XRELFO_$(V)) +am__v_XRELFO_ = $(am__v_XRELFO_$(AM_DEFAULT_VERBOSITY)) +am__v_XRELFO_0 = @echo " XRELFO " $@; +am__v_XRELFO_1 = + +if DEV_BUILD +XRELFO_FLAGS = -Wlog-format -Wlog-args +else +XRELFO_FLAGS = +endif + +SUFFIXES += .xref +%.xref: % $(CLIPPY) + $(AM_V_XRELFO) $(CLIPPY) $(top_srcdir)/python/xrelfo.py $(XRELFO_FLAGS) -o $@ $< + +# dependencies added in python/makefile.py +frr.xref: + $(AM_V_XRELFO) $(CLIPPY) $(top_srcdir)/python/xrelfo.py -o $@ $^ +all-am: frr.xref + +clean-xref: + -rm -rf $(xrefs) frr.xref +clean-local: clean-xref + ## automake's "ylwrap" is a great piece of GNU software... not. .l.c: $(AM_V_LEX)$(am__skiplex) $(LEXCOMPILE) $< diff --git a/lib/zlog.h b/lib/zlog.h index 3e86aa1345..4fdb47bb95 100644 --- a/lib/zlog.h +++ b/lib/zlog.h @@ -44,6 +44,7 @@ struct xref_logmsg { const char *fmtstring; uint32_t priority; uint32_t ec; + const char *args; }; struct xrefdata_logmsg { @@ -97,6 +98,7 @@ static inline void zlog_ref(const struct xref_logmsg *xref, .xref = XREF_INIT(XREFT_LOGMSG, &_xrefdata, __func__), \ .fmtstring = (msg), \ .priority = (prio), \ + .args = (#__VA_ARGS__), \ }; \ XREF_LINK(_xref.xref); \ zlog_ref(&_xref, (msg), ##__VA_ARGS__); \ @@ -122,6 +124,7 @@ static inline void zlog_ref(const struct xref_logmsg *xref, .fmtstring = (msg), \ .priority = (prio), \ .ec = (ec_), \ + .args = (#__VA_ARGS__), \ }; \ XREF_LINK(_xref.xref); \ zlog_ref(&_xref, "[EC %u] " msg, ec_, ##__VA_ARGS__); \ diff --git a/python/clippy/__init__.py b/python/clippy/__init__.py index d6865ff484..344a1c91ee 100644 --- a/python/clippy/__init__.py +++ b/python/clippy/__init__.py @@ -21,6 +21,8 @@ import _clippy from _clippy import parse, Graph, GraphNode +frr_top_src = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) + def graph_iterate(graph): """iterator yielding all nodes of a graph diff --git a/python/clippy/elf.py b/python/clippy/elf.py new file mode 100644 index 0000000000..4ed334f0c4 --- /dev/null +++ b/python/clippy/elf.py @@ -0,0 +1,574 @@ +# FRR libelf wrapper +# +# Copyright (C) 2020 David Lamparter for NetDEF, Inc. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; see the file COPYING; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +''' +Wrapping layer and additional utility around _clippy.ELFFile. + +Essentially, the C bits have the low-level ELF access bits that should be +fast while this has the bits that string everything together (and would've +been a PITA to do in C.) + +Surprisingly - or maybe through proper engineering - this actually works +across architecture, word size and even endianness boundaries. Both the C +module (through GElf_*) and this code (cf. struct.unpack format mangling +in ELFDissectStruct) will take appropriate measures to flip and resize +fields as needed. +''' + +import struct +from collections import OrderedDict +from weakref import WeakValueDictionary + +from _clippy import ELFFile, ELFAccessError + +# +# data access +# + +class ELFNull(object): + ''' + NULL pointer, returned instead of ELFData + ''' + def __init__(self): + self.symname = None + self._dstsect = None + + def __repr__(self): + return '<ptr: NULL>' + + def __hash__(self): + return hash(None) + + def get_string(self): + return None + +class ELFUnresolved(object): + ''' + Reference to an unresolved external symbol, returned instead of ELFData + + :param symname: name of the referenced symbol + :param addend: offset added to the symbol, normally zero + ''' + def __init__(self, symname, addend): + self.addend = addend + self.symname = symname + self._dstsect = None + + def __repr__(self): + return '<unresolved: %s+%d>' % (self.symname, self.addend) + + def __hash__(self): + return hash((self.symname, self.addend)) + +class ELFData(object): + ''' + Actual data somewhere in the ELF file. + + :type dstsect: ELFSubset + :param dstsect: container data area (section or entire file) + :param dstoffs: byte offset into dstsect + :param dstlen: byte size of object, or None if unknown, open-ended or string + ''' + def __init__(self, dstsect, dstoffs, dstlen): + self._dstsect = dstsect + self._dstoffs = dstoffs + self._dstlen = dstlen + self.symname = None + + def __repr__(self): + return '<ptr: %s+0x%05x/%d>' % (self._dstsect.name, self._dstoffs, self._dstlen or -1) + + def __hash__(self): + return hash((self._dstsect, self._dstoffs)) + + def get_string(self): + ''' + Interpret as C string / null terminated UTF-8 and get the actual text. + ''' + try: + return self._dstsect[self._dstoffs:str].decode('UTF-8') + except: + import pdb; pdb.set_trace() + + def get_data(self, reflen): + ''' + Interpret as some structure (and check vs. expected length) + + :param reflen: expected size of the object, compared against actual + size (which is only known in rare cases, mostly when directly + accessing a symbol since symbols have their destination object + size recorded) + ''' + if self._dstlen is not None and self._dstlen != reflen: + raise ValueError('symbol size mismatch (got %d, expected %d)' % (self._dstlen, reflen)) + return self._dstsect[self._dstoffs:self._dstoffs+reflen] + + def offset(self, offs, within_symbol=False): + ''' + Get another ELFData at an offset + + :param offs: byte offset, can be negative (e.g. in container_of) + :param within_symbol: retain length information + ''' + if self._dstlen is None or not within_symbol: + return ELFData(self._dstsect, self._dstoffs + offs, None) + else: + return ELFData(self._dstsect, self._dstoffs + offs, self._dstlen - offs) + +# +# dissection data items +# + +class ELFDissectData(object): + ''' + Common bits for ELFDissectStruct and ELFDissectUnion + ''' + + def __len__(self): + ''' + Used for boolean evaluation, e.g. "if struct: ..." + ''' + return not (isinstance(self._data, ELFNull) or isinstance(self._data, ELFUnresolved)) + + def container_of(self, parent, fieldname): + ''' + Assume this struct is embedded in a larger struct and get at the larger + + Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)`` + + :param parent: class (not instance) of the larger struct + :param fieldname: fieldname that refers back to this + :returns: instance of parent, with fieldname set to this object + ''' + offset = 0 + if not hasattr(parent, '_efields'): + parent._setup_efields() + + for field in parent._efields[self.elfclass]: + if field[0] == fieldname: + break + offset += struct.calcsize(field[1]) + else: + raise AttributeError('%r not found in %r.fields' % (fieldname, parent)) + + return parent(self._data.offset(-offset), replace = {fieldname: self}) + +class ELFDissectStruct(ELFDissectData): + ''' + Decode and provide access to a struct somewhere in the ELF file + + Handles pointers and strings somewhat nicely. Create a subclass for each + struct that is to be accessed, and give a field list in a "fields" + class-member. + + :param dataptr: ELFData referring to the data bits to decode. + :param parent: where this was instantiated from; only for reference, has + no functional impact. + :param replace: substitute data values for specific fields. Used by + `container_of` to replace the inner struct when creating the outer + one. + + .. attribute:: fields + + List of tuples describing the struct members. Items can be: + - ``('name', ELFDissectData)`` - directly embed another struct + - ``('name', 'I')`` - simple data types; second item for struct.unpack + - ``('name', 'I', None)`` - field to ignore + - ``('name', 'P', str)`` - pointer to string + - ``('name', 'P', ELFDissectData)`` - pointer to another struct + + ``P`` is added as unpack format for pointers (sized appropriately for + the ELF file.) + + Refer to tiabwarfo.py for extracting this from ``pahole``. + + TBD: replace tuples with a class. + + .. attribute:: fieldrename + + Dictionary to rename fields, useful if fields comes from tiabwarfo.py. + ''' + + class Pointer(object): + ''' + Quick wrapper for pointers to further structs + + This is just here to avoid going into infinite loops when loading + structs that have pointers to each other (e.g. struct xref <--> + struct xrefdata.) The pointer destination is only instantiated when + actually accessed. + ''' + def __init__(self, cls, ptr): + self.cls = cls + self.ptr = ptr + + def __repr__(self): + return '<Pointer:%s %r>' % (self.cls.__name__, self.ptr) + + def __call__(self): + if isinstance(self.ptr, ELFNull): + return None + return self.cls(self.ptr) + + def __new__(cls, dataptr, parent = None, replace = None): + if dataptr._dstsect is None: + return super().__new__(cls) + + obj = dataptr._dstsect._pointers.get((cls, dataptr)) + if obj is not None: + return obj + obj = super().__new__(cls) + dataptr._dstsect._pointers[(cls, dataptr)] = obj + return obj + + replacements = 'lLnN' + + @classmethod + def _preproc_structspec(cls, elfclass, spec): + elfbits = elfclass + + if hasattr(spec, 'calcsize'): + spec = '%ds' % (spec.calcsize(elfclass),) + + if elfbits == 32: + repl = ['i', 'I'] + else: + repl = ['q', 'Q'] + for c in cls.replacements: + spec = spec.replace(c, repl[int(c.isupper())]) + return spec + + @classmethod + def _setup_efields(cls): + cls._efields = {} + cls._esize = {} + for elfclass in [32, 64]: + cls._efields[elfclass] = [] + size = 0 + for f in cls.fields: + newf = (f[0], cls._preproc_structspec(elfclass, f[1])) + f[2:] + cls._efields[elfclass].append(newf) + size += struct.calcsize(newf[1]) + cls._esize[elfclass] = size + + def __init__(self, dataptr, parent = None, replace = None): + if not hasattr(self.__class__, '_efields'): + self._setup_efields() + + self._fdata = None + self._data = dataptr + self._parent = parent + self.symname = dataptr.symname + if isinstance(dataptr, ELFNull) or isinstance(dataptr, ELFUnresolved): + self._fdata = {} + return + + self._elfsect = dataptr._dstsect + self.elfclass = self._elfsect._elffile.elfclass + self.offset = dataptr._dstoffs + + pspecl = [f[1] for f in self._efields[self.elfclass]] + + # need to correlate output from struct.unpack with extra metadata + # about the particular fields, so note down byte offsets (in locs) + # and tuple indices of pointers (in ptrs) + pspec = '' + locs = {} + ptrs = set() + + for idx, spec in enumerate(pspecl): + if spec == 'P': + ptrs.add(idx) + spec = self._elfsect.ptrtype + + locs[idx] = struct.calcsize(pspec) + pspec = pspec + spec + + self._total_size = struct.calcsize(pspec) + + def replace_ptrs(v): + idx, val = v[0], v[1] + if idx not in ptrs: + return val + return self._elfsect.pointer(self.offset + locs[idx]) + + data = dataptr.get_data(struct.calcsize(pspec)) + unpacked = struct.unpack(self._elfsect.endian + pspec, data) + unpacked = list(map(replace_ptrs, enumerate(unpacked))) + self._fraw = unpacked + self._fdata = OrderedDict() + replace = replace or {} + + for i, item in enumerate(unpacked): + name = self.fields[i][0] + if name is None: + continue + + if name in replace: + self._fdata[name] = replace[name] + continue + + if isinstance(self.fields[i][1], type) and issubclass(self.fields[i][1], ELFDissectData): + dataobj = self.fields[i][1](dataptr.offset(locs[i]), self) + self._fdata[name] = dataobj + continue + if len(self.fields[i]) == 3: + if self.fields[i][2] == str: + self._fdata[name] = item.get_string() + continue + elif self.fields[i][2] is None: + pass + elif issubclass(self.fields[i][2], ELFDissectData): + cls = self.fields[i][2] + dataobj = self.Pointer(cls, item) + self._fdata[name] = dataobj + continue + + self._fdata[name] = item + + def __getattr__(self, attrname): + if attrname not in self._fdata: + raise AttributeError(attrname) + if isinstance(self._fdata[attrname], self.Pointer): + self._fdata[attrname] = self._fdata[attrname]() + return self._fdata[attrname] + + def __repr__(self): + if not isinstance(self._data, ELFData): + return '<%s: %r>' % (self.__class__.__name__, self._data) + return '<%s: %s>' % (self.__class__.__name__, + ', '.join(['%s=%r' % t for t in self._fdata.items()])) + + @classmethod + def calcsize(cls, elfclass): + ''' + Sum up byte size of this struct + + Wraps struct.calcsize with some extra features. + ''' + if not hasattr(cls, '_efields'): + cls._setup_efields() + + pspec = ''.join([f[1] for f in cls._efields[elfclass]]) + + ptrtype = 'I' if elfclass == 32 else 'Q' + pspec = pspec.replace('P', ptrtype) + + return struct.calcsize(pspec) + +class ELFDissectUnion(ELFDissectData): + ''' + Decode multiple structs in the same place. + + Not currently used (and hence not tested.) Worked at some point but not + needed anymore and may be borked now. Remove this comment when using. + ''' + def __init__(self, dataptr, parent = None): + self._dataptr = dataptr + self._parent = parent + self.members = [] + for name, membercls in self.__class__.members: + item = membercls(dataptr, parent) + self.members.append(item) + setattr(self, name, item) + + def __repr__(self): + return '<%s: %s>' % (self.__class__.__name__, ', '.join([repr(i) for i in self.members])) + + @classmethod + def calcsize(cls, elfclass): + return max([member.calcsize(elfclass) for name, member in cls.members]) + +# +# wrappers for spans of ELF data +# + +class ELFSubset(object): + ''' + Common abstract base for section-level and file-level access. + ''' + + def __init__(self): + super().__init__() + + self._pointers = WeakValueDictionary() + + def __hash__(self): + return hash(self.name) + + def __getitem__(self, k): + ''' + Read data from slice + + Subscript **must** be a slice; a simple index will not return a byte + but rather throw an exception. Valid slice syntaxes are defined by + the C module: + + - `this[123:456]` - extract specific range + - `this[123:str]` - extract until null byte. The slice stop value is + the `str` type (or, technically, `unicode`.) + ''' + return self._obj[k] + + def getreloc(self, offset): + ''' + Check for a relocation record at the specified offset. + ''' + return self._obj.getreloc(offset) + + def iter_data(self, scls, slice_ = slice(None)): + ''' + Assume an array of structs present at a particular slice and decode + + :param scls: ELFDissectData subclass for the struct + :param slice_: optional range specification + ''' + size = scls.calcsize(self._elffile.elfclass) + + offset = slice_.start or 0 + stop = slice_.stop or self._obj.len + if stop < 0: + stop = self._obj.len - stop + + while offset < stop: + yield scls(ELFData(self, offset, size)) + offset += size + + def pointer(self, offset): + ''' + Try to dereference a pointer value + + This checks whether there's a relocation at the given offset and + uses that; otherwise (e.g. in a non-PIE executable where the pointer + is already resolved by the linker) the data at the location is used. + + :param offset: byte offset from beginning of section, + or virtual address in file + :returns: ELFData wrapping pointed-to object + ''' + + ptrsize = struct.calcsize(self.ptrtype) + data = struct.unpack(self.endian + self.ptrtype, self[offset:offset + ptrsize])[0] + + reloc = self.getreloc(offset) + dstsect = None + if reloc: + # section won't be available in whole-file operation + dstsect = reloc.getsection(data) + addend = reloc.r_addend + + if reloc.relative: + # old-style ELF REL instead of RELA, not well-tested + addend += data + + if reloc.unresolved and reloc.symvalid: + return ELFUnresolved(reloc.symname, addend) + elif reloc.symvalid: + data = addend + reloc.st_value + else: + data = addend + + # 0 could technically be a valid pointer for a shared library, + # since libraries may use 0 as default virtual start address (it'll + # be adjusted on loading) + # That said, if the library starts at 0, that's where the ELF header + # would be so it's still an invalid pointer. + if data == 0 and dstsect == None: + return ELFNull() + + # wrap_data is different between file & section + return self._wrap_data(data, dstsect) + +class ELFDissectSection(ELFSubset): + ''' + Access the contents of an ELF section like ``.text`` or ``.data`` + + :param elfwrap: ELFDissectFile wrapper for the file + :param idx: section index in section header table + :param section: section object from C module + ''' + + def __init__(self, elfwrap, idx, section): + super().__init__() + + self._elfwrap = elfwrap + self._elffile = elfwrap._elffile + self._idx = idx + self._section = self._obj = section + self.name = section.name + self.ptrtype = elfwrap.ptrtype + self.endian = elfwrap.endian + + def _wrap_data(self, data, dstsect): + if dstsect is None: + dstsect = self._elfwrap._elffile.get_section_addr(data) + offs = data - dstsect.sh_addr + dstsect = self._elfwrap.get_section(dstsect.idx) + return ELFData(dstsect, offs, None) + +class ELFDissectFile(ELFSubset): + ''' + Access the contents of an ELF file. + + Note that offsets for array subscript and relocation/pointer access are + based on the file's virtual address space and are NOT offsets to the + start of the file on disk! + + (Shared libraries frequently have a virtual address space starting at 0, + but non-PIE executables have an architecture specific default loading + address like 0x400000 on x86. + + :param filename: ELF file to open + ''' + + def __init__(self, filename): + super().__init__() + + self.name = filename + self._elffile = self._obj = ELFFile(filename) + self._sections = {} + + self.ptrtype = 'I' if self._elffile.elfclass == 32 else 'Q' + self.endian = '>' if self._elffile.bigendian else '<' + + @property + def _elfwrap(self): + return self + + def _wrap_data(self, data, dstsect): + return ELFData(self, data, None) + + def get_section(self, secname): + ''' + Look up section by name or index + ''' + if isinstance(secname, int): + sh_idx = secname + section = self._elffile.get_section_idx(secname) + else: + section = self._elffile.get_section(secname) + + if section is None: + return None + + sh_idx = section.idx + + if sh_idx not in self._sections: + self._sections[sh_idx] = ELFDissectSection(self, sh_idx, section) + + return self._sections[sh_idx] diff --git a/python/clippy/uidhash.py b/python/clippy/uidhash.py new file mode 100644 index 0000000000..bf994d389e --- /dev/null +++ b/python/clippy/uidhash.py @@ -0,0 +1,71 @@ +# xref unique ID hash calculation +# +# Copyright (C) 2020 David Lamparter for NetDEF, Inc. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; see the file COPYING; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import struct +from hashlib import sha256 + +def bititer(data, bits, startbit = True): + ''' + just iterate the individual bits out from a bytes object + + if startbit is True, an '1' bit is inserted at the very beginning + goes <bits> at a time, starts at LSB. + ''' + bitavail, v = 0, 0 + if startbit and len(data) > 0: + v = data.pop(0) + yield (v & ((1 << bits) - 1)) | (1 << (bits - 1)) + bitavail = 9 - bits + v >>= bits - 1 + + while len(data) > 0: + while bitavail < bits: + v |= data.pop(0) << bitavail + bitavail += 8 + yield v & ((1 << bits) - 1) + bitavail -= bits + v >>= bits + +def base32c(data): + ''' + Crockford base32 with extra dashes + ''' + chs = "0123456789ABCDEFGHJKMNPQRSTVWXYZ" + o = '' + if type(data) == str: + data = [ord(v) for v in data] + else: + data = list(data) + for i, bits in enumerate(bititer(data, 5)): + if i == 5: + o = o + '-' + elif i == 10: + break + o = o + chs[bits] + return o + +def uidhash(filename, hashstr, hashu32a, hashu32b): + ''' + xref Unique ID hash used in FRRouting + ''' + filename = '/'.join(filename.rsplit('/')[-2:]) + + hdata = filename.encode('UTF-8') + hashstr.encode('UTF-8') + hdata += struct.pack('>II', hashu32a, hashu32b) + i = sha256(hdata).digest() + return base32c(i) diff --git a/python/makefile.py b/python/makefile.py index 10c73df72d..44658013b3 100644 --- a/python/makefile.py +++ b/python/makefile.py @@ -31,6 +31,10 @@ clippy_scan = mv["clippy_scan"].strip().split() for clippy_file in clippy_scan: assert clippy_file.endswith(".c") +xref_targets = [] +for varname in ["bin_PROGRAMS", "sbin_PROGRAMS", "lib_LTLIBRARIES", "module_LTLIBRARIES"]: + xref_targets.extend(mv[varname].strip().split()) + # check for files using clippy but not listed in clippy_scan if args.dev_build: basepath = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) @@ -125,6 +129,14 @@ for clippy_file in clippy_scan: out_lines.append(clippydep.substitute(clippybase=clippy_file[:-2])) out_lines.append("") +out_lines.append("xrefs = %s" % (" ".join(["%s.xref" % target for target in xref_targets]))) +out_lines.append("frr.xref: $(xrefs)") +out_lines.append("") + +#frr.xref: $(bin_PROGRAMS) $(sbin_PROGRAMS) $(lib_LTLIBRARIES) $(module_LTLIBRARIES) +# $(AM_V_XRELFO) $(CLIPPY) $(top_srcdir)/python/xrelfo.py -o $@ $^ + +out_lines.append("") out_lines.extend(bcdeps) out_lines.append("") bc_targets = [] diff --git a/python/runtests.py b/python/runtests.py new file mode 100644 index 0000000000..bcf650b329 --- /dev/null +++ b/python/runtests.py @@ -0,0 +1,14 @@ +import pytest +import sys +import os + +try: + import _clippy +except ImportError: + sys.stderr.write('''these tests need to be run with the _clippy C extension +module available. Try running "clippy runtests.py ...". +''') + sys.exit(1) + +os.chdir(os.path.dirname(os.path.abspath(__file__))) +raise SystemExit(pytest.main(sys.argv[1:])) diff --git a/python/test_xrelfo.py b/python/test_xrelfo.py new file mode 100644 index 0000000000..3ae24ea7b3 --- /dev/null +++ b/python/test_xrelfo.py @@ -0,0 +1,65 @@ +# some basic tests for xrelfo & the python ELF machinery +# +# Copyright (C) 2020 David Lamparter for NetDEF, Inc. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; see the file COPYING; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import sys +import os +import pytest +from pprint import pprint + +root = os.path.dirname(os.path.dirname(__file__)) +sys.path.append(os.path.join(root, 'python')) + +import xrelfo +from clippy import elf, uidhash + +def test_uidhash(): + assert uidhash.uidhash("lib/test_xref.c", "logging call", 3, 0) \ + == 'H7KJB-67TBH' + +def test_xrelfo_other(): + for data in [ + elf.ELFNull(), + elf.ELFUnresolved('somesym', 0), + ]: + + dissect = xrelfo.XrefPtr(data) + print(repr(dissect)) + + with pytest.raises(AttributeError): + dissect.xref + +def test_xrelfo_obj(): + xrelfo_ = xrelfo.Xrelfo() + edf = xrelfo_.load_elf(os.path.join(root, 'lib/.libs/zclient.o'), 'zclient.lo') + xrefs = xrelfo_._xrefs + + with pytest.raises(elf.ELFAccessError): + edf[0:4] + + pprint(xrefs[0]) + pprint(xrefs[0]._data) + +def test_xrelfo_bin(): + xrelfo_ = xrelfo.Xrelfo() + edf = xrelfo_.load_elf(os.path.join(root, 'lib/.libs/libfrr.so'), 'libfrr.la') + xrefs = xrelfo_._xrefs + + assert edf[0:4] == b'\x7fELF' + + pprint(xrefs[0]) + pprint(xrefs[0]._data) diff --git a/python/tiabwarfo.py b/python/tiabwarfo.py new file mode 100644 index 0000000000..265173e314 --- /dev/null +++ b/python/tiabwarfo.py @@ -0,0 +1,203 @@ +# FRR DWARF structure definition extractor +# +# Copyright (C) 2020 David Lamparter for NetDEF, Inc. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; see the file COPYING; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import sys +import os +import subprocess +import re +import argparse +import subprocess +import json + +structs = ['xref', 'xref_logmsg', 'xref_threadsched', 'xref_install_element', 'xrefdata', 'xrefdata_logmsg', 'cmd_element'] + +def extract(filename='lib/.libs/libfrr.so'): + ''' + Convert output from "pahole" to JSON. + + Example pahole output: + $ pahole -C xref lib/.libs/libfrr.so + struct xref { + struct xrefdata * xrefdata; /* 0 8 */ + enum xref_type type; /* 8 4 */ + int line; /* 12 4 */ + const char * file; /* 16 8 */ + const char * func; /* 24 8 */ + + /* size: 32, cachelines: 1, members: 5 */ + /* last cacheline: 32 bytes */ + }; + ''' + pahole = subprocess.check_output(['pahole', '-C', ','.join(structs), filename]).decode('UTF-8') + + struct_re = re.compile(r'^struct ([^ ]+) \{([^\}]+)};', flags=re.M | re.S) + field_re = re.compile(r'^\s*(?P<type>[^;\(]+)\s+(?P<name>[^;\[\]]+)(?:\[(?P<array>\d+)\])?;\s*\/\*(?P<comment>.*)\*\/\s*$') + comment_re = re.compile(r'^\s*\/\*.*\*\/\s*$') + + pastructs = struct_re.findall(pahole) + out = {} + + for sname, data in pastructs: + this = out.setdefault(sname, {}) + fields = this.setdefault('fields', []) + + lines = data.strip().splitlines() + + next_offs = 0 + + for line in lines: + if line.strip() == '': + continue + m = comment_re.match(line) + if m is not None: + continue + + m = field_re.match(line) + if m is not None: + offs, size = m.group('comment').strip().split() + offs = int(offs) + size = int(size) + typ_ = m.group('type').strip() + name = m.group('name') + + if name.startswith('(*'): + # function pointer + typ_ = typ_ + ' *' + name = name[2:].split(')')[0] + + data = { + 'name': name, + 'type': typ_, + # 'offset': offs, + # 'size': size, + } + if m.group('array'): + data['array'] = int(m.group('array')) + + fields.append(data) + if offs != next_offs: + raise ValueError('%d padding bytes before struct %s.%s' % (offs - next_offs, sname, name)) + next_offs = offs + size + continue + + raise ValueError('cannot process line: %s' % line) + + return out + +class FieldApplicator(object): + ''' + Fill ELFDissectStruct fields list from pahole/JSON + + Uses the JSON file created by the above code to fill in the struct fields + in subclasses of ELFDissectStruct. + ''' + + # only what we really need. add more as needed. + packtypes = { + 'int': 'i', + 'uint8_t': 'B', + 'uint16_t': 'H', + 'uint32_t': 'I', + 'char': 's', + } + + def __init__(self, data): + self.data = data + self.classes = [] + self.clsmap = {} + + def add(self, cls): + self.classes.append(cls) + self.clsmap[cls.struct] = cls + + def resolve(self, cls): + out = [] + #offset = 0 + + fieldrename = getattr(cls, 'fieldrename', {}) + def mkname(n): + return (fieldrename.get(n, n),) + + for field in self.data[cls.struct]['fields']: + typs = field['type'].split() + typs = [i for i in typs if i not in ['const']] + + # this will break reuse of xrefstructs.json across 32bit & 64bit + # platforms + + #if field['offset'] != offset: + # assert offset < field['offset'] + # out.append(('_pad', '%ds' % (field['offset'] - offset,))) + + # pretty hacky C types handling, but covers what we need + + ptrlevel = 0 + while typs[-1] == '*': + typs.pop(-1) + ptrlevel += 1 + + if ptrlevel > 0: + packtype = ('P', None) + if ptrlevel == 1: + if typs[0] == 'char': + packtype = ('P', str) + elif typs[0] == 'struct' and typs[1] in self.clsmap: + packtype = ('P', self.clsmap[typs[1]]) + elif typs[0] == 'enum': + packtype = ('I',) + elif typs[0] in self.packtypes: + packtype = (self.packtypes[typs[0]],) + elif typs[0] == 'struct': + if typs[1] in self.clsmap: + packtype = (self.clsmap[typs[1]],) + else: + raise ValueError('embedded struct %s not in extracted data' % (typs[1],)) + else: + raise ValueError('cannot decode field %s in struct %s (%s)' % ( + cls.struct, field['name'], field['type'])) + + if 'array' in field and typs[0] == 'char': + packtype = ('%ds' % field['array'],) + out.append(mkname(field['name']) + packtype) + elif 'array' in field: + for i in range(0, field['array']): + out.append(mkname('%s_%d' % (field['name'], i)) + packtype) + else: + out.append(mkname(field['name']) + packtype) + + #offset = field['offset'] + field['size'] + + cls.fields = out + + def __call__(self): + for cls in self.classes: + self.resolve(cls) + +def main(): + argp = argparse.ArgumentParser(description = 'FRR DWARF structure extractor') + argp.add_argument('-o', dest='output', type=str, help='write JSON output', default='python/xrefstructs.json') + argp.add_argument('-i', dest='input', type=str, help='ELF file to read', default='lib/.libs/libfrr.so') + args = argp.parse_args() + + out = extract(args.input) + with open(args.output + '.tmp', 'w') as fd: + json.dump(out, fd, indent=2, sort_keys=True) + os.rename(args.output + '.tmp', args.output) + +if __name__ == '__main__': + main() diff --git a/python/xrefstructs.json b/python/xrefstructs.json new file mode 100644 index 0000000000..25c48c9d56 --- /dev/null +++ b/python/xrefstructs.json @@ -0,0 +1,140 @@ +{ + "cmd_element": { + "fields": [ + { + "name": "string", + "type": "const char *" + }, + { + "name": "doc", + "type": "const char *" + }, + { + "name": "daemon", + "type": "int" + }, + { + "name": "attr", + "type": "uint32_t" + }, + { + "name": "func", + "type": "int *" + }, + { + "name": "name", + "type": "const char *" + }, + { + "name": "xref", + "type": "struct xref" + } + ] + }, + "xref": { + "fields": [ + { + "name": "xrefdata", + "type": "struct xrefdata *" + }, + { + "name": "type", + "type": "enum xref_type" + }, + { + "name": "line", + "type": "int" + }, + { + "name": "file", + "type": "const char *" + }, + { + "name": "func", + "type": "const char *" + } + ] + }, + "xref_install_element": { + "fields": [ + { + "name": "xref", + "type": "struct xref" + }, + { + "name": "cmd_element", + "type": "const struct cmd_element *" + }, + { + "name": "node_type", + "type": "enum node_type" + } + ] + }, + "xref_logmsg": { + "fields": [ + { + "name": "xref", + "type": "struct xref" + }, + { + "name": "fmtstring", + "type": "const char *" + }, + { + "name": "priority", + "type": "uint32_t" + }, + { + "name": "ec", + "type": "uint32_t" + }, + { + "name": "args", + "type": "const char *" + } + ] + }, + "xref_threadsched": { + "fields": [ + { + "name": "xref", + "type": "struct xref" + }, + { + "name": "funcname", + "type": "const char *" + }, + { + "name": "dest", + "type": "const char *" + }, + { + "name": "thread_type", + "type": "uint32_t" + } + ] + }, + "xrefdata": { + "fields": [ + { + "name": "xref", + "type": "const struct xref *" + }, + { + "array": 16, + "name": "uid", + "type": "char" + }, + { + "name": "hashstr", + "type": "const char *" + }, + { + "array": 2, + "name": "hashu32", + "type": "uint32_t" + } + ] + } +}
\ No newline at end of file diff --git a/python/xrelfo.py b/python/xrelfo.py new file mode 100644 index 0000000000..0ecd008579 --- /dev/null +++ b/python/xrelfo.py @@ -0,0 +1,424 @@ +# FRR ELF xref extractor +# +# Copyright (C) 2020 David Lamparter for NetDEF, Inc. +# +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +# more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; see the file COPYING; if not, write to the Free Software +# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +import sys +import os +import struct +import re +import traceback +import json +import argparse + +from clippy.uidhash import uidhash +from clippy.elf import * +from clippy import frr_top_src +from tiabwarfo import FieldApplicator + +try: + with open(os.path.join(frr_top_src, 'python', 'xrefstructs.json'), 'r') as fd: + xrefstructs = json.load(fd) +except FileNotFoundError: + sys.stderr.write(''' +The "xrefstructs.json" file (created by running tiabwarfo.py with the pahole +tool available) could not be found. It should be included with the sources. +''') + sys.exit(1) + +# constants, need to be kept in sync manually... + +XREFT_THREADSCHED = 0x100 +XREFT_LOGMSG = 0x200 +XREFT_DEFUN = 0x300 +XREFT_INSTALL_ELEMENT = 0x301 + +# LOG_* +priovals = {} +prios = ['0', '1', '2', 'E', 'W', 'N', 'I', 'D'] + + +class XrelfoJson(object): + def dump(self): + pass + + def check(self, wopt): + yield from [] + + def to_dict(self, refs): + pass + +class Xref(ELFDissectStruct, XrelfoJson): + struct = 'xref' + fieldrename = {'type': 'typ'} + containers = {} + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + self._container = None + if self.xrefdata: + self.xrefdata.ref_from(self, self.typ) + + def container(self): + if self._container is None: + if self.typ in self.containers: + self._container = self.container_of(self.containers[self.typ], 'xref') + return self._container + + def check(self, *args, **kwargs): + if self._container: + yield from self._container.check(*args, **kwargs) + + +class Xrefdata(ELFDissectStruct): + struct = 'xrefdata' + + # uid is all zeroes in the data loaded from ELF + fieldrename = {'uid': '_uid'} + + def ref_from(self, xref, typ): + self.xref = xref + + @property + def uid(self): + if self.hashstr is None: + return None + return uidhash(self.xref.file, self.hashstr, self.hashu32_0, self.hashu32_1) + +class XrefPtr(ELFDissectStruct): + fields = [ + ('xref', 'P', Xref), + ] + +class XrefThreadSched(ELFDissectStruct, XrelfoJson): + struct = 'xref_threadsched' +Xref.containers[XREFT_THREADSCHED] = XrefThreadSched + +class XrefLogmsg(ELFDissectStruct, XrelfoJson): + struct = 'xref_logmsg' + + def _warn_fmt(self, text): + lines = text.split('\n') + yield ((self.xref.file, self.xref.line), '%s:%d: %s (in %s())%s\n' % (self.xref.file, self.xref.line, lines[0], self.xref.func, ''.join(['\n' + l for l in lines[1:]]))) + + fmt_regexes = [ + (re.compile(r'([\n\t]+)'), 'error: log message contains tab or newline'), + # (re.compile(r'^(\s+)'), 'warning: log message starts with whitespace'), + (re.compile(r'^((?:warn(?:ing)?|error):\s*)', re.I), 'warning: log message starts with severity'), + ] + arg_regexes = [ + # the (?<![\?:] ) avoids warning for x ? inet_ntop(...) : "(bla)" + (re.compile(r'((?<![\?:] )inet_ntop\s*\(\s*(?:[AP]F_INET|2)\s*,)'), 'cleanup: replace inet_ntop(AF_INET, ...) with %pI4', lambda s: True), + (re.compile(r'((?<![\?:] )inet_ntop\s*\(\s*(?:[AP]F_INET6|10)\s*,)'), 'cleanup: replace inet_ntop(AF_INET6, ...) with %pI6', lambda s: True), + (re.compile(r'((?<![\?:] )inet_ntoa)'), 'cleanup: replace inet_ntoa(...) with %pI4', lambda s: True), + (re.compile(r'((?<![\?:] )ipaddr2str)'), 'cleanup: replace ipaddr2str(...) with %pIA', lambda s: True), + (re.compile(r'((?<![\?:] )prefix2str)'), 'cleanup: replace prefix2str(...) with %pFX', lambda s: True), + (re.compile(r'((?<![\?:] )prefix_mac2str)'), 'cleanup: replace prefix_mac2str(...) with %pEA', lambda s: True), + (re.compile(r'((?<![\?:] )sockunion2str)'), 'cleanup: replace sockunion2str(...) with %pSU', lambda s: True), + + # (re.compile(r'^(\s*__(?:func|FUNCTION|PRETTY_FUNCTION)__\s*)'), 'error: debug message starts with __func__', lambda s: (s.priority & 7 == 7) ), + ] + + def check(self, wopt): + def fmt_msg(rex, itext): + if sys.stderr.isatty(): + items = rex.split(itext) + out = [] + for i, text in enumerate(items): + if (i % 2) == 1: + out.append('\033[41;37;1m%s\033[m' % repr(text)[1:-1]) + else: + out.append(repr(text)[1:-1]) + + excerpt = ''.join(out) + else: + excerpt = repr(itext)[1:-1] + return excerpt + + if wopt.Wlog_format: + for rex, msg in self.fmt_regexes: + if not rex.search(self.fmtstring): + continue + + excerpt = fmt_msg(rex, self.fmtstring) + yield from self._warn_fmt('%s: "%s"' % (msg, excerpt)) + + if wopt.Wlog_args: + for rex, msg, cond in self.arg_regexes: + if not cond(self): + continue + if not rex.search(self.args): + continue + + excerpt = fmt_msg(rex, self.args) + yield from self._warn_fmt('%s:\n\t"%s",\n\t%s' % (msg, repr(self.fmtstring)[1:-1], excerpt)) + + def dump(self): + print('%-60s %s%s %-25s [EC %d] %s' % ( + '%s:%d %s()' % (self.xref.file, self.xref.line, self.xref.func), + prios[self.priority & 7], + priovals.get(self.priority & 0x30, ' '), + self.xref.xrefdata.uid, self.ec, self.fmtstring)) + + def to_dict(self, xrelfo): + jsobj = dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']]) + if self.ec != 0: + jsobj['ec'] = self.ec + jsobj['fmtstring'] = self.fmtstring + jsobj['args'] = self.args + jsobj['priority'] = self.priority & 7 + jsobj['type'] = 'logmsg' + jsobj['binary'] = self._elfsect._elfwrap.orig_filename + + if self.priority & 0x10: + jsobj.setdefault('flags', []).append('errno') + if self.priority & 0x20: + jsobj.setdefault('flags', []).append('getaddrinfo') + + xrelfo['refs'].setdefault(self.xref.xrefdata.uid, []).append(jsobj) + +Xref.containers[XREFT_LOGMSG] = XrefLogmsg + +class CmdElement(ELFDissectStruct, XrelfoJson): + struct = 'cmd_element' + + cmd_attrs = { 0: None, 1: 'deprecated', 2: 'hidden'} + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def to_dict(self, xrelfo): + jsobj = xrelfo['cli'].setdefault(self.name, {}).setdefault(self._elfsect._elfwrap.orig_filename, {}) + + jsobj.update({ + 'string': self.string, + 'doc': self.doc, + 'attr': self.cmd_attrs.get(self.attr, self.attr), + }) + if jsobj['attr'] is None: + del jsobj['attr'] + + jsobj['defun'] = dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']]) + +Xref.containers[XREFT_DEFUN] = CmdElement + +class XrefInstallElement(ELFDissectStruct, XrelfoJson): + struct = 'xref_install_element' + + def to_dict(self, xrelfo): + jsobj = xrelfo['cli'].setdefault(self.cmd_element.name, {}).setdefault(self._elfsect._elfwrap.orig_filename, {}) + nodes = jsobj.setdefault('nodes', []) + + nodes.append({ + 'node': self.node_type, + 'install': dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']]), + }) + +Xref.containers[XREFT_INSTALL_ELEMENT] = XrefInstallElement + +# shove in field defs +fieldapply = FieldApplicator(xrefstructs) +fieldapply.add(Xref) +fieldapply.add(Xrefdata) +fieldapply.add(XrefLogmsg) +fieldapply.add(XrefThreadSched) +fieldapply.add(CmdElement) +fieldapply.add(XrefInstallElement) +fieldapply() + + +class Xrelfo(dict): + def __init__(self): + super().__init__({ + 'refs': {}, + 'cli': {}, + }) + self._xrefs = [] + + def load_file(self, filename): + orig_filename = filename + if filename.endswith('.la') or filename.endswith('.lo'): + with open(filename, 'r') as fd: + for line in fd: + line = line.strip() + if line.startswith('#') or line == '' or '=' not in line: + continue + + var, val = line.split('=', 1) + if var not in ['library_names', 'pic_object']: + continue + if val.startswith("'") or val.startswith('"'): + val = val[1:-1] + + if var == 'pic_object': + filename = os.path.join(os.path.dirname(filename), val) + break + + val = val.strip().split()[0] + filename = os.path.join(os.path.dirname(filename), '.libs', val) + break + else: + raise ValueError('could not process libtool file "%s"' % orig_filename) + + while True: + with open(filename, 'rb') as fd: + hdr = fd.read(4) + + if hdr == b'\x7fELF': + self.load_elf(filename, orig_filename) + return + + if hdr[:2] == b'#!': + path, name = os.path.split(filename) + filename = os.path.join(path, '.libs', name) + continue + + if hdr[:1] == b'{': + with open(filename, 'r') as fd: + self.load_json(fd) + return + + raise ValueError('cannot determine file type for %s' % (filename)) + + def load_elf(self, filename, orig_filename): + edf = ELFDissectFile(filename) + edf.orig_filename = orig_filename + + note = edf._elffile.find_note('FRRouting', 'XREF') + if note is not None: + endian = '>' if edf._elffile.bigendian else '<' + mem = edf._elffile[note] + if edf._elffile.elfclass == 64: + start, end = struct.unpack(endian + 'QQ', mem) + start += note.start + end += note.start + 8 + else: + start, end = struct.unpack(endian + 'II', mem) + start += note.start + end += note.start + 4 + + ptrs = edf.iter_data(XrefPtr, slice(start, end)) + + else: + xrefarray = edf.get_section('xref_array') + if xrefarray is None: + raise ValueError('file has neither xref note nor xref_array section') + + ptrs = xrefarray.iter_data(XrefPtr) + + for ptr in ptrs: + if ptr.xref is None: + print('NULL xref') + continue + self._xrefs.append(ptr.xref) + + container = ptr.xref.container() + if container is None: + continue + container.to_dict(self) + + return edf + + def load_json(self, fd): + data = json.load(fd) + for uid, items in data['refs'].items(): + myitems = self['refs'].setdefault(uid, []) + for item in items: + if item in myitems: + continue + myitems.append(item) + + for cmd, items in data['cli'].items(): + self['cli'].setdefault(cmd, {}).update(items) + + return data + + def check(self, checks): + for xref in self._xrefs: + yield from xref.check(checks) + +def main(): + argp = argparse.ArgumentParser(description = 'FRR xref ELF extractor') + argp.add_argument('-o', dest='output', type=str, help='write JSON output') + argp.add_argument('--out-by-file', type=str, help='write by-file JSON output') + argp.add_argument('-Wlog-format', action='store_const', const=True) + argp.add_argument('-Wlog-args', action='store_const', const=True) + argp.add_argument('--profile', action='store_const', const=True) + argp.add_argument('binaries', metavar='BINARY', nargs='+', type=str, help='files to read (ELF files or libtool objects)') + args = argp.parse_args() + + if args.profile: + import cProfile + cProfile.runctx('_main(args)', globals(), {'args': args}, sort='cumtime') + else: + _main(args) + +def _main(args): + errors = 0 + xrelfo = Xrelfo() + + for fn in args.binaries: + try: + xrelfo.load_file(fn) + except: + errors += 1 + sys.stderr.write('while processing %s:\n' % (fn)) + traceback.print_exc() + + for option in dir(args): + if option.startswith('W'): + checks = sorted(xrelfo.check(args)) + sys.stderr.write(''.join([c[-1] for c in checks])) + break + + + refs = xrelfo['refs'] + + counts = {} + for k, v in refs.items(): + strs = set([i['fmtstring'] for i in v]) + if len(strs) != 1: + print('\033[31;1m%s\033[m' % k) + counts[k] = len(v) + + out = xrelfo + outbyfile = {} + for uid, locs in refs.items(): + for loc in locs: + filearray = outbyfile.setdefault(loc['file'], []) + loc = dict(loc) + del loc['file'] + filearray.append(loc) + + for k in outbyfile.keys(): + outbyfile[k] = sorted(outbyfile[k], key=lambda x: x['line']) + + if errors: + sys.exit(1) + + if args.output: + with open(args.output + '.tmp', 'w') as fd: + json.dump(out, fd, indent=2, sort_keys=True) + os.rename(args.output + '.tmp', args.output) + + if args.out_by_file: + with open(args.out_by_file + '.tmp', 'w') as fd: + json.dump(outbyfile, fd, indent=2, sort_keys=True) + os.rename(args.out_by_file + '.tmp', args.out_by_file) + +if __name__ == '__main__': + main() |
