summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRuss White <russ@riw.us>2021-03-09 07:58:43 -0500
committerGitHub <noreply@github.com>2021-03-09 07:58:43 -0500
commitda0a277faeb50082e92fe2c962cbf69ab3b153c1 (patch)
treeffb9bb3cdc52672e73c73a61fff3c876499f21cc
parent6adedc6fe3773fe1abd4ef62797cfb2d336348be (diff)
parent3cb0eab36fe5b6536b261e704942ff03fc855f13 (diff)
Merge pull request #6807 from opensourcerouting/xref-extract
xrefs extraction tool
-rw-r--r--.gitignore1
-rw-r--r--Makefile.am8
-rwxr-xr-xconfigure.ac14
-rw-r--r--debian/control1
-rw-r--r--doc/developer/building-frr-for-archlinux.rst2
-rw-r--r--doc/developer/building-frr-for-centos6.rst3
-rw-r--r--doc/developer/building-frr-for-centos7.rst3
-rw-r--r--doc/developer/building-frr-for-centos8.rst3
-rw-r--r--doc/developer/building-frr-for-debian8.rst2
-rw-r--r--doc/developer/building-frr-for-debian9.rst2
-rw-r--r--doc/developer/building-frr-for-fedora.rst3
-rw-r--r--doc/developer/building-frr-for-opensuse.rst3
-rw-r--r--doc/developer/building-frr-for-ubuntu1404.rst2
-rw-r--r--doc/developer/building-frr-for-ubuntu1604.rst3
-rw-r--r--doc/developer/building-frr-for-ubuntu1804.rst3
-rw-r--r--doc/developer/building-frr-for-ubuntu2004.rst2
-rw-r--r--doc/developer/xrefs.rst49
-rw-r--r--lib/clippy.h2
-rw-r--r--lib/command_graph.h2
-rw-r--r--lib/command_py.c2
-rw-r--r--lib/elf_py.c1301
-rw-r--r--lib/subdir.am30
-rw-r--r--lib/zlog.h3
-rw-r--r--python/clippy/__init__.py2
-rw-r--r--python/clippy/elf.py574
-rw-r--r--python/clippy/uidhash.py71
-rw-r--r--python/makefile.py12
-rw-r--r--python/runtests.py14
-rw-r--r--python/test_xrelfo.py65
-rw-r--r--python/tiabwarfo.py203
-rw-r--r--python/xrefstructs.json140
-rw-r--r--python/xrelfo.py424
32 files changed, 2933 insertions, 16 deletions
diff --git a/.gitignore b/.gitignore
index fbbb04b60c..97349769ad 100644
--- a/.gitignore
+++ b/.gitignore
@@ -59,6 +59,7 @@
*.cg.json
*.cg.dot
*.cg.svg
+*.xref
### gcov outputs
diff --git a/Makefile.am b/Makefile.am
index 90c8407010..bb8e97a115 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -187,8 +187,16 @@ EXTRA_DIST += \
\
python/clidef.py \
python/clippy/__init__.py \
+ python/clippy/elf.py \
+ python/clippy/uidhash.py \
python/makevars.py \
python/makefile.py \
+ python/tiabwarfo.py \
+ python/xrelfo.py \
+ python/test_xrelfo.py \
+ python/runtests.py \
+ \
+ python/xrefstructs.json \
\
redhat/frr.logrotate \
redhat/frr.pam \
diff --git a/configure.ac b/configure.ac
index f3d1f38986..139fca7c42 100755
--- a/configure.ac
+++ b/configure.ac
@@ -794,6 +794,20 @@ fi
#
AS_IF([test "$host" = "$build"], [
+ AC_CHECK_HEADER([gelf.h], [], [
+ AC_MSG_ERROR([libelf headers are required for building clippy. (Host only when cross-compiling.)])
+ ])
+ AC_CHECK_LIB([elf], [elf_memory], [], [
+ AC_MSG_ERROR([libelf is required for building clippy. (Host only when cross-compiling.)])
+ ])
+
+ AC_CHECK_LIB([elf], [elf_getdata_rawchunk], [
+ AC_DEFINE([HAVE_ELF_GETDATA_RAWCHUNK], [1], [Have elf_getdata_rawchunk()])
+ ])
+ AC_CHECK_LIB([elf], [gelf_getnote], [
+ AC_DEFINE([HAVE_GELF_GETNOTE], [1], [Have gelf_getnote()])
+ ])
+
FRR_PYTHON_DEV
], [
FRR_PYTHON
diff --git a/debian/control b/debian/control
index b9e96b55d0..7a08cbbdb0 100644
--- a/debian/control
+++ b/debian/control
@@ -13,6 +13,7 @@ Build-Depends: bison,
install-info,
libc-ares-dev,
libcap-dev,
+ libelf-dev,
libjson-c-dev | libjson0-dev,
libpam0g-dev | libpam-dev,
libpcre3-dev,
diff --git a/doc/developer/building-frr-for-archlinux.rst b/doc/developer/building-frr-for-archlinux.rst
index f62add5963..e589a9f724 100644
--- a/doc/developer/building-frr-for-archlinux.rst
+++ b/doc/developer/building-frr-for-archlinux.rst
@@ -11,7 +11,7 @@ Installing Dependencies
git autoconf automake libtool make cmake pcre readline texinfo \
pkg-config pam json-c bison flex python-pytest \
c-ares python systemd python2-ipaddress python-sphinx \
- systemd-libs net-snmp perl libcap
+ systemd-libs net-snmp perl libcap libelf
.. include:: building-libyang.rst
diff --git a/doc/developer/building-frr-for-centos6.rst b/doc/developer/building-frr-for-centos6.rst
index b730a5ee32..5d3be492de 100644
--- a/doc/developer/building-frr-for-centos6.rst
+++ b/doc/developer/building-frr-for-centos6.rst
@@ -45,7 +45,8 @@ Add packages:
sudo yum install git autoconf automake libtool make \
readline-devel texinfo net-snmp-devel groff pkgconfig \
- json-c-devel pam-devel flex epel-release c-ares-devel libcap-devel
+ json-c-devel pam-devel flex epel-release c-ares-devel libcap-devel \
+ elfutils-libelf-devel
Install newer version of bison (CentOS 6 package source is too old) from CentOS
7:
diff --git a/doc/developer/building-frr-for-centos7.rst b/doc/developer/building-frr-for-centos7.rst
index eb97150d67..8d0aea943c 100644
--- a/doc/developer/building-frr-for-centos7.rst
+++ b/doc/developer/building-frr-for-centos7.rst
@@ -21,7 +21,8 @@ Add packages:
sudo yum install git autoconf automake libtool make \
readline-devel texinfo net-snmp-devel groff pkgconfig \
json-c-devel pam-devel bison flex pytest c-ares-devel \
- python-devel systemd-devel python-sphinx libcap-devel
+ python-devel systemd-devel python-sphinx libcap-devel \
+ elfutils-libelf-devel
.. include:: building-libyang.rst
diff --git a/doc/developer/building-frr-for-centos8.rst b/doc/developer/building-frr-for-centos8.rst
index 75beb53378..77fe489358 100644
--- a/doc/developer/building-frr-for-centos8.rst
+++ b/doc/developer/building-frr-for-centos8.rst
@@ -14,7 +14,8 @@ Add packages:
sudo dnf install --enablerepo=PowerTools git autoconf pcre-devel \
automake libtool make readline-devel texinfo net-snmp-devel pkgconfig \
groff pkgconfig json-c-devel pam-devel bison flex python2-pytest \
- c-ares-devel python2-devel systemd-devel libcap-devel
+ c-ares-devel python2-devel systemd-devel libcap-devel \
+ elfutils-libelf-devel
.. include:: building-libyang.rst
diff --git a/doc/developer/building-frr-for-debian8.rst b/doc/developer/building-frr-for-debian8.rst
index c12bf46f8d..51dd07c42a 100644
--- a/doc/developer/building-frr-for-debian8.rst
+++ b/doc/developer/building-frr-for-debian8.rst
@@ -18,7 +18,7 @@ Add packages:
sudo apt-get install git autoconf automake libtool make \
libreadline-dev texinfo libjson-c-dev pkg-config bison flex python3-pip \
libc-ares-dev python3-dev python3-sphinx build-essential libsystemd-dev \
- libsnmp-dev libcap-dev
+ libsnmp-dev libcap-dev libelf-dev
Install newer pytest (>3.0) from pip
diff --git a/doc/developer/building-frr-for-debian9.rst b/doc/developer/building-frr-for-debian9.rst
index f976b9f49a..919b010314 100644
--- a/doc/developer/building-frr-for-debian9.rst
+++ b/doc/developer/building-frr-for-debian9.rst
@@ -11,7 +11,7 @@ Add packages:
sudo apt-get install git autoconf automake libtool make \
libreadline-dev texinfo libjson-c-dev pkg-config bison flex \
libc-ares-dev python3-dev python3-pytest python3-sphinx build-essential \
- libsnmp-dev libsystemd-dev libcap-dev
+ libsnmp-dev libsystemd-dev libcap-dev libelf-dev
.. include:: building-libyang.rst
diff --git a/doc/developer/building-frr-for-fedora.rst b/doc/developer/building-frr-for-fedora.rst
index 4ab59490fd..5fecd8a826 100644
--- a/doc/developer/building-frr-for-fedora.rst
+++ b/doc/developer/building-frr-for-fedora.rst
@@ -14,7 +14,8 @@ Installing Dependencies
sudo dnf install git autoconf automake libtool make \
readline-devel texinfo net-snmp-devel groff pkgconfig json-c-devel \
pam-devel python3-pytest bison flex c-ares-devel python3-devel \
- python3-sphinx perl-core patch systemd-devel libcap-devel
+ python3-sphinx perl-core patch systemd-devel libcap-devel \
+ elfutils-libelf-devel
.. include:: building-libyang.rst
diff --git a/doc/developer/building-frr-for-opensuse.rst b/doc/developer/building-frr-for-opensuse.rst
index 5ed714a67e..4e886e9c25 100644
--- a/doc/developer/building-frr-for-opensuse.rst
+++ b/doc/developer/building-frr-for-opensuse.rst
@@ -13,7 +13,8 @@ Installing Dependencies
zypper in git autoconf automake libtool make \
readline-devel texinfo net-snmp-devel groff pkgconfig libjson-c-devel\
pam-devel python3-pytest bison flex c-ares-devel python3-devel\
- python3-Sphinx perl patch systemd-devel libcap-devel libyang-devel
+ python3-Sphinx perl patch systemd-devel libcap-devel libyang-devel \
+ libelf-devel
Building & Installing FRR
-------------------------
diff --git a/doc/developer/building-frr-for-ubuntu1404.rst b/doc/developer/building-frr-for-ubuntu1404.rst
index cc54415266..2711e92b6f 100644
--- a/doc/developer/building-frr-for-ubuntu1404.rst
+++ b/doc/developer/building-frr-for-ubuntu1404.rst
@@ -14,7 +14,7 @@ Installing Dependencies
git autoconf automake libtool make libreadline-dev texinfo \
pkg-config libpam0g-dev libjson-c-dev bison flex python3-pytest \
libc-ares-dev python3-dev python3-sphinx install-info build-essential \
- libsnmp-dev perl libcap-dev
+ libsnmp-dev perl libcap-dev libelf-dev
.. include:: building-libyang.rst
diff --git a/doc/developer/building-frr-for-ubuntu1604.rst b/doc/developer/building-frr-for-ubuntu1604.rst
index 63c6f8648c..2cb9536f9b 100644
--- a/doc/developer/building-frr-for-ubuntu1604.rst
+++ b/doc/developer/building-frr-for-ubuntu1604.rst
@@ -14,7 +14,8 @@ Installing Dependencies
git autoconf automake libtool make libreadline-dev texinfo \
pkg-config libpam0g-dev libjson-c-dev bison flex python3-pytest \
libc-ares-dev python3-dev libsystemd-dev python-ipaddress python3-sphinx \
- install-info build-essential libsystemd-dev libsnmp-dev perl libcap-dev
+ install-info build-essential libsystemd-dev libsnmp-dev perl libcap-dev \
+ libelf-dev
.. include:: building-libyang.rst
diff --git a/doc/developer/building-frr-for-ubuntu1804.rst b/doc/developer/building-frr-for-ubuntu1804.rst
index 9d85957d88..eb3991c139 100644
--- a/doc/developer/building-frr-for-ubuntu1804.rst
+++ b/doc/developer/building-frr-for-ubuntu1804.rst
@@ -14,7 +14,8 @@ Installing Dependencies
git autoconf automake libtool make libreadline-dev texinfo \
pkg-config libpam0g-dev libjson-c-dev bison flex python3-pytest \
libc-ares-dev python3-dev libsystemd-dev python-ipaddress python3-sphinx \
- install-info build-essential libsystemd-dev libsnmp-dev perl libcap-dev
+ install-info build-essential libsystemd-dev libsnmp-dev perl libcap-dev \
+ libelf-dev
.. include:: building-libyang.rst
diff --git a/doc/developer/building-frr-for-ubuntu2004.rst b/doc/developer/building-frr-for-ubuntu2004.rst
index ef5d8da551..ffc05a6841 100644
--- a/doc/developer/building-frr-for-ubuntu2004.rst
+++ b/doc/developer/building-frr-for-ubuntu2004.rst
@@ -15,7 +15,7 @@ Installing Dependencies
pkg-config libpam0g-dev libjson-c-dev bison flex python3-pytest \
libc-ares-dev python3-dev libsystemd-dev python-ipaddress python3-sphinx \
install-info build-essential libsystemd-dev libsnmp-dev perl \
- libcap-dev python2
+ libcap-dev python2 libelf-dev
Note that Ubuntu 20 no longer installs python 2.x, so it must be
installed explicitly. Ensure that your system has a symlink named
diff --git a/doc/developer/xrefs.rst b/doc/developer/xrefs.rst
index 6a0794d41b..e8e07dfe1d 100644
--- a/doc/developer/xrefs.rst
+++ b/doc/developer/xrefs.rst
@@ -20,8 +20,6 @@ To verify xrefs have been included in a binary or dynamic library, run
``readelf -n binary``. For individual object files, it's
``readelf -S object.o | grep xref_array`` instead.
-An extraction tool will be added in a future commit.
-
Structure and contents
----------------------
@@ -168,3 +166,50 @@ entry point.
for C++ code when compiled by GCC. A workaround is present for runtime
functionality, but to extract the xrefs from a C++ source file, it needs
to be built with clang (or a future fixed version of GCC) instead.
+
+Extraction tool
+---------------
+
+The FRR source contains a matching tool to extract xref data from compiled ELF
+binaries in ``python/xrelfo.py``. This tool uses CPython extensions
+implemented in ``clippy`` and must therefore be executed with that.
+
+``xrelfo.py`` processes input from one or more ELF file (.o, .so, executable),
+libtool object (.lo, .la, executable wrapper script) or JSON (output from
+``xrelfo.py``) and generates an output JSON file. During standard FRR build,
+it is invoked on all binaries and libraries and the result is combined into
+``frr.json``.
+
+ELF files from any operating system, CPU architecture and endianness can be
+processed on any host. Any issues with this are bugs in ``xrelfo.py``
+(or clippy's ELF code.)
+
+``xrelfo.py`` also performs some sanity checking, particularly on log
+messages. The following options are available:
+
+.. option:: -o OUTPUT
+
+ Filename to write JSON output to. As a convention, a ``.xref`` filename
+ extension is used.
+
+.. option:: -Wlog-format
+
+ Performs extra checks on log message format strings, particularly checks
+ for ``\t`` and ``\n`` characters (which should not be used in log messages).
+
+.. option:: -Wlog-args
+
+ Generates cleanup hints for format string arguments where
+ :c:func:`printfrr()` extensions could be used, e.g. replacing ``inet_ntoa``
+ with ``%pI4``.
+
+.. option:: --profile
+
+ Runs the Python profiler to identify hotspots in the ``xrelfo.py`` code.
+
+``xrelfo.py`` uses information about C structure definitions saved in
+``python/xrefstructs.json``. This file is included with the FRR sources and
+only needs to be regenerated when some of the ``struct xref_*`` definitions
+are changed (which should be almost never). The file is written by
+``python/tiabwarfo.py``, which uses ``pahole`` to extract the necessary data
+from DWARF information.
diff --git a/lib/clippy.h b/lib/clippy.h
index be4db6e638..95af274106 100644
--- a/lib/clippy.h
+++ b/lib/clippy.h
@@ -20,6 +20,7 @@
#ifndef _FRR_CLIPPY_H
#define _FRR_CLIPPY_H
+#include <stdbool.h>
#include <Python.h>
#ifdef __cplusplus
@@ -28,6 +29,7 @@ extern "C" {
extern PyObject *clippy_parse(PyObject *self, PyObject *args);
extern PyMODINIT_FUNC command_py_init(void);
+extern bool elf_py_init(PyObject *pymod);
#ifdef __cplusplus
}
diff --git a/lib/command_graph.h b/lib/command_graph.h
index 09824460e6..86715410ce 100644
--- a/lib/command_graph.h
+++ b/lib/command_graph.h
@@ -99,7 +99,7 @@ struct cmd_element {
const char *string; /* Command specification by string. */
const char *doc; /* Documentation of this command. */
int daemon; /* Daemon to which this command belong. */
- uint8_t attr; /* Command attributes */
+ uint32_t attr; /* Command attributes */
/* handler function for command */
int (*func)(const struct cmd_element *, struct vty *, int,
diff --git a/lib/command_py.c b/lib/command_py.c
index 4ec116df33..7f19008fbf 100644
--- a/lib/command_py.c
+++ b/lib/command_py.c
@@ -345,5 +345,7 @@ PyMODINIT_FUNC command_py_init(void)
PyModule_AddObject(pymod, "GraphNode", (PyObject *)&typeobj_graph_node);
Py_INCREF(&typeobj_graph);
PyModule_AddObject(pymod, "Graph", (PyObject *)&typeobj_graph);
+ if (!elf_py_init(pymod))
+ initret(NULL);
initret(pymod);
}
diff --git a/lib/elf_py.c b/lib/elf_py.c
new file mode 100644
index 0000000000..0d8ad76e1c
--- /dev/null
+++ b/lib/elf_py.c
@@ -0,0 +1,1301 @@
+/*
+ * fast ELF file accessor
+ * Copyright (C) 2018-2020 David Lamparter for NetDEF, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; see the file COPYING; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+/* Note: this wrapper is intended to be used as build-time helper. While
+ * it should be generally correct and proper, there may be the occasional
+ * memory leak or SEGV for things that haven't been well-tested.
+ * _
+ * / \ This code is NOT SUITABLE FOR UNTRUSTED ELF FILES. It's used
+ * / ! \ in FRR to read files created by its own build. Don't take it out
+ * /_____\ of FRR and use it to parse random ELF files you found somewhere.
+ *
+ * If you're working with this code (or even reading it), you really need to
+ * read a bunch of the ELF specs. There's no way around it, things in here
+ * just represent pieces of ELF pretty much 1:1. Also, readelf & objdump are
+ * your friends.
+ *
+ * Required reading:
+ * https://refspecs.linuxfoundation.org/elf/elf.pdf
+ * https://refspecs.linuxfoundation.org/elf/x86_64-SysV-psABI.pdf
+ * Recommended reading:
+ * https://github.com/ARM-software/abi-aa/releases/download/2020Q4/aaelf64.pdf
+ *
+ * The core ELF spec is *not* enough, you should read at least one of the
+ * processor specific (psABI) docs. They define what & how relocations work.
+ * Luckily we don't need to care about the processor specifics since this only
+ * does data relocations, but without looking at the psABI, some things aren't
+ * quite clear.
+ */
+
+/* the API of this module roughly follows a very small subset of the one
+ * provided by the python elfutils package, which unfortunately is painfully
+ * slow.
+ */
+
+#define PY_SSIZE_T_CLEAN
+
+#include <Python.h>
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+#include "structmember.h"
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+
+#if defined(__sun__) && (__SIZEOF_POINTER__ == 4)
+/* Solaris libelf bails otherwise ... */
+#undef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 32
+#endif
+
+#include <elf.h>
+#include <libelf.h>
+#include <gelf.h>
+
+#include "typesafe.h"
+#include "jhash.h"
+#include "clippy.h"
+
+static bool debug;
+
+#define debugf(...) \
+ do { \
+ if (debug) \
+ fprintf(stderr, __VA_ARGS__); \
+ } while (0)
+
+/* Exceptions */
+static PyObject *ELFFormatError;
+static PyObject *ELFAccessError;
+
+/* most objects can only be created as return values from one of the methods */
+static PyObject *refuse_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
+{
+ PyErr_SetString(PyExc_ValueError,
+ "cannot create instances of this type");
+ return NULL;
+}
+
+struct elfreloc;
+struct elfsect;
+
+PREDECL_HASH(elfrelocs)
+
+/* ELFFile and ELFSection intentionally share some behaviour, particularly
+ * subscript[123:456] access to file data. This is because relocatables
+ * (.o files) do things section-based, but linked executables/libraries do
+ * things file-based. Having the two behave similar allows simplifying the
+ * Python code.
+ */
+
+/* class ELFFile:
+ *
+ * overall entry point, instantiated by reading in an ELF file
+ */
+struct elffile {
+ PyObject_HEAD
+
+ char *filename;
+ char *mmap, *mmend;
+ size_t len;
+ Elf *elf;
+
+ /* note from here on there are several instances of
+ *
+ * GElf_Something *x, _x;
+ *
+ * this is a pattern used by libelf's generic ELF routines; the _x
+ * field is used to create a copy of the ELF structure from the file
+ * with 32/64bit and endianness adjusted.
+ */
+
+ GElf_Ehdr *ehdr, _ehdr;
+ Elf_Scn *symtab;
+ size_t nsym, symstridx;
+ Elf_Data *symdata;
+
+ PyObject **sects;
+ size_t n_sect;
+
+ struct elfrelocs_head dynrelocs;
+
+ int elfclass;
+ bool bigendian;
+ bool has_symbols;
+};
+
+/* class ELFSection:
+ *
+ * note that executables and shared libraries can have their section headers
+ * removed, though in practice this is only used as an obfuscation technique.
+ */
+struct elfsect {
+ PyObject_HEAD
+
+ const char *name;
+ struct elffile *ef;
+
+ GElf_Shdr _shdr, *shdr;
+ Elf_Scn *scn;
+ unsigned long idx, len;
+
+ struct elfrelocs_head relocs;
+};
+
+/* class ELFReloc:
+ *
+ * note: relocations in object files (.o) are section-based while relocations
+ * in executables and shared libraries are file-based.
+ *
+ * Whenever accessing something that is a pointer in the ELF file, the Python
+ * code needs to check for a relocation; if the pointer is pointing to some
+ * unresolved symbol the file will generally contain 0 bytes. The relocation
+ * will tell what the pointer is actually pointing to.
+ *
+ * This represents both static (.o file) and dynamic (.so/exec) relocations.
+ */
+struct elfreloc {
+ PyObject_HEAD
+
+ struct elfrelocs_item elfrelocs_item;
+
+ struct elfsect *es;
+ struct elffile *ef;
+
+ /* there's also old-fashioned GElf_Rel; we're converting that to
+ * GElf_Rela in elfsect_add_relocations()
+ */
+ GElf_Rela _rela, *rela;
+ GElf_Sym _sym, *sym;
+ size_t symidx;
+ const char *symname;
+
+ /* documented below in python docstrings */
+ bool symvalid, unresolved, relative;
+ unsigned long long st_value;
+};
+
+static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b);
+static uint32_t elfreloc_hash(const struct elfreloc *reloc);
+
+DECLARE_HASH(elfrelocs, struct elfreloc, elfrelocs_item,
+ elfreloc_cmp, elfreloc_hash)
+
+static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx);
+static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx);
+static PyObject *elfreloc_getsection(PyObject *self, PyObject *args);
+static PyObject *elfreloc_getaddend(PyObject *obj, void *closure);
+
+/* --- end of declarations -------------------------------------------------- */
+
+/*
+ * class ELFReloc:
+ */
+
+static const char elfreloc_doc[] =
+ "Represents an ELF relocation record\n"
+ "\n"
+ "(struct elfreloc * in elf_py.c)";
+
+#define member(name, type, doc) \
+ { \
+ (char *)#name, type, offsetof(struct elfreloc, name), READONLY,\
+ (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
+ }
+static PyMemberDef members_elfreloc[] = {
+ member(symname, T_STRING,
+ "Name of symbol this relocation refers to.\n"
+ "\n"
+ "Will frequently be `None` in executables and shared libraries."
+ ),
+ member(symvalid, T_BOOL,
+ "Target symbol has a valid type, i.e. not STT_NOTYPE"),
+ member(unresolved, T_BOOL,
+ "Target symbol refers to an existing section"),
+ member(relative, T_BOOL,
+ "Relocation is a REL (not RELA) record and thus relative."),
+ member(st_value, T_ULONGLONG,
+ "Target symbol's value, if known\n\n"
+ "Will be zero for unresolved/external symbols."),
+ {}
+};
+#undef member
+
+static PyGetSetDef getset_elfreloc[] = {
+ { .name = (char *)"r_addend", .get = elfreloc_getaddend, .doc =
+ (char *)"Relocation addend value"},
+ {}
+};
+
+static PyMethodDef methods_elfreloc[] = {
+ {"getsection", elfreloc_getsection, METH_VARARGS,
+ "Find relocation target's ELF section\n\n"
+ "Args: address of relocatee (TODO: fix/remove?)\n"
+ "Returns: ELFSection or None\n\n"
+ "Not possible if section headers have been stripped."},
+ {}
+};
+
+static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b)
+{
+ if (a->rela->r_offset < b->rela->r_offset)
+ return -1;
+ if (a->rela->r_offset > b->rela->r_offset)
+ return 1;
+ return 0;
+}
+
+static uint32_t elfreloc_hash(const struct elfreloc *reloc)
+{
+ return jhash(&reloc->rela->r_offset, sizeof(reloc->rela->r_offset),
+ 0xc9a2b7f4);
+}
+
+static struct elfreloc *elfrelocs_get(struct elfrelocs_head *head,
+ GElf_Addr offset)
+{
+ struct elfreloc dummy;
+
+ dummy.rela = &dummy._rela;
+ dummy.rela->r_offset = offset;
+ return elfrelocs_find(head, &dummy);
+}
+
+static PyObject *elfreloc_getsection(PyObject *self, PyObject *args)
+{
+ struct elfreloc *w = (struct elfreloc *)self;
+ long data;
+
+ if (!PyArg_ParseTuple(args, "k", &data))
+ return NULL;
+
+ if (!w->es)
+ Py_RETURN_NONE;
+
+ if (w->symidx == 0) {
+ size_t idx = 0;
+ Elf_Scn *scn;
+
+ data = (w->relative ? data : 0) + w->rela->r_addend;
+ scn = elf_find_addr(w->es->ef, data, &idx);
+ if (!scn)
+ Py_RETURN_NONE;
+ return elffile_secbyidx(w->es->ef, scn, idx);
+ }
+ return elffile_secbyidx(w->es->ef, NULL, w->sym->st_shndx);
+}
+
+static PyObject *elfreloc_getaddend(PyObject *obj, void *closure)
+{
+ struct elfreloc *w = (struct elfreloc *)obj;
+
+ return Py_BuildValue("K", (unsigned long long)w->rela->r_addend);
+}
+
+static PyObject *elfreloc_repr(PyObject *arg)
+{
+ struct elfreloc *w = (struct elfreloc *)arg;
+
+ return PyUnicode_FromFormat("<ELFReloc @%lu %s+%lu>",
+ (unsigned long)w->rela->r_offset,
+ (w->symname && w->symname[0]) ? w->symname
+ : "[0]",
+ (unsigned long)w->rela->r_addend);
+}
+
+static void elfreloc_free(void *arg)
+{
+ struct elfreloc *w = arg;
+
+ (void)w;
+}
+
+static PyTypeObject typeobj_elfreloc = {
+ PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFReloc",
+ .tp_basicsize = sizeof(struct elfreloc),
+ .tp_flags = Py_TPFLAGS_DEFAULT,
+ .tp_doc = elfreloc_doc,
+ .tp_new = refuse_new,
+ .tp_free = elfreloc_free,
+ .tp_repr = elfreloc_repr,
+ .tp_members = members_elfreloc,
+ .tp_methods = methods_elfreloc,
+ .tp_getset = getset_elfreloc,
+};
+
+/*
+ * class ELFSection:
+ */
+
+static const char elfsect_doc[] =
+ "Represents an ELF section\n"
+ "\n"
+ "To access section contents, use subscript notation, e.g.\n"
+ " section[123:456]\n"
+ "To read null terminated C strings, replace the end with str:\n"
+ " section[123:str]\n\n"
+ "(struct elfsect * in elf_py.c)";
+
+static PyObject *elfsect_getaddr(PyObject *self, void *closure);
+
+#define member(name, type, doc) \
+ { \
+ (char *)#name, type, offsetof(struct elfsect, name), READONLY, \
+ (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
+ }
+static PyMemberDef members_elfsect[] = {
+ member(name, T_STRING,
+ "Section name, e.g. \".text\""),
+ member(idx, T_ULONG,
+ "Section index in file"),
+ member(len, T_ULONG,
+ "Section length in bytes"),
+ {},
+};
+#undef member
+
+static PyGetSetDef getset_elfsect[] = {
+ { .name = (char *)"sh_addr", .get = elfsect_getaddr, .doc =
+ (char *)"Section virtual address (mapped program view)"},
+ {}
+};
+
+static PyObject *elfsect_getaddr(PyObject *self, void *closure)
+{
+ struct elfsect *w = (struct elfsect *)self;
+
+ return Py_BuildValue("K", (unsigned long long)w->shdr->sh_addr);
+}
+
+
+static PyObject *elfsect_getreloc(PyObject *self, PyObject *args)
+{
+ struct elfsect *w = (struct elfsect *)self;
+ struct elfreloc *relw;
+ unsigned long offs;
+ PyObject *ret;
+
+ if (!PyArg_ParseTuple(args, "k", &offs))
+ return NULL;
+
+ relw = elfrelocs_get(&w->relocs, offs + w->shdr->sh_addr);
+ if (!relw)
+ Py_RETURN_NONE;
+
+ ret = (PyObject *)relw;
+ Py_INCREF(ret);
+ return ret;
+}
+
+static PyMethodDef methods_elfsect[] = {
+ {"getreloc", elfsect_getreloc, METH_VARARGS,
+ "Check for / get relocation at offset into section\n\n"
+ "Args: byte offset into section to check\n"
+ "Returns: ELFReloc or None"},
+ {}
+};
+
+static PyObject *elfsect_subscript(PyObject *self, PyObject *key)
+{
+ Py_ssize_t start, stop, step, sllen;
+ struct elfsect *w = (struct elfsect *)self;
+ PySliceObject *slice;
+ unsigned long offs, len = ~0UL;
+
+ if (!PySlice_Check(key)) {
+ PyErr_SetString(PyExc_IndexError,
+ "ELFSection subscript must be slice");
+ return NULL;
+ }
+ slice = (PySliceObject *)key;
+ if (PyLong_Check(slice->stop)) {
+ if (PySlice_GetIndicesEx(key, w->shdr->sh_size,
+ &start, &stop, &step, &sllen))
+ return NULL;
+
+ if (step != 1) {
+ PyErr_SetString(PyExc_IndexError,
+ "ELFSection subscript slice step must be 1");
+ return NULL;
+ }
+ if ((GElf_Xword)stop > w->shdr->sh_size) {
+ PyErr_Format(ELFAccessError,
+ "access (%lu) beyond end of section %lu/%s (%lu)",
+ stop, w->idx, w->name, w->shdr->sh_size);
+ return NULL;
+ }
+
+ offs = start;
+ len = sllen;
+ } else {
+ if (slice->stop != (void *)&PyUnicode_Type
+ || !PyLong_Check(slice->start)) {
+ PyErr_SetString(PyExc_IndexError, "invalid slice");
+ return NULL;
+ }
+
+ offs = PyLong_AsUnsignedLongLong(slice->start);
+ len = ~0UL;
+ }
+
+ offs += w->shdr->sh_offset;
+ if (offs > w->ef->len) {
+ PyErr_Format(ELFAccessError,
+ "access (%lu) beyond end of file (%lu)",
+ offs, w->ef->len);
+ return NULL;
+ }
+ if (len == ~0UL)
+ len = strnlen(w->ef->mmap + offs, w->ef->len - offs);
+
+ Py_ssize_t pylen = len;
+
+#if PY_MAJOR_VERSION >= 3
+ return Py_BuildValue("y#", w->ef->mmap + offs, pylen);
+#else
+ return Py_BuildValue("s#", w->ef->mmap + offs, pylen);
+#endif
+}
+
+static PyMappingMethods mp_elfsect = {
+ .mp_subscript = elfsect_subscript,
+};
+
+static void elfsect_free(void *arg)
+{
+ struct elfsect *w = arg;
+
+ (void)w;
+}
+
+static PyObject *elfsect_repr(PyObject *arg)
+{
+ struct elfsect *w = (struct elfsect *)arg;
+
+ return PyUnicode_FromFormat("<ELFSection %s>", w->name);
+}
+
+static PyTypeObject typeobj_elfsect = {
+ PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFSection",
+ .tp_basicsize = sizeof(struct elfsect),
+ .tp_flags = Py_TPFLAGS_DEFAULT,
+ .tp_doc = elfsect_doc,
+ .tp_new = refuse_new,
+ .tp_free = elfsect_free,
+ .tp_repr = elfsect_repr,
+ .tp_as_mapping = &mp_elfsect,
+ .tp_members = members_elfsect,
+ .tp_methods = methods_elfsect,
+ .tp_getset = getset_elfsect,
+};
+
+static void elfsect_add_relocations(struct elfsect *w, Elf_Scn *rel,
+ GElf_Shdr *relhdr)
+{
+ size_t i, entries;
+ Elf_Scn *symtab = elf_getscn(w->ef->elf, relhdr->sh_link);
+ GElf_Shdr _symhdr, *symhdr = gelf_getshdr(symtab, &_symhdr);
+ Elf_Data *symdata = elf_getdata(symtab, NULL);
+ Elf_Data *reldata = elf_getdata(rel, NULL);
+
+ entries = relhdr->sh_size / relhdr->sh_entsize;
+ for (i = 0; i < entries; i++) {
+ struct elfreloc *relw;
+ size_t symidx;
+ GElf_Rela *rela;
+ GElf_Sym *sym;
+
+ relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc(
+ &typeobj_elfreloc, 0);
+ relw->es = w;
+
+ if (relhdr->sh_type == SHT_REL) {
+ GElf_Rel _rel, *rel;
+
+ rel = gelf_getrel(reldata, i, &_rel);
+ relw->rela = &relw->_rela;
+ relw->rela->r_offset = rel->r_offset;
+ relw->rela->r_info = rel->r_info;
+ relw->rela->r_addend = 0;
+ relw->relative = true;
+ } else
+ relw->rela = gelf_getrela(reldata, i, &relw->_rela);
+
+ rela = relw->rela;
+ if (rela->r_offset < w->shdr->sh_addr
+ || rela->r_offset >= w->shdr->sh_addr + w->shdr->sh_size)
+ continue;
+
+ symidx = relw->symidx = GELF_R_SYM(rela->r_info);
+ sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym);
+ if (sym) {
+ relw->symname = elf_strptr(w->ef->elf, symhdr->sh_link,
+ sym->st_name);
+ relw->symvalid = GELF_ST_TYPE(sym->st_info)
+ != STT_NOTYPE;
+ relw->unresolved = sym->st_shndx == SHN_UNDEF;
+ relw->st_value = sym->st_value;
+ } else {
+ relw->symname = NULL;
+ relw->symvalid = false;
+ relw->unresolved = false;
+ relw->st_value = 0;
+ }
+
+ debugf("reloc @ %016llx sym %5llu %016llx %s\n",
+ (long long)rela->r_offset, (unsigned long long)symidx,
+ (long long)rela->r_addend, relw->symname);
+
+ elfrelocs_add(&w->relocs, relw);
+ }
+}
+
+/*
+ * bindings & loading code between ELFFile and ELFSection
+ */
+
+static PyObject *elfsect_wrap(struct elffile *ef, Elf_Scn *scn, size_t idx,
+ const char *name)
+{
+ struct elfsect *w;
+ size_t i;
+
+ w = (struct elfsect *)typeobj_elfsect.tp_alloc(&typeobj_elfsect, 0);
+ if (!w)
+ return NULL;
+
+ w->name = name;
+ w->ef = ef;
+ w->scn = scn;
+ w->shdr = gelf_getshdr(scn, &w->_shdr);
+ w->len = w->shdr->sh_size;
+ w->idx = idx;
+ elfrelocs_init(&w->relocs);
+
+ for (i = 0; i < ef->ehdr->e_shnum; i++) {
+ Elf_Scn *scn = elf_getscn(ef->elf, i);
+ GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
+
+ if (shdr->sh_type != SHT_RELA && shdr->sh_type != SHT_REL)
+ continue;
+ if (shdr->sh_info && shdr->sh_info != idx)
+ continue;
+ elfsect_add_relocations(w, scn, shdr);
+ }
+
+ return (PyObject *)w;
+}
+
+static Elf_Scn *elf_find_section(struct elffile *ef, const char *name,
+ size_t *idx)
+{
+ size_t i;
+ const char *secname;
+
+ for (i = 0; i < ef->ehdr->e_shnum; i++) {
+ Elf_Scn *scn = elf_getscn(ef->elf, i);
+ GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
+
+ secname = elf_strptr(ef->elf, ef->ehdr->e_shstrndx,
+ shdr->sh_name);
+ if (strcmp(secname, name))
+ continue;
+ if (idx)
+ *idx = i;
+ return scn;
+ }
+ return NULL;
+}
+
+static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx)
+{
+ size_t i;
+
+ for (i = 0; i < ef->ehdr->e_shnum; i++) {
+ Elf_Scn *scn = elf_getscn(ef->elf, i);
+ GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
+
+ if (addr < shdr->sh_addr ||
+ addr >= shdr->sh_addr + shdr->sh_size)
+ continue;
+
+ if (idx)
+ *idx = i;
+ return scn;
+ }
+ return NULL;
+}
+
+/*
+ * class ELFFile:
+ */
+
+static const char elffile_doc[] =
+ "Represents an ELF file\n"
+ "\n"
+ "Args: filename to load\n"
+ "\n"
+ "To access raw file contents, use subscript notation, e.g.\n"
+ " file[123:456]\n"
+ "To read null terminated C strings, replace the end with str:\n"
+ " file[123:str]\n\n"
+ "(struct elffile * in elf_py.c)";
+
+
+#define member(name, type, doc) \
+ { \
+ (char *)#name, type, offsetof(struct elffile, name), READONLY, \
+ (char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)" \
+ }
+static PyMemberDef members_elffile[] = {
+ member(filename, T_STRING,
+ "Original file name as given when opening"),
+ member(elfclass, T_INT,
+ "ELF class (architecture bit size)\n\n"
+ "Either 32 or 64, straight integer."),
+ member(bigendian, T_BOOL,
+ "ELF file is big-endian\n\n"
+ "All internal ELF structures are automatically converted."),
+ member(has_symbols, T_BOOL,
+ "A symbol section is present\n\n"
+ "Note: only refers to .symtab/SHT_SYMTAB section, not DT_SYMTAB"
+ ),
+ {},
+};
+#undef member
+
+static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx)
+{
+ const char *name;
+ PyObject *ret;
+
+ if (!scn)
+ scn = elf_getscn(w->elf, idx);
+ if (!scn || idx >= w->n_sect)
+ Py_RETURN_NONE;
+
+ if (!w->sects[idx]) {
+ GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
+
+ name = elf_strptr(w->elf, w->ehdr->e_shstrndx, shdr->sh_name);
+ w->sects[idx] = elfsect_wrap(w, scn, idx, name);
+ }
+
+ ret = w->sects[idx];
+ Py_INCREF(ret);
+ return ret;
+}
+
+static PyObject *elffile_get_section(PyObject *self, PyObject *args)
+{
+ const char *name;
+ struct elffile *w = (struct elffile *)self;
+ Elf_Scn *scn;
+ size_t idx = 0;
+
+ if (!PyArg_ParseTuple(args, "s", &name))
+ return NULL;
+
+ scn = elf_find_section(w, name, &idx);
+ return elffile_secbyidx(w, scn, idx);
+}
+
+static PyObject *elffile_get_section_addr(PyObject *self, PyObject *args)
+{
+ unsigned long long addr;
+ struct elffile *w = (struct elffile *)self;
+ Elf_Scn *scn;
+ size_t idx = 0;
+
+ if (!PyArg_ParseTuple(args, "K", &addr))
+ return NULL;
+
+ scn = elf_find_addr(w, addr, &idx);
+ return elffile_secbyidx(w, scn, idx);
+}
+
+static PyObject *elffile_get_section_idx(PyObject *self, PyObject *args)
+{
+ unsigned long long idx;
+ struct elffile *w = (struct elffile *)self;
+
+ if (!PyArg_ParseTuple(args, "K", &idx))
+ return NULL;
+
+ return elffile_secbyidx(w, NULL, idx);
+}
+
+static PyObject *elffile_get_symbol(PyObject *self, PyObject *args)
+{
+ const char *name, *symname;
+ struct elffile *w = (struct elffile *)self;
+ GElf_Sym _sym, *sym;
+ size_t i;
+
+ if (!PyArg_ParseTuple(args, "s", &name))
+ return NULL;
+
+ for (i = 0; i < w->nsym; i++) {
+ sym = gelf_getsym(w->symdata, i, &_sym);
+ if (sym->st_name == 0)
+ continue;
+ symname = elf_strptr(w->elf, w->symstridx, sym->st_name);
+ if (strcmp(symname, name))
+ continue;
+
+ PyObject *pysect;
+ Elf_Scn *scn = elf_getscn(w->elf, sym->st_shndx);
+
+ if (scn)
+ pysect = elffile_secbyidx(w, scn, sym->st_shndx);
+ else {
+ pysect = Py_None;
+ Py_INCREF(pysect);
+ }
+ return Py_BuildValue("sKN", symname,
+ (unsigned long long)sym->st_value, pysect);
+ }
+ Py_RETURN_NONE;
+}
+
+static PyObject *elffile_getreloc(PyObject *self, PyObject *args)
+{
+ struct elffile *w = (struct elffile *)self;
+ struct elfreloc *relw;
+ unsigned long offs;
+ PyObject *ret;
+
+ if (!PyArg_ParseTuple(args, "k", &offs))
+ return NULL;
+
+ relw = elfrelocs_get(&w->dynrelocs, offs);
+ if (!relw)
+ Py_RETURN_NONE;
+
+ ret = (PyObject *)relw;
+ Py_INCREF(ret);
+ return ret;
+}
+
+static PyObject *elffile_find_note(PyObject *self, PyObject *args)
+{
+#if defined(HAVE_GELF_GETNOTE) && defined(HAVE_ELF_GETDATA_RAWCHUNK)
+ const char *owner;
+ const uint8_t *ids;
+ GElf_Word id;
+ struct elffile *w = (struct elffile *)self;
+ size_t i;
+
+ if (!PyArg_ParseTuple(args, "ss", &owner, &ids))
+ return NULL;
+
+ if (strlen((char *)ids) != 4) {
+ PyErr_SetString(PyExc_ValueError,
+ "ELF note ID must be exactly 4-byte string");
+ return NULL;
+ }
+ if (w->bigendian)
+ id = (ids[0] << 24) | (ids[1] << 16) | (ids[2] << 8) | ids[3];
+ else
+ id = (ids[3] << 24) | (ids[2] << 16) | (ids[1] << 8) | ids[0];
+
+ for (i = 0; i < w->ehdr->e_phnum; i++) {
+ GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
+ Elf_Data *notedata;
+ size_t offset;
+
+ if (phdr->p_type != PT_NOTE)
+ continue;
+
+ notedata = elf_getdata_rawchunk(w->elf, phdr->p_offset,
+ phdr->p_filesz, ELF_T_NHDR);
+
+ GElf_Nhdr nhdr[1];
+ size_t nameoffs, dataoffs;
+
+ offset = 0;
+ while ((offset = gelf_getnote(notedata, offset, nhdr,
+ &nameoffs, &dataoffs))) {
+ if (phdr->p_offset + nameoffs >= w->len)
+ continue;
+
+ const char *name = w->mmap + phdr->p_offset + nameoffs;
+
+ if (strcmp(name, owner))
+ continue;
+ if (id != nhdr->n_type)
+ continue;
+
+ PyObject *s, *e;
+
+ s = PyLong_FromUnsignedLongLong(
+ phdr->p_vaddr + dataoffs);
+ e = PyLong_FromUnsignedLongLong(
+ phdr->p_vaddr + dataoffs + nhdr->n_descsz);
+ return PySlice_New(s, e, NULL);
+ }
+ }
+#endif
+ Py_RETURN_NONE;
+}
+
+static bool elffile_virt2file(struct elffile *w, GElf_Addr virt,
+ GElf_Addr *offs)
+{
+ *offs = 0;
+
+ for (size_t i = 0; i < w->ehdr->e_phnum; i++) {
+ GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
+
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ if (virt < phdr->p_vaddr
+ || virt >= phdr->p_vaddr + phdr->p_memsz)
+ continue;
+
+ if (virt >= phdr->p_vaddr + phdr->p_filesz)
+ return false;
+
+ *offs = virt - phdr->p_vaddr + phdr->p_offset;
+ return true;
+ }
+
+ return false;
+}
+
+static PyObject *elffile_subscript(PyObject *self, PyObject *key)
+{
+ Py_ssize_t start, stop, step;
+ PySliceObject *slice;
+ struct elffile *w = (struct elffile *)self;
+ bool str = false;
+
+ if (!PySlice_Check(key)) {
+ PyErr_SetString(PyExc_IndexError,
+ "ELFFile subscript must be slice");
+ return NULL;
+ }
+ slice = (PySliceObject *)key;
+ stop = -1;
+ step = 1;
+ if (PyLong_Check(slice->stop)) {
+ start = PyLong_AsSsize_t(slice->start);
+ if (PyErr_Occurred())
+ return NULL;
+ if (slice->stop != Py_None) {
+ stop = PyLong_AsSsize_t(slice->stop);
+ if (PyErr_Occurred())
+ return NULL;
+ }
+ if (slice->step != Py_None) {
+ step = PyLong_AsSsize_t(slice->step);
+ if (PyErr_Occurred())
+ return NULL;
+ }
+ } else {
+ if (slice->stop != (void *)&PyUnicode_Type
+ || !PyLong_Check(slice->start)) {
+ PyErr_SetString(PyExc_IndexError, "invalid slice");
+ return NULL;
+ }
+
+ str = true;
+ start = PyLong_AsUnsignedLongLong(slice->start);
+ }
+ if (step != 1) {
+ PyErr_SetString(PyExc_IndexError,
+ "ELFFile subscript slice step must be 1");
+ return NULL;
+ }
+
+ GElf_Addr xstart = start, xstop = stop;
+
+ for (size_t i = 0; i < w->ehdr->e_phnum; i++) {
+ GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
+
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ if (xstart < phdr->p_vaddr
+ || xstart >= phdr->p_vaddr + phdr->p_memsz)
+ continue;
+ if (!str && (xstop < phdr->p_vaddr
+ || xstop > phdr->p_vaddr + phdr->p_memsz)) {
+ PyErr_Format(ELFAccessError,
+ "access (%llu) beyond end of program header (%llu)",
+ (long long)xstop,
+ (long long)(phdr->p_vaddr +
+ phdr->p_memsz));
+ return NULL;
+ }
+
+ xstart = xstart - phdr->p_vaddr + phdr->p_offset;
+
+ if (str)
+ xstop = strlen(w->mmap + xstart);
+ else
+ xstop = xstop - phdr->p_vaddr + phdr->p_offset;
+
+ Py_ssize_t pylen = xstop - xstart;
+
+#if PY_MAJOR_VERSION >= 3
+ return Py_BuildValue("y#", w->mmap + xstart, pylen);
+#else
+ return Py_BuildValue("s#", w->mmap + xstart, pylen);
+#endif
+ };
+
+ return PyErr_Format(ELFAccessError,
+ "virtual address (%llu) not found in program headers",
+ (long long)start);
+}
+
+static PyMethodDef methods_elffile[] = {
+ {"find_note", elffile_find_note, METH_VARARGS,
+ "find specific note entry"},
+ {"getreloc", elffile_getreloc, METH_VARARGS,
+ "find relocation"},
+ {"get_symbol", elffile_get_symbol, METH_VARARGS,
+ "find symbol by name"},
+ {"get_section", elffile_get_section, METH_VARARGS,
+ "find section by name"},
+ {"get_section_addr", elffile_get_section_addr, METH_VARARGS,
+ "find section by address"},
+ {"get_section_idx", elffile_get_section_idx, METH_VARARGS,
+ "find section by index"},
+ {}
+};
+
+static PyObject *elffile_load(PyTypeObject *type, PyObject *args,
+ PyObject *kwds);
+
+static void elffile_free(void *arg)
+{
+ struct elffile *w = arg;
+
+ elf_end(w->elf);
+ munmap(w->mmap, w->len);
+ free(w->filename);
+}
+
+static PyMappingMethods mp_elffile = {
+ .mp_subscript = elffile_subscript,
+};
+
+static PyTypeObject typeobj_elffile = {
+ PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFFile",
+ .tp_basicsize = sizeof(struct elffile),
+ .tp_flags = Py_TPFLAGS_DEFAULT,
+ .tp_doc = elffile_doc,
+ .tp_new = elffile_load,
+ .tp_free = elffile_free,
+ .tp_as_mapping = &mp_elffile,
+ .tp_members = members_elffile,
+ .tp_methods = methods_elffile,
+};
+
+static char *elfdata_strptr(Elf_Data *data, size_t offset)
+{
+ char *p;
+
+ if (offset >= data->d_size)
+ return NULL;
+
+ p = (char *)data->d_buf + offset;
+ if (strnlen(p, data->d_size - offset) >= data->d_size - offset)
+ return NULL;
+
+ return p;
+}
+
+static void elffile_add_dynreloc(struct elffile *w, Elf_Data *reldata,
+ size_t entries, Elf_Data *symdata,
+ Elf_Data *strdata)
+{
+ size_t i;
+
+ for (i = 0; i < entries; i++) {
+ struct elfreloc *relw;
+ size_t symidx;
+ GElf_Rela *rela;
+ GElf_Sym *sym;
+
+ relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc(
+ &typeobj_elfreloc, 0);
+ relw->ef = w;
+
+ rela = relw->rela = gelf_getrela(reldata, i, &relw->_rela);
+ symidx = relw->symidx = GELF_R_SYM(rela->r_info);
+ sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym);
+ if (sym) {
+ relw->symname = elfdata_strptr(strdata, sym->st_name);
+ relw->symvalid = GELF_ST_TYPE(sym->st_info)
+ != STT_NOTYPE;
+ relw->unresolved = sym->st_shndx == SHN_UNDEF;
+ relw->st_value = sym->st_value;
+ } else {
+ relw->symname = NULL;
+ relw->symvalid = false;
+ relw->unresolved = false;
+ relw->st_value = 0;
+ }
+
+ debugf("dynreloc @ %016llx sym %5llu %016llx %s\n",
+ (long long)rela->r_offset, (unsigned long long)symidx,
+ (long long)rela->r_addend, relw->symname);
+
+ elfrelocs_add(&w->dynrelocs, relw);
+ }
+
+}
+
+/* primary (only, really) entry point to anything in this module */
+static PyObject *elffile_load(PyTypeObject *type, PyObject *args,
+ PyObject *kwds)
+{
+ const char *filename;
+ static const char * const kwnames[] = {"filename", NULL};
+ struct elffile *w;
+ struct stat st;
+ int fd, err;
+
+ w = (struct elffile *)typeobj_elffile.tp_alloc(&typeobj_elffile, 0);
+ if (!w)
+ return NULL;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", (char **)kwnames,
+ &filename))
+ return NULL;
+
+ w->filename = strdup(filename);
+ fd = open(filename, O_RDONLY | O_NOCTTY);
+ if (fd < 0 || fstat(fd, &st)) {
+ PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename);
+ close(fd);
+ goto out;
+ }
+ w->len = st.st_size;
+ w->mmap = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0);
+ if (!w->mmap) {
+ PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename);
+ close(fd);
+ goto out;
+ }
+ close(fd);
+ w->mmend = w->mmap + st.st_size;
+
+ if (w->len < EI_NIDENT || memcmp(w->mmap, ELFMAG, SELFMAG)) {
+ PyErr_SetString(ELFFormatError, "invalid ELF signature");
+ goto out;
+ }
+
+ switch (w->mmap[EI_CLASS]) {
+ case ELFCLASS32:
+ w->elfclass = 32;
+ break;
+ case ELFCLASS64:
+ w->elfclass = 64;
+ break;
+ default:
+ PyErr_SetString(ELFFormatError, "invalid ELF class");
+ goto out;
+ }
+ switch (w->mmap[EI_DATA]) {
+ case ELFDATA2LSB:
+ w->bigendian = false;
+ break;
+ case ELFDATA2MSB:
+ w->bigendian = true;
+ break;
+ default:
+ PyErr_SetString(ELFFormatError, "invalid ELF byte order");
+ goto out;
+ }
+
+ w->elf = elf_memory(w->mmap, w->len);
+ if (!w->elf)
+ goto out_elferr;
+ w->ehdr = gelf_getehdr(w->elf, &w->_ehdr);
+ if (!w->ehdr)
+ goto out_elferr;
+
+ for (size_t i = 0; i < w->ehdr->e_shnum; i++) {
+ Elf_Scn *scn = elf_getscn(w->elf, i);
+ GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr);
+
+ if (shdr->sh_type == SHT_SYMTAB) {
+ w->symtab = scn;
+ w->nsym = shdr->sh_size / shdr->sh_entsize;
+ w->symdata = elf_getdata(scn, NULL);
+ w->symstridx = shdr->sh_link;
+ break;
+ }
+ }
+ w->has_symbols = w->symtab && w->symstridx;
+ elfrelocs_init(&w->dynrelocs);
+
+#ifdef HAVE_ELF_GETDATA_RAWCHUNK
+ for (size_t i = 0; i < w->ehdr->e_phnum; i++) {
+ GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr);
+
+ if (phdr->p_type != PT_DYNAMIC)
+ continue;
+
+ Elf_Data *dyndata = elf_getdata_rawchunk(w->elf,
+ phdr->p_offset, phdr->p_filesz, ELF_T_DYN);
+
+ GElf_Addr dynrela = 0, symtab = 0, strtab = 0;
+ size_t dynrelasz = 0, dynrelaent = 0, strsz = 0;
+ GElf_Dyn _dyn, *dyn;
+
+ for (size_t j = 0;; j++) {
+ dyn = gelf_getdyn(dyndata, j, &_dyn);
+
+ if (dyn->d_tag == DT_NULL)
+ break;
+
+ switch (dyn->d_tag) {
+ case DT_SYMTAB:
+ symtab = dyn->d_un.d_ptr;
+ break;
+
+ case DT_STRTAB:
+ strtab = dyn->d_un.d_ptr;
+ break;
+ case DT_STRSZ:
+ strsz = dyn->d_un.d_val;
+ break;
+
+ case DT_RELA:
+ dynrela = dyn->d_un.d_ptr;
+ break;
+ case DT_RELASZ:
+ dynrelasz = dyn->d_un.d_val;
+ break;
+ case DT_RELAENT:
+ dynrelaent = dyn->d_un.d_val;
+ break;
+
+ case DT_RELSZ:
+ if (dyn->d_un.d_val)
+ fprintf(stderr,
+ "WARNING: ignoring non-empty DT_REL!\n");
+ break;
+ }
+ }
+
+ GElf_Addr offset;
+ Elf_Data *symdata = NULL, *strdata = NULL, *reladata = NULL;
+
+ if (elffile_virt2file(w, symtab, &offset))
+ symdata = elf_getdata_rawchunk(w->elf, offset,
+ w->len - offset,
+ ELF_T_SYM);
+ if (elffile_virt2file(w, strtab, &offset))
+ strdata = elf_getdata_rawchunk(w->elf, offset,
+ strsz, ELF_T_BYTE);
+
+ if (!dynrela || !dynrelasz || !dynrelaent)
+ continue;
+
+ if (!elffile_virt2file(w, dynrela, &offset))
+ continue;
+
+ debugf("dynrela @%llx/%llx+%llx\n", (long long)dynrela,
+ (long long)offset, (long long)dynrelasz);
+
+ reladata = elf_getdata_rawchunk(w->elf, offset, dynrelasz,
+ ELF_T_RELA);
+ elffile_add_dynreloc(w, reladata, dynrelasz / dynrelaent,
+ symdata, strdata);
+ }
+#endif
+
+ w->sects = calloc(sizeof(PyObject *), w->ehdr->e_shnum);
+ w->n_sect = w->ehdr->e_shnum;
+
+ return (PyObject *)w;
+
+out_elferr:
+ err = elf_errno();
+
+ PyErr_Format(ELFFormatError, "libelf error %d: %s",
+ err, elf_errmsg(err));
+out:
+ if (w->elf)
+ elf_end(w->elf);
+ free(w->filename);
+ return NULL;
+}
+
+static PyObject *elfpy_debug(PyObject *self, PyObject *args)
+{
+ int arg;
+
+ if (!PyArg_ParseTuple(args, "p", &arg))
+ return NULL;
+
+ debug = arg;
+
+ Py_RETURN_NONE;
+}
+
+static PyMethodDef methods_elfpy[] = {
+ {"elfpy_debug", elfpy_debug, METH_VARARGS, "switch debuging on/off"},
+ {}
+};
+
+bool elf_py_init(PyObject *pymod)
+{
+ if (PyType_Ready(&typeobj_elffile) < 0)
+ return false;
+ if (PyType_Ready(&typeobj_elfsect) < 0)
+ return false;
+ if (PyType_Ready(&typeobj_elfreloc) < 0)
+ return false;
+ if (elf_version(EV_CURRENT) == EV_NONE)
+ return false;
+
+#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 5
+ PyModule_AddFunctions(pymod, methods_elfpy);
+#else
+ (void)methods_elfpy;
+#endif
+
+ ELFFormatError = PyErr_NewException("_clippy.ELFFormatError",
+ PyExc_ValueError, NULL);
+ PyModule_AddObject(pymod, "ELFFormatError", ELFFormatError);
+ ELFAccessError = PyErr_NewException("_clippy.ELFAccessError",
+ PyExc_IndexError, NULL);
+ PyModule_AddObject(pymod, "ELFAccessError", ELFAccessError);
+
+ Py_INCREF(&typeobj_elffile);
+ PyModule_AddObject(pymod, "ELFFile", (PyObject *)&typeobj_elffile);
+ Py_INCREF(&typeobj_elfsect);
+ PyModule_AddObject(pymod, "ELFSection", (PyObject *)&typeobj_elfsect);
+ Py_INCREF(&typeobj_elfreloc);
+ PyModule_AddObject(pymod, "ELFReloc", (PyObject *)&typeobj_elfreloc);
+ return true;
+}
diff --git a/lib/subdir.am b/lib/subdir.am
index d5ffa08546..38d1a3f773 100644
--- a/lib/subdir.am
+++ b/lib/subdir.am
@@ -410,7 +410,7 @@ lib_grammar_sandbox_LDADD = \
lib_clippy_CPPFLAGS = $(AM_CPPFLAGS) -D_GNU_SOURCE -DBUILDING_CLIPPY
lib_clippy_CFLAGS = $(PYTHON_CFLAGS)
-lib_clippy_LDADD = $(PYTHON_LIBS) $(UST_LIBS)
+lib_clippy_LDADD = $(PYTHON_LIBS) $(UST_LIBS) -lelf
lib_clippy_LDFLAGS = -export-dynamic
lib_clippy_SOURCES = \
lib/jhash.c \
@@ -420,9 +420,11 @@ lib_clippy_SOURCES = \
lib/command_parse.y \
lib/command_py.c \
lib/defun_lex.l \
+ lib/elf_py.c \
lib/graph.c \
lib/libfrr_trace.c \
lib/memory.c \
+ lib/typesafe.c \
lib/vector.c \
# end
@@ -439,6 +441,32 @@ SUFFIXES += _clippy.c
.c_clippy.c:
$(AM_V_CLIPPY) $(CLIPPY) $(top_srcdir)/python/clidef.py -o $@ $<
+# xrelfo, the ELF xref extractor
+
+AM_V_XRELFO = $(am__v_XRELFO_$(V))
+am__v_XRELFO_ = $(am__v_XRELFO_$(AM_DEFAULT_VERBOSITY))
+am__v_XRELFO_0 = @echo " XRELFO " $@;
+am__v_XRELFO_1 =
+
+if DEV_BUILD
+XRELFO_FLAGS = -Wlog-format -Wlog-args
+else
+XRELFO_FLAGS =
+endif
+
+SUFFIXES += .xref
+%.xref: % $(CLIPPY)
+ $(AM_V_XRELFO) $(CLIPPY) $(top_srcdir)/python/xrelfo.py $(XRELFO_FLAGS) -o $@ $<
+
+# dependencies added in python/makefile.py
+frr.xref:
+ $(AM_V_XRELFO) $(CLIPPY) $(top_srcdir)/python/xrelfo.py -o $@ $^
+all-am: frr.xref
+
+clean-xref:
+ -rm -rf $(xrefs) frr.xref
+clean-local: clean-xref
+
## automake's "ylwrap" is a great piece of GNU software... not.
.l.c:
$(AM_V_LEX)$(am__skiplex) $(LEXCOMPILE) $<
diff --git a/lib/zlog.h b/lib/zlog.h
index 3e86aa1345..4fdb47bb95 100644
--- a/lib/zlog.h
+++ b/lib/zlog.h
@@ -44,6 +44,7 @@ struct xref_logmsg {
const char *fmtstring;
uint32_t priority;
uint32_t ec;
+ const char *args;
};
struct xrefdata_logmsg {
@@ -97,6 +98,7 @@ static inline void zlog_ref(const struct xref_logmsg *xref,
.xref = XREF_INIT(XREFT_LOGMSG, &_xrefdata, __func__), \
.fmtstring = (msg), \
.priority = (prio), \
+ .args = (#__VA_ARGS__), \
}; \
XREF_LINK(_xref.xref); \
zlog_ref(&_xref, (msg), ##__VA_ARGS__); \
@@ -122,6 +124,7 @@ static inline void zlog_ref(const struct xref_logmsg *xref,
.fmtstring = (msg), \
.priority = (prio), \
.ec = (ec_), \
+ .args = (#__VA_ARGS__), \
}; \
XREF_LINK(_xref.xref); \
zlog_ref(&_xref, "[EC %u] " msg, ec_, ##__VA_ARGS__); \
diff --git a/python/clippy/__init__.py b/python/clippy/__init__.py
index d6865ff484..344a1c91ee 100644
--- a/python/clippy/__init__.py
+++ b/python/clippy/__init__.py
@@ -21,6 +21,8 @@ import _clippy
from _clippy import parse, Graph, GraphNode
+frr_top_src = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
def graph_iterate(graph):
"""iterator yielding all nodes of a graph
diff --git a/python/clippy/elf.py b/python/clippy/elf.py
new file mode 100644
index 0000000000..4ed334f0c4
--- /dev/null
+++ b/python/clippy/elf.py
@@ -0,0 +1,574 @@
+# FRR libelf wrapper
+#
+# Copyright (C) 2020 David Lamparter for NetDEF, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; see the file COPYING; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+'''
+Wrapping layer and additional utility around _clippy.ELFFile.
+
+Essentially, the C bits have the low-level ELF access bits that should be
+fast while this has the bits that string everything together (and would've
+been a PITA to do in C.)
+
+Surprisingly - or maybe through proper engineering - this actually works
+across architecture, word size and even endianness boundaries. Both the C
+module (through GElf_*) and this code (cf. struct.unpack format mangling
+in ELFDissectStruct) will take appropriate measures to flip and resize
+fields as needed.
+'''
+
+import struct
+from collections import OrderedDict
+from weakref import WeakValueDictionary
+
+from _clippy import ELFFile, ELFAccessError
+
+#
+# data access
+#
+
+class ELFNull(object):
+ '''
+ NULL pointer, returned instead of ELFData
+ '''
+ def __init__(self):
+ self.symname = None
+ self._dstsect = None
+
+ def __repr__(self):
+ return '<ptr: NULL>'
+
+ def __hash__(self):
+ return hash(None)
+
+ def get_string(self):
+ return None
+
+class ELFUnresolved(object):
+ '''
+ Reference to an unresolved external symbol, returned instead of ELFData
+
+ :param symname: name of the referenced symbol
+ :param addend: offset added to the symbol, normally zero
+ '''
+ def __init__(self, symname, addend):
+ self.addend = addend
+ self.symname = symname
+ self._dstsect = None
+
+ def __repr__(self):
+ return '<unresolved: %s+%d>' % (self.symname, self.addend)
+
+ def __hash__(self):
+ return hash((self.symname, self.addend))
+
+class ELFData(object):
+ '''
+ Actual data somewhere in the ELF file.
+
+ :type dstsect: ELFSubset
+ :param dstsect: container data area (section or entire file)
+ :param dstoffs: byte offset into dstsect
+ :param dstlen: byte size of object, or None if unknown, open-ended or string
+ '''
+ def __init__(self, dstsect, dstoffs, dstlen):
+ self._dstsect = dstsect
+ self._dstoffs = dstoffs
+ self._dstlen = dstlen
+ self.symname = None
+
+ def __repr__(self):
+ return '<ptr: %s+0x%05x/%d>' % (self._dstsect.name, self._dstoffs, self._dstlen or -1)
+
+ def __hash__(self):
+ return hash((self._dstsect, self._dstoffs))
+
+ def get_string(self):
+ '''
+ Interpret as C string / null terminated UTF-8 and get the actual text.
+ '''
+ try:
+ return self._dstsect[self._dstoffs:str].decode('UTF-8')
+ except:
+ import pdb; pdb.set_trace()
+
+ def get_data(self, reflen):
+ '''
+ Interpret as some structure (and check vs. expected length)
+
+ :param reflen: expected size of the object, compared against actual
+ size (which is only known in rare cases, mostly when directly
+ accessing a symbol since symbols have their destination object
+ size recorded)
+ '''
+ if self._dstlen is not None and self._dstlen != reflen:
+ raise ValueError('symbol size mismatch (got %d, expected %d)' % (self._dstlen, reflen))
+ return self._dstsect[self._dstoffs:self._dstoffs+reflen]
+
+ def offset(self, offs, within_symbol=False):
+ '''
+ Get another ELFData at an offset
+
+ :param offs: byte offset, can be negative (e.g. in container_of)
+ :param within_symbol: retain length information
+ '''
+ if self._dstlen is None or not within_symbol:
+ return ELFData(self._dstsect, self._dstoffs + offs, None)
+ else:
+ return ELFData(self._dstsect, self._dstoffs + offs, self._dstlen - offs)
+
+#
+# dissection data items
+#
+
+class ELFDissectData(object):
+ '''
+ Common bits for ELFDissectStruct and ELFDissectUnion
+ '''
+
+ def __len__(self):
+ '''
+ Used for boolean evaluation, e.g. "if struct: ..."
+ '''
+ return not (isinstance(self._data, ELFNull) or isinstance(self._data, ELFUnresolved))
+
+ def container_of(self, parent, fieldname):
+ '''
+ Assume this struct is embedded in a larger struct and get at the larger
+
+ Python ``self.container_of(a, b)`` = C ``container_of(self, a, b)``
+
+ :param parent: class (not instance) of the larger struct
+ :param fieldname: fieldname that refers back to this
+ :returns: instance of parent, with fieldname set to this object
+ '''
+ offset = 0
+ if not hasattr(parent, '_efields'):
+ parent._setup_efields()
+
+ for field in parent._efields[self.elfclass]:
+ if field[0] == fieldname:
+ break
+ offset += struct.calcsize(field[1])
+ else:
+ raise AttributeError('%r not found in %r.fields' % (fieldname, parent))
+
+ return parent(self._data.offset(-offset), replace = {fieldname: self})
+
+class ELFDissectStruct(ELFDissectData):
+ '''
+ Decode and provide access to a struct somewhere in the ELF file
+
+ Handles pointers and strings somewhat nicely. Create a subclass for each
+ struct that is to be accessed, and give a field list in a "fields"
+ class-member.
+
+ :param dataptr: ELFData referring to the data bits to decode.
+ :param parent: where this was instantiated from; only for reference, has
+ no functional impact.
+ :param replace: substitute data values for specific fields. Used by
+ `container_of` to replace the inner struct when creating the outer
+ one.
+
+ .. attribute:: fields
+
+ List of tuples describing the struct members. Items can be:
+ - ``('name', ELFDissectData)`` - directly embed another struct
+ - ``('name', 'I')`` - simple data types; second item for struct.unpack
+ - ``('name', 'I', None)`` - field to ignore
+ - ``('name', 'P', str)`` - pointer to string
+ - ``('name', 'P', ELFDissectData)`` - pointer to another struct
+
+ ``P`` is added as unpack format for pointers (sized appropriately for
+ the ELF file.)
+
+ Refer to tiabwarfo.py for extracting this from ``pahole``.
+
+ TBD: replace tuples with a class.
+
+ .. attribute:: fieldrename
+
+ Dictionary to rename fields, useful if fields comes from tiabwarfo.py.
+ '''
+
+ class Pointer(object):
+ '''
+ Quick wrapper for pointers to further structs
+
+ This is just here to avoid going into infinite loops when loading
+ structs that have pointers to each other (e.g. struct xref <-->
+ struct xrefdata.) The pointer destination is only instantiated when
+ actually accessed.
+ '''
+ def __init__(self, cls, ptr):
+ self.cls = cls
+ self.ptr = ptr
+
+ def __repr__(self):
+ return '<Pointer:%s %r>' % (self.cls.__name__, self.ptr)
+
+ def __call__(self):
+ if isinstance(self.ptr, ELFNull):
+ return None
+ return self.cls(self.ptr)
+
+ def __new__(cls, dataptr, parent = None, replace = None):
+ if dataptr._dstsect is None:
+ return super().__new__(cls)
+
+ obj = dataptr._dstsect._pointers.get((cls, dataptr))
+ if obj is not None:
+ return obj
+ obj = super().__new__(cls)
+ dataptr._dstsect._pointers[(cls, dataptr)] = obj
+ return obj
+
+ replacements = 'lLnN'
+
+ @classmethod
+ def _preproc_structspec(cls, elfclass, spec):
+ elfbits = elfclass
+
+ if hasattr(spec, 'calcsize'):
+ spec = '%ds' % (spec.calcsize(elfclass),)
+
+ if elfbits == 32:
+ repl = ['i', 'I']
+ else:
+ repl = ['q', 'Q']
+ for c in cls.replacements:
+ spec = spec.replace(c, repl[int(c.isupper())])
+ return spec
+
+ @classmethod
+ def _setup_efields(cls):
+ cls._efields = {}
+ cls._esize = {}
+ for elfclass in [32, 64]:
+ cls._efields[elfclass] = []
+ size = 0
+ for f in cls.fields:
+ newf = (f[0], cls._preproc_structspec(elfclass, f[1])) + f[2:]
+ cls._efields[elfclass].append(newf)
+ size += struct.calcsize(newf[1])
+ cls._esize[elfclass] = size
+
+ def __init__(self, dataptr, parent = None, replace = None):
+ if not hasattr(self.__class__, '_efields'):
+ self._setup_efields()
+
+ self._fdata = None
+ self._data = dataptr
+ self._parent = parent
+ self.symname = dataptr.symname
+ if isinstance(dataptr, ELFNull) or isinstance(dataptr, ELFUnresolved):
+ self._fdata = {}
+ return
+
+ self._elfsect = dataptr._dstsect
+ self.elfclass = self._elfsect._elffile.elfclass
+ self.offset = dataptr._dstoffs
+
+ pspecl = [f[1] for f in self._efields[self.elfclass]]
+
+ # need to correlate output from struct.unpack with extra metadata
+ # about the particular fields, so note down byte offsets (in locs)
+ # and tuple indices of pointers (in ptrs)
+ pspec = ''
+ locs = {}
+ ptrs = set()
+
+ for idx, spec in enumerate(pspecl):
+ if spec == 'P':
+ ptrs.add(idx)
+ spec = self._elfsect.ptrtype
+
+ locs[idx] = struct.calcsize(pspec)
+ pspec = pspec + spec
+
+ self._total_size = struct.calcsize(pspec)
+
+ def replace_ptrs(v):
+ idx, val = v[0], v[1]
+ if idx not in ptrs:
+ return val
+ return self._elfsect.pointer(self.offset + locs[idx])
+
+ data = dataptr.get_data(struct.calcsize(pspec))
+ unpacked = struct.unpack(self._elfsect.endian + pspec, data)
+ unpacked = list(map(replace_ptrs, enumerate(unpacked)))
+ self._fraw = unpacked
+ self._fdata = OrderedDict()
+ replace = replace or {}
+
+ for i, item in enumerate(unpacked):
+ name = self.fields[i][0]
+ if name is None:
+ continue
+
+ if name in replace:
+ self._fdata[name] = replace[name]
+ continue
+
+ if isinstance(self.fields[i][1], type) and issubclass(self.fields[i][1], ELFDissectData):
+ dataobj = self.fields[i][1](dataptr.offset(locs[i]), self)
+ self._fdata[name] = dataobj
+ continue
+ if len(self.fields[i]) == 3:
+ if self.fields[i][2] == str:
+ self._fdata[name] = item.get_string()
+ continue
+ elif self.fields[i][2] is None:
+ pass
+ elif issubclass(self.fields[i][2], ELFDissectData):
+ cls = self.fields[i][2]
+ dataobj = self.Pointer(cls, item)
+ self._fdata[name] = dataobj
+ continue
+
+ self._fdata[name] = item
+
+ def __getattr__(self, attrname):
+ if attrname not in self._fdata:
+ raise AttributeError(attrname)
+ if isinstance(self._fdata[attrname], self.Pointer):
+ self._fdata[attrname] = self._fdata[attrname]()
+ return self._fdata[attrname]
+
+ def __repr__(self):
+ if not isinstance(self._data, ELFData):
+ return '<%s: %r>' % (self.__class__.__name__, self._data)
+ return '<%s: %s>' % (self.__class__.__name__,
+ ', '.join(['%s=%r' % t for t in self._fdata.items()]))
+
+ @classmethod
+ def calcsize(cls, elfclass):
+ '''
+ Sum up byte size of this struct
+
+ Wraps struct.calcsize with some extra features.
+ '''
+ if not hasattr(cls, '_efields'):
+ cls._setup_efields()
+
+ pspec = ''.join([f[1] for f in cls._efields[elfclass]])
+
+ ptrtype = 'I' if elfclass == 32 else 'Q'
+ pspec = pspec.replace('P', ptrtype)
+
+ return struct.calcsize(pspec)
+
+class ELFDissectUnion(ELFDissectData):
+ '''
+ Decode multiple structs in the same place.
+
+ Not currently used (and hence not tested.) Worked at some point but not
+ needed anymore and may be borked now. Remove this comment when using.
+ '''
+ def __init__(self, dataptr, parent = None):
+ self._dataptr = dataptr
+ self._parent = parent
+ self.members = []
+ for name, membercls in self.__class__.members:
+ item = membercls(dataptr, parent)
+ self.members.append(item)
+ setattr(self, name, item)
+
+ def __repr__(self):
+ return '<%s: %s>' % (self.__class__.__name__, ', '.join([repr(i) for i in self.members]))
+
+ @classmethod
+ def calcsize(cls, elfclass):
+ return max([member.calcsize(elfclass) for name, member in cls.members])
+
+#
+# wrappers for spans of ELF data
+#
+
+class ELFSubset(object):
+ '''
+ Common abstract base for section-level and file-level access.
+ '''
+
+ def __init__(self):
+ super().__init__()
+
+ self._pointers = WeakValueDictionary()
+
+ def __hash__(self):
+ return hash(self.name)
+
+ def __getitem__(self, k):
+ '''
+ Read data from slice
+
+ Subscript **must** be a slice; a simple index will not return a byte
+ but rather throw an exception. Valid slice syntaxes are defined by
+ the C module:
+
+ - `this[123:456]` - extract specific range
+ - `this[123:str]` - extract until null byte. The slice stop value is
+ the `str` type (or, technically, `unicode`.)
+ '''
+ return self._obj[k]
+
+ def getreloc(self, offset):
+ '''
+ Check for a relocation record at the specified offset.
+ '''
+ return self._obj.getreloc(offset)
+
+ def iter_data(self, scls, slice_ = slice(None)):
+ '''
+ Assume an array of structs present at a particular slice and decode
+
+ :param scls: ELFDissectData subclass for the struct
+ :param slice_: optional range specification
+ '''
+ size = scls.calcsize(self._elffile.elfclass)
+
+ offset = slice_.start or 0
+ stop = slice_.stop or self._obj.len
+ if stop < 0:
+ stop = self._obj.len - stop
+
+ while offset < stop:
+ yield scls(ELFData(self, offset, size))
+ offset += size
+
+ def pointer(self, offset):
+ '''
+ Try to dereference a pointer value
+
+ This checks whether there's a relocation at the given offset and
+ uses that; otherwise (e.g. in a non-PIE executable where the pointer
+ is already resolved by the linker) the data at the location is used.
+
+ :param offset: byte offset from beginning of section,
+ or virtual address in file
+ :returns: ELFData wrapping pointed-to object
+ '''
+
+ ptrsize = struct.calcsize(self.ptrtype)
+ data = struct.unpack(self.endian + self.ptrtype, self[offset:offset + ptrsize])[0]
+
+ reloc = self.getreloc(offset)
+ dstsect = None
+ if reloc:
+ # section won't be available in whole-file operation
+ dstsect = reloc.getsection(data)
+ addend = reloc.r_addend
+
+ if reloc.relative:
+ # old-style ELF REL instead of RELA, not well-tested
+ addend += data
+
+ if reloc.unresolved and reloc.symvalid:
+ return ELFUnresolved(reloc.symname, addend)
+ elif reloc.symvalid:
+ data = addend + reloc.st_value
+ else:
+ data = addend
+
+ # 0 could technically be a valid pointer for a shared library,
+ # since libraries may use 0 as default virtual start address (it'll
+ # be adjusted on loading)
+ # That said, if the library starts at 0, that's where the ELF header
+ # would be so it's still an invalid pointer.
+ if data == 0 and dstsect == None:
+ return ELFNull()
+
+ # wrap_data is different between file & section
+ return self._wrap_data(data, dstsect)
+
+class ELFDissectSection(ELFSubset):
+ '''
+ Access the contents of an ELF section like ``.text`` or ``.data``
+
+ :param elfwrap: ELFDissectFile wrapper for the file
+ :param idx: section index in section header table
+ :param section: section object from C module
+ '''
+
+ def __init__(self, elfwrap, idx, section):
+ super().__init__()
+
+ self._elfwrap = elfwrap
+ self._elffile = elfwrap._elffile
+ self._idx = idx
+ self._section = self._obj = section
+ self.name = section.name
+ self.ptrtype = elfwrap.ptrtype
+ self.endian = elfwrap.endian
+
+ def _wrap_data(self, data, dstsect):
+ if dstsect is None:
+ dstsect = self._elfwrap._elffile.get_section_addr(data)
+ offs = data - dstsect.sh_addr
+ dstsect = self._elfwrap.get_section(dstsect.idx)
+ return ELFData(dstsect, offs, None)
+
+class ELFDissectFile(ELFSubset):
+ '''
+ Access the contents of an ELF file.
+
+ Note that offsets for array subscript and relocation/pointer access are
+ based on the file's virtual address space and are NOT offsets to the
+ start of the file on disk!
+
+ (Shared libraries frequently have a virtual address space starting at 0,
+ but non-PIE executables have an architecture specific default loading
+ address like 0x400000 on x86.
+
+ :param filename: ELF file to open
+ '''
+
+ def __init__(self, filename):
+ super().__init__()
+
+ self.name = filename
+ self._elffile = self._obj = ELFFile(filename)
+ self._sections = {}
+
+ self.ptrtype = 'I' if self._elffile.elfclass == 32 else 'Q'
+ self.endian = '>' if self._elffile.bigendian else '<'
+
+ @property
+ def _elfwrap(self):
+ return self
+
+ def _wrap_data(self, data, dstsect):
+ return ELFData(self, data, None)
+
+ def get_section(self, secname):
+ '''
+ Look up section by name or index
+ '''
+ if isinstance(secname, int):
+ sh_idx = secname
+ section = self._elffile.get_section_idx(secname)
+ else:
+ section = self._elffile.get_section(secname)
+
+ if section is None:
+ return None
+
+ sh_idx = section.idx
+
+ if sh_idx not in self._sections:
+ self._sections[sh_idx] = ELFDissectSection(self, sh_idx, section)
+
+ return self._sections[sh_idx]
diff --git a/python/clippy/uidhash.py b/python/clippy/uidhash.py
new file mode 100644
index 0000000000..bf994d389e
--- /dev/null
+++ b/python/clippy/uidhash.py
@@ -0,0 +1,71 @@
+# xref unique ID hash calculation
+#
+# Copyright (C) 2020 David Lamparter for NetDEF, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; see the file COPYING; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import struct
+from hashlib import sha256
+
+def bititer(data, bits, startbit = True):
+ '''
+ just iterate the individual bits out from a bytes object
+
+ if startbit is True, an '1' bit is inserted at the very beginning
+ goes <bits> at a time, starts at LSB.
+ '''
+ bitavail, v = 0, 0
+ if startbit and len(data) > 0:
+ v = data.pop(0)
+ yield (v & ((1 << bits) - 1)) | (1 << (bits - 1))
+ bitavail = 9 - bits
+ v >>= bits - 1
+
+ while len(data) > 0:
+ while bitavail < bits:
+ v |= data.pop(0) << bitavail
+ bitavail += 8
+ yield v & ((1 << bits) - 1)
+ bitavail -= bits
+ v >>= bits
+
+def base32c(data):
+ '''
+ Crockford base32 with extra dashes
+ '''
+ chs = "0123456789ABCDEFGHJKMNPQRSTVWXYZ"
+ o = ''
+ if type(data) == str:
+ data = [ord(v) for v in data]
+ else:
+ data = list(data)
+ for i, bits in enumerate(bititer(data, 5)):
+ if i == 5:
+ o = o + '-'
+ elif i == 10:
+ break
+ o = o + chs[bits]
+ return o
+
+def uidhash(filename, hashstr, hashu32a, hashu32b):
+ '''
+ xref Unique ID hash used in FRRouting
+ '''
+ filename = '/'.join(filename.rsplit('/')[-2:])
+
+ hdata = filename.encode('UTF-8') + hashstr.encode('UTF-8')
+ hdata += struct.pack('>II', hashu32a, hashu32b)
+ i = sha256(hdata).digest()
+ return base32c(i)
diff --git a/python/makefile.py b/python/makefile.py
index 10c73df72d..44658013b3 100644
--- a/python/makefile.py
+++ b/python/makefile.py
@@ -31,6 +31,10 @@ clippy_scan = mv["clippy_scan"].strip().split()
for clippy_file in clippy_scan:
assert clippy_file.endswith(".c")
+xref_targets = []
+for varname in ["bin_PROGRAMS", "sbin_PROGRAMS", "lib_LTLIBRARIES", "module_LTLIBRARIES"]:
+ xref_targets.extend(mv[varname].strip().split())
+
# check for files using clippy but not listed in clippy_scan
if args.dev_build:
basepath = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
@@ -125,6 +129,14 @@ for clippy_file in clippy_scan:
out_lines.append(clippydep.substitute(clippybase=clippy_file[:-2]))
out_lines.append("")
+out_lines.append("xrefs = %s" % (" ".join(["%s.xref" % target for target in xref_targets])))
+out_lines.append("frr.xref: $(xrefs)")
+out_lines.append("")
+
+#frr.xref: $(bin_PROGRAMS) $(sbin_PROGRAMS) $(lib_LTLIBRARIES) $(module_LTLIBRARIES)
+# $(AM_V_XRELFO) $(CLIPPY) $(top_srcdir)/python/xrelfo.py -o $@ $^
+
+out_lines.append("")
out_lines.extend(bcdeps)
out_lines.append("")
bc_targets = []
diff --git a/python/runtests.py b/python/runtests.py
new file mode 100644
index 0000000000..bcf650b329
--- /dev/null
+++ b/python/runtests.py
@@ -0,0 +1,14 @@
+import pytest
+import sys
+import os
+
+try:
+ import _clippy
+except ImportError:
+ sys.stderr.write('''these tests need to be run with the _clippy C extension
+module available. Try running "clippy runtests.py ...".
+''')
+ sys.exit(1)
+
+os.chdir(os.path.dirname(os.path.abspath(__file__)))
+raise SystemExit(pytest.main(sys.argv[1:]))
diff --git a/python/test_xrelfo.py b/python/test_xrelfo.py
new file mode 100644
index 0000000000..3ae24ea7b3
--- /dev/null
+++ b/python/test_xrelfo.py
@@ -0,0 +1,65 @@
+# some basic tests for xrelfo & the python ELF machinery
+#
+# Copyright (C) 2020 David Lamparter for NetDEF, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; see the file COPYING; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import sys
+import os
+import pytest
+from pprint import pprint
+
+root = os.path.dirname(os.path.dirname(__file__))
+sys.path.append(os.path.join(root, 'python'))
+
+import xrelfo
+from clippy import elf, uidhash
+
+def test_uidhash():
+ assert uidhash.uidhash("lib/test_xref.c", "logging call", 3, 0) \
+ == 'H7KJB-67TBH'
+
+def test_xrelfo_other():
+ for data in [
+ elf.ELFNull(),
+ elf.ELFUnresolved('somesym', 0),
+ ]:
+
+ dissect = xrelfo.XrefPtr(data)
+ print(repr(dissect))
+
+ with pytest.raises(AttributeError):
+ dissect.xref
+
+def test_xrelfo_obj():
+ xrelfo_ = xrelfo.Xrelfo()
+ edf = xrelfo_.load_elf(os.path.join(root, 'lib/.libs/zclient.o'), 'zclient.lo')
+ xrefs = xrelfo_._xrefs
+
+ with pytest.raises(elf.ELFAccessError):
+ edf[0:4]
+
+ pprint(xrefs[0])
+ pprint(xrefs[0]._data)
+
+def test_xrelfo_bin():
+ xrelfo_ = xrelfo.Xrelfo()
+ edf = xrelfo_.load_elf(os.path.join(root, 'lib/.libs/libfrr.so'), 'libfrr.la')
+ xrefs = xrelfo_._xrefs
+
+ assert edf[0:4] == b'\x7fELF'
+
+ pprint(xrefs[0])
+ pprint(xrefs[0]._data)
diff --git a/python/tiabwarfo.py b/python/tiabwarfo.py
new file mode 100644
index 0000000000..265173e314
--- /dev/null
+++ b/python/tiabwarfo.py
@@ -0,0 +1,203 @@
+# FRR DWARF structure definition extractor
+#
+# Copyright (C) 2020 David Lamparter for NetDEF, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; see the file COPYING; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import sys
+import os
+import subprocess
+import re
+import argparse
+import subprocess
+import json
+
+structs = ['xref', 'xref_logmsg', 'xref_threadsched', 'xref_install_element', 'xrefdata', 'xrefdata_logmsg', 'cmd_element']
+
+def extract(filename='lib/.libs/libfrr.so'):
+ '''
+ Convert output from "pahole" to JSON.
+
+ Example pahole output:
+ $ pahole -C xref lib/.libs/libfrr.so
+ struct xref {
+ struct xrefdata * xrefdata; /* 0 8 */
+ enum xref_type type; /* 8 4 */
+ int line; /* 12 4 */
+ const char * file; /* 16 8 */
+ const char * func; /* 24 8 */
+
+ /* size: 32, cachelines: 1, members: 5 */
+ /* last cacheline: 32 bytes */
+ };
+ '''
+ pahole = subprocess.check_output(['pahole', '-C', ','.join(structs), filename]).decode('UTF-8')
+
+ struct_re = re.compile(r'^struct ([^ ]+) \{([^\}]+)};', flags=re.M | re.S)
+ field_re = re.compile(r'^\s*(?P<type>[^;\(]+)\s+(?P<name>[^;\[\]]+)(?:\[(?P<array>\d+)\])?;\s*\/\*(?P<comment>.*)\*\/\s*$')
+ comment_re = re.compile(r'^\s*\/\*.*\*\/\s*$')
+
+ pastructs = struct_re.findall(pahole)
+ out = {}
+
+ for sname, data in pastructs:
+ this = out.setdefault(sname, {})
+ fields = this.setdefault('fields', [])
+
+ lines = data.strip().splitlines()
+
+ next_offs = 0
+
+ for line in lines:
+ if line.strip() == '':
+ continue
+ m = comment_re.match(line)
+ if m is not None:
+ continue
+
+ m = field_re.match(line)
+ if m is not None:
+ offs, size = m.group('comment').strip().split()
+ offs = int(offs)
+ size = int(size)
+ typ_ = m.group('type').strip()
+ name = m.group('name')
+
+ if name.startswith('(*'):
+ # function pointer
+ typ_ = typ_ + ' *'
+ name = name[2:].split(')')[0]
+
+ data = {
+ 'name': name,
+ 'type': typ_,
+ # 'offset': offs,
+ # 'size': size,
+ }
+ if m.group('array'):
+ data['array'] = int(m.group('array'))
+
+ fields.append(data)
+ if offs != next_offs:
+ raise ValueError('%d padding bytes before struct %s.%s' % (offs - next_offs, sname, name))
+ next_offs = offs + size
+ continue
+
+ raise ValueError('cannot process line: %s' % line)
+
+ return out
+
+class FieldApplicator(object):
+ '''
+ Fill ELFDissectStruct fields list from pahole/JSON
+
+ Uses the JSON file created by the above code to fill in the struct fields
+ in subclasses of ELFDissectStruct.
+ '''
+
+ # only what we really need. add more as needed.
+ packtypes = {
+ 'int': 'i',
+ 'uint8_t': 'B',
+ 'uint16_t': 'H',
+ 'uint32_t': 'I',
+ 'char': 's',
+ }
+
+ def __init__(self, data):
+ self.data = data
+ self.classes = []
+ self.clsmap = {}
+
+ def add(self, cls):
+ self.classes.append(cls)
+ self.clsmap[cls.struct] = cls
+
+ def resolve(self, cls):
+ out = []
+ #offset = 0
+
+ fieldrename = getattr(cls, 'fieldrename', {})
+ def mkname(n):
+ return (fieldrename.get(n, n),)
+
+ for field in self.data[cls.struct]['fields']:
+ typs = field['type'].split()
+ typs = [i for i in typs if i not in ['const']]
+
+ # this will break reuse of xrefstructs.json across 32bit & 64bit
+ # platforms
+
+ #if field['offset'] != offset:
+ # assert offset < field['offset']
+ # out.append(('_pad', '%ds' % (field['offset'] - offset,)))
+
+ # pretty hacky C types handling, but covers what we need
+
+ ptrlevel = 0
+ while typs[-1] == '*':
+ typs.pop(-1)
+ ptrlevel += 1
+
+ if ptrlevel > 0:
+ packtype = ('P', None)
+ if ptrlevel == 1:
+ if typs[0] == 'char':
+ packtype = ('P', str)
+ elif typs[0] == 'struct' and typs[1] in self.clsmap:
+ packtype = ('P', self.clsmap[typs[1]])
+ elif typs[0] == 'enum':
+ packtype = ('I',)
+ elif typs[0] in self.packtypes:
+ packtype = (self.packtypes[typs[0]],)
+ elif typs[0] == 'struct':
+ if typs[1] in self.clsmap:
+ packtype = (self.clsmap[typs[1]],)
+ else:
+ raise ValueError('embedded struct %s not in extracted data' % (typs[1],))
+ else:
+ raise ValueError('cannot decode field %s in struct %s (%s)' % (
+ cls.struct, field['name'], field['type']))
+
+ if 'array' in field and typs[0] == 'char':
+ packtype = ('%ds' % field['array'],)
+ out.append(mkname(field['name']) + packtype)
+ elif 'array' in field:
+ for i in range(0, field['array']):
+ out.append(mkname('%s_%d' % (field['name'], i)) + packtype)
+ else:
+ out.append(mkname(field['name']) + packtype)
+
+ #offset = field['offset'] + field['size']
+
+ cls.fields = out
+
+ def __call__(self):
+ for cls in self.classes:
+ self.resolve(cls)
+
+def main():
+ argp = argparse.ArgumentParser(description = 'FRR DWARF structure extractor')
+ argp.add_argument('-o', dest='output', type=str, help='write JSON output', default='python/xrefstructs.json')
+ argp.add_argument('-i', dest='input', type=str, help='ELF file to read', default='lib/.libs/libfrr.so')
+ args = argp.parse_args()
+
+ out = extract(args.input)
+ with open(args.output + '.tmp', 'w') as fd:
+ json.dump(out, fd, indent=2, sort_keys=True)
+ os.rename(args.output + '.tmp', args.output)
+
+if __name__ == '__main__':
+ main()
diff --git a/python/xrefstructs.json b/python/xrefstructs.json
new file mode 100644
index 0000000000..25c48c9d56
--- /dev/null
+++ b/python/xrefstructs.json
@@ -0,0 +1,140 @@
+{
+ "cmd_element": {
+ "fields": [
+ {
+ "name": "string",
+ "type": "const char *"
+ },
+ {
+ "name": "doc",
+ "type": "const char *"
+ },
+ {
+ "name": "daemon",
+ "type": "int"
+ },
+ {
+ "name": "attr",
+ "type": "uint32_t"
+ },
+ {
+ "name": "func",
+ "type": "int *"
+ },
+ {
+ "name": "name",
+ "type": "const char *"
+ },
+ {
+ "name": "xref",
+ "type": "struct xref"
+ }
+ ]
+ },
+ "xref": {
+ "fields": [
+ {
+ "name": "xrefdata",
+ "type": "struct xrefdata *"
+ },
+ {
+ "name": "type",
+ "type": "enum xref_type"
+ },
+ {
+ "name": "line",
+ "type": "int"
+ },
+ {
+ "name": "file",
+ "type": "const char *"
+ },
+ {
+ "name": "func",
+ "type": "const char *"
+ }
+ ]
+ },
+ "xref_install_element": {
+ "fields": [
+ {
+ "name": "xref",
+ "type": "struct xref"
+ },
+ {
+ "name": "cmd_element",
+ "type": "const struct cmd_element *"
+ },
+ {
+ "name": "node_type",
+ "type": "enum node_type"
+ }
+ ]
+ },
+ "xref_logmsg": {
+ "fields": [
+ {
+ "name": "xref",
+ "type": "struct xref"
+ },
+ {
+ "name": "fmtstring",
+ "type": "const char *"
+ },
+ {
+ "name": "priority",
+ "type": "uint32_t"
+ },
+ {
+ "name": "ec",
+ "type": "uint32_t"
+ },
+ {
+ "name": "args",
+ "type": "const char *"
+ }
+ ]
+ },
+ "xref_threadsched": {
+ "fields": [
+ {
+ "name": "xref",
+ "type": "struct xref"
+ },
+ {
+ "name": "funcname",
+ "type": "const char *"
+ },
+ {
+ "name": "dest",
+ "type": "const char *"
+ },
+ {
+ "name": "thread_type",
+ "type": "uint32_t"
+ }
+ ]
+ },
+ "xrefdata": {
+ "fields": [
+ {
+ "name": "xref",
+ "type": "const struct xref *"
+ },
+ {
+ "array": 16,
+ "name": "uid",
+ "type": "char"
+ },
+ {
+ "name": "hashstr",
+ "type": "const char *"
+ },
+ {
+ "array": 2,
+ "name": "hashu32",
+ "type": "uint32_t"
+ }
+ ]
+ }
+} \ No newline at end of file
diff --git a/python/xrelfo.py b/python/xrelfo.py
new file mode 100644
index 0000000000..0ecd008579
--- /dev/null
+++ b/python/xrelfo.py
@@ -0,0 +1,424 @@
+# FRR ELF xref extractor
+#
+# Copyright (C) 2020 David Lamparter for NetDEF, Inc.
+#
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 2 of the License, or (at your option)
+# any later version.
+#
+# This program is distributed in the hope that it will be useful, but WITHOUT
+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+# more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; see the file COPYING; if not, write to the Free Software
+# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+import sys
+import os
+import struct
+import re
+import traceback
+import json
+import argparse
+
+from clippy.uidhash import uidhash
+from clippy.elf import *
+from clippy import frr_top_src
+from tiabwarfo import FieldApplicator
+
+try:
+ with open(os.path.join(frr_top_src, 'python', 'xrefstructs.json'), 'r') as fd:
+ xrefstructs = json.load(fd)
+except FileNotFoundError:
+ sys.stderr.write('''
+The "xrefstructs.json" file (created by running tiabwarfo.py with the pahole
+tool available) could not be found. It should be included with the sources.
+''')
+ sys.exit(1)
+
+# constants, need to be kept in sync manually...
+
+XREFT_THREADSCHED = 0x100
+XREFT_LOGMSG = 0x200
+XREFT_DEFUN = 0x300
+XREFT_INSTALL_ELEMENT = 0x301
+
+# LOG_*
+priovals = {}
+prios = ['0', '1', '2', 'E', 'W', 'N', 'I', 'D']
+
+
+class XrelfoJson(object):
+ def dump(self):
+ pass
+
+ def check(self, wopt):
+ yield from []
+
+ def to_dict(self, refs):
+ pass
+
+class Xref(ELFDissectStruct, XrelfoJson):
+ struct = 'xref'
+ fieldrename = {'type': 'typ'}
+ containers = {}
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ self._container = None
+ if self.xrefdata:
+ self.xrefdata.ref_from(self, self.typ)
+
+ def container(self):
+ if self._container is None:
+ if self.typ in self.containers:
+ self._container = self.container_of(self.containers[self.typ], 'xref')
+ return self._container
+
+ def check(self, *args, **kwargs):
+ if self._container:
+ yield from self._container.check(*args, **kwargs)
+
+
+class Xrefdata(ELFDissectStruct):
+ struct = 'xrefdata'
+
+ # uid is all zeroes in the data loaded from ELF
+ fieldrename = {'uid': '_uid'}
+
+ def ref_from(self, xref, typ):
+ self.xref = xref
+
+ @property
+ def uid(self):
+ if self.hashstr is None:
+ return None
+ return uidhash(self.xref.file, self.hashstr, self.hashu32_0, self.hashu32_1)
+
+class XrefPtr(ELFDissectStruct):
+ fields = [
+ ('xref', 'P', Xref),
+ ]
+
+class XrefThreadSched(ELFDissectStruct, XrelfoJson):
+ struct = 'xref_threadsched'
+Xref.containers[XREFT_THREADSCHED] = XrefThreadSched
+
+class XrefLogmsg(ELFDissectStruct, XrelfoJson):
+ struct = 'xref_logmsg'
+
+ def _warn_fmt(self, text):
+ lines = text.split('\n')
+ yield ((self.xref.file, self.xref.line), '%s:%d: %s (in %s())%s\n' % (self.xref.file, self.xref.line, lines[0], self.xref.func, ''.join(['\n' + l for l in lines[1:]])))
+
+ fmt_regexes = [
+ (re.compile(r'([\n\t]+)'), 'error: log message contains tab or newline'),
+ # (re.compile(r'^(\s+)'), 'warning: log message starts with whitespace'),
+ (re.compile(r'^((?:warn(?:ing)?|error):\s*)', re.I), 'warning: log message starts with severity'),
+ ]
+ arg_regexes = [
+ # the (?<![\?:] ) avoids warning for x ? inet_ntop(...) : "(bla)"
+ (re.compile(r'((?<![\?:] )inet_ntop\s*\(\s*(?:[AP]F_INET|2)\s*,)'), 'cleanup: replace inet_ntop(AF_INET, ...) with %pI4', lambda s: True),
+ (re.compile(r'((?<![\?:] )inet_ntop\s*\(\s*(?:[AP]F_INET6|10)\s*,)'), 'cleanup: replace inet_ntop(AF_INET6, ...) with %pI6', lambda s: True),
+ (re.compile(r'((?<![\?:] )inet_ntoa)'), 'cleanup: replace inet_ntoa(...) with %pI4', lambda s: True),
+ (re.compile(r'((?<![\?:] )ipaddr2str)'), 'cleanup: replace ipaddr2str(...) with %pIA', lambda s: True),
+ (re.compile(r'((?<![\?:] )prefix2str)'), 'cleanup: replace prefix2str(...) with %pFX', lambda s: True),
+ (re.compile(r'((?<![\?:] )prefix_mac2str)'), 'cleanup: replace prefix_mac2str(...) with %pEA', lambda s: True),
+ (re.compile(r'((?<![\?:] )sockunion2str)'), 'cleanup: replace sockunion2str(...) with %pSU', lambda s: True),
+
+ # (re.compile(r'^(\s*__(?:func|FUNCTION|PRETTY_FUNCTION)__\s*)'), 'error: debug message starts with __func__', lambda s: (s.priority & 7 == 7) ),
+ ]
+
+ def check(self, wopt):
+ def fmt_msg(rex, itext):
+ if sys.stderr.isatty():
+ items = rex.split(itext)
+ out = []
+ for i, text in enumerate(items):
+ if (i % 2) == 1:
+ out.append('\033[41;37;1m%s\033[m' % repr(text)[1:-1])
+ else:
+ out.append(repr(text)[1:-1])
+
+ excerpt = ''.join(out)
+ else:
+ excerpt = repr(itext)[1:-1]
+ return excerpt
+
+ if wopt.Wlog_format:
+ for rex, msg in self.fmt_regexes:
+ if not rex.search(self.fmtstring):
+ continue
+
+ excerpt = fmt_msg(rex, self.fmtstring)
+ yield from self._warn_fmt('%s: "%s"' % (msg, excerpt))
+
+ if wopt.Wlog_args:
+ for rex, msg, cond in self.arg_regexes:
+ if not cond(self):
+ continue
+ if not rex.search(self.args):
+ continue
+
+ excerpt = fmt_msg(rex, self.args)
+ yield from self._warn_fmt('%s:\n\t"%s",\n\t%s' % (msg, repr(self.fmtstring)[1:-1], excerpt))
+
+ def dump(self):
+ print('%-60s %s%s %-25s [EC %d] %s' % (
+ '%s:%d %s()' % (self.xref.file, self.xref.line, self.xref.func),
+ prios[self.priority & 7],
+ priovals.get(self.priority & 0x30, ' '),
+ self.xref.xrefdata.uid, self.ec, self.fmtstring))
+
+ def to_dict(self, xrelfo):
+ jsobj = dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']])
+ if self.ec != 0:
+ jsobj['ec'] = self.ec
+ jsobj['fmtstring'] = self.fmtstring
+ jsobj['args'] = self.args
+ jsobj['priority'] = self.priority & 7
+ jsobj['type'] = 'logmsg'
+ jsobj['binary'] = self._elfsect._elfwrap.orig_filename
+
+ if self.priority & 0x10:
+ jsobj.setdefault('flags', []).append('errno')
+ if self.priority & 0x20:
+ jsobj.setdefault('flags', []).append('getaddrinfo')
+
+ xrelfo['refs'].setdefault(self.xref.xrefdata.uid, []).append(jsobj)
+
+Xref.containers[XREFT_LOGMSG] = XrefLogmsg
+
+class CmdElement(ELFDissectStruct, XrelfoJson):
+ struct = 'cmd_element'
+
+ cmd_attrs = { 0: None, 1: 'deprecated', 2: 'hidden'}
+
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+
+ def to_dict(self, xrelfo):
+ jsobj = xrelfo['cli'].setdefault(self.name, {}).setdefault(self._elfsect._elfwrap.orig_filename, {})
+
+ jsobj.update({
+ 'string': self.string,
+ 'doc': self.doc,
+ 'attr': self.cmd_attrs.get(self.attr, self.attr),
+ })
+ if jsobj['attr'] is None:
+ del jsobj['attr']
+
+ jsobj['defun'] = dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']])
+
+Xref.containers[XREFT_DEFUN] = CmdElement
+
+class XrefInstallElement(ELFDissectStruct, XrelfoJson):
+ struct = 'xref_install_element'
+
+ def to_dict(self, xrelfo):
+ jsobj = xrelfo['cli'].setdefault(self.cmd_element.name, {}).setdefault(self._elfsect._elfwrap.orig_filename, {})
+ nodes = jsobj.setdefault('nodes', [])
+
+ nodes.append({
+ 'node': self.node_type,
+ 'install': dict([(i, getattr(self.xref, i)) for i in ['file', 'line', 'func']]),
+ })
+
+Xref.containers[XREFT_INSTALL_ELEMENT] = XrefInstallElement
+
+# shove in field defs
+fieldapply = FieldApplicator(xrefstructs)
+fieldapply.add(Xref)
+fieldapply.add(Xrefdata)
+fieldapply.add(XrefLogmsg)
+fieldapply.add(XrefThreadSched)
+fieldapply.add(CmdElement)
+fieldapply.add(XrefInstallElement)
+fieldapply()
+
+
+class Xrelfo(dict):
+ def __init__(self):
+ super().__init__({
+ 'refs': {},
+ 'cli': {},
+ })
+ self._xrefs = []
+
+ def load_file(self, filename):
+ orig_filename = filename
+ if filename.endswith('.la') or filename.endswith('.lo'):
+ with open(filename, 'r') as fd:
+ for line in fd:
+ line = line.strip()
+ if line.startswith('#') or line == '' or '=' not in line:
+ continue
+
+ var, val = line.split('=', 1)
+ if var not in ['library_names', 'pic_object']:
+ continue
+ if val.startswith("'") or val.startswith('"'):
+ val = val[1:-1]
+
+ if var == 'pic_object':
+ filename = os.path.join(os.path.dirname(filename), val)
+ break
+
+ val = val.strip().split()[0]
+ filename = os.path.join(os.path.dirname(filename), '.libs', val)
+ break
+ else:
+ raise ValueError('could not process libtool file "%s"' % orig_filename)
+
+ while True:
+ with open(filename, 'rb') as fd:
+ hdr = fd.read(4)
+
+ if hdr == b'\x7fELF':
+ self.load_elf(filename, orig_filename)
+ return
+
+ if hdr[:2] == b'#!':
+ path, name = os.path.split(filename)
+ filename = os.path.join(path, '.libs', name)
+ continue
+
+ if hdr[:1] == b'{':
+ with open(filename, 'r') as fd:
+ self.load_json(fd)
+ return
+
+ raise ValueError('cannot determine file type for %s' % (filename))
+
+ def load_elf(self, filename, orig_filename):
+ edf = ELFDissectFile(filename)
+ edf.orig_filename = orig_filename
+
+ note = edf._elffile.find_note('FRRouting', 'XREF')
+ if note is not None:
+ endian = '>' if edf._elffile.bigendian else '<'
+ mem = edf._elffile[note]
+ if edf._elffile.elfclass == 64:
+ start, end = struct.unpack(endian + 'QQ', mem)
+ start += note.start
+ end += note.start + 8
+ else:
+ start, end = struct.unpack(endian + 'II', mem)
+ start += note.start
+ end += note.start + 4
+
+ ptrs = edf.iter_data(XrefPtr, slice(start, end))
+
+ else:
+ xrefarray = edf.get_section('xref_array')
+ if xrefarray is None:
+ raise ValueError('file has neither xref note nor xref_array section')
+
+ ptrs = xrefarray.iter_data(XrefPtr)
+
+ for ptr in ptrs:
+ if ptr.xref is None:
+ print('NULL xref')
+ continue
+ self._xrefs.append(ptr.xref)
+
+ container = ptr.xref.container()
+ if container is None:
+ continue
+ container.to_dict(self)
+
+ return edf
+
+ def load_json(self, fd):
+ data = json.load(fd)
+ for uid, items in data['refs'].items():
+ myitems = self['refs'].setdefault(uid, [])
+ for item in items:
+ if item in myitems:
+ continue
+ myitems.append(item)
+
+ for cmd, items in data['cli'].items():
+ self['cli'].setdefault(cmd, {}).update(items)
+
+ return data
+
+ def check(self, checks):
+ for xref in self._xrefs:
+ yield from xref.check(checks)
+
+def main():
+ argp = argparse.ArgumentParser(description = 'FRR xref ELF extractor')
+ argp.add_argument('-o', dest='output', type=str, help='write JSON output')
+ argp.add_argument('--out-by-file', type=str, help='write by-file JSON output')
+ argp.add_argument('-Wlog-format', action='store_const', const=True)
+ argp.add_argument('-Wlog-args', action='store_const', const=True)
+ argp.add_argument('--profile', action='store_const', const=True)
+ argp.add_argument('binaries', metavar='BINARY', nargs='+', type=str, help='files to read (ELF files or libtool objects)')
+ args = argp.parse_args()
+
+ if args.profile:
+ import cProfile
+ cProfile.runctx('_main(args)', globals(), {'args': args}, sort='cumtime')
+ else:
+ _main(args)
+
+def _main(args):
+ errors = 0
+ xrelfo = Xrelfo()
+
+ for fn in args.binaries:
+ try:
+ xrelfo.load_file(fn)
+ except:
+ errors += 1
+ sys.stderr.write('while processing %s:\n' % (fn))
+ traceback.print_exc()
+
+ for option in dir(args):
+ if option.startswith('W'):
+ checks = sorted(xrelfo.check(args))
+ sys.stderr.write(''.join([c[-1] for c in checks]))
+ break
+
+
+ refs = xrelfo['refs']
+
+ counts = {}
+ for k, v in refs.items():
+ strs = set([i['fmtstring'] for i in v])
+ if len(strs) != 1:
+ print('\033[31;1m%s\033[m' % k)
+ counts[k] = len(v)
+
+ out = xrelfo
+ outbyfile = {}
+ for uid, locs in refs.items():
+ for loc in locs:
+ filearray = outbyfile.setdefault(loc['file'], [])
+ loc = dict(loc)
+ del loc['file']
+ filearray.append(loc)
+
+ for k in outbyfile.keys():
+ outbyfile[k] = sorted(outbyfile[k], key=lambda x: x['line'])
+
+ if errors:
+ sys.exit(1)
+
+ if args.output:
+ with open(args.output + '.tmp', 'w') as fd:
+ json.dump(out, fd, indent=2, sort_keys=True)
+ os.rename(args.output + '.tmp', args.output)
+
+ if args.out_by_file:
+ with open(args.out_by_file + '.tmp', 'w') as fd:
+ json.dump(outbyfile, fd, indent=2, sort_keys=True)
+ os.rename(args.out_by_file + '.tmp', args.out_by_file)
+
+if __name__ == '__main__':
+ main()