diff options
| -rwxr-xr-x | configure.ac | 14 | ||||
| -rw-r--r-- | debian/control | 1 | ||||
| -rw-r--r-- | lib/clippy.h | 2 | ||||
| -rw-r--r-- | lib/command_py.c | 2 | ||||
| -rw-r--r-- | lib/elf_py.c | 1301 | ||||
| -rw-r--r-- | lib/subdir.am | 4 | 
6 files changed, 1323 insertions, 1 deletions
diff --git a/configure.ac b/configure.ac index f3d1f38986..139fca7c42 100755 --- a/configure.ac +++ b/configure.ac @@ -794,6 +794,20 @@ fi  #  AS_IF([test "$host" = "$build"], [ +  AC_CHECK_HEADER([gelf.h], [], [ +    AC_MSG_ERROR([libelf headers are required for building clippy.  (Host only when cross-compiling.)]) +  ]) +  AC_CHECK_LIB([elf], [elf_memory], [], [ +    AC_MSG_ERROR([libelf is required for building clippy.  (Host only when cross-compiling.)]) +  ]) + +  AC_CHECK_LIB([elf], [elf_getdata_rawchunk], [ +    AC_DEFINE([HAVE_ELF_GETDATA_RAWCHUNK], [1], [Have elf_getdata_rawchunk()]) +  ]) +  AC_CHECK_LIB([elf], [gelf_getnote], [ +    AC_DEFINE([HAVE_GELF_GETNOTE], [1], [Have gelf_getnote()]) +  ]) +    FRR_PYTHON_DEV  ], [    FRR_PYTHON diff --git a/debian/control b/debian/control index b9e96b55d0..7a08cbbdb0 100644 --- a/debian/control +++ b/debian/control @@ -13,6 +13,7 @@ Build-Depends: bison,                 install-info,                 libc-ares-dev,                 libcap-dev, +               libelf-dev,                 libjson-c-dev | libjson0-dev,                 libpam0g-dev | libpam-dev,                 libpcre3-dev, diff --git a/lib/clippy.h b/lib/clippy.h index be4db6e638..95af274106 100644 --- a/lib/clippy.h +++ b/lib/clippy.h @@ -20,6 +20,7 @@  #ifndef _FRR_CLIPPY_H  #define _FRR_CLIPPY_H +#include <stdbool.h>  #include <Python.h>  #ifdef __cplusplus @@ -28,6 +29,7 @@ extern "C" {  extern PyObject *clippy_parse(PyObject *self, PyObject *args);  extern PyMODINIT_FUNC command_py_init(void); +extern bool elf_py_init(PyObject *pymod);  #ifdef __cplusplus  } diff --git a/lib/command_py.c b/lib/command_py.c index 4ec116df33..7f19008fbf 100644 --- a/lib/command_py.c +++ b/lib/command_py.c @@ -345,5 +345,7 @@ PyMODINIT_FUNC command_py_init(void)  	PyModule_AddObject(pymod, "GraphNode", (PyObject *)&typeobj_graph_node);  	Py_INCREF(&typeobj_graph);  	PyModule_AddObject(pymod, "Graph", (PyObject *)&typeobj_graph); +	if (!elf_py_init(pymod)) +		initret(NULL);  	initret(pymod);  } diff --git a/lib/elf_py.c b/lib/elf_py.c new file mode 100644 index 0000000000..0d8ad76e1c --- /dev/null +++ b/lib/elf_py.c @@ -0,0 +1,1301 @@ +/* + * fast ELF file accessor + * Copyright (C) 2018-2020  David Lamparter for NetDEF, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; see the file COPYING; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* Note: this wrapper is intended to be used as build-time helper.  While + * it should be generally correct and proper, there may be the occasional + * memory leak or SEGV for things that haven't been well-tested. + *     _ + *    / \    This code is NOT SUITABLE FOR UNTRUSTED ELF FILES.  It's used + *   / ! \   in FRR to read files created by its own build.  Don't take it out + *  /_____\  of FRR and use it to parse random ELF files you found somewhere. + * + * If you're working with this code (or even reading it), you really need to + * read a bunch of the ELF specs.  There's no way around it, things in here + * just represent pieces of ELF pretty much 1:1.  Also, readelf & objdump are + * your friends. + * + * Required reading: + *   https://refspecs.linuxfoundation.org/elf/elf.pdf + *   https://refspecs.linuxfoundation.org/elf/x86_64-SysV-psABI.pdf + * Recommended reading: + *   https://github.com/ARM-software/abi-aa/releases/download/2020Q4/aaelf64.pdf + * + * The core ELF spec is *not* enough, you should read at least one of the + * processor specific (psABI) docs.  They define what & how relocations work. + * Luckily we don't need to care about the processor specifics since this only + * does data relocations, but without looking at the psABI, some things aren't + * quite clear. + */ + +/* the API of this module roughly follows a very small subset of the one + * provided by the python elfutils package, which unfortunately is painfully + * slow. + */ + +#define PY_SSIZE_T_CLEAN + +#include <Python.h> +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif +#include "structmember.h" +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <fcntl.h> + +#if defined(__sun__) && (__SIZEOF_POINTER__ == 4) +/* Solaris libelf bails otherwise ... */ +#undef _FILE_OFFSET_BITS +#define _FILE_OFFSET_BITS 32 +#endif + +#include <elf.h> +#include <libelf.h> +#include <gelf.h> + +#include "typesafe.h" +#include "jhash.h" +#include "clippy.h" + +static bool debug; + +#define debugf(...)                                                            \ +	do {                                                                   \ +		if (debug)                                                     \ +			fprintf(stderr, __VA_ARGS__);                          \ +	} while (0) + +/* Exceptions */ +static PyObject *ELFFormatError; +static PyObject *ELFAccessError; + +/* most objects can only be created as return values from one of the methods */ +static PyObject *refuse_new(PyTypeObject *type, PyObject *args, PyObject *kwds) +{ +	PyErr_SetString(PyExc_ValueError, +			"cannot create instances of this type"); +	return NULL; +} + +struct elfreloc; +struct elfsect; + +PREDECL_HASH(elfrelocs) + +/* ELFFile and ELFSection intentionally share some behaviour, particularly + * subscript[123:456] access to file data.  This is because relocatables + * (.o files) do things section-based, but linked executables/libraries do + * things file-based.  Having the two behave similar allows simplifying the + * Python code. + */ + +/* class ELFFile: + * + * overall entry point, instantiated by reading in an ELF file + */ +struct elffile { +	PyObject_HEAD + +	char *filename; +	char *mmap, *mmend; +	size_t len; +	Elf *elf; + +	/* note from here on there are several instances of +	 * +	 *   GElf_Something *x, _x; +	 * +	 * this is a pattern used by libelf's generic ELF routines; the _x +	 * field is used to create a copy of the ELF structure from the file +	 * with 32/64bit and endianness adjusted. +	 */ + +	GElf_Ehdr *ehdr, _ehdr; +	Elf_Scn *symtab; +	size_t nsym, symstridx; +	Elf_Data *symdata; + +	PyObject **sects; +	size_t n_sect; + +	struct elfrelocs_head dynrelocs; + +	int elfclass; +	bool bigendian; +	bool has_symbols; +}; + +/* class ELFSection: + * + * note that executables and shared libraries can have their section headers + * removed, though in practice this is only used as an obfuscation technique. + */ +struct elfsect { +	PyObject_HEAD + +	const char *name; +	struct elffile *ef; + +	GElf_Shdr _shdr, *shdr; +	Elf_Scn *scn; +	unsigned long idx, len; + +	struct elfrelocs_head relocs; +}; + +/* class ELFReloc: + * + * note: relocations in object files (.o) are section-based while relocations + * in executables and shared libraries are file-based. + * + * Whenever accessing something that is a pointer in the ELF file, the Python + * code needs to check for a relocation;  if the pointer is pointing to some + * unresolved symbol the file will generally contain 0 bytes.  The relocation + * will tell what the pointer is actually pointing to. + * + * This represents both static (.o file) and dynamic (.so/exec) relocations. + */ +struct elfreloc { +	PyObject_HEAD + +	struct elfrelocs_item elfrelocs_item; + +	struct elfsect *es; +	struct elffile *ef; + +	/* there's also old-fashioned GElf_Rel; we're converting that to +	 * GElf_Rela in elfsect_add_relocations() +	 */ +	GElf_Rela _rela, *rela; +	GElf_Sym _sym, *sym; +	size_t symidx; +	const char *symname; + +	/* documented below in python docstrings */ +	bool symvalid, unresolved, relative; +	unsigned long long st_value; +}; + +static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b); +static uint32_t elfreloc_hash(const struct elfreloc *reloc); + +DECLARE_HASH(elfrelocs, struct elfreloc, elfrelocs_item, +	     elfreloc_cmp, elfreloc_hash) + +static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx); +static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx); +static PyObject *elfreloc_getsection(PyObject *self, PyObject *args); +static PyObject *elfreloc_getaddend(PyObject *obj, void *closure); + +/* --- end of declarations -------------------------------------------------- */ + +/* + * class ELFReloc: + */ + +static const char elfreloc_doc[] = +	"Represents an ELF relocation record\n" +	"\n" +	"(struct elfreloc * in elf_py.c)"; + +#define member(name, type, doc)                                                \ +	{                                                                      \ +		(char *)#name, type, offsetof(struct elfreloc, name), READONLY,\ +		(char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)"       \ +	} +static PyMemberDef members_elfreloc[] = { +	member(symname, T_STRING, +	       "Name of symbol this relocation refers to.\n" +	       "\n" +	       "Will frequently be `None` in executables and shared libraries." +	), +	member(symvalid, T_BOOL, +	       "Target symbol has a valid type, i.e. not STT_NOTYPE"), +	member(unresolved, T_BOOL, +	       "Target symbol refers to an existing section"), +	member(relative, T_BOOL, +	       "Relocation is a REL (not RELA) record and thus relative."), +	member(st_value, T_ULONGLONG, +	       "Target symbol's value, if known\n\n" +	       "Will be zero for unresolved/external symbols."), +	{} +}; +#undef member + +static PyGetSetDef getset_elfreloc[] = { +	{ .name = (char *)"r_addend", .get = elfreloc_getaddend, .doc = +		(char *)"Relocation addend value"}, +	{} +}; + +static PyMethodDef methods_elfreloc[] = { +	{"getsection", elfreloc_getsection, METH_VARARGS, +		"Find relocation target's ELF section\n\n" +		"Args: address of relocatee (TODO: fix/remove?)\n" +		"Returns: ELFSection or None\n\n" +		"Not possible if section headers have been stripped."}, +	{} +}; + +static int elfreloc_cmp(const struct elfreloc *a, const struct elfreloc *b) +{ +	if (a->rela->r_offset < b->rela->r_offset) +		return -1; +	if (a->rela->r_offset > b->rela->r_offset) +		return 1; +	return 0; +} + +static uint32_t elfreloc_hash(const struct elfreloc *reloc) +{ +	return jhash(&reloc->rela->r_offset, sizeof(reloc->rela->r_offset), +		     0xc9a2b7f4); +} + +static struct elfreloc *elfrelocs_get(struct elfrelocs_head *head, +				      GElf_Addr offset) +{ +	struct elfreloc dummy; + +	dummy.rela = &dummy._rela; +	dummy.rela->r_offset = offset; +	return elfrelocs_find(head, &dummy); +} + +static PyObject *elfreloc_getsection(PyObject *self, PyObject *args) +{ +	struct elfreloc *w = (struct elfreloc *)self; +	long data; + +	if (!PyArg_ParseTuple(args, "k", &data)) +		return NULL; + +	if (!w->es) +		Py_RETURN_NONE; + +	if (w->symidx == 0) { +		size_t idx = 0; +		Elf_Scn *scn; + +		data = (w->relative ? data : 0) + w->rela->r_addend; +		scn = elf_find_addr(w->es->ef, data, &idx); +		if (!scn) +			Py_RETURN_NONE; +		return elffile_secbyidx(w->es->ef, scn, idx); +	} +	return elffile_secbyidx(w->es->ef, NULL, w->sym->st_shndx); +} + +static PyObject *elfreloc_getaddend(PyObject *obj, void *closure) +{ +	struct elfreloc *w = (struct elfreloc *)obj; + +	return Py_BuildValue("K", (unsigned long long)w->rela->r_addend); +} + +static PyObject *elfreloc_repr(PyObject *arg) +{ +	struct elfreloc *w = (struct elfreloc *)arg; + +	return PyUnicode_FromFormat("<ELFReloc @%lu %s+%lu>", +				    (unsigned long)w->rela->r_offset, +				    (w->symname && w->symname[0]) ? w->symname +						: "[0]", +				    (unsigned long)w->rela->r_addend); +} + +static void elfreloc_free(void *arg) +{ +	struct elfreloc *w = arg; + +	(void)w; +} + +static PyTypeObject typeobj_elfreloc = { +	PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFReloc", +	.tp_basicsize = sizeof(struct elfreloc), +	.tp_flags = Py_TPFLAGS_DEFAULT, +	.tp_doc = elfreloc_doc, +	.tp_new = refuse_new, +	.tp_free = elfreloc_free, +	.tp_repr = elfreloc_repr, +	.tp_members = members_elfreloc, +	.tp_methods = methods_elfreloc, +	.tp_getset = getset_elfreloc, +}; + +/* + * class ELFSection: + */ + +static const char elfsect_doc[] = +	"Represents an ELF section\n" +	"\n" +	"To access section contents, use subscript notation, e.g.\n" +	"  section[123:456]\n" +	"To read null terminated C strings, replace the end with str:\n" +	"  section[123:str]\n\n" +	"(struct elfsect * in elf_py.c)"; + +static PyObject *elfsect_getaddr(PyObject *self, void *closure); + +#define member(name, type, doc)                                                \ +	{                                                                      \ +		(char *)#name, type, offsetof(struct elfsect, name), READONLY, \ +		(char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)"       \ +	} +static PyMemberDef members_elfsect[] = { +	member(name, T_STRING, +	       "Section name, e.g. \".text\""), +	member(idx, T_ULONG, +	       "Section index in file"), +	member(len, T_ULONG, +	       "Section length in bytes"), +	{}, +}; +#undef member + +static PyGetSetDef getset_elfsect[] = { +	{ .name = (char *)"sh_addr", .get = elfsect_getaddr, .doc = +		(char *)"Section virtual address (mapped program view)"}, +	{} +}; + +static PyObject *elfsect_getaddr(PyObject *self, void *closure) +{ +	struct elfsect *w = (struct elfsect *)self; + +	return Py_BuildValue("K", (unsigned long long)w->shdr->sh_addr); +} + + +static PyObject *elfsect_getreloc(PyObject *self, PyObject *args) +{ +	struct elfsect *w = (struct elfsect *)self; +	struct elfreloc *relw; +	unsigned long offs; +	PyObject *ret; + +	if (!PyArg_ParseTuple(args, "k", &offs)) +		return NULL; + +	relw = elfrelocs_get(&w->relocs, offs + w->shdr->sh_addr); +	if (!relw) +		Py_RETURN_NONE; + +	ret = (PyObject *)relw; +	Py_INCREF(ret); +	return ret; +} + +static PyMethodDef methods_elfsect[] = { +	{"getreloc", elfsect_getreloc, METH_VARARGS, +		"Check for / get relocation at offset into section\n\n" +		"Args: byte offset into section to check\n" +		"Returns: ELFReloc or None"}, +	{} +}; + +static PyObject *elfsect_subscript(PyObject *self, PyObject *key) +{ +	Py_ssize_t start, stop, step, sllen; +	struct elfsect *w = (struct elfsect *)self; +	PySliceObject *slice; +	unsigned long offs, len = ~0UL; + +	if (!PySlice_Check(key)) { +		PyErr_SetString(PyExc_IndexError, +				"ELFSection subscript must be slice"); +		return NULL; +	} +	slice = (PySliceObject *)key; +	if (PyLong_Check(slice->stop)) { +		if (PySlice_GetIndicesEx(key, w->shdr->sh_size, +					 &start, &stop, &step, &sllen)) +			return NULL; + +		if (step != 1) { +			PyErr_SetString(PyExc_IndexError, +					"ELFSection subscript slice step must be 1"); +			return NULL; +		} +		if ((GElf_Xword)stop > w->shdr->sh_size) { +			PyErr_Format(ELFAccessError, +				     "access (%lu) beyond end of section %lu/%s (%lu)", +				     stop, w->idx, w->name, w->shdr->sh_size); +			return NULL; +		} + +		offs = start; +		len = sllen; +	} else { +		if (slice->stop != (void *)&PyUnicode_Type +		    || !PyLong_Check(slice->start)) { +			PyErr_SetString(PyExc_IndexError, "invalid slice"); +			return NULL; +		} + +		offs = PyLong_AsUnsignedLongLong(slice->start); +		len = ~0UL; +	} + +	offs += w->shdr->sh_offset; +	if (offs > w->ef->len) { +		PyErr_Format(ELFAccessError, +			     "access (%lu) beyond end of file (%lu)", +			     offs, w->ef->len); +		return NULL; +	} +	if (len == ~0UL) +		len = strnlen(w->ef->mmap + offs, w->ef->len - offs); + +	Py_ssize_t pylen = len; + +#if PY_MAJOR_VERSION >= 3 +	return Py_BuildValue("y#", w->ef->mmap + offs, pylen); +#else +	return Py_BuildValue("s#", w->ef->mmap + offs, pylen); +#endif +} + +static PyMappingMethods mp_elfsect = { +	.mp_subscript = elfsect_subscript, +}; + +static void elfsect_free(void *arg) +{ +	struct elfsect *w = arg; + +	(void)w; +} + +static PyObject *elfsect_repr(PyObject *arg) +{ +	struct elfsect *w = (struct elfsect *)arg; + +	return PyUnicode_FromFormat("<ELFSection %s>", w->name); +} + +static PyTypeObject typeobj_elfsect = { +	PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFSection", +	.tp_basicsize = sizeof(struct elfsect), +	.tp_flags = Py_TPFLAGS_DEFAULT, +	.tp_doc = elfsect_doc, +	.tp_new = refuse_new, +	.tp_free = elfsect_free, +	.tp_repr = elfsect_repr, +	.tp_as_mapping = &mp_elfsect, +	.tp_members = members_elfsect, +	.tp_methods = methods_elfsect, +	.tp_getset = getset_elfsect, +}; + +static void elfsect_add_relocations(struct elfsect *w, Elf_Scn *rel, +				    GElf_Shdr *relhdr) +{ +	size_t i, entries; +	Elf_Scn *symtab = elf_getscn(w->ef->elf, relhdr->sh_link); +	GElf_Shdr _symhdr, *symhdr = gelf_getshdr(symtab, &_symhdr); +	Elf_Data *symdata = elf_getdata(symtab, NULL); +	Elf_Data *reldata = elf_getdata(rel, NULL); + +	entries = relhdr->sh_size / relhdr->sh_entsize; +	for (i = 0; i < entries; i++) { +		struct elfreloc *relw; +		size_t symidx; +		GElf_Rela *rela; +		GElf_Sym *sym; + +		relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc( +				&typeobj_elfreloc, 0); +		relw->es = w; + +		if (relhdr->sh_type == SHT_REL) { +			GElf_Rel _rel, *rel; + +			rel = gelf_getrel(reldata, i, &_rel); +			relw->rela = &relw->_rela; +			relw->rela->r_offset = rel->r_offset; +			relw->rela->r_info = rel->r_info; +			relw->rela->r_addend = 0; +			relw->relative = true; +		} else +			relw->rela = gelf_getrela(reldata, i, &relw->_rela); + +		rela = relw->rela; +		if (rela->r_offset < w->shdr->sh_addr +		    || rela->r_offset >= w->shdr->sh_addr + w->shdr->sh_size) +			continue; + +		symidx = relw->symidx = GELF_R_SYM(rela->r_info); +		sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym); +		if (sym) { +			relw->symname = elf_strptr(w->ef->elf, symhdr->sh_link, +						   sym->st_name); +			relw->symvalid = GELF_ST_TYPE(sym->st_info) +					!= STT_NOTYPE; +			relw->unresolved = sym->st_shndx == SHN_UNDEF; +			relw->st_value = sym->st_value; +		} else { +			relw->symname = NULL; +			relw->symvalid = false; +			relw->unresolved = false; +			relw->st_value = 0; +		} + +		debugf("reloc @ %016llx sym %5llu %016llx %s\n", +		       (long long)rela->r_offset, (unsigned long long)symidx, +		       (long long)rela->r_addend, relw->symname); + +		elfrelocs_add(&w->relocs, relw); +	} +} + +/* + * bindings & loading code between ELFFile and ELFSection + */ + +static PyObject *elfsect_wrap(struct elffile *ef, Elf_Scn *scn, size_t idx, +			      const char *name) +{ +	struct elfsect *w; +	size_t i; + +	w = (struct elfsect *)typeobj_elfsect.tp_alloc(&typeobj_elfsect, 0); +	if (!w) +		return NULL; + +	w->name = name; +	w->ef = ef; +	w->scn = scn; +	w->shdr = gelf_getshdr(scn, &w->_shdr); +	w->len = w->shdr->sh_size; +	w->idx = idx; +	elfrelocs_init(&w->relocs); + +	for (i = 0; i < ef->ehdr->e_shnum; i++) { +		Elf_Scn *scn = elf_getscn(ef->elf, i); +		GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + +		if (shdr->sh_type != SHT_RELA && shdr->sh_type != SHT_REL) +			continue; +		if (shdr->sh_info && shdr->sh_info != idx) +			continue; +		elfsect_add_relocations(w, scn, shdr); +	} + +	return (PyObject *)w; +} + +static Elf_Scn *elf_find_section(struct elffile *ef, const char *name, +		size_t *idx) +{ +	size_t i; +	const char *secname; + +	for (i = 0; i < ef->ehdr->e_shnum; i++) { +		Elf_Scn *scn = elf_getscn(ef->elf, i); +		GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + +		secname = elf_strptr(ef->elf, ef->ehdr->e_shstrndx, +				     shdr->sh_name); +		if (strcmp(secname, name)) +			continue; +		if (idx) +			*idx = i; +		return scn; +	} +	return NULL; +} + +static Elf_Scn *elf_find_addr(struct elffile *ef, uint64_t addr, size_t *idx) +{ +	size_t i; + +	for (i = 0; i < ef->ehdr->e_shnum; i++) { +		Elf_Scn *scn = elf_getscn(ef->elf, i); +		GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + +		if (addr < shdr->sh_addr || +		    addr >= shdr->sh_addr + shdr->sh_size) +			continue; + +		if (idx) +			*idx = i; +		return scn; +	} +	return NULL; +} + +/* + * class ELFFile: + */ + +static const char elffile_doc[] = +	"Represents an ELF file\n" +	"\n" +	"Args: filename to load\n" +	"\n" +	"To access raw file contents, use subscript notation, e.g.\n" +	"  file[123:456]\n" +	"To read null terminated C strings, replace the end with str:\n" +	"  file[123:str]\n\n" +	"(struct elffile * in elf_py.c)"; + + +#define member(name, type, doc)                                                \ +	{                                                                      \ +		(char *)#name, type, offsetof(struct elffile, name), READONLY, \ +		(char *)doc "\n\n(\"" #name "\", " #type " in elf_py.c)"       \ +	} +static PyMemberDef members_elffile[] = { +	member(filename, T_STRING, +	       "Original file name as given when opening"), +	member(elfclass, T_INT, +	       "ELF class (architecture bit size)\n\n" +	       "Either 32 or 64, straight integer."), +	member(bigendian, T_BOOL, +	       "ELF file is big-endian\n\n" +	       "All internal ELF structures are automatically converted."), +	member(has_symbols, T_BOOL, +	       "A symbol section is present\n\n" +	       "Note: only refers to .symtab/SHT_SYMTAB section, not DT_SYMTAB" +	), +	{}, +}; +#undef member + +static PyObject *elffile_secbyidx(struct elffile *w, Elf_Scn *scn, size_t idx) +{ +	const char *name; +	PyObject *ret; + +	if (!scn) +		scn = elf_getscn(w->elf, idx); +	if (!scn || idx >= w->n_sect) +		Py_RETURN_NONE; + +	if (!w->sects[idx]) { +		GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + +		name = elf_strptr(w->elf, w->ehdr->e_shstrndx, shdr->sh_name); +		w->sects[idx] = elfsect_wrap(w, scn, idx, name); +	} + +	ret = w->sects[idx]; +	Py_INCREF(ret); +	return ret; +} + +static PyObject *elffile_get_section(PyObject *self, PyObject *args) +{ +	const char *name; +	struct elffile *w = (struct elffile *)self; +	Elf_Scn *scn; +	size_t idx = 0; + +	if (!PyArg_ParseTuple(args, "s", &name)) +		return NULL; + +	scn = elf_find_section(w, name, &idx); +	return elffile_secbyidx(w, scn, idx); +} + +static PyObject *elffile_get_section_addr(PyObject *self, PyObject *args) +{ +	unsigned long long addr; +	struct elffile *w = (struct elffile *)self; +	Elf_Scn *scn; +	size_t idx = 0; + +	if (!PyArg_ParseTuple(args, "K", &addr)) +		return NULL; + +	scn = elf_find_addr(w, addr, &idx); +	return elffile_secbyidx(w, scn, idx); +} + +static PyObject *elffile_get_section_idx(PyObject *self, PyObject *args) +{ +	unsigned long long idx; +	struct elffile *w = (struct elffile *)self; + +	if (!PyArg_ParseTuple(args, "K", &idx)) +		return NULL; + +	return elffile_secbyidx(w, NULL, idx); +} + +static PyObject *elffile_get_symbol(PyObject *self, PyObject *args) +{ +	const char *name, *symname; +	struct elffile *w = (struct elffile *)self; +	GElf_Sym _sym, *sym; +	size_t i; + +	if (!PyArg_ParseTuple(args, "s", &name)) +		return NULL; + +	for (i = 0; i < w->nsym; i++) { +		sym = gelf_getsym(w->symdata, i, &_sym); +		if (sym->st_name == 0) +			continue; +		symname = elf_strptr(w->elf, w->symstridx, sym->st_name); +		if (strcmp(symname, name)) +			continue; + +		PyObject *pysect; +		Elf_Scn *scn = elf_getscn(w->elf, sym->st_shndx); + +		if (scn) +			pysect = elffile_secbyidx(w, scn, sym->st_shndx); +		else { +			pysect = Py_None; +			Py_INCREF(pysect); +		} +		return Py_BuildValue("sKN", symname, +				(unsigned long long)sym->st_value, pysect); +	} +	Py_RETURN_NONE; +} + +static PyObject *elffile_getreloc(PyObject *self, PyObject *args) +{ +	struct elffile *w = (struct elffile *)self; +	struct elfreloc *relw; +	unsigned long offs; +	PyObject *ret; + +	if (!PyArg_ParseTuple(args, "k", &offs)) +		return NULL; + +	relw = elfrelocs_get(&w->dynrelocs, offs); +	if (!relw) +		Py_RETURN_NONE; + +	ret = (PyObject *)relw; +	Py_INCREF(ret); +	return ret; +} + +static PyObject *elffile_find_note(PyObject *self, PyObject *args) +{ +#if defined(HAVE_GELF_GETNOTE) && defined(HAVE_ELF_GETDATA_RAWCHUNK) +	const char *owner; +	const uint8_t *ids; +	GElf_Word id; +	struct elffile *w = (struct elffile *)self; +	size_t i; + +	if (!PyArg_ParseTuple(args, "ss", &owner, &ids)) +		return NULL; + +	if (strlen((char *)ids) != 4) { +		PyErr_SetString(PyExc_ValueError, +				"ELF note ID must be exactly 4-byte string"); +		return NULL; +	} +	if (w->bigendian) +		id = (ids[0] << 24) | (ids[1] << 16) | (ids[2] << 8) | ids[3]; +	else +		id = (ids[3] << 24) | (ids[2] << 16) | (ids[1] << 8) | ids[0]; + +	for (i = 0; i < w->ehdr->e_phnum; i++) { +		GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); +		Elf_Data *notedata; +		size_t offset; + +		if (phdr->p_type != PT_NOTE) +			continue; + +		notedata = elf_getdata_rawchunk(w->elf, phdr->p_offset, +						phdr->p_filesz, ELF_T_NHDR); + +		GElf_Nhdr nhdr[1]; +		size_t nameoffs, dataoffs; + +		offset = 0; +		while ((offset = gelf_getnote(notedata, offset, nhdr, +					      &nameoffs, &dataoffs))) { +			if (phdr->p_offset + nameoffs >= w->len) +				continue; + +			const char *name = w->mmap + phdr->p_offset + nameoffs; + +			if (strcmp(name, owner)) +				continue; +			if (id != nhdr->n_type) +				continue; + +			PyObject *s, *e; + +			s = PyLong_FromUnsignedLongLong( +				phdr->p_vaddr + dataoffs); +			e = PyLong_FromUnsignedLongLong( +				phdr->p_vaddr + dataoffs + nhdr->n_descsz); +			return PySlice_New(s, e, NULL); +		} +	} +#endif +	Py_RETURN_NONE; +} + +static bool elffile_virt2file(struct elffile *w, GElf_Addr virt, +			      GElf_Addr *offs) +{ +	*offs = 0; + +	for (size_t i = 0; i < w->ehdr->e_phnum; i++) { +		GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + +		if (phdr->p_type != PT_LOAD) +			continue; + +		if (virt < phdr->p_vaddr +		    || virt >= phdr->p_vaddr + phdr->p_memsz) +			continue; + +		if (virt >= phdr->p_vaddr + phdr->p_filesz) +			return false; + +		*offs = virt - phdr->p_vaddr + phdr->p_offset; +		return true; +	} + +	return false; +} + +static PyObject *elffile_subscript(PyObject *self, PyObject *key) +{ +	Py_ssize_t start, stop, step; +	PySliceObject *slice; +	struct elffile *w = (struct elffile *)self; +	bool str = false; + +	if (!PySlice_Check(key)) { +		PyErr_SetString(PyExc_IndexError, +				"ELFFile subscript must be slice"); +		return NULL; +	} +	slice = (PySliceObject *)key; +	stop = -1; +	step = 1; +	if (PyLong_Check(slice->stop)) { +		start = PyLong_AsSsize_t(slice->start); +		if (PyErr_Occurred()) +			return NULL; +		if (slice->stop != Py_None) { +			stop = PyLong_AsSsize_t(slice->stop); +			if (PyErr_Occurred()) +				return NULL; +		} +		if (slice->step != Py_None) { +			step = PyLong_AsSsize_t(slice->step); +			if (PyErr_Occurred()) +				return NULL; +		} +	} else { +		if (slice->stop != (void *)&PyUnicode_Type +		    || !PyLong_Check(slice->start)) { +			PyErr_SetString(PyExc_IndexError, "invalid slice"); +			return NULL; +		} + +		str = true; +		start = PyLong_AsUnsignedLongLong(slice->start); +	} +	if (step != 1) { +		PyErr_SetString(PyExc_IndexError, +				"ELFFile subscript slice step must be 1"); +		return NULL; +	} + +	GElf_Addr xstart = start, xstop = stop; + +	for (size_t i = 0; i < w->ehdr->e_phnum; i++) { +		GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + +		if (phdr->p_type != PT_LOAD) +			continue; + +		if (xstart < phdr->p_vaddr +		    || xstart >= phdr->p_vaddr + phdr->p_memsz) +			continue; +		if (!str && (xstop < phdr->p_vaddr +		    || xstop > phdr->p_vaddr + phdr->p_memsz)) { +			PyErr_Format(ELFAccessError, +				     "access (%llu) beyond end of program header (%llu)", +				     (long long)xstop, +				     (long long)(phdr->p_vaddr + +						 phdr->p_memsz)); +			return NULL; +		} + +		xstart = xstart - phdr->p_vaddr + phdr->p_offset; + +		if (str) +			xstop = strlen(w->mmap + xstart); +		else +			xstop = xstop - phdr->p_vaddr + phdr->p_offset; + +		Py_ssize_t pylen = xstop - xstart; + +#if PY_MAJOR_VERSION >= 3 +		return Py_BuildValue("y#", w->mmap + xstart, pylen); +#else +		return Py_BuildValue("s#", w->mmap + xstart, pylen); +#endif +	}; + +	return PyErr_Format(ELFAccessError, +			    "virtual address (%llu) not found in program headers", +			    (long long)start); +} + +static PyMethodDef methods_elffile[] = { +	{"find_note", elffile_find_note, METH_VARARGS, +		"find specific note entry"}, +	{"getreloc", elffile_getreloc, METH_VARARGS, +		"find relocation"}, +	{"get_symbol", elffile_get_symbol, METH_VARARGS, +		"find symbol by name"}, +	{"get_section", elffile_get_section, METH_VARARGS, +		"find section by name"}, +	{"get_section_addr", elffile_get_section_addr, METH_VARARGS, +		"find section by address"}, +	{"get_section_idx", elffile_get_section_idx, METH_VARARGS, +		"find section by index"}, +	{} +}; + +static PyObject *elffile_load(PyTypeObject *type, PyObject *args, +			     PyObject *kwds); + +static void elffile_free(void *arg) +{ +	struct elffile *w = arg; + +	elf_end(w->elf); +	munmap(w->mmap, w->len); +	free(w->filename); +} + +static PyMappingMethods mp_elffile = { +	.mp_subscript = elffile_subscript, +}; + +static PyTypeObject typeobj_elffile = { +	PyVarObject_HEAD_INIT(NULL, 0).tp_name = "_clippy.ELFFile", +	.tp_basicsize = sizeof(struct elffile), +	.tp_flags = Py_TPFLAGS_DEFAULT, +	.tp_doc = elffile_doc, +	.tp_new = elffile_load, +	.tp_free = elffile_free, +	.tp_as_mapping = &mp_elffile, +	.tp_members = members_elffile, +	.tp_methods = methods_elffile, +}; + +static char *elfdata_strptr(Elf_Data *data, size_t offset) +{ +	char *p; + +	if (offset >= data->d_size) +		return NULL; + +	p = (char *)data->d_buf + offset; +	if (strnlen(p, data->d_size - offset) >= data->d_size - offset) +		return NULL; + +	return p; +} + +static void elffile_add_dynreloc(struct elffile *w, Elf_Data *reldata, +				 size_t entries, Elf_Data *symdata, +				 Elf_Data *strdata) +{ +	size_t i; + +	for (i = 0; i < entries; i++) { +		struct elfreloc *relw; +		size_t symidx; +		GElf_Rela *rela; +		GElf_Sym *sym; + +		relw = (struct elfreloc *)typeobj_elfreloc.tp_alloc( +				&typeobj_elfreloc, 0); +		relw->ef = w; + +		rela = relw->rela = gelf_getrela(reldata, i, &relw->_rela); +		symidx = relw->symidx = GELF_R_SYM(rela->r_info); +		sym = relw->sym = gelf_getsym(symdata, symidx, &relw->_sym); +		if (sym) { +			relw->symname = elfdata_strptr(strdata, sym->st_name); +			relw->symvalid = GELF_ST_TYPE(sym->st_info) +					!= STT_NOTYPE; +			relw->unresolved = sym->st_shndx == SHN_UNDEF; +			relw->st_value = sym->st_value; +		} else { +			relw->symname = NULL; +			relw->symvalid = false; +			relw->unresolved = false; +			relw->st_value = 0; +		} + +		debugf("dynreloc @ %016llx sym %5llu %016llx %s\n", +		       (long long)rela->r_offset, (unsigned long long)symidx, +		       (long long)rela->r_addend, relw->symname); + +		elfrelocs_add(&w->dynrelocs, relw); +	} + +} + +/* primary (only, really) entry point to anything in this module */ +static PyObject *elffile_load(PyTypeObject *type, PyObject *args, +			      PyObject *kwds) +{ +	const char *filename; +	static const char * const kwnames[] = {"filename", NULL}; +	struct elffile *w; +	struct stat st; +	int fd, err; + +	w = (struct elffile *)typeobj_elffile.tp_alloc(&typeobj_elffile, 0); +	if (!w) +		return NULL; + +	if (!PyArg_ParseTupleAndKeywords(args, kwds, "s", (char **)kwnames, +					 &filename)) +		return NULL; + +	w->filename = strdup(filename); +	fd = open(filename, O_RDONLY | O_NOCTTY); +	if (fd < 0 || fstat(fd, &st)) { +		PyErr_SetFromErrnoWithFilename(PyExc_OSError, filename); +		close(fd); +		goto out; +	} +	w->len = st.st_size; +	w->mmap = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED, fd, 0); +	if (!w->mmap) { +		PyErr_SetFromErrnoWithFilename(PyExc_IOError, filename); +		close(fd); +		goto out; +	} +	close(fd); +	w->mmend = w->mmap + st.st_size; + +	if (w->len < EI_NIDENT || memcmp(w->mmap, ELFMAG, SELFMAG)) { +		PyErr_SetString(ELFFormatError, "invalid ELF signature"); +		goto out; +	} + +	switch (w->mmap[EI_CLASS]) { +	case ELFCLASS32: +		w->elfclass = 32; +		break; +	case ELFCLASS64: +		w->elfclass = 64; +		break; +	default: +		PyErr_SetString(ELFFormatError, "invalid ELF class"); +		goto out; +	} +	switch (w->mmap[EI_DATA]) { +	case ELFDATA2LSB: +		w->bigendian = false; +		break; +	case ELFDATA2MSB: +		w->bigendian = true; +		break; +	default: +		PyErr_SetString(ELFFormatError, "invalid ELF byte order"); +		goto out; +	} + +	w->elf = elf_memory(w->mmap, w->len); +	if (!w->elf) +		goto out_elferr; +	w->ehdr = gelf_getehdr(w->elf, &w->_ehdr); +	if (!w->ehdr) +		goto out_elferr; + +	for (size_t i = 0; i < w->ehdr->e_shnum; i++) { +		Elf_Scn *scn = elf_getscn(w->elf, i); +		GElf_Shdr _shdr, *shdr = gelf_getshdr(scn, &_shdr); + +		if (shdr->sh_type == SHT_SYMTAB) { +			w->symtab = scn; +			w->nsym = shdr->sh_size / shdr->sh_entsize; +			w->symdata = elf_getdata(scn, NULL); +			w->symstridx = shdr->sh_link; +			break; +		} +	} +	w->has_symbols = w->symtab && w->symstridx; +	elfrelocs_init(&w->dynrelocs); + +#ifdef HAVE_ELF_GETDATA_RAWCHUNK +	for (size_t i = 0; i < w->ehdr->e_phnum; i++) { +		GElf_Phdr _phdr, *phdr = gelf_getphdr(w->elf, i, &_phdr); + +		if (phdr->p_type != PT_DYNAMIC) +			continue; + +		Elf_Data *dyndata = elf_getdata_rawchunk(w->elf, +				phdr->p_offset, phdr->p_filesz, ELF_T_DYN); + +		GElf_Addr dynrela = 0, symtab = 0, strtab = 0; +		size_t dynrelasz = 0, dynrelaent = 0, strsz = 0; +		GElf_Dyn _dyn, *dyn; + +		for (size_t j = 0;; j++) { +			dyn = gelf_getdyn(dyndata, j, &_dyn); + +			if (dyn->d_tag == DT_NULL) +				break; + +			switch (dyn->d_tag) { +			case DT_SYMTAB: +				symtab = dyn->d_un.d_ptr; +				break; + +			case DT_STRTAB: +				strtab = dyn->d_un.d_ptr; +				break; +			case DT_STRSZ: +				strsz = dyn->d_un.d_val; +				break; + +			case DT_RELA: +				dynrela = dyn->d_un.d_ptr; +				break; +			case DT_RELASZ: +				dynrelasz = dyn->d_un.d_val; +				break; +			case DT_RELAENT: +				dynrelaent = dyn->d_un.d_val; +				break; + +			case DT_RELSZ: +				if (dyn->d_un.d_val) +					fprintf(stderr, +						"WARNING: ignoring non-empty DT_REL!\n"); +				break; +			} +		} + +		GElf_Addr offset; +		Elf_Data *symdata = NULL, *strdata = NULL, *reladata = NULL; + +		if (elffile_virt2file(w, symtab, &offset)) +			symdata = elf_getdata_rawchunk(w->elf, offset, +						       w->len - offset, +						       ELF_T_SYM); +		if (elffile_virt2file(w, strtab, &offset)) +			strdata = elf_getdata_rawchunk(w->elf, offset, +						       strsz, ELF_T_BYTE); + +		if (!dynrela || !dynrelasz || !dynrelaent) +			continue; + +		if (!elffile_virt2file(w, dynrela, &offset)) +			continue; + +		debugf("dynrela @%llx/%llx+%llx\n", (long long)dynrela, +		       (long long)offset, (long long)dynrelasz); + +		reladata = elf_getdata_rawchunk(w->elf, offset, dynrelasz, +						ELF_T_RELA); +		elffile_add_dynreloc(w, reladata, dynrelasz / dynrelaent, +				     symdata, strdata); +	} +#endif + +	w->sects = calloc(sizeof(PyObject *), w->ehdr->e_shnum); +	w->n_sect = w->ehdr->e_shnum; + +	return (PyObject *)w; + +out_elferr: +	err = elf_errno(); + +	PyErr_Format(ELFFormatError, "libelf error %d: %s", +		     err, elf_errmsg(err)); +out: +	if (w->elf) +		elf_end(w->elf); +	free(w->filename); +	return NULL; +} + +static PyObject *elfpy_debug(PyObject *self, PyObject *args) +{ +	int arg; + +	if (!PyArg_ParseTuple(args, "p", &arg)) +		return NULL; + +	debug = arg; + +	Py_RETURN_NONE; +} + +static PyMethodDef methods_elfpy[] = { +	{"elfpy_debug", elfpy_debug, METH_VARARGS, "switch debuging on/off"}, +	{} +}; + +bool elf_py_init(PyObject *pymod) +{ +	if (PyType_Ready(&typeobj_elffile) < 0) +		return false; +	if (PyType_Ready(&typeobj_elfsect) < 0) +		return false; +	if (PyType_Ready(&typeobj_elfreloc) < 0) +		return false; +	if (elf_version(EV_CURRENT) == EV_NONE) +		return false; + +#if PY_MAJOR_VERSION >= 3 && PY_MINOR_VERSION >= 5 +	PyModule_AddFunctions(pymod, methods_elfpy); +#else +	(void)methods_elfpy; +#endif + +	ELFFormatError = PyErr_NewException("_clippy.ELFFormatError", +					    PyExc_ValueError, NULL); +	PyModule_AddObject(pymod, "ELFFormatError", ELFFormatError); +	ELFAccessError = PyErr_NewException("_clippy.ELFAccessError", +					    PyExc_IndexError, NULL); +	PyModule_AddObject(pymod, "ELFAccessError", ELFAccessError); + +	Py_INCREF(&typeobj_elffile); +	PyModule_AddObject(pymod, "ELFFile", (PyObject *)&typeobj_elffile); +	Py_INCREF(&typeobj_elfsect); +	PyModule_AddObject(pymod, "ELFSection", (PyObject *)&typeobj_elfsect); +	Py_INCREF(&typeobj_elfreloc); +	PyModule_AddObject(pymod, "ELFReloc", (PyObject *)&typeobj_elfreloc); +	return true; +} diff --git a/lib/subdir.am b/lib/subdir.am index d5ffa08546..15c6492140 100644 --- a/lib/subdir.am +++ b/lib/subdir.am @@ -410,7 +410,7 @@ lib_grammar_sandbox_LDADD = \  lib_clippy_CPPFLAGS = $(AM_CPPFLAGS) -D_GNU_SOURCE -DBUILDING_CLIPPY  lib_clippy_CFLAGS = $(PYTHON_CFLAGS) -lib_clippy_LDADD = $(PYTHON_LIBS) $(UST_LIBS) +lib_clippy_LDADD = $(PYTHON_LIBS) $(UST_LIBS) -lelf  lib_clippy_LDFLAGS = -export-dynamic  lib_clippy_SOURCES = \  	lib/jhash.c \ @@ -420,9 +420,11 @@ lib_clippy_SOURCES = \  	lib/command_parse.y \  	lib/command_py.c \  	lib/defun_lex.l \ +	lib/elf_py.c \  	lib/graph.c \  	lib/libfrr_trace.c \  	lib/memory.c \ +	lib/typesafe.c \  	lib/vector.c \  	# end  | 
