Implement tree-shaking for Python sources

This commit is contained in:
Justine Tunney 2021-09-05 01:20:03 -07:00
parent 81287b7ec0
commit 44c87b83ff
110 changed files with 899 additions and 1922 deletions

View file

@ -25,6 +25,7 @@
#include "libc/fmt/conv.h"
#include "libc/log/check.h"
#include "libc/log/log.h"
#include "libc/macros.internal.h"
#include "libc/runtime/gc.internal.h"
#include "libc/runtime/runtime.h"
#include "libc/sysv/consts/o.h"
@ -34,11 +35,15 @@
#include "third_party/python/Include/compile.h"
#include "third_party/python/Include/fileutils.h"
#include "third_party/python/Include/import.h"
#include "third_party/python/Include/longobject.h"
#include "third_party/python/Include/marshal.h"
#include "third_party/python/Include/opcode.h"
#include "third_party/python/Include/pydebug.h"
#include "third_party/python/Include/pylifecycle.h"
#include "third_party/python/Include/pymacro.h"
#include "third_party/python/Include/pythonrun.h"
#include "third_party/python/Include/tupleobject.h"
#include "third_party/python/Include/unicodeobject.h"
#include "tool/build/lib/elfwriter.h"
#include "tool/build/lib/stripcomponents.h"
/* clang-format off */
@ -66,14 +71,43 @@ FLAGS\n\
-h help\n\
\n"
bool binonly;
int optimize;
char *pyfile;
char *outpath;
bool nocompress;
uint64_t image_base;
int strip_components;
const char *path_prefix;
const char *const kIgnoredModules[] = /* sorted */ {
"__main__", /* todo? */
"_dummy_threading", /* evil code */
"_overlapped", /* don't recognize if sys.platform yet */
"_scproxy", /* don't recognize if sys.platform yet */
"_thread",
"_winapi", /* don't recognize if sys.platform yet */
"asyncio.test_support", /* todo??? */
"builtins",
"concurrent.futures", /* asyncio's fault */
"concurrent.futures._base",
"concurrent.futures.process",
"concurrent.futures.thread",
"java", /* don't recognize if sys.platform yet */
"java.lang", /* don't recognize if sys.platform yet */
"msvcrt", /* don't recognize if sys.platform yet */
"nt", /* os module don't care */
"os.path", /* todo */
"sys",
"test.test_warnings.data", /* todo */
};
static bool binonly;
static int optimize;
static char *pyfile;
static char *outpath;
static struct stat st;
static PyObject *code;
static PyObject *marsh;
static bool nocompress;
static uint64_t image_base;
static int strip_components;
static struct ElfWriter *elf;
static const char *path_prefix;
static struct timespec timestamp;
static void
GetOpts(int argc, char *argv[])
@ -91,7 +125,7 @@ GetOpts(int argc, char *argv[])
path_prefix = optarg;
break;
case 'C':
strip_components = atoi(optarg);
strip_components = atoi(optarg);
break;
case 'b':
image_base = strtoul(optarg, NULL, 0);
@ -117,6 +151,10 @@ GetOpts(int argc, char *argv[])
exit(1);
}
pyfile = argv[optind];
if (stat(pyfile, &st)) {
perror(pyfile);
exit(1);
}
if (!outpath) {
outpath = xstrcat(pyfile, ".o");
}
@ -134,16 +172,6 @@ Dotify(char *s)
return s;
}
static void
Yoink(struct ElfWriter *elf, const char *symbol)
{
elfwriter_align(elf, 1, 0);
elfwriter_startsection(elf, ".yoink", SHT_PROGBITS,
SHF_ALLOC | SHF_EXECINSTR);
elfwriter_yoink(elf, symbol);
elfwriter_finishsection(elf);
}
static char *
GetZipFile(void)
{
@ -179,52 +207,131 @@ GetModName(bool *ispkg)
return mod;
}
int
main(int argc, char *argv[])
static char *
GetModSibling(const char *rel)
{
char *p, *mod, *sib;
assert(*rel);
mod = Dotify(xstripexts(StripComponents(pyfile, strip_components)));
if ((p = strrchr(mod, '.'))) {
p[1] = 0;
sib = xstrcat(mod, rel);
} else {
sib = strdup(rel);
}
free(mod);
return sib;
}
static bool
IsIgnoredModule(const char *s)
{
int m, l, r, x;
l = 0;
r = ARRAYLEN(kIgnoredModules) - 1;
while (l <= r) {
m = (l + r) >> 1;
x = strcmp(s, kIgnoredModules[m]);
if (x < 0) {
r = m - 1;
} else if (x > 0) {
l = m + 1;
} else {
return true;
}
}
return false;
}
static void
YoinkImports(void)
{
size_t i, n;
bool istry, isrel;
char *p, *mod, *symbol;
int x, y, op, arg, extra;
PyObject *co_code, *co_names, *co_consts, *name, *cnst;
co_code = PyObject_GetAttrString(code, "co_code");
co_names = PyObject_GetAttrString(code, "co_names");
co_consts = PyObject_GetAttrString(code, "co_consts");
n = PyBytes_GET_SIZE(co_code);
p = PyBytes_AS_STRING(co_code);
for (istry = isrel = extra = i = 0; i + 2 <= n; i += 2) {
x = p[i + 0] & 255;
y = p[i + 1] & 255;
op = x;
arg = y | extra;
extra = op == EXTENDED_ARG ? y << 8 : 0;
switch (op) {
case SETUP_EXCEPT:
istry = true;
break;
case POP_BLOCK:
istry = false;
break;
case LOAD_CONST:
if (PyLong_Check((cnst = PyTuple_GetItem(co_consts, arg)))) {
isrel = PyLong_AsLong(cnst) == 1;
}
break;
case IMPORT_NAME:
name = PyUnicode_AsUTF8String(PyTuple_GetItem(co_names, arg));
if (*PyBytes_AS_STRING(name)) { /* TODO: empty? */
if (isrel) {
mod = GetModSibling(PyBytes_AS_STRING(name));
} else {
mod = strdup(PyBytes_AS_STRING(name));
}
if (!IsIgnoredModule(mod)) {
symbol = xstrcat("pyc:", mod);
elfwriter_yoink(elf, symbol, istry ? STB_WEAK : STB_GLOBAL);
free(symbol);
}
free(mod);
}
Py_DECREF(name);
break;
default:
break;
}
}
Py_DECREF(co_consts);
Py_DECREF(co_names);
Py_DECREF(co_code);
}
static int
Objectify(void)
{
char t[12];
bool ispkg;
ssize_t rc;
struct stat st;
struct ElfWriter *elf;
struct timespec timestamp;
PyObject *code, *marshalled;
size_t i, pysize, pycsize, marsize;
char *s, *pydata, *pycdata, *mardata, *zipfile, *zipdir, *synfile, *modname;
char header[12];
size_t pysize, pycsize, marsize;
char *pydata, *pycdata, *mardata, *zipfile, *zipdir, *synfile, *modname;
ShowCrashReports();
GetOpts(argc, argv);
marshalled = 0;
if (stat(pyfile, &st) == -1) perror(pyfile), exit(1);
CHECK_NOTNULL((pydata = gc(xslurp(pyfile, &pysize))));
Py_NoUserSiteDirectory++;
Py_NoSiteFlag++;
Py_IgnoreEnvironmentFlag++;
Py_FrozenFlag++;
Py_SetProgramName(gc(utf8toutf32(argv[0], -1, 0)));
_Py_InitializeEx_Private(1, 0);
zipdir = gc(GetZipDir());
zipfile = gc(GetZipFile());
synfile = gc(GetSynFile());
modname = gc(GetModName(&ispkg));
code = Py_CompileStringExFlags(pydata, synfile, Py_file_input, NULL, optimize);
if (!code) goto error;
marshalled = PyMarshal_WriteObjectToString(code, Py_MARSHAL_VERSION);
Py_CLEAR(code);
if (!marshalled) goto error;
memset(&timestamp, 0, sizeof(timestamp));
assert(PyBytes_CheckExact(marshalled));
mardata = PyBytes_AS_STRING(marshalled);
marsize = PyBytes_GET_SIZE(marshalled);
CHECK_NOTNULL((pydata = gc(xslurp(pyfile, &pysize))));
if ((!(code = Py_CompileStringExFlags(pydata, synfile, Py_file_input,
NULL, optimize)) ||
!(marsh = PyMarshal_WriteObjectToString(code, Py_MARSHAL_VERSION)))) {
PyErr_Print();
return 1;
}
assert(PyBytes_CheckExact(marsh));
mardata = PyBytes_AS_STRING(marsh);
marsize = PyBytes_GET_SIZE(marsh);
WRITE16LE(header+0, 3379); /* Python 3.6rc1 */
WRITE16LE(header+2, READ16LE("\r\n"));
WRITE32LE(header+4, timestamp.tv_sec);
WRITE32LE(header+8, marsize);
pycsize = sizeof(header) + marsize;
pycdata = gc(malloc(pycsize));
memcpy(pycdata, header, sizeof(header));
memcpy(pycdata + sizeof(header), mardata, marsize);
elf = elfwriter_open(outpath, 0644);
elfwriter_cargoculting(elf);
WRITE16LE(t+0, 3379); /* Python 3.6rc1 */
WRITE16LE(t+2, READ16LE("\r\n"));
WRITE32LE(t+4, timestamp.tv_sec);
WRITE32LE(t+8, marsize);
pycsize = sizeof(t) + marsize;
pycdata = gc(malloc(pycsize));
memcpy(pycdata, t, sizeof(t));
memcpy(pycdata + sizeof(t), mardata, marsize);
if (ispkg) {
elfwriter_zip(elf, zipdir, zipdir, strlen(zipdir),
pydata, pysize, 040755, timestamp, timestamp,
@ -238,14 +345,33 @@ main(int argc, char *argv[])
elfwriter_zip(elf, gc(xstrcat("pyc:", modname)), gc(xstrcat(zipfile, 'c')),
strlen(zipfile) + 1, pycdata, pycsize, st.st_mode, timestamp,
timestamp, timestamp, nocompress, image_base);
Yoink(elf, "__zip_start");
elfwriter_align(elf, 1, 0);
elfwriter_startsection(elf, ".yoink", SHT_PROGBITS,
SHF_ALLOC | SHF_EXECINSTR);
YoinkImports();
if (path_prefix && !strchr(modname, '.')) {
elfwriter_yoink(elf, gc(xstrcat(path_prefix, "/")), STB_GLOBAL);
}
elfwriter_yoink(elf, "__zip_start", STB_GLOBAL);
elfwriter_finishsection(elf);
elfwriter_close(elf);
Py_CLEAR(marshalled);
Py_Finalize();
return 0;
error:
PyErr_Print();
Py_Finalize();
if (marshalled) Py_DECREF(marshalled);
return 1;
}
int
main(int argc, char *argv[])
{
int rc;
GetOpts(argc, argv);
Py_NoUserSiteDirectory++;
Py_NoSiteFlag++;
Py_IgnoreEnvironmentFlag++;
Py_FrozenFlag++;
Py_SetProgramName(gc(utf8toutf32(argv[0], -1, 0)));
_Py_InitializeEx_Private(1, 0);
rc = Objectify();
Py_XDECREF(code);
Py_XDECREF(marsh);
Py_Finalize();
return rc;
}