From d4be3083408ec405811f0ea61b01c109ed299247 Mon Sep 17 00:00:00 2001 From: ahgamut <41098605+ahgamut@users.noreply.github.com> Date: Sun, 17 Apr 2022 06:45:22 +0530 Subject: [PATCH] help gcc compile main loop better based on https://www.corsix.org/content/micro-optimisations-can-speed-up-cpython --- third_party/python/Python/ceval.c | 111 ++++++++++++++---------------- 1 file changed, 53 insertions(+), 58 deletions(-) diff --git a/third_party/python/Python/ceval.c b/third_party/python/Python/ceval.c index 67bf71c4e..e208fe428 100644 --- a/third_party/python/Python/ceval.c +++ b/third_party/python/Python/ceval.c @@ -741,7 +741,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) int opcode; /* Current opcode */ int oparg; /* Current opcode argument, if any */ enum why_code why; /* Reason for block stack unwind */ - PyObject **fastlocals, **freevars; + PyObject **freevars; PyObject *retval = NULL; /* Return value */ PyThreadState *tstate = PyThreadState_GET(); PyCodeObject *co; @@ -888,7 +888,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) /* Code access macros */ /* The integer overflow is checked by an assertion below. */ -#define INSTR_OFFSET() (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr)) +#define INSTR_OFFSET() ( (char*)next_instr - (char*)first_instr ) #define NEXTOPARG() do { \ _Py_CODEUNIT word = *next_instr; \ opcode = _Py_OPCODE(word); \ @@ -982,7 +982,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) /* Local variable macros */ -#define GETLOCAL(i) (fastlocals[i]) +#define GETLOCAL(i) (f->f_localsplus[i]) /* The SETLOCAL() macro must not DECREF the local variable in-place and then store the new value; it must copy the old value to a temporary @@ -1068,7 +1068,6 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) co = f->f_code; names = co->co_names; consts = co->co_consts; - fastlocals = f->f_localsplus; freevars = f->f_localsplus + co->co_nlocals; assert(PyBytes_Check(co->co_code)); assert(PyBytes_GET_SIZE(co->co_code) <= INT_MAX); @@ -1269,7 +1268,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) FAST_DISPATCH(); LIKELY_TARGET(LOAD_FAST) { - PyObject *value = GETLOCAL(oparg); + PyObject *value = GETLOCAL(((unsigned)oparg)); if (UNLIKELY(value == NULL)) { format_exc_check_arg(PyExc_UnboundLocalError, UNBOUNDLOCAL_ERROR_MSG, @@ -1283,7 +1282,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) PREDICTED(LOAD_CONST); TARGET(LOAD_CONST) { - PyObject *value = GETITEM(consts, oparg); + PyObject *value = GETITEM(consts, ((unsigned)oparg)); Py_INCREF(value); PUSH(value); FAST_DISPATCH(); @@ -1292,7 +1291,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) PREDICTED(STORE_FAST); TARGET(STORE_FAST) { PyObject *value = POP(); - SETLOCAL(oparg, value); + SETLOCAL(((unsigned)oparg), value); FAST_DISPATCH(); } @@ -1573,7 +1572,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) TARGET(LIST_APPEND) { PyObject *v = POP(); - PyObject *list = PEEK(oparg); + PyObject *list = PEEK((size_t)(unsigned)oparg); int err; err = PyList_Append(list, v); Py_DECREF(v); @@ -1778,7 +1777,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) _Py_IDENTIFIER(__annotations__); PyObject *ann_dict; PyObject *ann = POP(); - PyObject *name = GETITEM(names, oparg); + PyObject *name = GETITEM(names, ((unsigned)oparg)); int err; if (f->f_locals == NULL) { PyErr_Format(PyExc_SystemError, @@ -2209,7 +2208,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(STORE_NAME) { - PyObject *name = GETITEM(names, oparg); + PyObject *name = GETITEM(names, ((unsigned)oparg)); PyObject *v = POP(); PyObject *ns = f->f_locals; int err; @@ -2230,7 +2229,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(DELETE_NAME) { - PyObject *name = GETITEM(names, oparg); + PyObject *name = GETITEM(names, ((unsigned)oparg)); PyObject *ns = f->f_locals; int err; if (ns == NULL) { @@ -2252,7 +2251,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) TARGET(UNPACK_SEQUENCE) { PyObject *seq = POP(), *item, **items; if (PyTuple_CheckExact(seq) && - PyTuple_GET_SIZE(seq) == oparg) { + PyTuple_GET_SIZE(seq) == ((unsigned)oparg)) { items = ((PyTupleObject *)seq)->ob_item; while (oparg--) { item = items[oparg]; @@ -2260,7 +2259,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) PUSH(item); } } else if (PyList_CheckExact(seq) && - PyList_GET_SIZE(seq) == oparg) { + PyList_GET_SIZE(seq) == (unsigned)oparg) { items = ((PyListObject *)seq)->ob_item; while (oparg--) { item = items[oparg]; @@ -2269,7 +2268,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } } else if (unpack_iterable(seq, oparg, -1, stack_pointer + oparg)) { - STACKADJ(oparg); + STACKADJ(((unsigned)oparg)); } else { /* unpack_iterable() raised an exception */ Py_DECREF(seq); @@ -2295,7 +2294,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(STORE_ATTR) { - PyObject *name = GETITEM(names, oparg); + PyObject *name = GETITEM(names, ((unsigned)oparg)); PyObject *owner = TOP(); PyObject *v = SECOND(); int err; @@ -2309,7 +2308,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(DELETE_ATTR) { - PyObject *name = GETITEM(names, oparg); + PyObject *name = GETITEM(names, ((unsigned)oparg)); PyObject *owner = POP(); int err; err = PyObject_SetAttr(owner, name, (PyObject *)NULL); @@ -2320,7 +2319,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(STORE_GLOBAL) { - PyObject *name = GETITEM(names, oparg); + PyObject *name = GETITEM(names, ((unsigned)oparg)); PyObject *v = POP(); int err; err = PyDict_SetItem(f->f_globals, name, v); @@ -2331,7 +2330,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(DELETE_GLOBAL) { - PyObject *name = GETITEM(names, oparg); + PyObject *name = GETITEM(names, ((unsigned)oparg)); int err; err = PyDict_DelItem(f->f_globals, name); if (err != 0) { @@ -2343,7 +2342,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(LOAD_NAME) { - PyObject *name = GETITEM(names, oparg); + PyObject *name = GETITEM(names, ((unsigned)oparg)); PyObject *locals = f->f_locals; PyObject *v; if (locals == NULL) { @@ -2394,7 +2393,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(LOAD_GLOBAL) { - PyObject *name = GETITEM(names, oparg); + PyObject *name = GETITEM(names, ((unsigned)oparg)); PyObject *v; if (PyDict_CheckExact(f->f_globals) && PyDict_CheckExact(f->f_builtins)) @@ -2438,9 +2437,9 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(DELETE_FAST) { - PyObject *v = GETLOCAL(oparg); + PyObject *v = GETLOCAL(((unsigned)oparg)); if (v != NULL) { - SETLOCAL(oparg, NULL); + SETLOCAL(((unsigned)oparg), NULL); DISPATCH(); } format_exc_check_arg( @@ -2541,7 +2540,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(BUILD_TUPLE) { - PyObject *tup = PyTuple_New(oparg); + PyObject *tup = PyTuple_New(((unsigned)oparg)); if (tup == NULL) goto error; while (--oparg >= 0) { @@ -2553,7 +2552,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(BUILD_LIST) { - PyObject *list = PyList_New(oparg); + PyObject *list = PyList_New(((unsigned)oparg)); if (list == NULL) goto error; while (--oparg >= 0) { @@ -2619,7 +2618,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) err = PySet_Add(set, item); Py_DECREF(item); } - STACKADJ(-oparg); + STACKADJ(-(size_t)(unsigned)oparg); if (err != 0) { Py_DECREF(set); goto error; @@ -2649,7 +2648,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) TARGET(BUILD_MAP) { Py_ssize_t i; - PyObject *map = _PyDict_NewPresized((Py_ssize_t)oparg); + PyObject *map = _PyDict_NewPresized((size_t)(unsigned)oparg); if (map == NULL) goto error; for (i = oparg; i > 0; i--) { @@ -2732,12 +2731,12 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) PyObject *map; PyObject *keys = TOP(); if (!PyTuple_CheckExact(keys) || - PyTuple_GET_SIZE(keys) != (Py_ssize_t)oparg) { + PyTuple_GET_SIZE(keys) != (Py_ssize_t)(size_t)(unsigned)oparg) { PyErr_SetString(PyExc_SystemError, "bad BUILD_CONST_KEY_MAP keys argument"); goto error; } - map = _PyDict_NewPresized((Py_ssize_t)oparg); + map = _PyDict_NewPresized((size_t)(unsigned)oparg); if (map == NULL) { goto error; } @@ -2793,7 +2792,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) for (i = oparg; i > 0; i--) { PyObject *arg = PEEK(i); if (_PyDict_MergeEx(sum, arg, 2) < 0) { - PyObject *func = PEEK(2 + oparg); + PyObject *func = PEEK(2 + (size_t)(unsigned)oparg); if (PyErr_ExceptionMatches(PyExc_AttributeError)) { format_kwargs_mapping_error(func, arg); } @@ -2852,7 +2851,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(LOAD_ATTR) { - PyObject *name = GETITEM(names, oparg); + PyObject *name = GETITEM(names, (unsigned)oparg); PyObject *owner = TOP(); PyObject *res = PyObject_GetAttr(owner, name); Py_DECREF(owner); @@ -2877,7 +2876,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(IMPORT_NAME) { - PyObject *name = GETITEM(names, oparg); + PyObject *name = GETITEM(names, (unsigned)oparg); PyObject *fromlist = POP(); PyObject *level = TOP(); PyObject *res; @@ -2914,7 +2913,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(IMPORT_FROM) { - PyObject *name = GETITEM(names, oparg); + PyObject *name = GETITEM(names, (unsigned)oparg); PyObject *from = TOP(); PyObject *res; res = import_from(from, name); @@ -2925,7 +2924,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(JUMP_FORWARD) { - JUMPBY(oparg); + JUMPBY((unsigned)oparg); FAST_DISPATCH(); } @@ -2939,7 +2938,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } if (cond == Py_False) { Py_DECREF(cond); - JUMPTO(oparg); + JUMPTO((unsigned)oparg); FAST_DISPATCH(); } err = PyObject_IsTrue(cond); @@ -2947,7 +2946,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) if (err > 0) err = 0; else if (err == 0) - JUMPTO(oparg); + JUMPTO((unsigned)oparg); else goto error; DISPATCH(); @@ -2963,14 +2962,14 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } if (cond == Py_True) { Py_DECREF(cond); - JUMPTO(oparg); + JUMPTO((unsigned)oparg); FAST_DISPATCH(); } err = PyObject_IsTrue(cond); Py_DECREF(cond); if (err > 0) { err = 0; - JUMPTO(oparg); + JUMPTO((unsigned)oparg); } else if (err == 0) ; @@ -2988,7 +2987,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) FAST_DISPATCH(); } if (cond == Py_False) { - JUMPTO(oparg); + JUMPTO((unsigned)oparg); FAST_DISPATCH(); } err = PyObject_IsTrue(cond); @@ -2998,7 +2997,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) err = 0; } else if (err == 0) - JUMPTO(oparg); + JUMPTO((unsigned)oparg); else goto error; DISPATCH(); @@ -3013,13 +3012,13 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) FAST_DISPATCH(); } if (cond == Py_True) { - JUMPTO(oparg); + JUMPTO((unsigned)oparg); FAST_DISPATCH(); } err = PyObject_IsTrue(cond); if (err > 0) { err = 0; - JUMPTO(oparg); + JUMPTO((unsigned)oparg); } else if (err == 0) { STACKADJ(-1); @@ -3032,7 +3031,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) PREDICTED(JUMP_ABSOLUTE); TARGET(JUMP_ABSOLUTE) { - JUMPTO(oparg); + JUMPTO((unsigned)oparg); #if FAST_LOOPS /* Enabling this path speeds-up all while and for-loops by bypassing the per-loop checks for signals. By default, this should be turned-off @@ -3110,7 +3109,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) /* iterator ended normally */ STACKADJ(-1); Py_DECREF(iter); - JUMPBY(oparg); + JUMPBY((unsigned)oparg); PREDICT(POP_BLOCK); DISPATCH(); } @@ -3121,7 +3120,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag) } TARGET(CONTINUE_LOOP) { - retval = PyLong_FromLong(oparg); + retval = PyLong_FromLong((unsigned)oparg); if (retval == NULL) goto error; why = WHY_CONTINUE; @@ -3783,7 +3782,7 @@ format_missing(const char *kind, PyCodeObject *co, PyObject *names) static void missing_arguments(PyCodeObject *co, Py_ssize_t missing, Py_ssize_t defcount, - PyObject **fastlocals) + PyFrameObject *f) { Py_ssize_t i, j = 0; Py_ssize_t start, end; @@ -3820,7 +3819,7 @@ missing_arguments(PyCodeObject *co, Py_ssize_t missing, Py_ssize_t defcount, static void too_many_positional(PyCodeObject *co, Py_ssize_t given, Py_ssize_t defcount, - PyObject **fastlocals) + PyFrameObject *f) { int plural; Py_ssize_t kwonly_given = 0; @@ -3888,7 +3887,7 @@ _PyEval_EvalCodeWithName(PyObject *_co, PyObject *globals, PyObject *locals, PyCodeObject* co = (PyCodeObject*)_co; PyFrameObject *f; PyObject *retval = NULL; - PyObject **fastlocals, **freevars; + PyObject **freevars; PyThreadState *tstate; PyObject *x, *u; const Py_ssize_t total_args = co->co_argcount + co->co_kwonlyargcount; @@ -3906,7 +3905,6 @@ _PyEval_EvalCodeWithName(PyObject *_co, PyObject *globals, PyObject *locals, if (f == NULL) { return NULL; } - fastlocals = f->f_localsplus; freevars = f->f_localsplus + co->co_nlocals; /* Create a dictionary for keyword parameters (**kwags) */ if (co->co_flags & CO_VARKEYWORDS) { @@ -4002,7 +4000,7 @@ _PyEval_EvalCodeWithName(PyObject *_co, PyObject *globals, PyObject *locals, } /* Check the number of positional arguments */ if (argcount > co->co_argcount && !(co->co_flags & CO_VARARGS)) { - too_many_positional(co, argcount, defcount, fastlocals); + too_many_positional(co, argcount, defcount, f); goto fail; } /* Add missing positional arguments (copy default values from defs) */ @@ -4015,7 +4013,7 @@ _PyEval_EvalCodeWithName(PyObject *_co, PyObject *globals, PyObject *locals, } } if (missing) { - missing_arguments(co, missing, defcount, fastlocals); + missing_arguments(co, missing, defcount, f); goto fail; } if (n > m) @@ -4049,7 +4047,7 @@ _PyEval_EvalCodeWithName(PyObject *_co, PyObject *globals, PyObject *locals, missing++; } if (missing) { - missing_arguments(co, missing, -1, fastlocals); + missing_arguments(co, missing, -1, f); goto fail; } } @@ -4817,7 +4815,6 @@ _PyFunction_FastCall(PyCodeObject *co, PyObject **args, Py_ssize_t nargs, { PyFrameObject *f; PyThreadState *tstate = PyThreadState_GET(); - PyObject **fastlocals; Py_ssize_t i; PyObject *result; PCALL(PCALL_FASTER_FUNCTION); @@ -4831,10 +4828,9 @@ _PyFunction_FastCall(PyCodeObject *co, PyObject **args, Py_ssize_t nargs, if (f == NULL) { return NULL; } - fastlocals = f->f_localsplus; for (i = 0; i < nargs; i++) { Py_INCREF(*args); - fastlocals[i] = *args++; + f->f_localsplus[(size_t)i] = *args++; } result = PyEval_EvalFrameEx(f,0); ++tstate->recursion_depth; @@ -5348,9 +5344,8 @@ unicode_concatenate(PyObject *v, PyObject *w, switch (opcode) { case STORE_FAST: { - PyObject **fastlocals = f->f_localsplus; - if (GETLOCAL(oparg) == v) - SETLOCAL(oparg, NULL); + if (GETLOCAL((unsigned)oparg) == v) + SETLOCAL((unsigned)oparg, NULL); break; } case STORE_DEREF: @@ -5365,7 +5360,7 @@ unicode_concatenate(PyObject *v, PyObject *w, case STORE_NAME: { PyObject *names = f->f_code->co_names; - PyObject *name = GETITEM(names, oparg); + PyObject *name = GETITEM(names, (unsigned)oparg); PyObject *locals = f->f_locals; if (locals && PyDict_CheckExact(locals) && PyDict_GetItem(locals, name) == v) {