help gcc compile main loop better

based on

https://www.corsix.org/content/micro-optimisations-can-speed-up-cpython
This commit is contained in:
ahgamut 2022-04-17 06:45:22 +05:30
parent f6b6204b9e
commit d4be308340

View file

@ -741,7 +741,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
int opcode; /* Current opcode */
int oparg; /* Current opcode argument, if any */
enum why_code why; /* Reason for block stack unwind */
PyObject **fastlocals, **freevars;
PyObject **freevars;
PyObject *retval = NULL; /* Return value */
PyThreadState *tstate = PyThreadState_GET();
PyCodeObject *co;
@ -888,7 +888,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
/* Code access macros */
/* The integer overflow is checked by an assertion below. */
#define INSTR_OFFSET() (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr))
#define INSTR_OFFSET() ( (char*)next_instr - (char*)first_instr )
#define NEXTOPARG() do { \
_Py_CODEUNIT word = *next_instr; \
opcode = _Py_OPCODE(word); \
@ -982,7 +982,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
/* Local variable macros */
#define GETLOCAL(i) (fastlocals[i])
#define GETLOCAL(i) (f->f_localsplus[i])
/* The SETLOCAL() macro must not DECREF the local variable in-place and
then store the new value; it must copy the old value to a temporary
@ -1068,7 +1068,6 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
co = f->f_code;
names = co->co_names;
consts = co->co_consts;
fastlocals = f->f_localsplus;
freevars = f->f_localsplus + co->co_nlocals;
assert(PyBytes_Check(co->co_code));
assert(PyBytes_GET_SIZE(co->co_code) <= INT_MAX);
@ -1269,7 +1268,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
FAST_DISPATCH();
LIKELY_TARGET(LOAD_FAST) {
PyObject *value = GETLOCAL(oparg);
PyObject *value = GETLOCAL(((unsigned)oparg));
if (UNLIKELY(value == NULL)) {
format_exc_check_arg(PyExc_UnboundLocalError,
UNBOUNDLOCAL_ERROR_MSG,
@ -1283,7 +1282,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
PREDICTED(LOAD_CONST);
TARGET(LOAD_CONST) {
PyObject *value = GETITEM(consts, oparg);
PyObject *value = GETITEM(consts, ((unsigned)oparg));
Py_INCREF(value);
PUSH(value);
FAST_DISPATCH();
@ -1292,7 +1291,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
PREDICTED(STORE_FAST);
TARGET(STORE_FAST) {
PyObject *value = POP();
SETLOCAL(oparg, value);
SETLOCAL(((unsigned)oparg), value);
FAST_DISPATCH();
}
@ -1573,7 +1572,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
TARGET(LIST_APPEND) {
PyObject *v = POP();
PyObject *list = PEEK(oparg);
PyObject *list = PEEK((size_t)(unsigned)oparg);
int err;
err = PyList_Append(list, v);
Py_DECREF(v);
@ -1778,7 +1777,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
_Py_IDENTIFIER(__annotations__);
PyObject *ann_dict;
PyObject *ann = POP();
PyObject *name = GETITEM(names, oparg);
PyObject *name = GETITEM(names, ((unsigned)oparg));
int err;
if (f->f_locals == NULL) {
PyErr_Format(PyExc_SystemError,
@ -2209,7 +2208,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(STORE_NAME) {
PyObject *name = GETITEM(names, oparg);
PyObject *name = GETITEM(names, ((unsigned)oparg));
PyObject *v = POP();
PyObject *ns = f->f_locals;
int err;
@ -2230,7 +2229,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(DELETE_NAME) {
PyObject *name = GETITEM(names, oparg);
PyObject *name = GETITEM(names, ((unsigned)oparg));
PyObject *ns = f->f_locals;
int err;
if (ns == NULL) {
@ -2252,7 +2251,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
TARGET(UNPACK_SEQUENCE) {
PyObject *seq = POP(), *item, **items;
if (PyTuple_CheckExact(seq) &&
PyTuple_GET_SIZE(seq) == oparg) {
PyTuple_GET_SIZE(seq) == ((unsigned)oparg)) {
items = ((PyTupleObject *)seq)->ob_item;
while (oparg--) {
item = items[oparg];
@ -2260,7 +2259,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
PUSH(item);
}
} else if (PyList_CheckExact(seq) &&
PyList_GET_SIZE(seq) == oparg) {
PyList_GET_SIZE(seq) == (unsigned)oparg) {
items = ((PyListObject *)seq)->ob_item;
while (oparg--) {
item = items[oparg];
@ -2269,7 +2268,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
} else if (unpack_iterable(seq, oparg, -1,
stack_pointer + oparg)) {
STACKADJ(oparg);
STACKADJ(((unsigned)oparg));
} else {
/* unpack_iterable() raised an exception */
Py_DECREF(seq);
@ -2295,7 +2294,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(STORE_ATTR) {
PyObject *name = GETITEM(names, oparg);
PyObject *name = GETITEM(names, ((unsigned)oparg));
PyObject *owner = TOP();
PyObject *v = SECOND();
int err;
@ -2309,7 +2308,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(DELETE_ATTR) {
PyObject *name = GETITEM(names, oparg);
PyObject *name = GETITEM(names, ((unsigned)oparg));
PyObject *owner = POP();
int err;
err = PyObject_SetAttr(owner, name, (PyObject *)NULL);
@ -2320,7 +2319,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(STORE_GLOBAL) {
PyObject *name = GETITEM(names, oparg);
PyObject *name = GETITEM(names, ((unsigned)oparg));
PyObject *v = POP();
int err;
err = PyDict_SetItem(f->f_globals, name, v);
@ -2331,7 +2330,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(DELETE_GLOBAL) {
PyObject *name = GETITEM(names, oparg);
PyObject *name = GETITEM(names, ((unsigned)oparg));
int err;
err = PyDict_DelItem(f->f_globals, name);
if (err != 0) {
@ -2343,7 +2342,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(LOAD_NAME) {
PyObject *name = GETITEM(names, oparg);
PyObject *name = GETITEM(names, ((unsigned)oparg));
PyObject *locals = f->f_locals;
PyObject *v;
if (locals == NULL) {
@ -2394,7 +2393,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(LOAD_GLOBAL) {
PyObject *name = GETITEM(names, oparg);
PyObject *name = GETITEM(names, ((unsigned)oparg));
PyObject *v;
if (PyDict_CheckExact(f->f_globals)
&& PyDict_CheckExact(f->f_builtins))
@ -2438,9 +2437,9 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(DELETE_FAST) {
PyObject *v = GETLOCAL(oparg);
PyObject *v = GETLOCAL(((unsigned)oparg));
if (v != NULL) {
SETLOCAL(oparg, NULL);
SETLOCAL(((unsigned)oparg), NULL);
DISPATCH();
}
format_exc_check_arg(
@ -2541,7 +2540,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(BUILD_TUPLE) {
PyObject *tup = PyTuple_New(oparg);
PyObject *tup = PyTuple_New(((unsigned)oparg));
if (tup == NULL)
goto error;
while (--oparg >= 0) {
@ -2553,7 +2552,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(BUILD_LIST) {
PyObject *list = PyList_New(oparg);
PyObject *list = PyList_New(((unsigned)oparg));
if (list == NULL)
goto error;
while (--oparg >= 0) {
@ -2619,7 +2618,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
err = PySet_Add(set, item);
Py_DECREF(item);
}
STACKADJ(-oparg);
STACKADJ(-(size_t)(unsigned)oparg);
if (err != 0) {
Py_DECREF(set);
goto error;
@ -2649,7 +2648,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
TARGET(BUILD_MAP) {
Py_ssize_t i;
PyObject *map = _PyDict_NewPresized((Py_ssize_t)oparg);
PyObject *map = _PyDict_NewPresized((size_t)(unsigned)oparg);
if (map == NULL)
goto error;
for (i = oparg; i > 0; i--) {
@ -2732,12 +2731,12 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
PyObject *map;
PyObject *keys = TOP();
if (!PyTuple_CheckExact(keys) ||
PyTuple_GET_SIZE(keys) != (Py_ssize_t)oparg) {
PyTuple_GET_SIZE(keys) != (Py_ssize_t)(size_t)(unsigned)oparg) {
PyErr_SetString(PyExc_SystemError,
"bad BUILD_CONST_KEY_MAP keys argument");
goto error;
}
map = _PyDict_NewPresized((Py_ssize_t)oparg);
map = _PyDict_NewPresized((size_t)(unsigned)oparg);
if (map == NULL) {
goto error;
}
@ -2793,7 +2792,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
for (i = oparg; i > 0; i--) {
PyObject *arg = PEEK(i);
if (_PyDict_MergeEx(sum, arg, 2) < 0) {
PyObject *func = PEEK(2 + oparg);
PyObject *func = PEEK(2 + (size_t)(unsigned)oparg);
if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
format_kwargs_mapping_error(func, arg);
}
@ -2852,7 +2851,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(LOAD_ATTR) {
PyObject *name = GETITEM(names, oparg);
PyObject *name = GETITEM(names, (unsigned)oparg);
PyObject *owner = TOP();
PyObject *res = PyObject_GetAttr(owner, name);
Py_DECREF(owner);
@ -2877,7 +2876,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(IMPORT_NAME) {
PyObject *name = GETITEM(names, oparg);
PyObject *name = GETITEM(names, (unsigned)oparg);
PyObject *fromlist = POP();
PyObject *level = TOP();
PyObject *res;
@ -2914,7 +2913,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(IMPORT_FROM) {
PyObject *name = GETITEM(names, oparg);
PyObject *name = GETITEM(names, (unsigned)oparg);
PyObject *from = TOP();
PyObject *res;
res = import_from(from, name);
@ -2925,7 +2924,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(JUMP_FORWARD) {
JUMPBY(oparg);
JUMPBY((unsigned)oparg);
FAST_DISPATCH();
}
@ -2939,7 +2938,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
if (cond == Py_False) {
Py_DECREF(cond);
JUMPTO(oparg);
JUMPTO((unsigned)oparg);
FAST_DISPATCH();
}
err = PyObject_IsTrue(cond);
@ -2947,7 +2946,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
if (err > 0)
err = 0;
else if (err == 0)
JUMPTO(oparg);
JUMPTO((unsigned)oparg);
else
goto error;
DISPATCH();
@ -2963,14 +2962,14 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
if (cond == Py_True) {
Py_DECREF(cond);
JUMPTO(oparg);
JUMPTO((unsigned)oparg);
FAST_DISPATCH();
}
err = PyObject_IsTrue(cond);
Py_DECREF(cond);
if (err > 0) {
err = 0;
JUMPTO(oparg);
JUMPTO((unsigned)oparg);
}
else if (err == 0)
;
@ -2988,7 +2987,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
FAST_DISPATCH();
}
if (cond == Py_False) {
JUMPTO(oparg);
JUMPTO((unsigned)oparg);
FAST_DISPATCH();
}
err = PyObject_IsTrue(cond);
@ -2998,7 +2997,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
err = 0;
}
else if (err == 0)
JUMPTO(oparg);
JUMPTO((unsigned)oparg);
else
goto error;
DISPATCH();
@ -3013,13 +3012,13 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
FAST_DISPATCH();
}
if (cond == Py_True) {
JUMPTO(oparg);
JUMPTO((unsigned)oparg);
FAST_DISPATCH();
}
err = PyObject_IsTrue(cond);
if (err > 0) {
err = 0;
JUMPTO(oparg);
JUMPTO((unsigned)oparg);
}
else if (err == 0) {
STACKADJ(-1);
@ -3032,7 +3031,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
PREDICTED(JUMP_ABSOLUTE);
TARGET(JUMP_ABSOLUTE) {
JUMPTO(oparg);
JUMPTO((unsigned)oparg);
#if FAST_LOOPS
/* Enabling this path speeds-up all while and for-loops by bypassing
the per-loop checks for signals. By default, this should be turned-off
@ -3110,7 +3109,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
/* iterator ended normally */
STACKADJ(-1);
Py_DECREF(iter);
JUMPBY(oparg);
JUMPBY((unsigned)oparg);
PREDICT(POP_BLOCK);
DISPATCH();
}
@ -3121,7 +3120,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
}
TARGET(CONTINUE_LOOP) {
retval = PyLong_FromLong(oparg);
retval = PyLong_FromLong((unsigned)oparg);
if (retval == NULL)
goto error;
why = WHY_CONTINUE;
@ -3783,7 +3782,7 @@ format_missing(const char *kind, PyCodeObject *co, PyObject *names)
static void
missing_arguments(PyCodeObject *co, Py_ssize_t missing, Py_ssize_t defcount,
PyObject **fastlocals)
PyFrameObject *f)
{
Py_ssize_t i, j = 0;
Py_ssize_t start, end;
@ -3820,7 +3819,7 @@ missing_arguments(PyCodeObject *co, Py_ssize_t missing, Py_ssize_t defcount,
static void
too_many_positional(PyCodeObject *co, Py_ssize_t given, Py_ssize_t defcount,
PyObject **fastlocals)
PyFrameObject *f)
{
int plural;
Py_ssize_t kwonly_given = 0;
@ -3888,7 +3887,7 @@ _PyEval_EvalCodeWithName(PyObject *_co, PyObject *globals, PyObject *locals,
PyCodeObject* co = (PyCodeObject*)_co;
PyFrameObject *f;
PyObject *retval = NULL;
PyObject **fastlocals, **freevars;
PyObject **freevars;
PyThreadState *tstate;
PyObject *x, *u;
const Py_ssize_t total_args = co->co_argcount + co->co_kwonlyargcount;
@ -3906,7 +3905,6 @@ _PyEval_EvalCodeWithName(PyObject *_co, PyObject *globals, PyObject *locals,
if (f == NULL) {
return NULL;
}
fastlocals = f->f_localsplus;
freevars = f->f_localsplus + co->co_nlocals;
/* Create a dictionary for keyword parameters (**kwags) */
if (co->co_flags & CO_VARKEYWORDS) {
@ -4002,7 +4000,7 @@ _PyEval_EvalCodeWithName(PyObject *_co, PyObject *globals, PyObject *locals,
}
/* Check the number of positional arguments */
if (argcount > co->co_argcount && !(co->co_flags & CO_VARARGS)) {
too_many_positional(co, argcount, defcount, fastlocals);
too_many_positional(co, argcount, defcount, f);
goto fail;
}
/* Add missing positional arguments (copy default values from defs) */
@ -4015,7 +4013,7 @@ _PyEval_EvalCodeWithName(PyObject *_co, PyObject *globals, PyObject *locals,
}
}
if (missing) {
missing_arguments(co, missing, defcount, fastlocals);
missing_arguments(co, missing, defcount, f);
goto fail;
}
if (n > m)
@ -4049,7 +4047,7 @@ _PyEval_EvalCodeWithName(PyObject *_co, PyObject *globals, PyObject *locals,
missing++;
}
if (missing) {
missing_arguments(co, missing, -1, fastlocals);
missing_arguments(co, missing, -1, f);
goto fail;
}
}
@ -4817,7 +4815,6 @@ _PyFunction_FastCall(PyCodeObject *co, PyObject **args, Py_ssize_t nargs,
{
PyFrameObject *f;
PyThreadState *tstate = PyThreadState_GET();
PyObject **fastlocals;
Py_ssize_t i;
PyObject *result;
PCALL(PCALL_FASTER_FUNCTION);
@ -4831,10 +4828,9 @@ _PyFunction_FastCall(PyCodeObject *co, PyObject **args, Py_ssize_t nargs,
if (f == NULL) {
return NULL;
}
fastlocals = f->f_localsplus;
for (i = 0; i < nargs; i++) {
Py_INCREF(*args);
fastlocals[i] = *args++;
f->f_localsplus[(size_t)i] = *args++;
}
result = PyEval_EvalFrameEx(f,0);
++tstate->recursion_depth;
@ -5348,9 +5344,8 @@ unicode_concatenate(PyObject *v, PyObject *w,
switch (opcode) {
case STORE_FAST:
{
PyObject **fastlocals = f->f_localsplus;
if (GETLOCAL(oparg) == v)
SETLOCAL(oparg, NULL);
if (GETLOCAL((unsigned)oparg) == v)
SETLOCAL((unsigned)oparg, NULL);
break;
}
case STORE_DEREF:
@ -5365,7 +5360,7 @@ unicode_concatenate(PyObject *v, PyObject *w,
case STORE_NAME:
{
PyObject *names = f->f_code->co_names;
PyObject *name = GETITEM(names, oparg);
PyObject *name = GETITEM(names, (unsigned)oparg);
PyObject *locals = f->f_locals;
if (locals && PyDict_CheckExact(locals) &&
PyDict_GetItem(locals, name) == v) {