[mypyc] Fix native tuple unboxing
Currently, unboxing tuples first involve doing a tuple cast to do error checks in advance. This hack was added because the error handling code for unboxing tuples is busted and would cause crashes when the unbox failed. (It only checks if the first item is uninitialized. If the second item is uninitialized while the first is not, no exception is raised, leading to crashes when it's accessed later.)
This commit reworks the tuple unbox logic so the cast isn't needed anymore. Instead ...
- Use emit_arg_check() to do a tuple type and length; and then
- Modify the error handler for the item unboxes/casts to jump to an error block that uninitializes the whole tuple[^1] and sets an exception.
I had to add support for custom error handlers to native integer and float unboxes so a jump to the tuple unbox error block is possible on failure.
NOTE: this doesn't affect tuple->tuple coercions because those are optimized away: each tuple item are coerced individually instead. This only affects coercions where the source is either Any or a tuple of variable length.
Fixes mypyc/mypyc#451.
[^1]: While uninitializing only the first element would suffice, it’s simpler and probably safer to uninitialize the entire tuple.
Here's the old and new C code for this test file:
from typing import Any, Tuple
from mypy_extensions import i64, i32
class A: pass
def f(arg: Any) -> None:
t: Tuple[str, str] = arg
t2: Tuple[int, float, bool, None] = arg
t3: Tuple[i64, i32] = arg
t4: Tuple[A] = arg
New code
char CPyDef_f(PyObject *cpy_r_arg) {
tuple_T2OO cpy_r_r0;
tuple_T2OO cpy_r_t;
tuple_T4IFCC cpy_r_r1;
tuple_T4IFCC cpy_r_t2;
tuple_T284 cpy_r_r2;
int64_t cpy_r_r3;
char cpy_r_r4;
tuple_T284 cpy_r_t3;
tuple_T1O cpy_r_r5;
PyObject *cpy_r_r6;
tuple_T1O cpy_r_t4;
char cpy_r_r7;
CPyL0: ;
if (!PyTuple_Check(cpy_r_arg) || PyTuple_Size(cpy_r_arg) != 2) {
CPy_TypeError("tuple[str, str]", cpy_r_arg); cpy_r_r0 = tuple_undefined_T2OO;
} else {
PyObject *__tmp3 = PyTuple_GET_ITEM(cpy_r_arg, 0);
if (likely(PyUnicode_Check(__tmp3)))
cpy_r_r0.f0 = __tmp3;
else {
goto __LL1;
}
PyObject *__tmp4 = PyTuple_GET_ITEM(cpy_r_arg, 1);
if (likely(PyUnicode_Check(__tmp4)))
cpy_r_r0.f1 = __tmp4;
else {
goto __LL1;
}
CPy_INCREF(cpy_r_r0.f0);
CPy_INCREF(cpy_r_r0.f1);
goto __LL2;
__LL1: ;
CPy_TypeError("tuple[str, str]", cpy_r_arg); cpy_r_r0 = tuple_undefined_T2OO;
}
__LL2: ;
if (unlikely(cpy_r_r0.f0 == NULL)) {
CPy_AddTraceback("temp/unbox.py", "f", 7, CPyStatic_globals);
goto CPyL6;
}
CPyL1: ;
cpy_r_t = cpy_r_r0;
CPy_DECREF(cpy_r_t.f0);
CPy_DECREF(cpy_r_t.f1);
if (!PyTuple_Check(cpy_r_arg) || PyTuple_Size(cpy_r_arg) != 4) {
CPy_TypeError("tuple[int, float, bool, None]", cpy_r_arg); cpy_r_r1 = tuple_undefined_T4IFCC;
} else {
PyObject *__tmp7 = PyTuple_GET_ITEM(cpy_r_arg, 0);
if (likely(PyLong_Check(__tmp7)))
cpy_r_r1.f0 = CPyTagged_BorrowFromObject(__tmp7);
else {
goto __LL5;
}
PyObject *__tmp8 = PyTuple_GET_ITEM(cpy_r_arg, 1);
cpy_r_r1.f1 = PyFloat_AsDouble(__tmp8);
if (cpy_r_r1.f1 == -1.0 && PyErr_Occurred()) {
goto __LL5;
}
PyObject *__tmp9 = PyTuple_GET_ITEM(cpy_r_arg, 2);
if (unlikely(!PyBool_Check(__tmp9))) {
goto __LL5;
} else
cpy_r_r1.f2 = __tmp9 == Py_True;
PyObject *__tmp10 = PyTuple_GET_ITEM(cpy_r_arg, 3);
if (unlikely(__tmp10 != Py_None)) {
goto __LL5;
} else
cpy_r_r1.f3 = 1;
CPyTagged_INCREF(cpy_r_r1.f0);
goto __LL6;
__LL5: ;
if (!PyErr_Occurred()) {
CPy_TypeError("tuple[int, float, bool, None]", cpy_r_arg);
}
cpy_r_r1 = tuple_undefined_T4IFCC;
}
__LL6: ;
if (unlikely(cpy_r_r1.f0 == CPY_INT_TAG)) {
CPy_AddTraceback("temp/unbox.py", "f", 8, CPyStatic_globals);
goto CPyL6;
}
CPyL2: ;
cpy_r_t2 = cpy_r_r1;
CPyTagged_DECREF(cpy_r_t2.f0);
if (!PyTuple_Check(cpy_r_arg) || PyTuple_Size(cpy_r_arg) != 2) {
CPy_TypeError("tuple[int64, int32]", cpy_r_arg); cpy_r_r2 = tuple_undefined_T284;
} else {
PyObject *__tmp13 = PyTuple_GET_ITEM(cpy_r_arg, 0);
cpy_r_r2.f0 = CPyLong_AsInt64(__tmp13);
if (cpy_r_r2.f0 == -113 && PyErr_Occurred()) {
goto __LL11;
}
PyObject *__tmp14 = PyTuple_GET_ITEM(cpy_r_arg, 1);
cpy_r_r2.f1 = CPyLong_AsInt32(__tmp14);
if (cpy_r_r2.f1 == -113 && PyErr_Occurred()) {
goto __LL11;
}
goto __LL12;
__LL11: ;
if (!PyErr_Occurred()) {
CPy_TypeError("tuple[int64, int32]", cpy_r_arg);
}
cpy_r_r2 = tuple_undefined_T284;
}
__LL12: ;
cpy_r_r3 = cpy_r_r2.f0;
cpy_r_r4 = cpy_r_r3 == -113;
if (unlikely(cpy_r_r4)) goto CPyL4;
CPyL3: ;
cpy_r_t3 = cpy_r_r2;
if (!PyTuple_Check(cpy_r_arg) || PyTuple_Size(cpy_r_arg) != 1) {
CPy_TypeError("tuple[unbox.A]", cpy_r_arg); cpy_r_r5 = tuple_undefined_T1O;
} else {
PyObject *__tmp17 = PyTuple_GET_ITEM(cpy_r_arg, 0);
if (likely(Py_TYPE(__tmp17) == CPyType_A))
cpy_r_r5.f0 = __tmp17;
else {
goto __LL15;
}
CPy_INCREF(cpy_r_r5.f0);
goto __LL16;
__LL15: ;
CPy_TypeError("tuple[unbox.A]", cpy_r_arg); cpy_r_r5 = tuple_undefined_T1O;
}
__LL16: ;
if (unlikely(cpy_r_r5.f0 == NULL)) {
CPy_AddTraceback("temp/unbox.py", "f", 10, CPyStatic_globals);
goto CPyL6;
} else
goto CPyL5;
CPyL4: ;
cpy_r_r6 = PyErr_Occurred();
if (unlikely(cpy_r_r6 != NULL)) {
CPy_AddTraceback("temp/unbox.py", "f", 9, CPyStatic_globals);
goto CPyL6;
} else
goto CPyL3;
CPyL5: ;
cpy_r_t4 = cpy_r_r5;
CPy_DECREF(cpy_r_t4.f0);
return 1;
CPyL6: ;
cpy_r_r7 = 2;
return cpy_r_r7;
}
Old code
char CPyDef_f(PyObject *cpy_r_arg) {
tuple_T2OO cpy_r_r0;
tuple_T2OO cpy_r_t;
tuple_T4IFCC cpy_r_r1;
tuple_T4IFCC cpy_r_t2;
tuple_T284 cpy_r_r2;
int64_t cpy_r_r3;
char cpy_r_r4;
tuple_T284 cpy_r_t3;
tuple_T1O cpy_r_r5;
PyObject *cpy_r_r6;
tuple_T1O cpy_r_t4;
char cpy_r_r7;
CPyL0: ;
PyObject *__tmp1;
if (unlikely(!(PyTuple_Check(cpy_r_arg) && PyTuple_GET_SIZE(cpy_r_arg) == 2))) {
__tmp1 = NULL;
goto __LL2;
}
if (likely(PyUnicode_Check(PyTuple_GET_ITEM(cpy_r_arg, 0))))
__tmp1 = PyTuple_GET_ITEM(cpy_r_arg, 0);
else {
__tmp1 = NULL;
}
if (__tmp1 == NULL) goto __LL2;
if (likely(PyUnicode_Check(PyTuple_GET_ITEM(cpy_r_arg, 1))))
__tmp1 = PyTuple_GET_ITEM(cpy_r_arg, 1);
else {
__tmp1 = NULL;
}
if (__tmp1 == NULL) goto __LL2;
__tmp1 = cpy_r_arg;
__LL2: ;
if (unlikely(__tmp1 == NULL)) {
CPy_TypeError("tuple[str, str]", cpy_r_arg); cpy_r_r0 = tuple_undefined_T2OO;
} else {
PyObject *__tmp3 = PyTuple_GET_ITEM(cpy_r_arg, 0);
CPy_INCREF(__tmp3);
PyObject *__tmp4;
if (likely(PyUnicode_Check(__tmp3)))
__tmp4 = __tmp3;
else {
CPy_TypeError("str", __tmp3);
__tmp4 = NULL;
}
cpy_r_r0.f0 = __tmp4;
PyObject *__tmp5 = PyTuple_GET_ITEM(cpy_r_arg, 1);
CPy_INCREF(__tmp5);
PyObject *__tmp6;
if (likely(PyUnicode_Check(__tmp5)))
__tmp6 = __tmp5;
else {
CPy_TypeError("str", __tmp5);
__tmp6 = NULL;
}
cpy_r_r0.f1 = __tmp6;
}
if (unlikely(cpy_r_r0.f0 == NULL)) {
CPy_AddTraceback("temp/unbox.py", "f", 7, CPyStatic_globals);
goto CPyL6;
}
CPyL1: ;
cpy_r_t = cpy_r_r0;
CPy_DECREF(cpy_r_t.f0);
CPy_DECREF(cpy_r_t.f1);
PyObject *__tmp7;
if (unlikely(!(PyTuple_Check(cpy_r_arg) && PyTuple_GET_SIZE(cpy_r_arg) == 4))) {
__tmp7 = NULL;
goto __LL8;
}
if (likely(PyLong_Check(PyTuple_GET_ITEM(cpy_r_arg, 0))))
__tmp7 = PyTuple_GET_ITEM(cpy_r_arg, 0);
else {
__tmp7 = NULL;
}
if (__tmp7 == NULL) goto __LL8;
if (likely(CPyFloat_Check(PyTuple_GET_ITEM(cpy_r_arg, 1))))
__tmp7 = PyTuple_GET_ITEM(cpy_r_arg, 1);
else {
__tmp7 = NULL;
}
if (__tmp7 == NULL) goto __LL8;
if (likely(PyBool_Check(PyTuple_GET_ITEM(cpy_r_arg, 2))))
__tmp7 = PyTuple_GET_ITEM(cpy_r_arg, 2);
else {
__tmp7 = NULL;
}
if (__tmp7 == NULL) goto __LL8;
if (likely(PyTuple_GET_ITEM(cpy_r_arg, 3) == Py_None))
__tmp7 = PyTuple_GET_ITEM(cpy_r_arg, 3);
else {
__tmp7 = NULL;
}
if (__tmp7 == NULL) goto __LL8;
__tmp7 = cpy_r_arg;
__LL8: ;
if (unlikely(__tmp7 == NULL)) {
CPy_TypeError("tuple[int, float, bool, None]", cpy_r_arg); cpy_r_r1 = tuple_undefined_T4IFCC;
} else {
PyObject *__tmp9 = PyTuple_GET_ITEM(cpy_r_arg, 0);
CPyTagged __tmp10;
if (likely(PyLong_Check(__tmp9)))
__tmp10 = CPyTagged_FromObject(__tmp9);
else {
CPy_TypeError("int", __tmp9); __tmp10 = CPY_INT_TAG;
}
cpy_r_r1.f0 = __tmp10;
PyObject *__tmp11 = PyTuple_GET_ITEM(cpy_r_arg, 1);
double __tmp12;
__tmp12 = PyFloat_AsDouble(__tmp11);
if (__tmp12 == -1.0 && PyErr_Occurred()) {
__tmp12 = -113.0;
}
cpy_r_r1.f1 = __tmp12;
PyObject *__tmp13 = PyTuple_GET_ITEM(cpy_r_arg, 2);
char __tmp14;
if (unlikely(!PyBool_Check(__tmp13))) {
CPy_TypeError("bool", __tmp13); __tmp14 = 2;
} else
__tmp14 = __tmp13 == Py_True;
cpy_r_r1.f2 = __tmp14;
PyObject *__tmp15 = PyTuple_GET_ITEM(cpy_r_arg, 3);
char __tmp16;
if (unlikely(__tmp15 != Py_None)) {
CPy_TypeError("None", __tmp15); __tmp16 = 2;
} else
__tmp16 = 1;
cpy_r_r1.f3 = __tmp16;
}
if (unlikely(cpy_r_r1.f0 == CPY_INT_TAG)) {
CPy_AddTraceback("temp/unbox.py", "f", 8, CPyStatic_globals);
goto CPyL6;
}
CPyL2: ;
cpy_r_t2 = cpy_r_r1;
CPyTagged_DECREF(cpy_r_t2.f0);
PyObject *__tmp17;
if (unlikely(!(PyTuple_Check(cpy_r_arg) && PyTuple_GET_SIZE(cpy_r_arg) == 2))) {
__tmp17 = NULL;
goto __LL18;
}
if (likely(PyLong_Check(PyTuple_GET_ITEM(cpy_r_arg, 0))))
__tmp17 = PyTuple_GET_ITEM(cpy_r_arg, 0);
else {
__tmp17 = NULL;
}
if (__tmp17 == NULL) goto __LL18;
if (likely(PyLong_Check(PyTuple_GET_ITEM(cpy_r_arg, 1))))
__tmp17 = PyTuple_GET_ITEM(cpy_r_arg, 1);
else {
__tmp17 = NULL;
}
if (__tmp17 == NULL) goto __LL18;
__tmp17 = cpy_r_arg;
__LL18: ;
if (unlikely(__tmp17 == NULL)) {
CPy_TypeError("tuple[int64, int32]", cpy_r_arg); cpy_r_r2 = tuple_undefined_T284;
} else {
PyObject *__tmp19 = PyTuple_GET_ITEM(cpy_r_arg, 0);
int64_t __tmp20;
__tmp20 = CPyLong_AsInt64(__tmp19);
cpy_r_r2.f0 = __tmp20;
PyObject *__tmp21 = PyTuple_GET_ITEM(cpy_r_arg, 1);
int32_t __tmp22;
__tmp22 = CPyLong_AsInt32(__tmp21);
cpy_r_r2.f1 = __tmp22;
}
cpy_r_r3 = cpy_r_r2.f0;
cpy_r_r4 = cpy_r_r3 == -113;
if (unlikely(cpy_r_r4)) goto CPyL4;
CPyL3: ;
cpy_r_t3 = cpy_r_r2;
PyObject *__tmp23;
if (unlikely(!(PyTuple_Check(cpy_r_arg) && PyTuple_GET_SIZE(cpy_r_arg) == 1))) {
__tmp23 = NULL;
goto __LL24;
}
if (likely(Py_TYPE(PyTuple_GET_ITEM(cpy_r_arg, 0)) == CPyType_A))
__tmp23 = PyTuple_GET_ITEM(cpy_r_arg, 0);
else {
__tmp23 = NULL;
}
if (__tmp23 == NULL) goto __LL24;
__tmp23 = cpy_r_arg;
__LL24: ;
if (unlikely(__tmp23 == NULL)) {
CPy_TypeError("tuple[unbox.A]", cpy_r_arg); cpy_r_r5 = tuple_undefined_T1O;
} else {
PyObject *__tmp25 = PyTuple_GET_ITEM(cpy_r_arg, 0);
CPy_INCREF(__tmp25);
PyObject *__tmp26;
if (likely(Py_TYPE(__tmp25) == CPyType_A))
__tmp26 = __tmp25;
else {
CPy_TypeError("unbox.A", __tmp25);
__tmp26 = NULL;
}
cpy_r_r5.f0 = __tmp26;
}
if (unlikely(cpy_r_r5.f0 == NULL)) {
CPy_AddTraceback("temp/unbox.py", "f", 10, CPyStatic_globals);
goto CPyL6;
} else
goto CPyL5;
CPyL4: ;
cpy_r_r6 = PyErr_Occurred();
if (unlikely(cpy_r_r6 != NULL)) {
CPy_AddTraceback("temp/unbox.py", "f", 9, CPyStatic_globals);
goto CPyL6;
} else
goto CPyL3;
CPyL5: ;
cpy_r_t4 = cpy_r_r5;
CPy_DECREF(cpy_r_t4.f0);
return 1;
CPyL6: ;
cpy_r_r7 = 2;
return cpy_r_r7;
}
Actually, I may as well double check the refcount management of this code while I'm fixing it.
I created a simple benchmark and it somewhat surprisingly seems about 20% slower with the new code (Python 3.11 and Apple M1 Max):
Good catch! Adding unlikely(...) and using the inline length function seems to have brought performance back to master levels. Unfortunately there's no significant speed up from this PR. If I left the refcounting alone, then a small speed up can be observed (~5%), but incrementing refcounts per item makes it really challenging to deref properly when the unbox fails (since what needs to deref-ed depends on which item failed). We would (probably) have to add custom decref code into the error handler for each item. I don't think that's worth it.
I have a (not yet complete) patch that merges the unbox error branches within the unbox op that depends on this PR. @JukkaL I'd appreciate if this could be looked at soon (especially since this seems merge conflict prone). Thanks!
--- build/__native.c 2023-07-20 12:47:55.132023572 -0400
+++ check-build/__native.c 2023-07-20 12:47:19.404279527 -0400
@@ -67,31 +67,22 @@
if (likely(PyLong_Check(cpy_r_arg)))
cpy_r_r0 = CPyTagged_FromObject(cpy_r_arg);
else {
- CPy_TypeError("int", cpy_r_arg); cpy_r_r0 = CPY_INT_TAG;
- }
- if (unlikely(cpy_r_r0 == CPY_INT_TAG)) {
- CPy_AddTraceback("temp/unbox_size.py", "main", 5, CPyStatic_globals);
+ CPy_TypeErrorTraceback("temp/unbox_size.py", "main", 5, CPyStatic_globals, "int", cpy_r_arg);
goto CPyL4;
}
cpy_r_integer = cpy_r_r0;
CPyTagged_DECREF(cpy_r_integer);
if (unlikely(!PyBool_Check(cpy_r_arg))) {
- CPy_TypeError("bool", cpy_r_arg); cpy_r_r1 = 2;
+ CPy_TypeErrorTraceback("temp/unbox_size.py", "main", 6, CPyStatic_globals, "bool", cpy_r_arg);
+ goto CPyL4;
} else
cpy_r_r1 = cpy_r_arg == Py_True;
- if (unlikely(cpy_r_r1 == 2)) {
- CPy_AddTraceback("temp/unbox_size.py", "main", 6, CPyStatic_globals);
- goto CPyL4;
- }
cpy_r_boolean = cpy_r_r1;
if (unlikely(cpy_r_arg != Py_None)) {
- CPy_TypeError("None", cpy_r_arg); cpy_r_r2 = 2;
+ CPy_TypeErrorTraceback("temp/unbox_size.py", "main", 7, CPyStatic_globals, "None", cpy_r_arg);
+ goto CPyL4;
} else
cpy_r_r2 = 1;
- if (unlikely(cpy_r_r2 == 2)) {
- CPy_AddTraceback("temp/unbox_size.py", "main", 7, CPyStatic_globals);
- goto CPyL4;
- }
cpy_r_none = cpy_r_r2;
return 1;
CPyL4: ;
@JukkaL, been a while since I've last been around. I hope you're well. Could you take a look at this?
Sorry for the slow response! It looks like the microbenchmark is still about 5% slower with this change (Applex M1 Max). I wonder if we can have something that is faster without too much extra complexity.