wzprof
wzprof copied to clipboard
Python: write a tool to generate struct offsets
Currently, field offsets in structs are hardcoded:
https://github.com/stealthrocket/wzprof/blob/ba3fa22d8ada53862bf029d023b7213bbcecc07c/python.go#L113-L153
They have been generated by adding the following C code inside CPython's 3.11 Python/sysmodule.c
:
void print_offsets(void) __attribute__((constructor))
{
printf("// --- start of cpython structs layout constants ---\n");
printf("// _PyRuntimeState\n");
printf("padTstateCurrentInRT = %d\n", offsetof(_PyRuntimeState, gilstate.tstate_current));
printf("// PyThreadState\n");
printf("padCframeInThreadState = %d\n", offsetof(PyThreadState, cframe));
printf("// _PyCFrame\n");
printf("padCurrentFrameInCFrame = %d\n", offsetof(_PyCFrame, current_frame));
printf("// _PyInterpreterFrame\n");
printf("padPreviousInFrame = %d\n", offsetof(_PyInterpreterFrame, previous));
printf("padCodeInFrame = %d\n", offsetof(_PyInterpreterFrame, f_code));
printf("padPrevInstrInFrame = %d\n", offsetof(_PyInterpreterFrame, prev_instr));
printf("padOwnerInFrame = %d\n", offsetof(_PyInterpreterFrame, owner));
printf("// PyCodeObject\n");
printf("padFilenameInCodeObject = %d\n", offsetof(PyCodeObject, co_filename));
printf("padNameInCodeObject = %d\n", offsetof(PyCodeObject, co_name));
printf("padCodeAdaptiveInCodeObject = %d\n", offsetof(PyCodeObject, co_code_adaptive));
printf("padFirstlinenoInCodeObject = %d\n", offsetof(PyCodeObject, co_firstlineno));
printf("padLinearrayInCodeObject = %d\n", offsetof(PyCodeObject, _co_linearray));
printf("padLinetableInCodeObject = %d\n", offsetof(PyCodeObject, co_linetable));
printf("padFirstTraceableInCodeObject = %d\n", offsetof(PyCodeObject, _co_firsttraceable));
printf("padQualNameInCodeObject = %d\n", offsetof(PyCodeObject, co_qualname));
printf("sizeCodeUnit = %d\n", sizeof(_Py_CODEUNIT));
printf("// PyASCIIObject\n");
printf("padStateInAsciiObject = %d\n", offsetof(PyASCIIObject, state));
printf("padLengthInAsciiObject = %d\n", offsetof(PyASCIIObject, length));
printf("sizeAsciiObject = %d\n", sizeof(PyASCIIObject));
printf("// PyBytesObject\n");
printf("padSvalInBytesObject = %d\n", offsetof(PyBytesObject, ob_sval));
printf("padSizeInBytesObject = %d\n", offsetof(PyBytesObject, ob_base.ob_size));
printf("// Enum constants\n");
printf("enumCodeLocation1 = %d\n", PY_CODE_LOCATION_INFO_ONE_LINE1);
printf("enumCodeLocation2 = %d\n", PY_CODE_LOCATION_INFO_ONE_LINE2);
printf("enumCodeLocationNoCol = %d\n", PY_CODE_LOCATION_INFO_NO_COLUMNS);
printf("enumCodeLocationLong = %d\n", PY_CODE_LOCATION_INFO_LONG);
printf("enumFrameOwnedByGenerator = %d\n", FRAME_OWNED_BY_GENERATOR);
printf("// --- end of cpython structs layout constants ---\n");
}
To support more versions of Python, we should build tooling to compute those offsets more easily. One approach may be to use CGo?
The other alternative may be to interpret the DWARF data, which should contain those offsets.