blog
blog copied to clipboard
Python中的栈帧对象
在Python真正执行的时候,它的虚拟机实际上面对的并不是一个PyCodeObject对象,而是另一个对象PyFrameObject。它就是我们所说的执行环境,也是Python在对系统栈帧的模拟。
0x00 栈帧的表示
typedef struct _frame {
PyObject_VAR_HEAD
struct _frame *f_back; /* previous frame, or NULL */
PyCodeObject *f_code; /* code segment */
PyObject *f_builtins; /* builtin symbol table (PyDictObject) */
PyObject *f_globals; /* global symbol table (PyDictObject) */
PyObject *f_locals; /* local symbol table (any mapping) */
PyObject **f_valuestack; /* points after the last local */
/* Next free slot in f_valuestack. Frame creation sets to f_valuestack.
Frame evaluation usually NULLs it, but a frame that yields sets it
to the current stack top. */
PyObject **f_stacktop;
...
int f_lasti; /* Last instruction if called */
/* Call PyFrame_GetLineNumber() instead of reading this field
directly. As of 2.3 f_lineno is only valid when tracing is
active (i.e. when f_trace is set). At other times we use
PyCode_Addr2Line to calculate the line from the current
bytecode index. */
int f_lineno; /* Current line number */
int f_iblock; /* index in f_blockstack */
char f_executing; /* whether the frame is still executing */
PyTryBlock f_blockstack[CO_MAXBLOCKS]; /* for try and loop blocks */
PyObject *f_localsplus[1]; /* locals+stack, dynamically sized */
} PyFrameObject;
在Python实际的执行中,会产生很多PyFrameObject对象,而这些对象会被链接起来,形成一条执行环境链表。
f_back指向执行环境链的上一个栈帧,使新的栈帧在结束之后能顺利回到旧的栈帧中。f_code存放这一个PyCodeObject对象。f_builtins,f_globals,f_locals则是三个命名空间,f_builtins和f_globals都是PyDictObject对象,f_locals可以是任意的映射对象,一般情况下也是PyDictObject对象。f_valuestack和f_stacktop分别指向“运行时栈”的栈底和栈顶。f_lasti指向当前已经执行过的字节码指令的位置。f_lineno表示了当前执行PyCodeObject对象的行号。
PyFrameObject是一个可变长对象,大小取决f_localsplus所申请的空间大小。从注释可以看出来,f_localsplus的大小由两部分组成locals和stack。
locals部分是用来储存局部变量stack指的是“运行时栈”,是在执行运算时所需要的内存空间,这个栈的大小存储在f_code.co_stacksize中,编译PyCodeObject对象的时候会计算出来,由于不同Code Block在执行时所需的栈空间的大小是不同的,所以决定了PyFrameObject的开头一定有一个PyObject_VAR_HEAD。
0x01 栈帧的创建
PyFrameObject*
PyFrame_New(PyThreadState *tstate, PyCodeObject *code,
PyObject *globals, PyObject *locals)
{
PyFrameObject *f = _PyFrame_New_NoTrack(tstate, code, globals, locals);
if (f)
_PyObject_GC_TRACK(f);
return f;
}
PyFrameObject* _Py_HOT_FUNCTION
_PyFrame_New_NoTrack(PyThreadState *tstate, PyCodeObject *code,
PyObject *globals, PyObject *locals)
{
PyFrameObject *back = tstate->frame;
PyFrameObject *f;
PyObject *builtins;
Py_ssize_t i;
...
// 获取builtins模块字典
if (code->co_zombieframe != NULL) {
...
}
else {
Py_ssize_t extras, ncells, nfrees;
// [1]
ncells = PyTuple_GET_SIZE(code->co_cellvars);
nfrees = PyTuple_GET_SIZE(code->co_freevars);
extras = code->co_stacksize + code->co_nlocals + ncells +
nfrees;
if (free_list == NULL) {
f = PyObject_GC_NewVar(PyFrameObject, &PyFrame_Type,
extras);
if (f == NULL) {
Py_DECREF(builtins);
return NULL;
}
}
else {
assert(numfree > 0);
--numfree;
f = free_list;
free_list = free_list->f_back;
if (Py_SIZE(f) < extras) {
PyFrameObject *new_f = PyObject_GC_Resize(PyFrameObject, f, extras);
if (new_f == NULL) {
PyObject_GC_Del(f);
Py_DECREF(builtins);
return NULL;
}
f = new_f;
}
_Py_NewReference((PyObject *)f);
}
f->f_code = code;
extras = code->co_nlocals + ncells + nfrees;
// [2]
f->f_valuestack = f->f_localsplus + extras;
for (i=0; i<extras; i++)
f->f_localsplus[i] = NULL;
f->f_locals = NULL;
f->f_trace = NULL;
}
// [3]
f->f_stacktop = f->f_valuestack;
// [4]
f->f_builtins = builtins;
Py_XINCREF(back);
f->f_back = back;
Py_INCREF(code);
Py_INCREF(globals);
f->f_globals = globals;
/* Most functions have CO_NEWLOCALS and CO_OPTIMIZED set. */
if ((code->co_flags & (CO_NEWLOCALS | CO_OPTIMIZED)) ==
(CO_NEWLOCALS | CO_OPTIMIZED))
; /* f_locals = NULL; will be set by PyFrame_FastToLocals() */
else if (code->co_flags & CO_NEWLOCALS) {
locals = PyDict_New();
if (locals == NULL) {
Py_DECREF(f);
return NULL;
}
f->f_locals = locals;
}
else {
if (locals == NULL)
locals = globals;
Py_INCREF(locals);
f->f_locals = locals;
}
f->f_lasti = -1;
f->f_lineno = code->co_firstlineno;
f->f_iblock = 0;
f->f_executing = 0;
f->f_gen = NULL;
f->f_trace_opcodes = 0;
f->f_trace_lines = 1;
return f;
}
可以看出,PyFrameObject对象也有内存池机制。
[1] 在创建PyFrameObject对象时,动态大小的内存由两部分组成:
- 局部变量 locals:
code->co_nlocals(Code Block 中局部变量的个数)code->co_cellvars(Code Block 中嵌套函数所引用的局部变量名集合)code->co_freevars(Code Block 中的自由变量名集合)
- 运行时栈 stack:
code->co_stacksize
[2] f_valuestack维护“运行时栈”的栈底,也就是f_localsplus+locals。
[3] f_stacktop维护“运行时栈”的栈顶,当前指向栈底位置。
[4] 初始化其他成员。
0x02 栈帧的执行
PyObject* _Py_HOT_FUNCTION
_PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)
{
...
co = f->f_code;
names = co->co_names;
consts = co->co_consts;
fastlocals = f->f_localsplus;
freevars = f->f_localsplus + co->co_nlocals;
first_instr = (_Py_CODEUNIT *) PyBytes_AS_STRING(co->co_code);
/*
f->f_lasti refers to the index of the last instruction,
unless it's -1 in which case next_instr should be first_instr.
YIELD_FROM sets f_lasti to itself, in order to repeatedly yield
multiple values.
When the PREDICT() macros are enabled, some opcode pairs follow in
direct succession without updating f->f_lasti. A successful
prediction effectively links the two codes together as if they
were a single new opcode; accordingly,f->f_lasti will point to
the first code in the pair (for instance, GET_ITER followed by
FOR_ITER is effectively a single opcode and f->f_lasti will point
to the beginning of the combined pair.)
*/
assert(f->f_lasti >= -1);
next_instr = first_instr;
if (f->f_lasti >= 0) {
assert(f->f_lasti % sizeof(_Py_CODEUNIT) == 0);
next_instr += f->f_lasti / sizeof(_Py_CODEUNIT) + 1;
}
stack_pointer = f->f_stacktop;
assert(stack_pointer != NULL);
f->f_stacktop = NULL; /* remains NULL unless yield suspends frame */
f->f_executing = 1;
why = WHY_NOT;
...
for (;;) {
...
#define TARGET(op) \
case op:
#define DISPATCH() continue
#define FAST_DISPATCH() goto fast_next_opcode
/* Code access macros */
/* The integer overflow is checked by an assertion below. */
#define INSTR_OFFSET() \
(sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr))
#define NEXTOPARG() do { \
_Py_CODEUNIT word = *next_instr; \
opcode = _Py_OPCODE(word); \
oparg = _Py_OPARG(word); \
next_instr++; \
} while (0)
...
fast_next_opcode:
f->f_lasti = INSTR_OFFSET();
...
/* Extract opcode and argument */
NEXTOPARG();
dispatch_opcode:
...
switch (opcode) {
TARGET(NOP)
FAST_DISPATCH();
TARGET(...) {
...
// FAST_DISPATCH();
// DISPATCH();
}
...
default:
fprintf(stderr,
"XXX lineno: %d, opcode: %d\n",
PyFrame_GetLineNumber(f),
opcode);
PyErr_SetString(PyExc_SystemError, "unknown opcode");
goto error;
} /* switch */
}
}
PyFrameObject对象的执行过程就是一个for循环遍历co_code,通过NEXTOPARG获取一条字节码指令和参数,利用switch/case结构执行字节码指令。
first_instr指向字节码指令序列的开始位置,next_instr指向下一条待执行的字节码指令位置。
f_lasti表示当前执行的位置,初始化为-1,进入fast_next_opcode之后通过INSTR_OFFSET宏设置为0。
每条字节码指令执行之后,会根据指令不同执行FAST_DISPATCH或DISPATCH宏。
DISPATCH会回到for循环开始,在执行下一条指令之前可能会释放GIL,而FAST_DISPATCH直接进入下一条指令的执行。
需要注意的是why变量,why的取值范围在ceval.c中被定义,表示了Python结束字节码执行时的状态:
/* Status code for main loop (reason for stack unwind) */
enum why_code {
WHY_NOT = 0x0001, /* No error */
WHY_EXCEPTION = 0x0002, /* Exception occurred */
WHY_RETURN = 0x0008, /* 'return' statement */
WHY_BREAK = 0x0010, /* 'break' statement */
WHY_CONTINUE = 0x0020, /* 'continue' statement */
WHY_YIELD = 0x0040, /* 'yield' operator */
WHY_SILENCED = 0x0080 /* Exception silenced by 'with' */
};
Python正常退出for循环时的状态是WHY_NOT,也有可能因为执行字节码时发生异常导致退出,这时候why变量就可能是WHY_EXCEPTION状态。
0x03 栈帧的获取
可以通过sys._getframe获取当前的PyFrameObject对象。
>>> import sys
>>> frame = sys._getframe()
>>> frame
<frame object at 0x103ab2d48>