blog
blog copied to clipboard
Python的运行
Python的入口定义在 Modules/main.c
int
Py_Main(int argc, wchar_t **argv)
{
_PyMain pymain = _PyMain_INIT;
pymain.use_bytes_argv = 0;
pymain.argc = argc;
pymain.wchar_argv = argv;
return pymain_main(&pymain);
}
static int
pymain_main(_PyMain *pymain)
{
...
pymain->err = _Py_InitializeCore(&pymain->config);
...
pymain_run_python(pymain);
...
}
0x00 初始化运行环境
通过_Py_InitializeCore来初始化Python的运行环境,定义在 Python/pylifecycle.c
/* Begin interpreter initialization
*
* On return, the first thread and interpreter state have been created,
* but the compiler, signal handling, multithreading and
* multiple interpreter support, and codec infrastructure are not yet
* available.
*
* The import system will support builtin and frozen modules only.
* The only supported io is writing to sys.stderr
*
* If any operation invoked by this function fails, a fatal error is
* issued and the function does not return.
*
* Any code invoked from this function should *not* assume it has access
* to the Python C API (unless the API is explicitly listed as being
* safe to call without calling Py_Initialize first)
*/
_PyInitError
_Py_InitializeCore(const _PyCoreConfig *core_config)
{
assert(core_config != NULL);
PyInterpreterState *interp;
PyThreadState *tstate;
PyObject *bimod, *sysmod, *pstderr;
_PyInitError err;
// 初始化Python运行时
// 初始化Python进程环境
// 初始化Python线程环境
// 初始化Python类型系统
// 初始化Modules系统
/* Set up a preliminary stderr printer until we have enough
infrastructure for the io module in place. */
pstderr = PyFile_NewStdPrinter(fileno(stderr));
if (pstderr == NULL)
return _Py_INIT_ERR("can't set preliminary stderr");
_PySys_SetObjectId(&PyId_stderr, pstderr);
PySys_SetObject("__stderr__", pstderr);
Py_DECREF(pstderr);
err = _PyImport_Init(interp);
if (_Py_INIT_FAILED(err)) {
return err;
}
err = _PyImportHooks_Init();
if (_Py_INIT_FAILED(err)) {
return err;
}
...
/* Only when we get here is the runtime core fully initialized */
_PyRuntime.core_initialized = 1;
return _Py_INIT_OK();
}
_Py_InitializeCore 会初始化三个比较重要的数据结构:运行时状态、解释器状态与线程状态。
运行时状态
// Include/internal/pystate.h
/* Full Python runtime state */
typedef struct pyruntimestate {
int initialized;
int core_initialized;
PyThreadState *finalizing;
struct pyinterpreters {
PyThread_type_lock mutex;
PyInterpreterState *head;
PyInterpreterState *main;
/* _next_interp_id is an auto-numbered sequence of small
integers. It gets initialized in _PyInterpreterState_Init(),
which is called in Py_Initialize(), and used in
PyInterpreterState_New(). A negative interpreter ID
indicates an error occurred. The main interpreter will
always have an ID of 0. Overflow results in a RuntimeError.
If that becomes a problem later then we can adjust, e.g. by
using a Python int. */
int64_t next_id;
} interpreters;
...
struct _gc_runtime_state gc;
struct _warnings_runtime_state warnings;
struct _ceval_runtime_state ceval;
struct _gilstate_runtime_state gilstate;
// XXX Consolidate globals found via the check-c-globals script.
} _PyRuntimeState;
#define _PyRuntimeState_INIT {.initialized = 0, .core_initialized = 0}
解释器状态
// Include/pystate.h
typedef struct _is {
struct _is *next;
struct _ts *tstate_head;
int64_t id;
int64_t id_refcount;
PyThread_type_lock id_mutex;
PyObject *modules;
PyObject *modules_by_index;
PyObject *sysdict;
PyObject *builtins;
PyObject *importlib;
...
PyObject *codec_search_path;
PyObject *codec_search_cache;
PyObject *codec_error_registry;
int codecs_initialized;
int fscodec_initialized;
_PyCoreConfig core_config;
_PyMainInterpreterConfig config;
...
PyObject *builtins_copy;
PyObject *import_func;
/* Initialized to PyEval_EvalFrameDefault(). */
_PyFrameEvalFunction eval_frame;
...
/* AtExit module */
void (*pyexitfunc)(PyObject *);
PyObject *pyexitmodule;
uint64_t tstate_next_unique_id;
} PyInterpreterState;
线程状态
// Include/pystate.h
typedef struct _ts {
/* See Python/ceval.c for comments explaining most fields */
struct _ts *prev;
struct _ts *next;
PyInterpreterState *interp;
struct _frame *frame;
int recursion_depth;
char overflowed; /* The stack has overflowed. Allow 50 more calls
to handle the runtime error. */
char recursion_critical; /* The current calls must not cause
a stack overflow. */
int stackcheck_counter;
...
/* The exception currently being raised */
PyObject *curexc_type;
PyObject *curexc_value;
PyObject *curexc_traceback;
/* The exception currently being handled, if no coroutines/generators
* are present. Always last element on the stack referred to be exc_info.
*/
_PyErr_StackItem exc_state;
/* Pointer to the top of the stack of the exceptions currently
* being handled */
_PyErr_StackItem *exc_info;
PyObject *dict; /* Stores per-thread state */
int gilstate_counter;
...
/* Unique thread state id. */
uint64_t id;
/* XXX signal handlers should also be here */
} PyThreadState;
0x01 运行Python
一旦完成了所有的初始化,Py_Main 函数会调用 pymain_run_python 函数开始运行。
static void
pymain_run_python(_PyMain *pymain)
{
PyCompilerFlags cf = {.cf_flags = 0};
pymain_header(pymain);
pymain_import_readline(pymain);
if (pymain->command) {
pymain->status = pymain_run_command(pymain->command, &cf);
}
else if (pymain->module) {
pymain->status = (pymain_run_module(pymain->module, 1) != 0);
}
else {
pymain_run_filename(pymain, &cf);
}
pymain_repl(pymain, &cf);
}
pymain_run_python根据不同的运行方式调用的函数也不一样,我们只看两种运行方式。
- 脚本方式运行:
pymain_run_filename->pymain_run_file->PyRun_AnyFileExFlags - 交互式运行:
pymain_repl->PyRun_AnyFileFlags->PyRun_AnyFileExFlags
PyRun_AnyFileExFlags定义在 Python/pythonrun.c
/* Parse input from a file and execute it */
int
PyRun_AnyFileExFlags(FILE *fp, const char *filename, int closeit,
PyCompilerFlags *flags)
{
if (filename == NULL)
filename = "???";
if (Py_FdIsInteractive(fp, filename)) {
int err = PyRun_InteractiveLoopFlags(fp, filename, flags);
if (closeit)
fclose(fp);
return err;
}
else
return PyRun_SimpleFileExFlags(fp, filename, closeit, flags);
}
实际上PyRun_InteractiveLoopFlags和PyRun_SimpleFileExFlags都执行了类似的代码:
// 编译
mod = PyParser_ASTFromFileObject(fp, filename, NULL, start, 0, 0,
flags, NULL, arena);
// 执行
ret = run_mod(mod, filename, globals, locals, flags, arena);
编译语句或文件得到抽象语法树(AST),并调用run_mod执行。
static PyObject *
run_mod(mod_ty mod, PyObject *filename, PyObject *globals, PyObject *locals,
PyCompilerFlags *flags, PyArena *arena)
{
PyCodeObject *co;
PyObject *v;
co = PyAST_CompileObject(mod, filename, flags, -1, arena);
if (co == NULL)
return NULL;
v = PyEval_EvalCode((PyObject*)co, globals, locals);
Py_DECREF(co);
return v;
}
run_mod函数基于AST编译得到PyCodeObject对象,并通过调用PyEval_EvalCode执行PyCodeObject对象中的字节码指令序列。
PyEval_EvalCode定义在 Python/ceval.c
PyObject *
PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals)
{
return PyEval_EvalCodeEx(co,
globals, locals,
(PyObject **)NULL, 0,
(PyObject **)NULL, 0,
(PyObject **)NULL, 0,
NULL, NULL);
}
PyObject *
PyEval_EvalCodeEx(PyObject *_co, PyObject *globals, PyObject *locals,
PyObject *const *args, int argcount,
PyObject *const *kws, int kwcount,
PyObject *const *defs, int defcount,
PyObject *kwdefs, PyObject *closure)
{
return _PyEval_EvalCodeWithName(_co, globals, locals,
args, argcount,
kws, kws != NULL ? kws + 1 : NULL,
kwcount, 2,
defs, defcount,
kwdefs, closure,
NULL, NULL);
}
PyObject *
_PyEval_EvalCodeWithName(PyObject *_co, PyObject *globals, PyObject *locals,
PyObject *const *args, Py_ssize_t argcount,
PyObject *const *kwnames, PyObject *const *kwargs,
Py_ssize_t kwcount, int kwstep,
PyObject *const *defs, Py_ssize_t defcount,
PyObject *kwdefs, PyObject *closure,
PyObject *name, PyObject *qualname)
{
PyCodeObject* co = (PyCodeObject*)_co;
PyFrameObject *f;
PyObject *retval = NULL;
...
/* Create the frame */
tstate = PyThreadState_GET();
assert(tstate != NULL);
f = _PyFrame_New_NoTrack(tstate, co, globals, locals);
...
retval = PyEval_EvalFrameEx(f,0);
...
return retval;
}
本质上PyEval_EvalCode函数是通过PyEval_EvalFrameEx来执行新创建的PyFrameObject对象。
一旦完成了所有的初始化,
Py_Main函数会调用pymain_run_file函数开始运行。
上面的pymain_run_file应该是pymain_run_python?
@roachsinai
上面的pymain_run_file应该是pymain_run_python?
thx, 修正了