blog icon indicating copy to clipboard operation
blog copied to clipboard

Python的运行

Open junnplus opened this issue 7 years ago • 4 comments

Python的入口定义在 Modules/main.c

int
Py_Main(int argc, wchar_t **argv)
{
    _PyMain pymain = _PyMain_INIT;
    pymain.use_bytes_argv = 0;
    pymain.argc = argc;
    pymain.wchar_argv = argv;

    return pymain_main(&pymain);
}

static int
pymain_main(_PyMain *pymain)
{
    ...
    pymain->err = _Py_InitializeCore(&pymain->config);
    ...
    pymain_run_python(pymain);
    ...
}

junnplus avatar Mar 31 '18 14:03 junnplus

0x00 初始化运行环境

通过_Py_InitializeCore来初始化Python的运行环境,定义在 Python/pylifecycle.c

/* Begin interpreter initialization
 *
 * On return, the first thread and interpreter state have been created,
 * but the compiler, signal handling, multithreading and
 * multiple interpreter support, and codec infrastructure are not yet
 * available.
 *
 * The import system will support builtin and frozen modules only.
 * The only supported io is writing to sys.stderr
 *
 * If any operation invoked by this function fails, a fatal error is
 * issued and the function does not return.
 *
 * Any code invoked from this function should *not* assume it has access
 * to the Python C API (unless the API is explicitly listed as being
 * safe to call without calling Py_Initialize first)
 */

_PyInitError
_Py_InitializeCore(const _PyCoreConfig *core_config)
{
    assert(core_config != NULL);

    PyInterpreterState *interp;
    PyThreadState *tstate;
    PyObject *bimod, *sysmod, *pstderr;
    _PyInitError err;

    // 初始化Python运行时

    // 初始化Python进程环境

    // 初始化Python线程环境

    // 初始化Python类型系统

    // 初始化Modules系统

    /* Set up a preliminary stderr printer until we have enough
       infrastructure for the io module in place. */
    pstderr = PyFile_NewStdPrinter(fileno(stderr));
    if (pstderr == NULL)
        return _Py_INIT_ERR("can't set preliminary stderr");
    _PySys_SetObjectId(&PyId_stderr, pstderr);
    PySys_SetObject("__stderr__", pstderr);
    Py_DECREF(pstderr);

    err = _PyImport_Init(interp);
    if (_Py_INIT_FAILED(err)) {
        return err;
    }

    err = _PyImportHooks_Init();
    if (_Py_INIT_FAILED(err)) {
        return err;
    }

...

    /* Only when we get here is the runtime core fully initialized */
    _PyRuntime.core_initialized = 1;
    return _Py_INIT_OK();
}

_Py_InitializeCore 会初始化三个比较重要的数据结构:运行时状态、解释器状态与线程状态。

运行时状态

// Include/internal/pystate.h
/* Full Python runtime state */

typedef struct pyruntimestate {
    int initialized;
    int core_initialized;
    PyThreadState *finalizing;

    struct pyinterpreters {
        PyThread_type_lock mutex;
        PyInterpreterState *head;
        PyInterpreterState *main;
        /* _next_interp_id is an auto-numbered sequence of small
           integers.  It gets initialized in _PyInterpreterState_Init(),
           which is called in Py_Initialize(), and used in
           PyInterpreterState_New().  A negative interpreter ID
           indicates an error occurred.  The main interpreter will
           always have an ID of 0.  Overflow results in a RuntimeError.
           If that becomes a problem later then we can adjust, e.g. by
           using a Python int. */
        int64_t next_id;
    } interpreters;
...
    struct _gc_runtime_state gc;
    struct _warnings_runtime_state warnings;
    struct _ceval_runtime_state ceval;
    struct _gilstate_runtime_state gilstate;

    // XXX Consolidate globals found via the check-c-globals script.
} _PyRuntimeState;

#define _PyRuntimeState_INIT {.initialized = 0, .core_initialized = 0}

解释器状态

// Include/pystate.h
typedef struct _is {

    struct _is *next;
    struct _ts *tstate_head;

    int64_t id;
    int64_t id_refcount;
    PyThread_type_lock id_mutex;

    PyObject *modules;
    PyObject *modules_by_index;
    PyObject *sysdict;
    PyObject *builtins;
    PyObject *importlib;

...
    PyObject *codec_search_path;
    PyObject *codec_search_cache;
    PyObject *codec_error_registry;
    int codecs_initialized;
    int fscodec_initialized;

    _PyCoreConfig core_config;
    _PyMainInterpreterConfig config;
...

    PyObject *builtins_copy;
    PyObject *import_func;
    /* Initialized to PyEval_EvalFrameDefault(). */
    _PyFrameEvalFunction eval_frame;

...
    /* AtExit module */
    void (*pyexitfunc)(PyObject *);
    PyObject *pyexitmodule;

    uint64_t tstate_next_unique_id;
} PyInterpreterState;

线程状态

// Include/pystate.h
typedef struct _ts {
    /* See Python/ceval.c for comments explaining most fields */

    struct _ts *prev;
    struct _ts *next;
    PyInterpreterState *interp;

    struct _frame *frame;
    int recursion_depth;
    char overflowed; /* The stack has overflowed. Allow 50 more calls
                        to handle the runtime error. */
    char recursion_critical; /* The current calls must not cause
                                a stack overflow. */
    int stackcheck_counter;

...

    /* The exception currently being raised */
    PyObject *curexc_type;
    PyObject *curexc_value;
    PyObject *curexc_traceback;

    /* The exception currently being handled, if no coroutines/generators
     * are present. Always last element on the stack referred to be exc_info.
     */
    _PyErr_StackItem exc_state;

    /* Pointer to the top of the stack of the exceptions currently
     * being handled */
    _PyErr_StackItem *exc_info;

    PyObject *dict;  /* Stores per-thread state */

    int gilstate_counter;

...

    /* Unique thread state id. */
    uint64_t id;

    /* XXX signal handlers should also be here */

} PyThreadState;

junnplus avatar Jun 13 '20 09:06 junnplus

0x01 运行Python

一旦完成了所有的初始化,Py_Main 函数会调用 pymain_run_python 函数开始运行。

static void
pymain_run_python(_PyMain *pymain)
{
    PyCompilerFlags cf = {.cf_flags = 0};

    pymain_header(pymain);
    pymain_import_readline(pymain);

    if (pymain->command) {
        pymain->status = pymain_run_command(pymain->command, &cf);
    }
    else if (pymain->module) {
        pymain->status = (pymain_run_module(pymain->module, 1) != 0);
    }
    else {
        pymain_run_filename(pymain, &cf);
    }

    pymain_repl(pymain, &cf);
}

pymain_run_python根据不同的运行方式调用的函数也不一样,我们只看两种运行方式。

  • 脚本方式运行:pymain_run_filename -> pymain_run_file -> PyRun_AnyFileExFlags
  • 交互式运行:pymain_repl -> PyRun_AnyFileFlags -> PyRun_AnyFileExFlags

PyRun_AnyFileExFlags定义在 Python/pythonrun.c

/* Parse input from a file and execute it */
int
PyRun_AnyFileExFlags(FILE *fp, const char *filename, int closeit,
                     PyCompilerFlags *flags)
{
    if (filename == NULL)
        filename = "???";
    if (Py_FdIsInteractive(fp, filename)) {
        int err = PyRun_InteractiveLoopFlags(fp, filename, flags);
        if (closeit)
            fclose(fp);
        return err;
    }
    else
        return PyRun_SimpleFileExFlags(fp, filename, closeit, flags);
}

实际上PyRun_InteractiveLoopFlagsPyRun_SimpleFileExFlags都执行了类似的代码:

// 编译
mod = PyParser_ASTFromFileObject(fp, filename, NULL, start, 0, 0,
                                 flags, NULL, arena);
// 执行
ret = run_mod(mod, filename, globals, locals, flags, arena);

编译语句或文件得到抽象语法树(AST),并调用run_mod执行。

static PyObject *
run_mod(mod_ty mod, PyObject *filename, PyObject *globals, PyObject *locals,
            PyCompilerFlags *flags, PyArena *arena)
{
    PyCodeObject *co;
    PyObject *v;
    co = PyAST_CompileObject(mod, filename, flags, -1, arena);
    if (co == NULL)
        return NULL;
    v = PyEval_EvalCode((PyObject*)co, globals, locals);
    Py_DECREF(co);
    return v;
}

run_mod函数基于AST编译得到PyCodeObject对象,并通过调用PyEval_EvalCode执行PyCodeObject对象中的字节码指令序列。

PyEval_EvalCode定义在 Python/ceval.c

PyObject *
PyEval_EvalCode(PyObject *co, PyObject *globals, PyObject *locals)
{
    return PyEval_EvalCodeEx(co,
                      globals, locals,
                      (PyObject **)NULL, 0,
                      (PyObject **)NULL, 0,
                      (PyObject **)NULL, 0,
                      NULL, NULL);
}

PyObject *
PyEval_EvalCodeEx(PyObject *_co, PyObject *globals, PyObject *locals,
                  PyObject *const *args, int argcount,
                  PyObject *const *kws, int kwcount,
                  PyObject *const *defs, int defcount,
                  PyObject *kwdefs, PyObject *closure)
{
    return _PyEval_EvalCodeWithName(_co, globals, locals,
                                    args, argcount,
                                    kws, kws != NULL ? kws + 1 : NULL,
                                    kwcount, 2,
                                    defs, defcount,
                                    kwdefs, closure,
                                    NULL, NULL);
}

PyObject *
_PyEval_EvalCodeWithName(PyObject *_co, PyObject *globals, PyObject *locals,
           PyObject *const *args, Py_ssize_t argcount,
           PyObject *const *kwnames, PyObject *const *kwargs,
           Py_ssize_t kwcount, int kwstep,
           PyObject *const *defs, Py_ssize_t defcount,
           PyObject *kwdefs, PyObject *closure,
           PyObject *name, PyObject *qualname)
{
    PyCodeObject* co = (PyCodeObject*)_co;
    PyFrameObject *f;
    PyObject *retval = NULL;
...
    /* Create the frame */
    tstate = PyThreadState_GET();
    assert(tstate != NULL);
    f = _PyFrame_New_NoTrack(tstate, co, globals, locals);
...
    retval = PyEval_EvalFrameEx(f,0);
...
    return retval;
}

本质上PyEval_EvalCode函数是通过PyEval_EvalFrameEx来执行新创建的PyFrameObject对象。

junnplus avatar Jun 13 '20 09:06 junnplus

一旦完成了所有的初始化,Py_Main函数会调用pymain_run_file函数开始运行。

上面的pymain_run_file应该是pymain_run_python

roachsinai avatar Jun 19 '20 04:06 roachsinai

@roachsinai

上面的pymain_run_file应该是pymain_run_python?

thx, 修正了

junnplus avatar Jun 19 '20 04:06 junnplus