leevis.com icon indicating copy to clipboard operation
leevis.com copied to clipboard

nginx script

Open vislee opened this issue 8 years ago • 0 comments

概述

nginx 通过封装ngx_http_script.h|c这两个文件支持脚本。在nginx内部,脚本正则变量是混在一起使用的。例如:ngx_http_rewrite_module 中if的实现rewrite的实现。 和动态语言类似,需要先把脚本编译然后再执行。 编译的过程基本就是向三个数组(flushes、lengths、values)中添加回调和内容,执行是遍历其中的2个数组(lengths 、values)调用编译阶段添加的回调函数。

代码分析

数据结构:

// 常量回调
typedef struct {
    ngx_http_script_code_pt     code;
    uintptr_t                   len;
} ngx_http_script_copy_code_t;

// 变量回调
typedef struct {
    ngx_http_script_code_pt     code;
    uintptr_t                   index;
} ngx_http_script_var_code_t;

typedef struct {
    ngx_str_t                   value;
    ngx_uint_t                 *flushes;
    void                       *lengths;
    void                       *values;
} ngx_http_complex_value_t;

typedef struct {
    ngx_conf_t                 *cf;
    ngx_str_t                  *value;
    ngx_http_complex_value_t   *complex_value;

    unsigned                    zero:1;
    unsigned                    conf_prefix:1;
    unsigned                    root_prefix:1;
} ngx_http_compile_complex_value_t;

// 运行脚本需要的结构体
typedef struct {
    u_char                     *ip;
    u_char                     *pos;
    ngx_http_variable_value_t  *sp;

    ngx_str_t                   buf;   // 保存结果的内存
    ngx_str_t                   line;  // 运行输入的字符串,用以匹配正则的字符串

    /* the start of the rewritten arguments */
    u_char                     *args;

    unsigned                    flushed:1;
    unsigned                    skip:1;
    unsigned                    quote:1;
    unsigned                    is_args:1;
    unsigned                    log:1;

    ngx_int_t                   status;  // 运行返回的状态,由code->status赋值。
    ngx_http_request_t         *request;
} ngx_http_script_engine_t;

// 编译所需要的结构体
typedef struct {
    ngx_conf_t                 *cf;
    ngx_str_t                  *source;  /* 带变量的字符串 */

    // ngx_http_script_flush_complex_value函数
    ngx_array_t               **flushes;  /*  保存需要刷新的变量下标,刷新r->variables数组。 */
    ngx_array_t               **lengths;  /*  计算变量长度的回调 */
    ngx_array_t               **values;  /*  计算变量值的回调 */

    ngx_uint_t                  variables;  /* 变量的个数,两个用途,开始编译前是字符串$的个数,编译后是真实的变量的个数。正则表达式捕获设置别名的才会生成变量 */
    ngx_uint_t                  ncaptures;  /* 正则表达式匹配获取结果的个数 */
    ngx_uint_t                  captures_mask;  /*  正则表达式 标志变量重复 */
    ngx_uint_t                  size;  /*  常量字符串总长度,例如:A_${host}_B_${remorete_ip} 中A__B_为常量字符串。两个变量host和remote_ip*/

    void                       *main;  /*  */

    unsigned                    compile_args:1;
    unsigned                    complete_lengths:1;  /* 是否要编译lengths数组,如果是,在该数组后添加个null */
    unsigned                    complete_values:1; /* 是否需要添加null */
    unsigned                    zero:1;
    unsigned                    conf_prefix:1;
    unsigned                    root_prefix:1;

    unsigned                    dup_capture:1;  /*  正则表达式是否使用了重复的变量 */
    unsigned                    args:1;
} ngx_http_script_compile_t;

实现的函数:

  • ngx_http_script_start_code(ngx_pool_t *pool, ngx_array_t **codes, size_t size) 初始化codes数组,从数组中分配大小为size的内存返回。

  • ngx_http_script_variables_count(ngx_str_t *value) 返回字符串中变量的个数。通过遍历字符串累加“$”的个数。

  • ngx_http_script_init_arrays(ngx_http_script_compile_t *sc) 初始化sc变量的三个数组(flushes、lengths、values)。

  • ngx_http_script_compile(ngx_http_script_compile_t *sc) 编译变量,把变量对应的回调添加到三个数组中(flushes、lengths、values)。

  • ngx_http_script_add_copy_code(ngx_http_script_compile_t *sc, ngx_str_t *value, ngx_uint_t last) 添加字符串常量回调。value是字符串常量,last标识是否是最后一个字符串。向lengths数组中添加ngx_http_script_copy_code_t结构体,回调函数是ngx_http_script_copy_len_code。向values添加ngx_http_script_copy_code_t结构体和字符串常量,回调函数是ngx_http_script_copy_code。

  • ngx_http_script_add_var_code(ngx_http_script_compile_t *sc, ngx_str_t *name) 添加普通变量回调。name是变量名。向sc变量的两个数组中注册会调函数,分别是ngx_http_script_copy_var_len_code,ngx_http_script_copy_var_code。

  • ngx_http_script_add_capture_code(ngx_http_script_compile_t *sc, ngx_uint_t n) 添加捕获变量回调。向sc变量的两个数组中(lengths、values)注册回调函数,回调函数分别是ngx_http_script_copy_capture_len_code、ngx_http_script_copy_capture_code。

  • ngx_http_script_add_code(ngx_array_t *codes, size_t size, void *code) 在codes数组中分配一个大小为size的节点。

  • ngx_http_script_done(ngx_http_script_compile_t *sc)

  • ngx_http_script_regex_start_code(ngx_http_script_engine_t *e) 开始编译一个带有正则表达式捕获变量的脚本

  • ngx_http_script_regex_end_code(ngx_http_script_engine_t *e) 编译带有正则表达式捕获变量的脚本结束,把计算后的结果赋值。

  • ngx_http_script_flush_no_cacheable_variables(ngx_http_request_t *r, ngx_array_t *indices) 根据flushes数组清除r->variables数组保存的变量值。

代码:

static ngx_int_t
ngx_http_script_init_arrays(ngx_http_script_compile_t *sc)
{
    ngx_uint_t   n;

    if (sc->flushes && *sc->flushes == NULL) {
        n = sc->variables ? sc->variables : 1;
        *sc->flushes = ngx_array_create(sc->cf->pool, n, sizeof(ngx_uint_t));
        if (*sc->flushes == NULL) {
            return NGX_ERROR;
        }
    }

    if (*sc->lengths == NULL) {
        n = sc->variables * (2 * sizeof(ngx_http_script_copy_code_t)
                             + sizeof(ngx_http_script_var_code_t))
            + sizeof(uintptr_t);

        *sc->lengths = ngx_array_create(sc->cf->pool, n, 1);
        if (*sc->lengths == NULL) {
            return NGX_ERROR;
        }
    }

    if (*sc->values == NULL) {
        n = (sc->variables * (2 * sizeof(ngx_http_script_copy_code_t)
                              + sizeof(ngx_http_script_var_code_t))
                + sizeof(uintptr_t)
                + sc->source->len    // 可能会保存常量的值
                + sizeof(uintptr_t) - 1)
            & ~(sizeof(uintptr_t) - 1);  // 内存对齐

        *sc->values = ngx_array_create(sc->cf->pool, n, 1);
        if (*sc->values == NULL) {
            return NGX_ERROR;
        }
    }

    sc->variables = 0;

    return NGX_OK;
}


// 编译变量
ngx_int_t
ngx_http_script_compile(ngx_http_script_compile_t *sc)
{
    u_char       ch;
    ngx_str_t    name;
    ngx_uint_t   i, bracket;

    if (ngx_http_script_init_arrays(sc) != NGX_OK) {
        return NGX_ERROR;
    }

    // rewrite ^(/download/.*)/media/(.*)\..*$ $1/mp3/$2.mp3 last; source=$1/mp3/$2.mp3
    // limit_req_zone A_${host}_$binary_remote_addr zone=zone:10m rate=1r/s; 
    // source=A_${host}_$binary_remote_addr
    for (i = 0; i < sc->source->len; /* void */ ) {

        name.len = 0;

        if (sc->source->data[i] == '$') {

            if (++i == sc->source->len) {
                goto invalid_variable;
            }

            if (sc->source->data[i] >= '1' && sc->source->data[i] <= '9') {
#if (NGX_PCRE)
                ngx_uint_t  n;

                n = sc->source->data[i] - '0';

                if (sc->captures_mask & ((ngx_uint_t) 1 << n)) {
                    sc->dup_capture = 1;
                }

                sc->captures_mask |= (ngx_uint_t) 1 << n;

                // 正则表达式捕获的变量,n是变量的占位符也就是第n个变量。
                if (ngx_http_script_add_capture_code(sc, n) != NGX_OK) {
                    return NGX_ERROR;
                }

                i++;

                continue;
#else
                // 不支持正则表达式
                ngx_conf_log_error(NGX_LOG_EMERG, sc->cf, 0,
                                   "using variable \"$%c\" requires "
                                   "PCRE library", sc->source->data[i]);
                return NGX_ERROR;
#endif
            }

            if (sc->source->data[i] == '{') {
                bracket = 1;

                if (++i == sc->source->len) {
                    goto invalid_variable;
                }

                name.data = &sc->source->data[i];

            } else {
                bracket = 0;
                name.data = &sc->source->data[i];
            }

            for ( /* void */ ; i < sc->source->len; i++, name.len++) {
                ch = sc->source->data[i];

                if (ch == '}' && bracket) {
                    i++;
                    bracket = 0;
                    break;
                }

                if ((ch >= 'A' && ch <= 'Z')
                    || (ch >= 'a' && ch <= 'z')
                    || (ch >= '0' && ch <= '9')
                    || ch == '_')
                {
                    continue;
                }

                break;
            }

            if (bracket) {
                ngx_conf_log_error(NGX_LOG_EMERG, sc->cf, 0,
                                   "the closing bracket in \"%V\" "
                                   "variable is missing", &name);
                return NGX_ERROR;
            }

            if (name.len == 0) {
                goto invalid_variable;
            }

            // 变量的个数
            sc->variables++;
            // 添加变量回调
            if (ngx_http_script_add_var_code(sc, &name) != NGX_OK) {
                return NGX_ERROR;
            }

            continue;
        }

        // uri 的参数
        if (sc->source->data[i] == '?' && sc->compile_args) {
            sc->args = 1;
            sc->compile_args = 0;

            if (ngx_http_script_add_args_code(sc) != NGX_OK) {
                return NGX_ERROR;
            }

            i++;

            continue;
        }

        // 字符常量
        name.data = &sc->source->data[i];

        while (i < sc->source->len) {

            if (sc->source->data[i] == '$') {
                break;
            }

            if (sc->source->data[i] == '?') {

                sc->args = 1;

                if (sc->compile_args) {
                    break;
                }
            }

            i++;
            name.len++;
        }

        // 字符常量总长度
        sc->size += name.len;
        // 添加常量字符串
        if (ngx_http_script_add_copy_code(sc, &name, (i == sc->source->len))
            != NGX_OK)
        {
            return NGX_ERROR;
        }
    }

    return ngx_http_script_done(sc);

invalid_variable:

    ngx_conf_log_error(NGX_LOG_EMERG, sc->cf, 0, "invalid variable name");

    return NGX_ERROR;
}

ngx_int_t
ngx_http_compile_complex_value(ngx_http_compile_complex_value_t *ccv)
{
    ngx_str_t                  *v;
    ngx_uint_t                  i, n, nv, nc;
    ngx_array_t                 flushes, lengths, values, *pf, *pl, *pv;
    ngx_http_script_compile_t   sc;

    v = ccv->value;

    nv = 0;  // 捕获变量个数
    nc = 0;  // 捕获占位符个数

    for (i = 0; i < v->len; i++) {
        if (v->data[i] == '$') {
            if (v->data[i + 1] >= '1' && v->data[i + 1] <= '9') {
                nc++;

            } else {
                nv++;
            }
        }
    }

    if ((v->len == 0 || v->data[0] != '$')
        && (ccv->conf_prefix || ccv->root_prefix))
    {
        if (ngx_conf_full_name(ccv->cf->cycle, v, ccv->conf_prefix) != NGX_OK) {
            return NGX_ERROR;
        }

        ccv->conf_prefix = 0;
        ccv->root_prefix = 0;
    }

    ccv->complex_value->value = *v;  // 变量字符串
    ccv->complex_value->flushes = NULL;  // 需要刷新的变量的下标
    ccv->complex_value->lengths = NULL;  // 计算变量和常量的长度 的回调和参数
    ccv->complex_value->values = NULL;    // 计算变量和常量的值 的回调和参数

    // 正则表达式不需要捕获结果
    if (nv == 0 && nc == 0) {
        return NGX_OK;
    }

    n = nv + 1;

    if (ngx_array_init(&flushes, ccv->cf->pool, n, sizeof(ngx_uint_t))
        != NGX_OK)
    {
        return NGX_ERROR;
    }

    n = nv * (2 * sizeof(ngx_http_script_copy_code_t)
                  + sizeof(ngx_http_script_var_code_t))
        + sizeof(uintptr_t);

    if (ngx_array_init(&lengths, ccv->cf->pool, n, 1) != NGX_OK) {
        return NGX_ERROR;
    }

    n = (nv * (2 * sizeof(ngx_http_script_copy_code_t)
                   + sizeof(ngx_http_script_var_code_t))
                + sizeof(uintptr_t)
                + v->len
                + sizeof(uintptr_t) - 1)
            & ~(sizeof(uintptr_t) - 1);

    if (ngx_array_init(&values, ccv->cf->pool, n, 1) != NGX_OK) {
        return NGX_ERROR;
    }

    pf = &flushes;
    pl = &lengths;
    pv = &values;

    ngx_memzero(&sc, sizeof(ngx_http_script_compile_t));

    sc.cf = ccv->cf;
    sc.source = v;
    sc.flushes = &pf;
    sc.lengths = &pl;
    sc.values = &pv;
    sc.complete_lengths = 1;
    sc.complete_values = 1;
    sc.zero = ccv->zero;
    sc.conf_prefix = ccv->conf_prefix;
    sc.root_prefix = ccv->root_prefix;

    // 编译脚本
    if (ngx_http_script_compile(&sc) != NGX_OK) {
        return NGX_ERROR;
    }

    if (flushes.nelts) {
        ccv->complex_value->flushes = flushes.elts;
        ccv->complex_value->flushes[flushes.nelts] = (ngx_uint_t) -1;
    }

    ccv->complex_value->lengths = lengths.elts;
    ccv->complex_value->values = values.elts;

    return NGX_OK;
}


// 正则表达式捕获的变量,n为捕获的第n个变量。
// 如:^(/download/.*)/media/(.*)\..*$ $1/mp3/$2.mp3  $1为捕获的第一个变量,即:/download/.*
static ngx_int_t
ngx_http_script_add_capture_code(ngx_http_script_compile_t *sc, ngx_uint_t n)
{
    ngx_http_script_copy_capture_code_t  *code;

    code = ngx_http_script_add_code(*sc->lengths,
                                    sizeof(ngx_http_script_copy_capture_code_t),
                                    NULL);
    if (code == NULL) {
        return NGX_ERROR;
    }

    code->code = (ngx_http_script_code_pt)
                      ngx_http_script_copy_capture_len_code;
    // 乘以2是因为正则表达式库(pcre)需要
    code->n = 2 * n;


    code = ngx_http_script_add_code(*sc->values,
                                    sizeof(ngx_http_script_copy_capture_code_t),
                                    &sc->main);
    if (code == NULL) {
        return NGX_ERROR;
    }

    code->code = ngx_http_script_copy_capture_code;
    code->n = 2 * n;

    if (sc->ncaptures < n) {
        sc->ncaptures = n;
    }

    return NGX_OK;
}


// 添加结束标志
static ngx_int_t
ngx_http_script_done(ngx_http_script_compile_t *sc)
{
    ngx_str_t    zero;
    uintptr_t   *code;

    if (sc->zero) {

        zero.len = 1;
        zero.data = (u_char *) "\0";

        if (ngx_http_script_add_copy_code(sc, &zero, 0) != NGX_OK) {
            return NGX_ERROR;
        }
    }

    if (sc->conf_prefix || sc->root_prefix) {
        if (ngx_http_script_add_full_name_code(sc) != NGX_OK) {
            return NGX_ERROR;
        }
    }

    // 是否编译变量长度
    if (sc->complete_lengths) {
        code = ngx_http_script_add_code(*sc->lengths, sizeof(uintptr_t), NULL);
        if (code == NULL) {
            return NGX_ERROR;
        }

        *code = (uintptr_t) NULL;
    }

    if (sc->complete_values) {
        code = ngx_http_script_add_code(*sc->values, sizeof(uintptr_t),
                                        &sc->main);
        if (code == NULL) {
            return NGX_ERROR;
        }

        *code = (uintptr_t) NULL;
    }

    return NGX_OK;
}

ngx_int_t
ngx_http_complex_value(ngx_http_request_t *r, ngx_http_complex_value_t *val,
    ngx_str_t *value)
{
    size_t                        len;
    ngx_http_script_code_pt       code;
    ngx_http_script_len_code_pt   lcode;
    ngx_http_script_engine_t      e;

    if (val->lengths == NULL) {
        *value = val->value;
        return NGX_OK;
    }

    ngx_http_script_flush_complex_value(r, val);

    ngx_memzero(&e, sizeof(ngx_http_script_engine_t));

    e.ip = val->lengths;
    e.request = r;
    e.flushed = 1;

    len = 0;

    while (*(uintptr_t *) e.ip) {
        lcode = *(ngx_http_script_len_code_pt *) e.ip;
        len += lcode(&e);
    }

    value->len = len;
    value->data = ngx_pnalloc(r->pool, len);
    if (value->data == NULL) {
        return NGX_ERROR;
    }

    e.ip = val->values;
    e.pos = value->data;
    e.buf = *value;

    while (*(uintptr_t *) e.ip) {
        code = *(ngx_http_script_code_pt *) e.ip;
        code((ngx_http_script_engine_t *) &e);
    }

    *value = e.buf;

    return NGX_OK;
}

常量字符串

例如: set $text "hello world";

// 分配的结构体
typedef struct {
    ngx_http_script_code_pt     code;  // 回调函数
    uintptr_t                   value;    // 变量个数
    uintptr_t                   text_len;  // 常量字符串长度
    uintptr_t                   text_data;  // 常量字符串
} ngx_http_script_value_code_t;

// 回调函数
void
ngx_http_script_value_code(ngx_http_script_engine_t *e)
{
    ngx_http_script_value_code_t  *code;

    code = (ngx_http_script_value_code_t *) e->ip;

    e->ip += sizeof(ngx_http_script_value_code_t);

    e->sp->len = code->text_len;
    e->sp->data = (u_char *) code->text_data;

    ngx_log_debug1(NGX_LOG_DEBUG_HTTP, e->request->connection->log, 0,
                   "http script value: \"%v\"", e->sp);

    e->sp++;
}

带有变量的字符串

带有变量的字符串比较复杂,需要分配个结构体保存赋值的回调函数,然后再编译带有变量的结果。 例如: set $text "hello $hostname";

// 赋值回调需要分配的结构体
typedef struct {
    ngx_http_script_code_pt     code;  // 回调函数
    ngx_array_t                *lengths;
} ngx_http_script_complex_value_code_t;

// 回调函数
void
ngx_http_script_complex_value_code(ngx_http_script_engine_t *e)
{
    size_t                                 len;
    ngx_http_script_engine_t               le;
    ngx_http_script_len_code_pt            lcode;
    ngx_http_script_complex_value_code_t  *code;

    code = (ngx_http_script_complex_value_code_t *) e->ip;

    e->ip += sizeof(ngx_http_script_complex_value_code_t);

    ngx_log_debug0(NGX_LOG_DEBUG_HTTP, e->request->connection->log, 0,
                   "http script complex value");

    ngx_memzero(&le, sizeof(ngx_http_script_engine_t));

    le.ip = code->lengths->elts;
    le.line = e->line;
    le.request = e->request;
    le.quote = e->quote;

    for (len = 0; *(uintptr_t *) le.ip; len += lcode(&le)) {
        lcode = *(ngx_http_script_len_code_pt *) le.ip;
    }

    e->buf.len = len;
    e->buf.data = ngx_pnalloc(e->request->pool, len);
    if (e->buf.data == NULL) {
        e->ip = ngx_http_script_exit;
        e->status = NGX_HTTP_INTERNAL_SERVER_ERROR;
        return;
    }

    e->pos = e->buf.data;

    e->sp->len = e->buf.len;
    e->sp->data = e->buf.data;
    e->sp++;
}

vislee avatar Mar 31 '17 11:03 vislee