nginx-log-collector
nginx-log-collector copied to clipboard
Don`t insert http_x_real_ip to nginx.access_log
select * from nginx.access_log ORDER BY event_datetime DESC limit 1;
SELECT *
FROM nginx.access_log
ORDER BY event_datetime DESC
LIMIT 1
┌──────event_datetime─┬─event_date─┬─server_name─┬─remote_user─┬─http_x_real_ip─┬─status─┬─scheme─┬─request_method─┬─request_uri─┬─server_protocol─┬─body_bytes_sent─┬─request_bytes─┬─http_referer─┬─http_user_agent─┬─request_time─┬─upstream_response_time─┬─hostname────────────────────────┬─host───┐
│ 2020-01-01 12:46:26 │ 2020-01-01 │ vhost3 │ │ 0 │ 304 │ http │ GET │ / │ HTTP/1.0 │ 0 │ 74 │ │ ApacheBench/2.3 │ 0.001 │ [0.001] │ nginx-grafana-apatsev.novalocal │ vhost3 │
I used nginx.conf from https://github.com/avito-tech/nginx-log-collector/blob/master/etc/examples/nginx.conf
And my nginx.conf
............
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
log_format avito_json escape=json
'{'
'"event_datetime": "$time_iso8601", '
'"server_name": "$server_name", '
'"remote_addr": "$remote_addr", '
'"remote_user": "$remote_user", '
'"http_x_real_ip": "$http_x_real_ip", '
'"status": "$status", '
'"scheme": "$scheme", '
'"request_method": "$request_method", '
'"request_uri": "$request_uri", '
'"server_protocol": "$server_protocol", '
'"body_bytes_sent": $body_bytes_sent, '
'"http_referer": "$http_referer", '
'"http_user_agent": "$http_user_agent", '
'"request_bytes": "$request_length", '
'"request_time": "$request_time", '
'"upstream_response_time": "$upstream_response_time", '
'"hostname": "$hostname", '
'"host": "$host"'
'}';
access_log syslog:server=unix:/var/run/nginx_log.sock,nohostname,tag=nginx avito_json; #ClickHouse
error_log syslog:server=unix:/var/run/nginx_log.sock,nohostname,tag=nginx_error; #ClickHouse
proxy_set_header Host $http_host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Scheme $scheme;
proxy_set_header X-Forwarded-Host $host;
proxy_set_header X-Forwarded-Server $host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
include /etc/nginx/conf.d/*.conf;
}
I changed sql schema:
CREATE TABLE nginx.access_log_shard
(
event_datetime DateTime,
event_date Date,
server_name LowCardinality(String),
remote_user String,
remote_addr UInt32,
status UInt16,
scheme LowCardinality(String),
request_method LowCardinality(String),
request_uri String,
server_protocol LowCardinality(String),
body_bytes_sent UInt64,
request_bytes UInt64,
http_referer String,
http_user_agent LowCardinality(String),
request_time Float32,
upstream_response_time Array(Float32),
hostname LowCardinality(String),
host LowCardinality(String)
)
ENGINE = MergeTree(event_date, (hostname, request_uri, event_date), 8192)
CREATE TABLE nginx.access_log
(
event_datetime DateTime,
event_date Date,
server_name LowCardinality(String),
remote_user String,
remote_addr UInt32,
status UInt16,
scheme LowCardinality(String),
request_method LowCardinality(String),
request_uri String,
server_protocol LowCardinality(String),
body_bytes_sent UInt64,
request_bytes UInt64,
http_referer String,
http_user_agent LowCardinality(String),
request_time Float32,
upstream_response_time Array(Float32),
hostname LowCardinality(String),
host LowCardinality(String)
)
ENGINE = Distributed('logs_cluster', 'nginx', 'access_log_shard', rand())
But get digital in remote_addr
select remote_addr from nginx.access_log ORDER BY event_datetime DESC limit 1;
SELECT remote_addr
FROM nginx.access_log
ORDER BY event_datetime DESC
LIMIT 1
┌─remote_addr─┐
│ 2887387607 │
└─────────────┘

And get error:
{"level":"warn","component":"backlog","error":"clickhouse response status 500: Code: 27, e.displayText() = DB::Exception: Cannot parse input: expected \" before: .26.9.215\", \"remote_user\": \"\", \"http_x_real_ip\": \"\", \"status\": \"200\", \"scheme\": \"http\", \"request_method\": \"GET\", \"request_uri\": \"/\", \"server_protocol\": \"HTTP/1.: (while read the value of key remote_addr): (at row 1)\n (version 19.17.6.36 (official build))\n","time":"2020-01-02T14:39:36Z","message":"unable to upload backlog file"}
{"level":"warn","component":"backlog","error":"clickhouse response status 500: Code: 27, e.displayText() = DB::Exception: Cannot parse input: expected \" before: .26.9.215\", \"remote_user\": \"\", \"http_x_real_ip\": \"\", \"status\": \"200\", \"scheme\": \"http\", \"request_method\": \"GET\", \"request_uri\": \"/\", \"server_protocol\": \"HTTP/1.: (while read the value of key remote_addr): (at row 1)\n (version 19.17.6.36 (official build))\n","time":"2020-01-02T14:39:36Z","message":"unable to upload backlog file"}
Today build new nginx-log-collector. Run and get error:
{"level":"warn","component":"backlog","error":"clickhouse response status 500: Code: 27, e.displayText() = DB::Exception: Cannot parse input: expected \" before: .0.0.1\", \"remote_user\": \"\", \"http_x_real_ip\": \"0\", \"status\": \"401\", \"scheme\": \"http\", \"request_method\": \"GET\", \"request_uri\": \"/\", \"server_protocol\": \"HTTP/1.0\": (while read the value of key remote_addr): (at row 1)\n (version 19.17.6.36 (official build))\n","time":"2020-01-14T09:40:41Z","message":"unable to upload backlog file"} {"level":"warn","component":"backlog","error":"clickhouse response status 500: Code: 27, e.displayText() = DB::Exception: Cannot parse input: expected \" before: .0.0.1\", \"remote_user\": \"\", \"http_x_real_ip\": \"0\", \"status\": \"404\", \"scheme\": \"http\", \"request_method\": \"GET\", \"request_uri\": \"/\", \"server_protocol\": \"HTTP/1.0\": (while read the value of key remote_addr): (at row 1)\n (version 19.17.6.36 (official build))\n","time":"2020-01-14T09:38:19Z","message":"unable to upload backlog file"}
{"level":"warn","component":"uploader","tag":"nginx:","url":"http://172.26.9.230:8123/?input_format_skip_unknown_fields=1&query=INSERT+INTO+nginx.access_log+FORMAT+JSONEachRow","error":"clickhouse response status 500: Code: 27, e.displayText() = DB::Exception: Cannot parse input: expected \" before: .0.0.1\", \"remote_user\": \"\", \"http_x_real_ip\": \"0\", \"status\": \"304\", \"scheme\": \"http\", \"request_method\": \"GET\", \"request_uri\": \"/\", \"server_protocol\": \"HTTP/1.0\": (while read the value of key remote_addr): (at row 1)\n (version 19.17.6.36 (official build))\n","time":"2020-01-14T09:40:39Z","message":"upload error; creating backlog job"} {"level":"warn","component":"uploader","tag":"nginx:","url":"http://172.26.9.230:8123/?input_format_skip_unknown_fields=1&query=INSERT+INTO+nginx.access_log+FORMAT+JSONEachRow","error":"clickhouse response status 500: Code: 27, e.displayText() = DB::Exception: Cannot parse input: expected \" before: .0.0.1\", \"remote_user\": \"\", \"http_x_real_ip\": \"0\", \"status\": \"401\", \"scheme\": \"http\", \"request_method\": \"GET\", \"request_uri\": \"/\", \"server_protocol\": \"HTTP/1.0\": (while read the value of key remote_addr): (at row 1)\n (version 19.17.6.36 (official build))\n","time":"2020-01-14T09:40:09Z","message":"upload error; creating backlog job"}
Конфиги стандартные отсюда https://github.com/avito-tech/nginx-log-collector/tree/master/etc/examples
Access log:
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 500 27 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 500 27 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
127.0.0.1 - - [14/Jan/2020:09:54:16 +0000] "GET / HTTP/1.0" 404 19 "-" "ApacheBench/2.3" "-"
I guess you should remove
http_x_real_ip:
ipToUint32:
from your config
Log
{"level":"warn","component":"backlog","error":"clickhouse response status 500: Code: 27, e.displayText() = DB::Exception: Cannot parse input: expected \" before: .0.0.1\", \"remote_user\": \"\", \"http_x_real_ip\": \"0\", \"status\": \"404\", \"scheme\": \"http\", \"request_method\": \"GET\", \"request_uri\": \"/\", \"server_protocol\": \"HTTP/1.0\": (while read the value of key remote_addr): (at row 1)\n (version 19.17.6.36 (official build))\n","time":"2020-01-14T15:40:15Z","message":"unable to upload backlog file"} {"level":"warn","component":"backlog","error":"clickhouse response status 500: Code: 27, e.displayText() = DB::Exception: Cannot parse input: expected \" before: .0.0.1\", \"remote_user\": \"\", \"http_x_real_ip\": \"0\", \"status\": \"404\", \"scheme\": \"http\", \"request_method\": \"GET\", \"request_uri\": \"/\", \"server_protocol\": \"HTTP/1.0\": (while read the value of key remote_addr): (at row 1)\n (version 19.17.6.36 (official build))\n","time":"2020-01-14T15:40:15Z","message":"unable to upload backlog file"}
Config:
processor:
workers: 8
receiver:
addr: 0.0.0.0:4444
logging:
level: debug
statsd:
# prefix: resources.monitoring.nginx_log_collector
prefix: complex.delete_me.nginx_log_collector
addr: localhost:2003
enabled: false
pprof:
enabled: true
addr: 0.0.0.0:6060
backlog:
dir: /tmp/backlog
collected_logs:
- tag: "nginx:"
format: access # access | error
buffer_size: 104857600
transformers: # possible functions: ipToUint32 | limitMaxLength(int) | toArray | splitAndStore
upstream_response_time:
toArray:
http_referer:
limitMaxLength: 800
request_uri:
splitAndStore:
delimiter: "?"
store_to:
request_uri: 0
request_args: 1
upload:
table: nginx.access_log
dsn: http://172.26.9.230:8123/
- tag: "nginx_error:"
format: error # access | error
buffer_size: 1048576
upload:
table: nginx.error_log
dsn: http://172.26.9.230:8123/
Если я правильно понял, то в примерах в table_schema.sql был столбец http_x_real_ip UInt32, и ты изменил его на remote_addr UInt32 (то есть просто переименовал).
Так как из nginx в http_x_real_ip приходили строковые значения, то в config.yaml была инструкция для преобразования строки в int:
http_x_real_ip: ipToUint32:
Так как ты переименовал столбец, то следует переименовать его и в config.yaml. То есть там должно оказаться что-то вроде
remote_addr: ipToUint32:
С последним конфигом и с этим pull request https://github.com/avito-tech/nginx-log-collector/pull/6 все работает