logstash-filter-grok
logstash-filter-grok copied to clipboard
IP pattern is very slow because of the IPV6 regexp
test script:
# encoding: utf-8
require 'logstash/event'
require 'logstash/environment'
require 'spec/filters/grok_spec'
grok_base = LogStash::Filters::Grok.new(
"match" => ["message", '%{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "%{WORD:verb} %{DATA:request} HTTP/%{NUMBER:httpversion}" %{NUMBER:response:int} (?:-|%{NUMBER:bytes:int}) %{QS:referrer} %{QS:agent}'])
grok_fast = LogStash::Filters::Grok.new(
"match" => ["message", '%{IPV4ORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "%{WORD:verb} %{DATA:request} HTTP/%{NUMBER:httpversion}" %{NUMBER:response:int} (?:-|%{NUMBER:bytes:int}) %{QS:referrer} %{QS:agent}'])
grok_base.register
grok_fast.register
def benchmark(filter)
t = Time.now
total = 0
File.open("logs", "r") do |file|
file.each_line("logs") do |line|
event = LogStash::Event.new("message" => line)
filter.filter(event)
bytes = event["[bytes]"]
total += bytes if bytes
end
end
puts Time.now - t
puts total
end
puts "warmup base"
benchmark(grok_base)
puts "benchmark base"
benchmark(grok_base)
puts "warmup fast"
benchmark(grok_fast)
puts "benchmark fast"
benchmark(grok_fast)
where IPV4ORHOST is IPV4ORHOST (?:%{IPV4}|%{HOSTNAME})
result:
% bundle exec ruby -J-Xmx6g bench.rb
warmup base
55.881
13026527862
benchmark base
54.502
13026527862
warmup fast
28.809
13026527862
benchmark fast
28.765
13026527862
This means that if you know you don't have ipv6 addresses you can get twice the throughput