Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Register Fluent::Plugin::RegexpParser as parser plugin #1094

Merged
merged 10 commits into from
Jul 19, 2016
17 changes: 17 additions & 0 deletions lib/fluent/compat/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
require 'fluent/plugin/parser'
require 'fluent/mixin'

require 'fluent/plugin/parser_regexp'
require 'fluent/plugin/parser_json'
require 'fluent/plugin/parser_tsv'
require 'fluent/plugin/parser_ltsv'
Expand Down Expand Up @@ -121,6 +122,22 @@ class TimeParser < Fluent::Plugin::Parser::TimeParser

class RegexpParser < Fluent::Plugin::RegexpParser
# TODO: warn when deprecated
def initialize(regexp, conf = {})
super()

unless conf.empty?
unless conf.is_a?(Config::Element)
conf = Config::Element.new('default_regexp_conf', '', conf, [])
end
configure(conf)
end

@regexp = regexp
end

def patterns
{'format' => @regexp, 'time_format' => @time_format}
end
end

class ValuesParser < Fluent::Plugin::ValuesParser
Expand Down
69 changes: 0 additions & 69 deletions lib/fluent/plugin/parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -97,75 +97,6 @@ def parse(value)
end
end

class RegexpParser < Parser
include Fluent::TypeConverter

config_param :time_key, :string, default: 'time'
config_param :time_format, :string, default: nil

def initialize(regexp, conf={})
super()

unless conf.empty?
unless conf.is_a?(Config::Element)
conf = Config::Element.new('default_regexp_conf', '', conf, [])
end
configure(conf)
end

@regexp = regexp
@time_parser = TimeParser.new(@time_format)
@mutex = Mutex.new
end

def configure(conf)
super
@time_parser = TimeParser.new(@time_format)
end

def patterns
{'format' => @regexp, 'time_format' => @time_format}
end

def parse(text)
m = @regexp.match(text)
unless m
yield nil, nil
return
end

time = nil
record = {}

m.names.each do |name|
if value = m[name]
if name == @time_key
time = @mutex.synchronize { @time_parser.parse(value) }
if @keep_time_key
record[name] = if @type_converters.nil?
value
else
convert_type(name, value)
end
end
else
record[name] = if @type_converters.nil?
value
else
convert_type(name, value)
end
end
end
end

if @estimate_current_event
time ||= Fluent::EventTime.now
end

yield time, record
end
end

class ValuesParser < Parser
include Fluent::TypeConverter

Expand Down
16 changes: 10 additions & 6 deletions lib/fluent/plugin/parser_apache.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,13 @@

require 'fluent/plugin/parser'

Fluent::Plugin.register_parser('apache', Proc.new{
Fluent::Plugin::RegexpParser.new(
/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/,
{'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}
)
})
module Fluent
module Plugin
class ApacheParser < RegexpParser
Plugin.register_parser("apache", self)

config_set_default :expression, %q{/^(?<host>[^ ]*) [^ ]* (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^ ]*) +\S*)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/}
config_set_default :time_format, "%d/%b/%Y:%H:%M:%S %z"
end
end
end
11 changes: 8 additions & 3 deletions lib/fluent/plugin/parser_apache_error.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@

require 'fluent/plugin/parser'

Fluent::Plugin.register_parser('apache_error', Proc.new {
Fluent::Plugin::RegexpParser.new(/^\[[^ ]* (?<time>[^\]]*)\] \[(?<level>[^\]]*)\](?: \[pid (?<pid>[^\]]*)\])?( \[client (?<client>[^\]]*)\])? (?<message>.*)$/)
})
module Fluent
module Plugin
class ApacheErrorParser < RegexpParser
Plugin.register_parser("apache_error", self)
config_set_default :expression, %q{/^\[[^ ]* (?<time>[^\]]*)\] \[(?<level>[^\]]*)\](?: \[pid (?<pid>[^\]]*)\])?( \[client (?<client>[^\]]*)\])? (?<message>.*)$/}
end
end
end
8 changes: 5 additions & 3 deletions lib/fluent/plugin/parser_multiline.rb
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,13 @@ def configure(conf)

formats = parse_formats(conf).compact.map { |f| f[1..-2] }.join
begin
@regex = Regexp.new(formats, Regexp::MULTILINE)
if @regex.named_captures.empty?
regexp = Regexp.new(formats, Regexp::MULTILINE)
if regexp.named_captures.empty?
raise "No named captures"
end
@parser = RegexpParser.new(@regex, conf)
regexp_conf = Fluent::Config::Element.new("", "", { "expression" => "/#{formats}/", "multiline" => true }, [])
@parser = Fluent::Plugin::RegexpParser.new
@parser.configure(conf + regexp_conf)
rescue => e
raise ConfigError, "Invalid regexp '#{formats}': #{e}"
end
Expand Down
16 changes: 10 additions & 6 deletions lib/fluent/plugin/parser_nginx.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,13 @@

require 'fluent/plugin/parser'

Fluent::Plugin.register_parser('nginx', Proc.new {
Fluent::Plugin::RegexpParser.new(
/^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/,
{'time_format'=>"%d/%b/%Y:%H:%M:%S %z"}
)
})
module Fluent
module Plugin
class NginxParser < RegexpParser
Plugin.register_parser("nginx", self)

config_set_default :expression, %q{/^(?<remote>[^ ]*) (?<host>[^ ]*) (?<user>[^ ]*) \[(?<time>[^\]]*)\] "(?<method>\S+)(?: +(?<path>[^\"]*?)(?: +\S*)?)?" (?<code>[^ ]*) (?<size>[^ ]*)(?: "(?<referer>[^\"]*)" "(?<agent>[^\"]*)")?$/}
config_set_default :time_format, "%d/%b/%Y:%H:%M:%S %z"
end
end
end
73 changes: 73 additions & 0 deletions lib/fluent/plugin/parser_regexp.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
module Fluent
module Plugin
class RegexpParser < Parser
include Fluent::Compat::TypeConverter

Plugin.register_parser("regexp", self)

config_param :expression, :string, default: ""
config_param :ignorecase, :bool, default: false
config_param :multiline, :bool, default: false
config_param :time_key, :string, default: 'time'
config_param :time_format, :string, default: nil

def initialize
super
@mutex = Mutex.new
end

def configure(conf)
super
@time_parser = TimeParser.new(@time_format)
unless @expression.empty?
if @expression[0] == "/" && @expression[-1] == "/"
regexp_option = 0
regexp_option |= Regexp::IGNORECASE if @ignorecase
regexp_option |= Regexp::MULTILINE if @multiline
@regexp = Regexp.new(@expression[1..-2], regexp_option)
else
raise Fluent::ConfigError, "expression must start with `/` and end with `/`: #{@expression}"
end
end
end

def parse(text)
m = @regexp.match(text)
unless m
yield nil, nil
return
end

time = nil
record = {}

m.names.each do |name|
if value = m[name]
if name == @time_key
time = @mutex.synchronize { @time_parser.parse(value) }
if @keep_time_key
record[name] = if @type_converters.nil?
value
else
convert_type(name, value)
end
end
else
record[name] = if @type_converters.nil?
value
else
convert_type(name, value)
end
end
end
end

if @estimate_current_event
time ||= Fluent::EventTime.now
end

yield time, record
end
end
end
end
22 changes: 13 additions & 9 deletions test/plugin/test_parser_apache.rb
Original file line number Diff line number Diff line change
@@ -1,16 +1,20 @@
require_relative '../helper'
require 'fluent/test/driver/parser'
require 'fluent/plugin/parser'
require 'fluent/plugin/parser_apache'

class ApacheParserTest < ::Test::Unit::TestCase
def setup
Fluent::Test.setup
@parser = Fluent::Test::Driver::Parser.new(Fluent::Plugin.new_parser('apache'))
end

def create_driver(conf = {})
Fluent::Test::Driver::Parser.new(Fluent::Plugin::ApacheParser.new).configure(conf)
end

data('parse' => :parse, 'call' => :call)
def test_call(method_name)
m = @parser.instance.method(method_name)
d = create_driver
m = d.instance.method(method_name)
m.call('192.168.0.1 - - [28/Feb/2013:12:00:00 +0900] "GET / HTTP/1.1" 200 777') { |time, record|
assert_equal(event_time('28/Feb/2013:12:00:00 +0900', format: '%d/%b/%Y:%H:%M:%S %z'), time)
assert_equal({
Expand All @@ -25,13 +29,13 @@ def test_call(method_name)
end

def test_parse_with_keep_time_key
parser = Fluent::Test::Driver::Parser.new(Fluent::Plugin.new_parser('apache'))
parser.instance.configure(
'time_format'=>"%d/%b/%Y:%H:%M:%S %z",
'keep_time_key'=>'true',
)
conf = {
'time_format' => "%d/%b/%Y:%H:%M:%S %z",
'keep_time_key' => 'true',
}
d = create_driver(conf)
text = '192.168.0.1 - - [28/Feb/2013:12:00:00 +0900] "GET / HTTP/1.1" 200 777'
parser.instance.parse(text) do |time, record|
d.instance.parse(text) do |_time, record|
assert_equal "28/Feb/2013:12:00:00 +0900", record['time']
end
end
Expand Down
17 changes: 11 additions & 6 deletions test/plugin/test_parser_apache_error.rb
Original file line number Diff line number Diff line change
@@ -1,35 +1,40 @@
require_relative '../helper'
require 'fluent/test/driver/parser'
require 'fluent/plugin/parser'
require 'fluent/plugin/parser_apache_error'

class ApacheErrorParserTest < ::Test::Unit::TestCase
def setup
Fluent::Test.setup
@parser = Fluent::Test::Driver::Parser.new(Fluent::Plugin.new_parser('apache_error'))
@parser.configure({})
@expected = {
'level' => 'error',
'client' => '127.0.0.1',
'message' => 'client denied by server configuration'
}
end

def create_driver
Fluent::Test::Driver::Parser.new(Fluent::Plugin::ApacheErrorParser.new).configure({})
end

def test_parse
@parser.instance.parse('[Wed Oct 11 14:32:52 2000] [error] [client 127.0.0.1] client denied by server configuration') { |time, record|
d = create_driver
d.instance.parse('[Wed Oct 11 14:32:52 2000] [error] [client 127.0.0.1] client denied by server configuration') { |time, record|
assert_equal(event_time('Wed Oct 11 14:32:52 2000'), time)
assert_equal(@expected, record)
}
end

def test_parse_with_pid
@parser.instance.parse('[Wed Oct 11 14:32:52 2000] [error] [pid 1000] [client 127.0.0.1] client denied by server configuration') { |time, record|
d = create_driver
d.instance.parse('[Wed Oct 11 14:32:52 2000] [error] [pid 1000] [client 127.0.0.1] client denied by server configuration') { |time, record|
assert_equal(event_time('Wed Oct 11 14:32:52 2000'), time)
assert_equal(@expected.merge('pid' => '1000'), record)
}
end

def test_parse_without_client
@parser.instance.parse('[Wed Oct 11 14:32:52 2000] [notice] Apache/2.2.15 (Unix) DAV/2 configured -- resuming normal operations') { |time, record|
d = create_driver
d.instance.parse('[Wed Oct 11 14:32:52 2000] [notice] Apache/2.2.15 (Unix) DAV/2 configured -- resuming normal operations') { |time, record|
assert_equal(event_time('Wed Oct 11 14:32:52 2000'), time)
assert_equal({
'level' => 'notice',
Expand Down
16 changes: 11 additions & 5 deletions test/plugin/test_parser_nginx.rb
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
require_relative '../helper'
require 'fluent/test/driver/parser'
require 'fluent/plugin/parser'
require 'fluent/plugin/parser_nginx'
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto.


class NginxParserTest < ::Test::Unit::TestCase
def setup
Fluent::Test.setup
@parser = Fluent::Test::Driver::Parser.new(Fluent::Plugin.new_parser('nginx'))
@expected = {
'remote' => '127.0.0.1',
'host' => '192.168.0.1',
Expand All @@ -19,22 +18,29 @@ def setup
}
end

def create_driver
Fluent::Test::Driver::Parser.new(Fluent::Plugin::NginxParser.new).configure({})
end

def test_parse
@parser.instance.parse('127.0.0.1 192.168.0.1 - [28/Feb/2013:12:00:00 +0900] "GET / HTTP/1.1" 200 777 "-" "Opera/12.0"') { |time, record|
d = create_driver
d.instance.parse('127.0.0.1 192.168.0.1 - [28/Feb/2013:12:00:00 +0900] "GET / HTTP/1.1" 200 777 "-" "Opera/12.0"') { |time, record|
assert_equal(event_time('28/Feb/2013:12:00:00 +0900', format: '%d/%b/%Y:%H:%M:%S %z'), time)
assert_equal(@expected, record)
}
end

def test_parse_with_empty_included_path
@parser.instance.parse('127.0.0.1 192.168.0.1 - [28/Feb/2013:12:00:00 +0900] "GET /a[ ]b HTTP/1.1" 200 777 "-" "Opera/12.0"') { |time, record|
d = create_driver
d.instance.parse('127.0.0.1 192.168.0.1 - [28/Feb/2013:12:00:00 +0900] "GET /a[ ]b HTTP/1.1" 200 777 "-" "Opera/12.0"') { |time, record|
assert_equal(event_time('28/Feb/2013:12:00:00 +0900', format: '%d/%b/%Y:%H:%M:%S %z'), time)
assert_equal(@expected.merge('path' => '/a[ ]b'), record)
}
end

def test_parse_without_http_version
@parser.instance.parse('127.0.0.1 192.168.0.1 - [28/Feb/2013:12:00:00 +0900] "GET /" 200 777 "-" "Opera/12.0"') { |time, record|
d = create_driver
d.instance.parse('127.0.0.1 192.168.0.1 - [28/Feb/2013:12:00:00 +0900] "GET /" 200 777 "-" "Opera/12.0"') { |time, record|
assert_equal(event_time('28/Feb/2013:12:00:00 +0900', format: '%d/%b/%Y:%H:%M:%S %z'), time)
assert_equal(@expected, record)
}
Expand Down
Loading