From c476326fdd33cf166dd050701cdaa51a7d545ece Mon Sep 17 00:00:00 2001 From: Masahiro Nakagawa Date: Mon, 29 Jul 2019 14:10:16 +0900 Subject: [PATCH] formatter_csv: Improve the performance. 2x faster Avoid creating CSV object per format call. Signed-off-by: Masahiro Nakagawa --- lib/fluent/plugin/formatter_csv.rb | 16 ++++++++++------ test/plugin/test_formatter_csv.rb | 16 ++++++++++++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/lib/fluent/plugin/formatter_csv.rb b/lib/fluent/plugin/formatter_csv.rb index 94286fd2ca..a4ae53f619 100644 --- a/lib/fluent/plugin/formatter_csv.rb +++ b/lib/fluent/plugin/formatter_csv.rb @@ -33,18 +33,22 @@ class CsvFormatter < Formatter def configure(conf) super + @fields = fields.select{|f| !f.empty? } raise ConfigError, "empty value is specified in fields parameter" if @fields.empty? - @generate_opts = {col_sep: @delimiter, force_quotes: @force_quotes} + @generate_opts = {col_sep: @delimiter, force_quotes: @force_quotes, headers: @fields, + row_sep: @add_newline ? :auto : "".force_encoding(Encoding::ASCII_8BIT)} + # Cache CSV object per thread to avoid internal state sharing + @cache = {} end def format(tag, time, record) - row = @fields.map do |key| - record[key] - end - line = CSV.generate_line(row, @generate_opts) - line.chomp! unless @add_newline + csv = (@cache[Thread.current] ||= CSV.new("".force_encoding(Encoding::ASCII_8BIT), @generate_opts)) + line = (csv << record).string.dup + # Need manual cleanup because CSV writer doesn't provide such method. + csv.rewind + csv.truncate(0) line end end diff --git a/test/plugin/test_formatter_csv.rb b/test/plugin/test_formatter_csv.rb index 5467e25df1..dceff4324d 100644 --- a/test/plugin/test_formatter_csv.rb +++ b/test/plugin/test_formatter_csv.rb @@ -108,4 +108,20 @@ def test_config_params_with_fields(data) d = create_driver('fields' => data) assert_equal %w(one two three), d.instance.fields end + + def test_format_with_multiple_records + d = create_driver("fields" => "message,message2") + r = {'message' => 'hello', 'message2' => 'fluentd'} + + formatted = d.instance.format(tag, @time, r) + assert_equal("\"hello\",\"fluentd\"\n", formatted) + + r = {'message' => 'hey', 'message2' => 'ho'} + formatted = d.instance.format(tag, @time, r) + assert_equal("\"hey\",\"ho\"\n", formatted) + + r = {'message' => 'longer message', 'message2' => 'longer longer message'} + formatted = d.instance.format(tag, @time, r) + assert_equal("\"longer message\",\"longer longer message\"\n", formatted) + end end