Skip to content

Commit

Permalink
fixes #132
Browse files Browse the repository at this point in the history
  • Loading branch information
kylewhite21 authored and beatrichartz committed Jan 2, 2025
1 parent 1a9f852 commit 9eec472
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 12 deletions.
2 changes: 1 addition & 1 deletion lib/csv/decoding/decoder.ex
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ defmodule CSV.Decoding.Decoder do
Must be a codepoint (syntax: ? + (your separator)).
* `:escape_character` – The escape character token to use, defaults to `?"`.
Must be a codepoint (syntax: ? + (your escape character)).
* `:escape_max_lines` – The number of lines an escape sequence is allowed
* `:escape_max_lines` – The number of lines an escape sequence is allowed
to span, defaults to 10.
* `:field_transform` – A function with arity 1 that will get called with
each field and can apply transformations. Defaults to identity function.
Expand Down
42 changes: 31 additions & 11 deletions lib/csv/decoding/parser.ex
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@ defmodule CSV.Decoding.Parser do
* `:separator` – The separator token to use, defaults to `?,`.
Must be a codepoint (syntax: ? + (your separator)).
* `:field_transform` – A function with arity 1 that will get called with
* `:field_transform` – A function with arity 1 that will get called with
each field and can apply transformations. Defaults to identity function.
This function will get called for every field and therefore should return
This function will get called for every field and therefore should return
quickly.
* `:unescape_formulas` – When set to `true`, will remove formula escaping
* `:unescape_formulas` – When set to `true`, will remove formula escaping
inserted to prevent [CSV Injection](https://owasp.org/www-community/attacks/CSV_Injection).
## Examples
Expand Down Expand Up @@ -167,7 +167,8 @@ defmodule CSV.Decoding.Parser do
{[], {fields, partial_field, parse_state, sequence}},
escape_character,
token_pattern,
field_transform
field_transform,
sequence
)

sequence, {fields, partial_field, parse_state, {leftover_state, leftover_sequence}} ->
Expand All @@ -190,16 +191,17 @@ defmodule CSV.Decoding.Parser do
end
end

@compile {:inline, parse_to_end: 4}
defp parse_to_end({rows, {[], _, _, {_, ""}} = parse_state}, _, _, _) do
@compile {:inline, parse_to_end: 5}
defp parse_to_end({rows, {[], _, _, {_, ""}} = parse_state}, _, _, _, _) do
{rows |> add_stream_halted_to_errors, parse_state}
end

defp parse_to_end(
{rows, {fields, partial_field, {:open, _, _} = parse_state, {_, sequence}}},
escape_character,
token_pattern,
field_transform
field_transform,
_
) do
tokens = :binary.matches(sequence, token_pattern)

Expand All @@ -220,7 +222,7 @@ defmodule CSV.Decoding.Parser do
{escape_character, :binary.matches(sequence, @newline) |> Enum.count()},
field_transform
)
|> parse_to_end(escape_character, token_pattern, field_transform)
|> parse_to_end(escape_character, token_pattern, field_transform, sequence)
end
end

Expand All @@ -231,7 +233,8 @@ defmodule CSV.Decoding.Parser do
{_, sequence}}},
_,
_,
field_transform
field_transform,
_
) do
case byte_size(sequence) - 1 do
^previous_token_position ->
Expand All @@ -255,11 +258,27 @@ defmodule CSV.Decoding.Parser do
end
end

defp parse_to_end(
{rows, {_, _, {:escaped, _, _, line}, {_, sequence}}},
_,
_,
_,
last_sequence
) when sequence == last_sequence do
{rows
|> add_error(StrayEscapeCharacterError,
line: line,
sequence: sequence,
stream_halted: true
), empty_transform_state()}
end

defp parse_to_end(
{rows, {fields, partial_field, {:escaped, _, _, line} = parse_state, {_, sequence}}},
escape_character,
token_pattern,
field_transform
field_transform,
_
) do
tokens = :binary.matches(sequence, token_pattern)

Expand Down Expand Up @@ -287,14 +306,15 @@ defmodule CSV.Decoding.Parser do
{escape_character, :binary.matches(sequence, @newline) |> Enum.count()},
field_transform
)
|> parse_to_end(escape_character, token_pattern, field_transform)
|> parse_to_end(escape_character, token_pattern, field_transform, sequence)
end
end

defp parse_to_end(
{rows, {[], _, {:errored, _, error_module, construct_arguments, _}, _}},
_,
_,
_,
_
) do
{rows
Expand Down
10 changes: 10 additions & 0 deletions test/decoding/parser_exceptions_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,16 @@ defmodule DecodingTests.ParserExceptionsTest do
end)
end

test "includes an error for rows with unescaped quotes and no newline" do
errors = Parser.parse(["a,b\n", "c,\"d,e"]) |> Enum.to_list()

assert errors == [
{:ok, ["a", "b"]},
{:error, CSV.StrayEscapeCharacterError,
[line: 2, sequence: "d,e", stream_halted: true]}
]
end

test "includes an error for rows with unescaped quotes in escape sequences on the same line" do
stream = ["a,\"b\"e", "\"c,\"d", "\"e,f\"g\",h", "j,k"] |> to_line_stream
errors = stream |> Parser.parse() |> Enum.to_list()
Expand Down

0 comments on commit 9eec472

Please sign in to comment.