Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

internal/testrunner/runners/pipeline: unmarshal test results using UseNumber #717

Merged
merged 7 commits into from
Mar 16, 2022
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 8 additions & 2 deletions internal/testrunner/runners/pipeline/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,8 @@ func (r *runner) run() ([]testrunner.TestResult, error) {
}
startTime := time.Now()

// TODO: Add tests to cover regressive use of json.Unmarshal in loadTestCaseFile.
// See https://github.com/elastic/elastic-package/pull/717.
tc, err := r.loadTestCaseFile(testCaseFile)
if err != nil {
err := errors.Wrap(err, "loading test case failed")
Expand Down Expand Up @@ -141,6 +143,8 @@ func (r *runner) run() ([]testrunner.TestResult, error) {
return nil, errors.Wrapf(err, "creating fields validator for data stream failed (path: %s, test case file: %s)", dataStreamPath, testCaseFile)
}

// TODO: Add tests to cover regressive use of json.Unmarshal in verifyResults.
// See https://github.com/elastic/elastic-package/pull/717.
err = r.verifyResults(testCaseFile, tc.config, result, fieldsValidator)
if e, ok := err.(testrunner.ErrTestCaseFailed); ok {
tr.FailureMsg = e.Error()
Expand Down Expand Up @@ -226,6 +230,8 @@ func (r *runner) verifyResults(testCaseFile string, config *testConfig, result *
testCasePath := filepath.Join(r.options.TestFolder.Path, testCaseFile)

if r.options.GenerateTestResult {
// TODO: Add tests to cover regressive use of json.Unmarshal in writeTestResult.
// See https://github.com/elastic/elastic-package/pull/717.
err := writeTestResult(testCasePath, result)
if err != nil {
return errors.Wrap(err, "writing test result failed")
Expand Down Expand Up @@ -275,7 +281,7 @@ func verifyDynamicFields(result *testResult, config *testConfig) error {
var multiErr multierror.Error
for _, event := range result.events {
var m common.MapStr
err := json.Unmarshal(event, &m)
err := jsonUnmarshalUsingNumber(event, &m)
if err != nil {
return errors.Wrap(err, "can't unmarshal event")
}
Expand Down Expand Up @@ -342,7 +348,7 @@ func checkErrorMessage(event json.RawMessage) error {
Message interface{}
}
}
err := json.Unmarshal(event, &pipelineError)
err := jsonUnmarshalUsingNumber(event, &pipelineError)
if err != nil {
return errors.Wrapf(err, "can't unmarshal event to check pipeline error: %#q", event)
}
Expand Down
108 changes: 108 additions & 0 deletions internal/testrunner/runners/pipeline/runner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ package pipeline

import (
"encoding/json"
"fmt"
"strings"
"testing"

"github.com/google/go-cmp/cmp"
Expand Down Expand Up @@ -318,3 +320,109 @@ func TestDiffUlite(t *testing.T) {
})
}
}

var jsonUnmarshalUsingNumberTests = []struct {
name string
msg string
}{
{
name: "empty",
msg: "", // Will error "unexpected end of JSON input".
},
{
name: "string",
msg: `"message"`,
},
{
name: "array",
msg: "[1,2,3,4,5]",
},
{
name: "object",
msg: `{"key":42}`,
},
{
name: "object",
msg: `{"key":42}answer`, // Will error "invalid character 'a' after top-level value".
},
// Test extra data whitespace parity with json.Unmarshal for error parity.
{
name: "object",
msg: `{"key":42} `,
},
{
name: "object",
msg: `{"key":42}` + "\t",
},
{
name: "object",
msg: `{"key":42}` + "\r",
},
{
name: "object",
msg: `{"key":42}` + "\n",
},
{
name: "0x1p52+1",
msg: fmt.Sprint(uint64(0x1p52) + 1),
},
{
name: "0x1p53-1",
msg: fmt.Sprint(uint64(0x1p53) - 1),
},
// The following three cases will fail if json.Unmarshal is used in place
// of jsonUnmarshalUsingNumber, as they are past the cutover.
{
name: "0x1p53+1",
msg: fmt.Sprint(uint64(0x1p53) + 1),
},
{
name: "0x1p54+1",
msg: fmt.Sprint(uint64(0x1p54) + 1),
},
{
name: "long",
msg: "9223372036854773807",
},
}

func TestJsonUnmarshalUsingNumberRoundTrip(t *testing.T) {
// This tests that jsonUnmarshalUsingNumber behaves the same
// way as json.Unmarshal with the exception that numbers are
// not unmarshaled through float64. This is important to avoid
// low-bit truncation of long numeric values that are greater
// than or equal to 0x1p53, the limit of bijective equivalence
// with 64 bit-integers.

for _, test := range jsonUnmarshalUsingNumberTests {
t.Run(test.name, func(t *testing.T) {
var val interface{}
err := jsonUnmarshalUsingNumber([]byte(test.msg), &val)

// Confirm that we get the same errors with jsonUnmarshalUsingNumber
// as are returned by json.Unmarshal.
jerr := json.Unmarshal([]byte(test.msg), new(interface{}))
// String comparison because we are not returning a real json.SyntaxError.
if fmt.Sprint(err) != fmt.Sprint(jerr) {
t.Errorf("unexpected error: got:%#v want:%#v", err, jerr)
}
if err != nil {
return
}

// Confirm that we round-trip the message correctly without
// alteration beyond trailing whitespace.
got, err := json.Marshal(val)
if err != nil {
t.Errorf("unexpected error: got:%#v want:%#v", err, jerr)
}
// Truncate trailing whitespace from the input since it won't
// be rendered in the output. This set of space characters is
// defined in encoding/json/scanner.go as func isSpace.
want := strings.TrimRight(test.msg, " \t\r\n")
if string(got) != want {
t.Errorf("unexpected result: got:%v want:%v", val, want)
}
})
}
}
4 changes: 2 additions & 2 deletions internal/testrunner/runners/pipeline/test_case.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ type testCaseDefinition struct {

func readTestCaseEntriesForEvents(inputData []byte) ([]json.RawMessage, error) {
var tcd testCaseDefinition
err := json.Unmarshal(inputData, &tcd)
err := jsonUnmarshalUsingNumber(inputData, &tcd)
if err != nil {
return nil, errors.Wrap(err, "unmarshalling input data failed")
}
Expand Down Expand Up @@ -59,7 +59,7 @@ func createTestCase(filename string, entries []json.RawMessage, config *testConf
var events []json.RawMessage
for _, entry := range entries {
var m common.MapStr
err := json.Unmarshal(entry, &m)
err := jsonUnmarshalUsingNumber(entry, &m)
if err != nil {
return nil, errors.Wrap(err, "can't unmarshal test case entry")
}
Expand Down
36 changes: 33 additions & 3 deletions internal/testrunner/runners/pipeline/test_result.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"bytes"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"strings"
Expand Down Expand Up @@ -188,7 +189,7 @@ func adjustTestResult(result *testResult, config *testConfig) (*testResult, erro
}

var m common.MapStr
err := json.Unmarshal(event, &m)
err := jsonUnmarshalUsingNumber(event, &m)
if err != nil {
return nil, errors.Wrapf(err, "can't unmarshal event: %s", string(event))
}
Expand All @@ -212,7 +213,7 @@ func adjustTestResult(result *testResult, config *testConfig) (*testResult, erro

func unmarshalTestResult(body []byte) (*testResult, error) {
var trd testResultDefinition
err := json.Unmarshal(body, &trd)
err := jsonUnmarshalUsingNumber(body, &trd)
if err != nil {
return nil, errors.Wrap(err, "unmarshalling test result failed")
}
Expand All @@ -222,6 +223,35 @@ func unmarshalTestResult(body []byte) (*testResult, error) {
return &tr, nil
}

// jsonUnmarshalUsingNumber is a drop-in replacement for json.Unmarshal that
// does not default to unmarshaling numeric values to float64 in order to
// prevent low bit truncation of values greater than 1<<53.
// See https://golang.org/cl/6202068 for details.
func jsonUnmarshalUsingNumber(data []byte, v interface{}) error {
dec := json.NewDecoder(bytes.NewReader(data))
dec.UseNumber()
err := dec.Decode(v)
if err != nil {
if err == io.EOF {
return errors.New("unexpected end of JSON input")
}
return err
}
// Make sure there is no invalid syntax after the message
// to match json.Unmarshal's behaviour.
remains, err := io.ReadAll(dec.Buffered())
if err != nil {
return err
}
for _, b := range remains {
if b > ' ' || (b != ' ' && b != '\t' && b != '\r' && b != '\n') {
// Mimic encoding/json error for this case, but without rigmarole.
return fmt.Errorf("invalid character %q after top-level value", b)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Wouldn't be enough to check if there is something else there with dec.More()?

Suggested change
remains, err := io.ReadAll(dec.Buffered())
if err != nil {
return err
}
for _, b := range remains {
if b > ' ' || (b != ' ' && b != '\t' && b != '\r' && b != '\n') {
// Mimic encoding/json error for this case, but without rigmarole.
return fmt.Errorf("invalid character %q after top-level value", b)
if dec.More() {
return fmt.Errorf("unexpected characters found after unmarshaling value")
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, this won't work. Some trailing bytes are acceptable, others are not; a JSON stream can have arbitrarily large quantities of whitespace outside literals and this extends to trailing and leading data.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dec.More() ignores whitespaces, are there other acceptable bytes? The code I propose passes the test cases added in this PR (after adapting the error strings).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Or something like this could be done to keep this error message:

        _, err = dec.Token()
        if err != nil && !errors.Is(err, io.EOF) {
                r, ok := dec.Buffered().(io.ByteReader)
                if !ok {
                        return err
                }
                b, err := r.ReadByte()
                if err != nil {
                        return err
                }
                // Mimic encoding/json error for this case, but without rigmarole.
                return fmt.Errorf("invalid character %q after top-level value", b)
        }

Copy link
Contributor Author

@efd6 efd6 Mar 9, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

dec.More() ignores whitespaces

Nice. I didn't notice that. To retain the error parity we'll need something like the second option or what I have. I'm not convinced that the assertion to io.ByteReader is simpler of lighter though. We expect that in the happy case that remaining will be ~0, and in the unhappy case remaining will still be ~small (it must have fit in memory), but we cannot guarantee that dec.Buffered() will return an io.ByteReader, so we may miss cases.

If we are happy to lose error parity, then dec.More() would be fine.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What I would like to avoid is to do parsing here (the if b > ' ' || (b != ' ' && b != '\t' && b != '\r' && b != '\n') line), and rely on the decoder for this.

Regarding the error message, I don't have a strong opinion, but at this level I think that it is ok if we don't show the found character.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. This will complicate the test code. I will do it next week.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to complicate test code, actually I think that we could simplify it, and only check that it returns an error when expected, without checking the error message.

}
}
return nil
}

func marshalTestResultDefinition(result *testResult) ([]byte, error) {
var trd testResultDefinition
trd.Expected = result.events
Expand All @@ -241,7 +271,7 @@ func marshalNormalizedJSON(v testResultDefinition) ([]byte, error) {
return msg, err
}
var obj interface{}
err = json.Unmarshal(msg, &obj)
err = jsonUnmarshalUsingNumber(msg, &obj)
if err != nil {
return msg, err
}
Expand Down
11 changes: 9 additions & 2 deletions scripts/test-check-packages.sh
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,15 @@ for d in test/packages/${PACKAGE_TEST_TYPE:-other}/${PACKAGE_UNDER_TEST:-*}/; do
cd $d
elastic-package install -v

# defer-cleanup is set to a short period to verify that the option is available
elastic-package test -v --report-format xUnit --report-output file --defer-cleanup 1s --test-coverage
if [ "$(basename $d)" == "long_integers" ]; then
# Ensure that any change in unmarshaling behaviour is noticed; this will result in a dirty
# git state on exit if an inappropriate use of encoding/json.Unmarshal has been made.
elastic-package test -v -g --report-format xUnit --report-output file --defer-cleanup 1s --test-coverage
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is the -g necessary here? Shouldn't it be caught due to differences compared to the expected.json file?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1, I think this if is not needed, elastic-package test should fail if there are regressions in unmarshalling logic. If it doesn't fail, this is a different bug to be addressed in a different PR.

Copy link
Contributor Author

@efd6 efd6 Mar 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No, it won't be caught. This was the reason for the additional code previously. The issue is that it is not possible for a machine to measure the accuracy of some thing that is more accurate than it, and this is what that would be asking it to do; if the the code is regressed, then it becomes a less accurate machine and so become inadequate to test that the test cases (which are more accurate) are satisfied. This is all theoretically sound, but can be practically confirmed by running the tests at 9b93a10 with this change

diff --git a/internal/testrunner/runners/pipeline/test_result.go b/internal/testrunner/runners/pipeline/test_result.go
index d67853c..38916f8 100644
--- a/internal/testrunner/runners/pipeline/test_result.go
+++ b/internal/testrunner/runners/pipeline/test_result.go
@@ -228,6 +228,7 @@ func unmarshalTestResult(body []byte) (*testResult, error) {
 // prevent low bit truncation of values greater than 1<<53.
 // See https://golang.org/cl/6202068 for details.
 func jsonUnmarshalUsingNumber(data []byte, v interface{}) error {
+       return json.Unmarshal(data, v)
        dec := json.NewDecoder(bytes.NewReader(data))
        dec.UseNumber()
        err := dec.Decode(v)

They will pass.

Aleternatively, this also demonstrates the situation https://play.golang.com/p/CMsFSSjcd0f.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm.. to be honest, I'd accept that risk than introduce a sneaky workaround. It sounds to me like an edge case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should I remove the tests? They essentially don't do anything without this.

Copy link
Contributor

@mtojek mtojek Mar 10, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let me challenge this. In this case, we're talking about the Go stdlib's nature, which unfortunately has this behavior. It means that we need to a test to detect a possible regression. I'm wondering if the unit test which calls jsonUnmarshalUsingNumber, then marshals back to []byte using the right json.Marshaller can detect this issue. WDYT?

EDIT:

https://play.golang.com/p/8xH6zmearh4 (it's similar/same? to what you have covered with your tests).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The question that has to be asked is what does that test? Depending where it's put that could adequately test for all regressions, but as the code stands you will end up mocking essentially all of (*runner).run in order to achieve that.

There are many moving parts here, breaking some of them would be detected, but breaking others will not. The fact that breaking some of them will be is a good reason to keep the package here. I'm at the end of an extremely long day, so I don't have the cognitive capacity to sort out which are visible and which are not right now.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, I see, you make a good point with the comparison of floats.

([ -z "$(git status --short)" ] || exit 1)
else
# defer-cleanup is set to a short period to verify that the option is available
elastic-package test -v --report-format xUnit --report-output file --defer-cleanup 1s --test-coverage
fi
)
cd -
done
3 changes: 3 additions & 0 deletions test/packages/other/long_integers/_dev/build/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
dependencies:
ecs:
reference: [email protected]
5 changes: 5 additions & 0 deletions test/packages/other/long_integers/_dev/build/docs/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Long Integer Tests

{{event "test"}}

{{fields "test"}}
6 changes: 6 additions & 0 deletions test/packages/other/long_integers/changelog.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
# newer versions go on top
- version: "0.0.1"
changes:
- description: Initial draft of the package
type: enhancement
link: https://github.com/elastic/integrations/pull/0 # FIXME Replace with the real PR link
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
4503599627370497,9007199254740991,9007199254740993,18014398509481985,9223372036854773807
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
fields:
"@warning": "The values in sequence_number must match the values in message."
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"expected": [
{
"@warning": "The values in sequence_number must match the values in message.",
"message": "4503599627370497,9007199254740991,9007199254740993,18014398509481985,9223372036854773807",
"sequence_number": [
4503599627370497,
9007199254740991,
9007199254740993,
18014398509481985,
9223372036854773807
]
}
]
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
paths:
{{#each paths as |path i|}}
- {{path}}
{{/each}}
exclude_files: [".gz$"]
processors:
- add_locale: ~
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
---
description: Pipeline for processing sample logs
processors:
- split:
field: message
separator: ","
target_field: sequence_number
ignore_missing: true
- convert:
field: sequence_number
type: long

on_failure:
- set:
field: error.message
value: '{{ _ingest.on_failure_message }}'
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
- name: data_stream.type
type: constant_keyword
description: Data stream type.
- name: data_stream.dataset
type: constant_keyword
description: Data stream dataset.
- name: data_stream.namespace
type: constant_keyword
description: Data stream namespace.
- name: '@timestamp'
type: date
description: Event timestamp.
- name: '@warning'
type: keyword
description: Warning for devs.
- name: message
type: keyword
description: Original input.
- name: sequence_number
type: long
description: |
Log entry identifier that is incremented sequentially. Unique for each log type.
13 changes: 13 additions & 0 deletions test/packages/other/long_integers/data_stream/test/manifest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
title: "Test"
type: logs
streams:
- input: logfile
title: Sample logs
description: Collect sample logs
vars:
- name: paths
type: text
title: Paths
multi: true
default:
- /var/log/*.log
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"@warning": "The values in sequence_number must match the values in message.",
"message": "4503599627370497,9007199254740991,9007199254740993,18014398509481985,9223372036854773807",
"sequence_number": [
4503599627370497,
9007199254740991,
9007199254740993,
18014398509481985,
9223372036854773807
]
}
Loading