From 78cf37ca054bc5c964ad22feb9a983838a1914d9 Mon Sep 17 00:00:00 2001
From: Dan Jaglowski <jaglows3@gmail.com>
Date: Mon, 21 Aug 2023 16:03:31 -0400
Subject: [PATCH 1/2] [pkg/stanza] Move encoding concerns to dedicated package

---
 pkg/stanza/operator/helper/encoding.go | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pkg/stanza/operator/helper/encoding.go b/pkg/stanza/operator/helper/encoding.go
index 2ebd4c7e5f11..9a625fd623f2 100644
--- a/pkg/stanza/operator/helper/encoding.go
+++ b/pkg/stanza/operator/helper/encoding.go
@@ -20,6 +20,7 @@ type EncodingConfig struct {
 // Deprecated: [v0.84.0] Use decoder.Decoder instead
 type Decoder = decoder.Decoder
 
+// Deprecated: [v0.84.0] Use decoder.New instead
 var NewDecoder = decoder.New
 
 // Deprecated: [v0.84.0] Use decoder.LookupEncoding instead

From 6e827f1b4a00fdbe41b590ccb745c0aecb5f4808 Mon Sep 17 00:00:00 2001
From: Dan Jaglowski <jaglows3@gmail.com>
Date: Mon, 21 Aug 2023 14:47:09 -0400
Subject: [PATCH 2/2] [chore] Move tokenization test into subpackage of
 tokenize

---
 .../operator/input/syslog/syslog_test.go      |  54 +--
 pkg/stanza/operator/internal/test_common.go   | 119 -----
 pkg/stanza/tokenize/multiline_test.go         | 414 ++++++++----------
 .../tokenize.go}                              |  46 +-
 4 files changed, 242 insertions(+), 391 deletions(-)
 delete mode 100644 pkg/stanza/operator/internal/test_common.go
 rename pkg/stanza/tokenize/{util_test.go => tokenizetest/tokenize.go} (60%)

diff --git a/pkg/stanza/operator/input/syslog/syslog_test.go b/pkg/stanza/operator/input/syslog/syslog_test.go
index 4decaf3f5e12..a19e763cc01c 100644
--- a/pkg/stanza/operator/input/syslog/syslog_test.go
+++ b/pkg/stanza/operator/input/syslog/syslog_test.go
@@ -15,10 +15,10 @@ import (
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/input/tcp"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/input/udp"
-	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/internal"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/parser/syslog"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/pipeline"
 	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/testutil"
+	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize/tokenizetest"
 )
 
 var (
@@ -182,68 +182,68 @@ func NewConfigWithUDP(syslogCfg *syslog.BaseConfig) *Config {
 }
 
 func TestOctetFramingSplitFunc(t *testing.T) {
-	testCases := []internal.TokenizerTestCase{
+	testCases := []tokenizetest.TestCase{
 		{
-			Name: "OneLogSimple",
-			Raw:  []byte(`17 my log LOGEND 123`),
-			ExpectedTokenized: []string{
+			Name:  "OneLogSimple",
+			Input: []byte(`17 my log LOGEND 123`),
+			ExpectedTokens: []string{
 				`17 my log LOGEND 123`,
 			},
 		},
 		{
-			Name: "TwoLogsSimple",
-			Raw:  []byte(`17 my log LOGEND 12317 my log LOGEND 123`),
-			ExpectedTokenized: []string{
+			Name:  "TwoLogsSimple",
+			Input: []byte(`17 my log LOGEND 12317 my log LOGEND 123`),
+			ExpectedTokens: []string{
 				`17 my log LOGEND 123`,
 				`17 my log LOGEND 123`,
 			},
 		},
 		{
-			Name: "NoMatches",
-			Raw:  []byte(`no matches in it`),
-			ExpectedTokenized: []string{
+			Name:  "NoMatches",
+			Input: []byte(`no matches in it`),
+			ExpectedTokens: []string{
 				`no matches in it`,
 			},
 		},
 		{
-			Name: "NonMatchesAfter",
-			Raw:  []byte(`17 my log LOGEND 123my log LOGEND 12317 my log LOGEND 123`),
-			ExpectedTokenized: []string{
+			Name:  "NonMatchesAfter",
+			Input: []byte(`17 my log LOGEND 123my log LOGEND 12317 my log LOGEND 123`),
+			ExpectedTokens: []string{
 				`17 my log LOGEND 123`,
 				`my log LOGEND 12317 my log LOGEND 123`,
 			},
 		},
 		{
 			Name: "HugeLog100",
-			Raw: func() []byte {
-				newRaw := internal.GeneratedByteSliceOfLength(100)
+			Input: func() []byte {
+				newRaw := tokenizetest.GenerateBytes(100)
 				newRaw = append([]byte(`100 `), newRaw...)
 				return newRaw
 			}(),
-			ExpectedTokenized: []string{
-				`100 ` + string(internal.GeneratedByteSliceOfLength(100)),
+			ExpectedTokens: []string{
+				`100 ` + string(tokenizetest.GenerateBytes(100)),
 			},
 		},
 		{
 			Name: "maxCapacity",
-			Raw: func() []byte {
-				newRaw := internal.GeneratedByteSliceOfLength(4091)
+			Input: func() []byte {
+				newRaw := tokenizetest.GenerateBytes(4091)
 				newRaw = append([]byte(`4091 `), newRaw...)
 				return newRaw
 			}(),
-			ExpectedTokenized: []string{
-				`4091 ` + string(internal.GeneratedByteSliceOfLength(4091)),
+			ExpectedTokens: []string{
+				`4091 ` + string(tokenizetest.GenerateBytes(4091)),
 			},
 		},
 		{
 			Name: "over capacity",
-			Raw: func() []byte {
-				newRaw := internal.GeneratedByteSliceOfLength(4092)
+			Input: func() []byte {
+				newRaw := tokenizetest.GenerateBytes(4092)
 				newRaw = append([]byte(`5000 `), newRaw...)
 				return newRaw
 			}(),
-			ExpectedTokenized: []string{
-				`5000 ` + string(internal.GeneratedByteSliceOfLength(4091)),
+			ExpectedTokens: []string{
+				`5000 ` + string(tokenizetest.GenerateBytes(4091)),
 				`j`,
 			},
 		},
@@ -251,7 +251,7 @@ func TestOctetFramingSplitFunc(t *testing.T) {
 	for _, tc := range testCases {
 		splitFunc, err := OctetMultiLineBuilder(nil)
 		require.NoError(t, err)
-		t.Run(tc.Name, tc.RunFunc(splitFunc))
+		t.Run(tc.Name, tc.Run(splitFunc))
 	}
 }
 
diff --git a/pkg/stanza/operator/internal/test_common.go b/pkg/stanza/operator/internal/test_common.go
deleted file mode 100644
index 39210f43aef7..000000000000
--- a/pkg/stanza/operator/internal/test_common.go
+++ /dev/null
@@ -1,119 +0,0 @@
-// Copyright The OpenTelemetry Authors
-// SPDX-License-Identifier: Apache-2.0
-
-package internal // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/operator/internal"
-
-import (
-	"bufio"
-	"io"
-	"testing"
-	"time"
-
-	"github.com/stretchr/testify/assert"
-)
-
-// state is going to keep processing state of the TestReader
-type state struct {
-	ReadFrom  int
-	Processed int
-}
-
-// TestReader is a TestReader which keeps state of readed and processed data
-type TestReader struct {
-	State *state
-	Data  []byte
-}
-
-// NewTestReader creates TestReader with empty state
-func NewTestReader(data []byte) TestReader {
-	return TestReader{
-		State: &state{
-			ReadFrom:  0,
-			Processed: 0,
-		},
-		Data: data,
-	}
-}
-
-// Read reads data from TestReader and remebers where reading has been finished
-func (r TestReader) Read(p []byte) (n int, err error) {
-	// return eof if data has been fully readed
-	if len(r.Data)-r.State.ReadFrom == 0 {
-		return 0, io.EOF
-	}
-
-	// iterate over data char by char and write into p
-	// until p is full or no more data left to read
-	i := 0
-	for ; i < len(r.Data)-r.State.ReadFrom; i++ {
-		if i == len(p) {
-			break
-		}
-		p[i] = r.Data[r.State.ReadFrom+i]
-	}
-
-	// update state
-	r.State.ReadFrom += i
-	return i, nil
-}
-
-// Reset resets TestReader state (sets last readed position to last processed position)
-func (r *TestReader) Reset() {
-	r.State.ReadFrom = r.State.Processed
-}
-
-func (r *TestReader) SplitFunc(splitFunc bufio.SplitFunc) bufio.SplitFunc {
-	return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
-		advance, token, err = splitFunc(data, atEOF)
-		r.State.Processed += advance
-		return
-	}
-}
-
-type TokenizerTestCase struct {
-	Name                        string
-	Pattern                     string
-	Raw                         []byte
-	ExpectedTokenized           []string
-	ExpectedError               error
-	Sleep                       time.Duration
-	AdditionalIterations        int
-	PreserveLeadingWhitespaces  bool
-	PreserveTrailingWhitespaces bool
-}
-
-func (tc TokenizerTestCase) RunFunc(splitFunc bufio.SplitFunc) func(t *testing.T) {
-	reader := NewTestReader(tc.Raw)
-
-	return func(t *testing.T) {
-		var tokenized []string
-		for i := 0; i < 1+tc.AdditionalIterations; i++ {
-			// sleep before next iterations
-			if i > 0 {
-				time.Sleep(tc.Sleep)
-			}
-			reader.Reset()
-			scanner := bufio.NewScanner(reader)
-			scanner.Split(reader.SplitFunc(splitFunc))
-			for {
-				ok := scanner.Scan()
-				if !ok {
-					assert.Equal(t, tc.ExpectedError, scanner.Err())
-					break
-				}
-				tokenized = append(tokenized, scanner.Text())
-			}
-		}
-
-		assert.Equal(t, tc.ExpectedTokenized, tokenized)
-	}
-}
-
-func GeneratedByteSliceOfLength(length int) []byte {
-	chars := []byte(`abcdefghijklmnopqrstuvwxyz`)
-	newSlice := make([]byte, length)
-	for i := 0; i < length; i++ {
-		newSlice[i] = chars[i%len(chars)]
-	}
-	return newSlice
-}
diff --git a/pkg/stanza/tokenize/multiline_test.go b/pkg/stanza/tokenize/multiline_test.go
index af0dffc69e35..23482f25ba9b 100644
--- a/pkg/stanza/tokenize/multiline_test.go
+++ b/pkg/stanza/tokenize/multiline_test.go
@@ -12,10 +12,11 @@ import (
 	"testing"
 	"time"
 
-	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 	"golang.org/x/text/encoding"
 	"golang.org/x/text/encoding/unicode"
+
+	"github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize/tokenizetest"
 )
 
 const (
@@ -25,29 +26,29 @@ const (
 )
 
 type MultiLineTokenizerTestCase struct {
-	TokenizerTestCase
+	tokenizetest.TestCase
 	Flusher *Flusher
 }
 
 func TestLineStartSplitFunc(t *testing.T) {
 	testCases := []MultiLineTokenizerTestCase{
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "OneLogSimple",
 				Pattern: `LOGSTART \d+ `,
-				Raw:     []byte("LOGSTART 123 log1LOGSTART 123 a"),
-				ExpectedTokenized: []string{
+				Input:   []byte("LOGSTART 123 log1LOGSTART 123 a"),
+				ExpectedTokens: []string{
 					`LOGSTART 123 log1`,
 				},
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "TwoLogsSimple",
 				Pattern: `LOGSTART \d+ `,
-				Raw:     []byte(`LOGSTART 123 log1 LOGSTART 234 log2 LOGSTART 345 foo`),
-				ExpectedTokenized: []string{
+				Input:   []byte(`LOGSTART 123 log1 LOGSTART 234 log2 LOGSTART 345 foo`),
+				ExpectedTokens: []string{
 					`LOGSTART 123 log1`,
 					`LOGSTART 234 log2`,
 				},
@@ -55,11 +56,11 @@ func TestLineStartSplitFunc(t *testing.T) {
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "TwoLogsLineStart",
 				Pattern: `^LOGSTART \d+ `,
-				Raw:     []byte("LOGSTART 123 LOGSTART 345 log1\nLOGSTART 234 log2\nLOGSTART 345 foo"),
-				ExpectedTokenized: []string{
+				Input:   []byte("LOGSTART 123 LOGSTART 345 log1\nLOGSTART 234 log2\nLOGSTART 345 foo"),
+				ExpectedTokens: []string{
 					"LOGSTART 123 LOGSTART 345 log1",
 					"LOGSTART 234 log2",
 				},
@@ -67,19 +68,19 @@ func TestLineStartSplitFunc(t *testing.T) {
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "NoMatches",
 				Pattern: `LOGSTART \d+ `,
-				Raw:     []byte(`file that has no matches in it`),
+				Input:   []byte(`file that has no matches in it`),
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "PrecedingNonMatches",
 				Pattern: `LOGSTART \d+ `,
-				Raw:     []byte(`part that doesn't match LOGSTART 123 part that matchesLOGSTART 123 foo`),
-				ExpectedTokenized: []string{
+				Input:   []byte(`part that doesn't match LOGSTART 123 part that matchesLOGSTART 123 foo`),
+				ExpectedTokens: []string{
 					`part that doesn't match`,
 					`LOGSTART 123 part that matches`,
 				},
@@ -87,57 +88,57 @@ func TestLineStartSplitFunc(t *testing.T) {
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "HugeLog100",
 				Pattern: `LOGSTART \d+ `,
-				Raw: func() []byte {
-					newRaw := []byte(`LOGSTART 123 `)
-					newRaw = append(newRaw, GeneratedByteSliceOfLength(100)...)
-					newRaw = append(newRaw, []byte(`LOGSTART 234 endlog`)...)
-					return newRaw
+				Input: func() []byte {
+					newInput := []byte(`LOGSTART 123 `)
+					newInput = append(newInput, tokenizetest.GenerateBytes(100)...)
+					newInput = append(newInput, []byte(`LOGSTART 234 endlog`)...)
+					return newInput
 				}(),
-				ExpectedTokenized: []string{
-					`LOGSTART 123 ` + string(GeneratedByteSliceOfLength(100)),
+				ExpectedTokens: []string{
+					`LOGSTART 123 ` + string(tokenizetest.GenerateBytes(100)),
 				},
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "HugeLog10000",
 				Pattern: `LOGSTART \d+ `,
-				Raw: func() []byte {
-					newRaw := []byte(`LOGSTART 123 `)
-					newRaw = append(newRaw, GeneratedByteSliceOfLength(10000)...)
-					newRaw = append(newRaw, []byte(`LOGSTART 234 endlog`)...)
-					return newRaw
+				Input: func() []byte {
+					newInput := []byte(`LOGSTART 123 `)
+					newInput = append(newInput, tokenizetest.GenerateBytes(10000)...)
+					newInput = append(newInput, []byte(`LOGSTART 234 endlog`)...)
+					return newInput
 				}(),
-				ExpectedTokenized: []string{
-					`LOGSTART 123 ` + string(GeneratedByteSliceOfLength(10000)),
+				ExpectedTokens: []string{
+					`LOGSTART 123 ` + string(tokenizetest.GenerateBytes(10000)),
 				},
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "ErrTooLong",
 				Pattern: `LOGSTART \d+ `,
-				Raw: func() []byte {
-					newRaw := []byte(`LOGSTART 123 `)
-					newRaw = append(newRaw, GeneratedByteSliceOfLength(1000000)...)
-					newRaw = append(newRaw, []byte(`LOGSTART 234 endlog`)...)
-					return newRaw
+				Input: func() []byte {
+					newInput := []byte(`LOGSTART 123 `)
+					newInput = append(newInput, tokenizetest.GenerateBytes(1000000)...)
+					newInput = append(newInput, []byte(`LOGSTART 234 endlog`)...)
+					return newInput
 				}(),
 				ExpectedError: errors.New("bufio.Scanner: token too long"),
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "MultipleMultilineLogs",
 				Pattern: `^LOGSTART \d+`,
-				Raw:     []byte("LOGSTART 12 log1\t  \nLOGPART log1\nLOGPART log1\t   \nLOGSTART 17 log2\nLOGPART log2\nanother line\nLOGSTART 43 log5"),
-				ExpectedTokenized: []string{
+				Input:   []byte("LOGSTART 12 log1\t  \nLOGPART log1\nLOGPART log1\t   \nLOGSTART 17 log2\nLOGPART log2\nanother line\nLOGSTART 43 log5"),
+				ExpectedTokens: []string{
 					"LOGSTART 12 log1\t  \nLOGPART log1\nLOGPART log1",
 					"LOGSTART 17 log2\nLOGPART log2\nanother line",
 				},
@@ -145,19 +146,19 @@ func TestLineStartSplitFunc(t *testing.T) {
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "LogsWithoutFlusher",
 				Pattern: `^LOGSTART \d+`,
-				Raw:     []byte("LOGPART log1\nLOGPART log1\t   \n"),
+				Input:   []byte("LOGPART log1\nLOGPART log1\t   \n"),
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "LogsWithFlusher",
 				Pattern: `^LOGSTART \d+`,
-				Raw:     []byte("LOGPART log1\nLOGPART log1\t   \n"),
-				ExpectedTokenized: []string{
+				Input:   []byte("LOGPART log1\nLOGPART log1\t   \n"),
+				ExpectedTokens: []string{
 					"LOGPART log1\nLOGPART log1",
 				},
 
@@ -169,11 +170,11 @@ func TestLineStartSplitFunc(t *testing.T) {
 			},
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "LogsWithFlusherWithMultipleLogsInBuffer",
 				Pattern: `^LOGSTART \d+`,
-				Raw:     []byte("LOGPART log1\nLOGSTART 123\nLOGPART log1\t   \n"),
-				ExpectedTokenized: []string{
+				Input:   []byte("LOGPART log1\nLOGSTART 123\nLOGPART log1\t   \n"),
+				ExpectedTokens: []string{
 					"LOGPART log1",
 					"LOGSTART 123\nLOGPART log1",
 				},
@@ -185,11 +186,11 @@ func TestLineStartSplitFunc(t *testing.T) {
 			},
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "LogsWithLongFlusherWithMultipleLogsInBuffer",
 				Pattern: `^LOGSTART \d+`,
-				Raw:     []byte("LOGPART log1\nLOGSTART 123\nLOGPART log1\t   \n"),
-				ExpectedTokenized: []string{
+				Input:   []byte("LOGPART log1\nLOGSTART 123\nLOGPART log1\t   \n"),
+				ExpectedTokens: []string{
 					"LOGPART log1",
 				},
 				AdditionalIterations: 1,
@@ -200,11 +201,11 @@ func TestLineStartSplitFunc(t *testing.T) {
 			},
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "LogsWithFlusherWithLogStartingWithWhiteChars",
 				Pattern: `^LOGSTART \d+`,
-				Raw:     []byte("\nLOGSTART 333"),
-				ExpectedTokenized: []string{
+				Input:   []byte("\nLOGSTART 333"),
+				ExpectedTokens: []string{
 					"",
 					"LOGSTART 333",
 				},
@@ -224,7 +225,7 @@ func TestLineStartSplitFunc(t *testing.T) {
 
 		splitFunc, err := cfg.getSplitFunc(unicode.UTF8, false, tc.Flusher, 0, tc.PreserveLeadingWhitespaces, tc.PreserveTrailingWhitespaces)
 		require.NoError(t, err)
-		t.Run(tc.Name, tc.RunFunc(splitFunc))
+		t.Run(tc.Name, tc.Run(splitFunc))
 	}
 
 	t.Run("FirstMatchHitsEndOfBuffer", func(t *testing.T) {
@@ -250,22 +251,22 @@ func TestLineStartSplitFunc(t *testing.T) {
 func TestLineEndSplitFunc(t *testing.T) {
 	testCases := []MultiLineTokenizerTestCase{
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "OneLogSimple",
 				Pattern: `LOGEND \d+`,
-				Raw:     []byte(`my log LOGEND 123`),
-				ExpectedTokenized: []string{
+				Input:   []byte(`my log LOGEND 123`),
+				ExpectedTokens: []string{
 					`my log LOGEND 123`,
 				},
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "TwoLogsSimple",
 				Pattern: `LOGEND \d+`,
-				Raw:     []byte(`log1 LOGEND 123log2 LOGEND 234`),
-				ExpectedTokenized: []string{
+				Input:   []byte(`log1 LOGEND 123log2 LOGEND 234`),
+				ExpectedTokens: []string{
 					`log1 LOGEND 123`,
 					`log2 LOGEND 234`,
 				},
@@ -273,11 +274,11 @@ func TestLineEndSplitFunc(t *testing.T) {
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "TwoLogsLineEndSimple",
 				Pattern: `LOGEND$`,
-				Raw:     []byte("log1 LOGEND LOGEND\nlog2 LOGEND\n"),
-				ExpectedTokenized: []string{
+				Input:   []byte("log1 LOGEND LOGEND\nlog2 LOGEND\n"),
+				ExpectedTokens: []string{
 					"log1 LOGEND LOGEND",
 					"log2 LOGEND",
 				},
@@ -285,73 +286,73 @@ func TestLineEndSplitFunc(t *testing.T) {
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "NoMatches",
 				Pattern: `LOGEND \d+`,
-				Raw:     []byte(`file that has no matches in it`),
+				Input:   []byte(`file that has no matches in it`),
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "NonMatchesAfter",
 				Pattern: `LOGEND \d+`,
-				Raw:     []byte(`part that matches LOGEND 123 part that doesn't match`),
-				ExpectedTokenized: []string{
+				Input:   []byte(`part that matches LOGEND 123 part that doesn't match`),
+				ExpectedTokens: []string{
 					`part that matches LOGEND 123`,
 				},
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "HugeLog100",
 				Pattern: `LOGEND \d`,
-				Raw: func() []byte {
-					newRaw := GeneratedByteSliceOfLength(100)
-					newRaw = append(newRaw, []byte(`LOGEND 1 `)...)
-					return newRaw
+				Input: func() []byte {
+					newInput := tokenizetest.GenerateBytes(100)
+					newInput = append(newInput, []byte(`LOGEND 1 `)...)
+					return newInput
 				}(),
-				ExpectedTokenized: []string{
-					string(GeneratedByteSliceOfLength(100)) + `LOGEND 1`,
+				ExpectedTokens: []string{
+					string(tokenizetest.GenerateBytes(100)) + `LOGEND 1`,
 				},
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "HugeLog10000",
 				Pattern: `LOGEND \d`,
-				Raw: func() []byte {
-					newRaw := GeneratedByteSliceOfLength(10000)
-					newRaw = append(newRaw, []byte(`LOGEND 1 `)...)
-					return newRaw
+				Input: func() []byte {
+					newInput := tokenizetest.GenerateBytes(10000)
+					newInput = append(newInput, []byte(`LOGEND 1 `)...)
+					return newInput
 				}(),
-				ExpectedTokenized: []string{
-					string(GeneratedByteSliceOfLength(10000)) + `LOGEND 1`,
+				ExpectedTokens: []string{
+					string(tokenizetest.GenerateBytes(10000)) + `LOGEND 1`,
 				},
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "HugeLog1000000",
 				Pattern: `LOGEND \d`,
-				Raw: func() []byte {
-					newRaw := GeneratedByteSliceOfLength(1000000)
-					newRaw = append(newRaw, []byte(`LOGEND 1 `)...)
-					return newRaw
+				Input: func() []byte {
+					newInput := tokenizetest.GenerateBytes(1000000)
+					newInput = append(newInput, []byte(`LOGEND 1 `)...)
+					return newInput
 				}(),
 				ExpectedError: errors.New("bufio.Scanner: token too long"),
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "MultipleMultilineLogs",
 				Pattern: `^LOGEND.*$`,
-				Raw:     []byte("LOGSTART 12 log1\t  \nLOGPART log1\nLOGEND log1\t   \nLOGSTART 17 log2\nLOGPART log2\nLOGEND log2\nLOGSTART 43 log5"),
-				ExpectedTokenized: []string{
+				Input:   []byte("LOGSTART 12 log1\t  \nLOGPART log1\nLOGEND log1\t   \nLOGSTART 17 log2\nLOGPART log2\nLOGEND log2\nLOGSTART 43 log5"),
+				ExpectedTokens: []string{
 					"LOGSTART 12 log1\t  \nLOGPART log1\nLOGEND log1",
 					"LOGSTART 17 log2\nLOGPART log2\nLOGEND log2",
 				},
@@ -359,19 +360,19 @@ func TestLineEndSplitFunc(t *testing.T) {
 			nil,
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "LogsWithoutFlusher",
 				Pattern: `^LOGEND.*$`,
-				Raw:     []byte("LOGPART log1\nLOGPART log1\t   \n"),
+				Input:   []byte("LOGPART log1\nLOGPART log1\t   \n"),
 			},
 			&Flusher{},
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "LogsWithFlusher",
 				Pattern: `^LOGEND.*$`,
-				Raw:     []byte("LOGPART log1\nLOGPART log1\t   \n"),
-				ExpectedTokenized: []string{
+				Input:   []byte("LOGPART log1\nLOGPART log1\t   \n"),
+				ExpectedTokens: []string{
 					"LOGPART log1\nLOGPART log1",
 				},
 
@@ -383,11 +384,11 @@ func TestLineEndSplitFunc(t *testing.T) {
 			},
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "LogsWithFlusherWithMultipleLogsInBuffer",
 				Pattern: `^LOGEND.*$`,
-				Raw:     []byte("LOGPART log1\nLOGEND\nLOGPART log1\t   \n"),
-				ExpectedTokenized: []string{
+				Input:   []byte("LOGPART log1\nLOGEND\nLOGPART log1\t   \n"),
+				ExpectedTokens: []string{
 					"LOGPART log1\nLOGEND",
 					"LOGPART log1",
 				},
@@ -400,11 +401,11 @@ func TestLineEndSplitFunc(t *testing.T) {
 			},
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "LogsWithLongFlusherWithMultipleLogsInBuffer",
 				Pattern: `^LOGEND.*$`,
-				Raw:     []byte("LOGPART log1\nLOGEND\nLOGPART log1\t   \n"),
-				ExpectedTokenized: []string{
+				Input:   []byte("LOGPART log1\nLOGEND\nLOGPART log1\t   \n"),
+				ExpectedTokens: []string{
 					"LOGPART log1\nLOGEND",
 				},
 
@@ -416,11 +417,11 @@ func TestLineEndSplitFunc(t *testing.T) {
 			},
 		},
 		{
-			TokenizerTestCase{
+			tokenizetest.TestCase{
 				Name:    "LogsWithFlusherWithLogStartingWithWhiteChars",
 				Pattern: `LOGEND \d+$`,
-				Raw:     []byte("\nLOGEND 333"),
-				ExpectedTokenized: []string{
+				Input:   []byte("\nLOGEND 333"),
+				ExpectedTokens: []string{
 					"LOGEND 333",
 				},
 
@@ -440,33 +441,33 @@ func TestLineEndSplitFunc(t *testing.T) {
 
 		splitFunc, err := cfg.getSplitFunc(unicode.UTF8, false, tc.Flusher, 0, tc.PreserveLeadingWhitespaces, tc.PreserveTrailingWhitespaces)
 		require.NoError(t, err)
-		t.Run(tc.Name, tc.RunFunc(splitFunc))
+		t.Run(tc.Name, tc.Run(splitFunc))
 	}
 }
 
 func TestNewlineSplitFunc(t *testing.T) {
 	testCases := []MultiLineTokenizerTestCase{
 		{
-			TokenizerTestCase{Name: "OneLogSimple",
-				Raw: []byte("my log\n"),
-				ExpectedTokenized: []string{
+			tokenizetest.TestCase{Name: "OneLogSimple",
+				Input: []byte("my log\n"),
+				ExpectedTokens: []string{
 					`my log`,
 				},
 			}, nil,
 		},
 		{
-			TokenizerTestCase{Name: "OneLogCarriageReturn",
-				Raw: []byte("my log\r\n"),
-				ExpectedTokenized: []string{
+			tokenizetest.TestCase{Name: "OneLogCarriageReturn",
+				Input: []byte("my log\r\n"),
+				ExpectedTokens: []string{
 					`my log`,
 				},
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{Name: "TwoLogsSimple",
-				Raw: []byte("log1\nlog2\n"),
-				ExpectedTokenized: []string{
+			tokenizetest.TestCase{Name: "TwoLogsSimple",
+				Input: []byte("log1\nlog2\n"),
+				ExpectedTokens: []string{
 					`log1`,
 					`log2`,
 				},
@@ -474,9 +475,9 @@ func TestNewlineSplitFunc(t *testing.T) {
 			nil,
 		},
 		{
-			TokenizerTestCase{Name: "TwoLogsCarriageReturn",
-				Raw: []byte("log1\r\nlog2\r\n"),
-				ExpectedTokenized: []string{
+			tokenizetest.TestCase{Name: "TwoLogsCarriageReturn",
+				Input: []byte("log1\r\nlog2\r\n"),
+				ExpectedTokens: []string{
 					`log1`,
 					`log2`,
 				},
@@ -484,59 +485,59 @@ func TestNewlineSplitFunc(t *testing.T) {
 			nil,
 		},
 		{
-			TokenizerTestCase{Name: "NoTailingNewline",
-				Raw: []byte(`foo`),
+			tokenizetest.TestCase{Name: "NoTailingNewline",
+				Input: []byte(`foo`),
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{Name: "HugeLog100",
-				Raw: func() []byte {
-					newRaw := GeneratedByteSliceOfLength(100)
-					newRaw = append(newRaw, '\n')
-					return newRaw
+			tokenizetest.TestCase{Name: "HugeLog100",
+				Input: func() []byte {
+					newInput := tokenizetest.GenerateBytes(100)
+					newInput = append(newInput, '\n')
+					return newInput
 				}(),
-				ExpectedTokenized: []string{
-					string(GeneratedByteSliceOfLength(100)),
+				ExpectedTokens: []string{
+					string(tokenizetest.GenerateBytes(100)),
 				},
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{Name: "HugeLog10000",
-				Raw: func() []byte {
-					newRaw := GeneratedByteSliceOfLength(10000)
-					newRaw = append(newRaw, '\n')
-					return newRaw
+			tokenizetest.TestCase{Name: "HugeLog10000",
+				Input: func() []byte {
+					newInput := tokenizetest.GenerateBytes(10000)
+					newInput = append(newInput, '\n')
+					return newInput
 				}(),
-				ExpectedTokenized: []string{
-					string(GeneratedByteSliceOfLength(10000)),
+				ExpectedTokens: []string{
+					string(tokenizetest.GenerateBytes(10000)),
 				},
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{Name: "HugeLog1000000",
-				Raw: func() []byte {
-					newRaw := GeneratedByteSliceOfLength(1000000)
-					newRaw = append(newRaw, '\n')
-					return newRaw
+			tokenizetest.TestCase{Name: "HugeLog1000000",
+				Input: func() []byte {
+					newInput := tokenizetest.GenerateBytes(1000000)
+					newInput = append(newInput, '\n')
+					return newInput
 				}(),
 				ExpectedError: errors.New("bufio.Scanner: token too long"),
 			},
 			nil,
 		},
 		{
-			TokenizerTestCase{Name: "LogsWithoutFlusher",
-				Raw: []byte("LOGPART log1"),
+			tokenizetest.TestCase{Name: "LogsWithoutFlusher",
+				Input: []byte("LOGPART log1"),
 			},
 
 			&Flusher{},
 		},
 		{
-			TokenizerTestCase{Name: "LogsWithFlusher",
-				Raw: []byte("LOGPART log1"),
-				ExpectedTokenized: []string{
+			tokenizetest.TestCase{Name: "LogsWithFlusher",
+				Input: []byte("LOGPART log1"),
+				ExpectedTokens: []string{
 					"LOGPART log1",
 				},
 				AdditionalIterations: 1,
@@ -547,9 +548,9 @@ func TestNewlineSplitFunc(t *testing.T) {
 			},
 		},
 		{
-			TokenizerTestCase{Name: "DefaultFlusherSplits",
-				Raw: []byte("log1\nlog2\n"),
-				ExpectedTokenized: []string{
+			tokenizetest.TestCase{Name: "DefaultFlusherSplits",
+				Input: []byte("log1\nlog2\n"),
+				ExpectedTokens: []string{
 					"log1",
 					"log2",
 				},
@@ -557,9 +558,9 @@ func TestNewlineSplitFunc(t *testing.T) {
 			nil,
 		},
 		{
-			TokenizerTestCase{Name: "LogsWithLogStartingWithWhiteChars",
-				Raw: []byte("\nLOGEND 333\nAnother one"),
-				ExpectedTokenized: []string{
+			tokenizetest.TestCase{Name: "LogsWithLogStartingWithWhiteChars",
+				Input: []byte("\nLOGEND 333\nAnother one"),
+				ExpectedTokens: []string{
 					"",
 					"LOGEND 333",
 				},
@@ -567,9 +568,9 @@ func TestNewlineSplitFunc(t *testing.T) {
 			nil,
 		},
 		{
-			TokenizerTestCase{Name: "PreserveLeadingWhitespaces",
-				Raw: []byte("\n LOGEND 333 \nAnother one "),
-				ExpectedTokenized: []string{
+			tokenizetest.TestCase{Name: "PreserveLeadingWhitespaces",
+				Input: []byte("\n LOGEND 333 \nAnother one "),
+				ExpectedTokens: []string{
 					"",
 					" LOGEND 333",
 				},
@@ -578,9 +579,9 @@ func TestNewlineSplitFunc(t *testing.T) {
 			nil,
 		},
 		{
-			TokenizerTestCase{Name: "PreserveTrailingWhitespaces",
-				Raw: []byte("\n LOGEND 333 \nAnother one "),
-				ExpectedTokenized: []string{
+			tokenizetest.TestCase{Name: "PreserveTrailingWhitespaces",
+				Input: []byte("\n LOGEND 333 \nAnother one "),
+				ExpectedTokens: []string{
 					"",
 					"LOGEND 333 ",
 				},
@@ -589,9 +590,9 @@ func TestNewlineSplitFunc(t *testing.T) {
 			nil,
 		},
 		{
-			TokenizerTestCase{Name: "PreserveBothLeadingAndTrailingWhitespaces",
-				Raw: []byte("\n LOGEND 333 \nAnother one "),
-				ExpectedTokenized: []string{
+			tokenizetest.TestCase{Name: "PreserveBothLeadingAndTrailingWhitespaces",
+				Input: []byte("\n LOGEND 333 \nAnother one "),
+				ExpectedTokens: []string{
 					"",
 					" LOGEND 333 ",
 				},
@@ -608,99 +609,68 @@ func TestNewlineSplitFunc(t *testing.T) {
 		if tc.Flusher != nil {
 			splitFunc = tc.Flusher.SplitFunc(splitFunc)
 		}
-		t.Run(tc.Name, tc.RunFunc(splitFunc))
-	}
-}
-
-type noSplitTestCase struct {
-	Name              string
-	Raw               []byte
-	ExpectedTokenized [][]byte
-}
-
-func (tc noSplitTestCase) RunFunc(splitFunc bufio.SplitFunc) func(t *testing.T) {
-	return func(t *testing.T) {
-		scanner := bufio.NewScanner(bytes.NewReader(tc.Raw))
-		scanner.Split(splitFunc)
-		var tokenized [][]byte
-		for {
-			ok := scanner.Scan()
-			if !ok {
-				break
-			}
-			tokenized = append(tokenized, scanner.Bytes())
-		}
-
-		assert.Equal(t, tc.ExpectedTokenized, tokenized)
+		t.Run(tc.Name, tc.Run(splitFunc))
 	}
 }
 
 func TestNoSplitFunc(t *testing.T) {
 	const largeLogSize = 100
-	testCases := []noSplitTestCase{
+	testCases := []tokenizetest.TestCase{
 		{
-			Name: "OneLogSimple",
-			Raw:  []byte("my log\n"),
-			ExpectedTokenized: [][]byte{
-				[]byte("my log\n"),
-			},
+			Name:           "OneLogSimple",
+			Input:          []byte("my log\n"),
+			ExpectedTokens: []string{"my log\n"},
 		},
 		{
-			Name: "TwoLogsSimple",
-			Raw:  []byte("log1\nlog2\n"),
-			ExpectedTokenized: [][]byte{
-				[]byte("log1\nlog2\n"),
-			},
+			Name:           "TwoLogsSimple",
+			Input:          []byte("log1\nlog2\n"),
+			ExpectedTokens: []string{"log1\nlog2\n"},
 		},
 		{
-			Name: "TwoLogsCarriageReturn",
-			Raw:  []byte("log1\r\nlog2\r\n"),
-			ExpectedTokenized: [][]byte{
-				[]byte("log1\r\nlog2\r\n"),
-			},
+			Name:           "TwoLogsCarriageReturn",
+			Input:          []byte("log1\r\nlog2\r\n"),
+			ExpectedTokens: []string{"log1\r\nlog2\r\n"},
 		},
 		{
-			Name:              "NoTailingNewline",
-			Raw:               []byte(`foo`),
-			ExpectedTokenized: [][]byte{[]byte("foo")},
+			Name:           "NoTailingNewline",
+			Input:          []byte(`foo`),
+			ExpectedTokens: []string{"foo"},
 		},
 		{
 			Name: "HugeLog100",
-			Raw: func() []byte {
-				return GeneratedByteSliceOfLength(largeLogSize)
+			Input: func() []byte {
+				return tokenizetest.GenerateBytes(largeLogSize)
 			}(),
-			ExpectedTokenized: [][]byte{
-				GeneratedByteSliceOfLength(100),
-			},
+			ExpectedTokens: []string{string(tokenizetest.GenerateBytes(largeLogSize))},
 		},
 		{
 			Name: "HugeLog300",
-			Raw: func() []byte {
-				return GeneratedByteSliceOfLength(largeLogSize * 3)
+			Input: func() []byte {
+				return tokenizetest.GenerateBytes(largeLogSize * 3)
 			}(),
-			ExpectedTokenized: [][]byte{
-				[]byte("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuv"),
-				[]byte("wxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqr"),
-				[]byte("stuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmn"),
+			ExpectedTokens: []string{
+				"abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuv",
+				"wxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqr",
+				"stuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmn",
 			},
 		},
 		{
 			Name: "EOFBeforeMaxLogSize",
-			Raw: func() []byte {
-				return GeneratedByteSliceOfLength(largeLogSize * 3.5)
+			Input: func() []byte {
+				return tokenizetest.GenerateBytes(largeLogSize * 3.5)
 			}(),
-			ExpectedTokenized: [][]byte{
-				[]byte("abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuv"),
-				[]byte("wxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqr"),
-				[]byte("stuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmn"),
-				[]byte("opqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijkl"),
+			ExpectedTokens: []string{
+				"abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuv",
+				"wxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqr",
+				"stuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmn",
+				"opqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijkl",
 			},
 		},
 	}
 
 	for _, tc := range testCases {
 		splitFunc := SplitNone(largeLogSize)
-		t.Run(tc.Name, tc.RunFunc(splitFunc))
+		t.Run(tc.Name, tc.Run(splitFunc))
 	}
 }
 
diff --git a/pkg/stanza/tokenize/util_test.go b/pkg/stanza/tokenize/tokenizetest/tokenize.go
similarity index 60%
rename from pkg/stanza/tokenize/util_test.go
rename to pkg/stanza/tokenize/tokenizetest/tokenize.go
index 9a357c5c27b7..996ced63ef50 100644
--- a/pkg/stanza/tokenize/util_test.go
+++ b/pkg/stanza/tokenize/tokenizetest/tokenize.go
@@ -1,7 +1,7 @@
 // Copyright The OpenTelemetry Authors
 // SPDX-License-Identifier: Apache-2.0
 
-package tokenize
+package tokenizetest // import "github.com/open-telemetry/opentelemetry-collector-contrib/pkg/stanza/tokenize/tokenizetest"
 
 import (
 	"bufio"
@@ -12,21 +12,21 @@ import (
 	"github.com/stretchr/testify/assert"
 )
 
-// state is going to keep processing state of the TestReader
+// state is going to keep processing state of the testReader
 type state struct {
 	ReadFrom  int
 	Processed int
 }
 
-// TestReader is a TestReader which keeps state of readed and processed data
-type TestReader struct {
+// testReader is a testReader which keeps state of readed and processed data
+type testReader struct {
 	State *state
 	Data  []byte
 }
 
-// NewTestReader creates TestReader with empty state
-func NewTestReader(data []byte) TestReader {
-	return TestReader{
+// newTestReader creates testReader with empty state
+func newTestReader(data []byte) testReader {
+	return testReader{
 		State: &state{
 			ReadFrom:  0,
 			Processed: 0,
@@ -35,8 +35,8 @@ func NewTestReader(data []byte) TestReader {
 	}
 }
 
-// Read reads data from TestReader and remebers where reading has been finished
-func (r TestReader) Read(p []byte) (n int, err error) {
+// Read reads data from testReader and remebers where reading has been finished
+func (r testReader) Read(p []byte) (n int, err error) {
 	// return eof if data has been fully readed
 	if len(r.Data)-r.State.ReadFrom == 0 {
 		return 0, io.EOF
@@ -57,24 +57,24 @@ func (r TestReader) Read(p []byte) (n int, err error) {
 	return i, nil
 }
 
-// Reset resets TestReader state (sets last readed position to last processed position)
-func (r *TestReader) Reset() {
+// Reset resets testReader state (sets last readed position to last processed position)
+func (r *testReader) Reset() {
 	r.State.ReadFrom = r.State.Processed
 }
 
-func (r *TestReader) SplitFunc(splitFunc bufio.SplitFunc) bufio.SplitFunc {
+func (r *testReader) splitFunc(split bufio.SplitFunc) bufio.SplitFunc {
 	return func(data []byte, atEOF bool) (advance int, token []byte, err error) {
-		advance, token, err = splitFunc(data, atEOF)
+		advance, token, err = split(data, atEOF)
 		r.State.Processed += advance
 		return
 	}
 }
 
-type TokenizerTestCase struct {
+type TestCase struct {
 	Name                        string
 	Pattern                     string
-	Raw                         []byte
-	ExpectedTokenized           []string
+	Input                       []byte
+	ExpectedTokens              []string
 	ExpectedError               error
 	Sleep                       time.Duration
 	AdditionalIterations        int
@@ -82,11 +82,11 @@ type TokenizerTestCase struct {
 	PreserveTrailingWhitespaces bool
 }
 
-func (tc TokenizerTestCase) RunFunc(splitFunc bufio.SplitFunc) func(t *testing.T) {
-	reader := NewTestReader(tc.Raw)
+func (tc TestCase) Run(split bufio.SplitFunc) func(t *testing.T) {
+	reader := newTestReader(tc.Input)
 
 	return func(t *testing.T) {
-		var tokenized []string
+		var tokens []string
 		for i := 0; i < 1+tc.AdditionalIterations; i++ {
 			// sleep before next iterations
 			if i > 0 {
@@ -94,22 +94,22 @@ func (tc TokenizerTestCase) RunFunc(splitFunc bufio.SplitFunc) func(t *testing.T
 			}
 			reader.Reset()
 			scanner := bufio.NewScanner(reader)
-			scanner.Split(reader.SplitFunc(splitFunc))
+			scanner.Split(reader.splitFunc(split))
 			for {
 				ok := scanner.Scan()
 				if !ok {
 					assert.Equal(t, tc.ExpectedError, scanner.Err())
 					break
 				}
-				tokenized = append(tokenized, scanner.Text())
+				tokens = append(tokens, scanner.Text())
 			}
 		}
 
-		assert.Equal(t, tc.ExpectedTokenized, tokenized)
+		assert.Equal(t, tc.ExpectedTokens, tokens)
 	}
 }
 
-func GeneratedByteSliceOfLength(length int) []byte {
+func GenerateBytes(length int) []byte {
 	chars := []byte(`abcdefghijklmnopqrstuvwxyz`)
 	newSlice := make([]byte, length)
 	for i := 0; i < length; i++ {