Skip to content

Commit

Permalink
Revert PR #8723 which disabled the smart JSON chunker when the docume…
Browse files Browse the repository at this point in the history
…nt contained large strings.

This is now safe.
  • Loading branch information
nicktobey committed Feb 7, 2025
1 parent edfa9c7 commit 34f9158
Show file tree
Hide file tree
Showing 4 changed files with 1 addition and 49 deletions.
7 changes: 0 additions & 7 deletions go/store/prolly/tree/json_chunker.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,6 @@ func SerializeJsonToAddr(ctx context.Context, ns NodeStore, j sql.JSONWrapper) (
}
jsonChunker.appendJsonToBuffer(jsonBytes)
err = jsonChunker.processBuffer(ctx)
if largeJsonStringError.Is(err) {
// Due to current limits on chunk sizes, and an inability of older clients to read
// string keys and values split across multiple chunks, we can't use the JSON chunker for documents
// with extremely long strings.
node, _, err := serializeJsonToBlob(ctx, ns, j)
return node, err
}
if err != nil {
return Node{}, err
}
Expand Down
2 changes: 1 addition & 1 deletion go/store/prolly/tree/json_indexed_document.go
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ func tryWithFallback(
tryFunc func() error,
fallbackFunc func(document types.JSONDocument) error) error {
err := tryFunc()
if err == unknownLocationKeyError || err == unsupportedPathError || err == jsonParseError || largeJsonStringError.Is(err) {
if err == unknownLocationKeyError || err == unsupportedPathError || err == jsonParseError {
if err != unsupportedPathError {
if sqlCtx, ok := ctx.(*sql.Context); ok {
sqlCtx.GetLogger().Warn(err)
Expand Down
20 changes: 0 additions & 20 deletions go/store/prolly/tree/json_scanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ package tree
import (
"fmt"
"io"

errorkinds "gopkg.in/src-d/go-errors.v1"
)

// JsonScanner is a state machine that parses already-normalized JSON while keeping track of the path to the current value.
Expand All @@ -40,15 +38,7 @@ type JsonScanner struct {
valueOffset int
}

// The JSONChunker can't draw a chunk boundary within an object key.
// This can lead to large chunks that may cause problems.
// We've observed chunks getting written incorrectly if they exceed 48KB.
// Since boundaries are always drawn once a chunk exceeds maxChunkSize (16KB),
// this is the largest length that can be appended to a chunk without exceeding 48KB.
var maxJsonStringLength = 48*1024 - maxChunkSize

var jsonParseError = fmt.Errorf("encountered invalid JSON while reading JSON from the database, or while preparing to write JSON to the database. This is most likely a bug in JSON diffing")
var largeJsonStringError = errorkinds.NewKind("encountered JSON key with length %s, larger than max allowed length %s")

func (j JsonScanner) Clone() JsonScanner {
return JsonScanner{
Expand Down Expand Up @@ -215,9 +205,6 @@ func (s *JsonScanner) acceptKeyString() (stringBytes []byte, err error) {
}
s.valueOffset++
}
if s.valueOffset-stringStart > maxJsonStringLength {
return nil, largeJsonStringError.New(s.valueOffset-stringStart, maxJsonStringLength)
}
result := s.jsonBuffer[stringStart:s.valueOffset]
// Advance past the ending quotes
s.valueOffset++
Expand All @@ -233,20 +220,13 @@ func (s *JsonScanner) acceptValueString() (finishedString bool, err error) {
}

func (s *JsonScanner) acceptRestOfValueString() (finishedString bool, err error) {
stringStart := s.valueOffset
for s.current() != '"' {
switch s.current() {
case '\\':
s.valueOffset++
}
s.valueOffset++
}
// We don't currently split value strings across chunks because it causes issues being read by older clients.
// Instead, by returning largeJsonStringError, we trigger the fallback behavior where the JSON document
// gets treated as a non-indexed blob.
if s.valueOffset-stringStart > maxJsonStringLength {
return false, largeJsonStringError.New(s.valueOffset-stringStart, maxJsonStringLength)
}
// Advance past the ending quotes
s.valueOffset++
return true, nil
Expand Down
21 changes: 0 additions & 21 deletions integration-tests/bats/json.bats
Original file line number Diff line number Diff line change
Expand Up @@ -298,24 +298,3 @@ SQL
[ "$status" -eq 0 ]
[[ "$output" =~ "Blob" ]] || false
}

# Older clients have a bug that prevents them from reading indexed JSON documents with strings split across chunks,
# And making one large chunk can causes issues with the journal.
# Thus, we expect that the document gets stored as a blob.
@test "json: document with large string value doesn't get indexed" {
run dolt sql <<SQL
CREATE TABLE js (
pk int PRIMARY KEY,
js json
);
insert into js values (1, '{"key": "`head -c 2097152 < /dev/zero | tr '\0' '\141'`" }');
insert into js values (2, '{"`head -c 2097152 < /dev/zero | tr '\0' '\141'`": "value" }');
SQL
# If the documents aren't put into an IndexedJsonDocument, they will contain the following hashes.
run dolt show 9erat0qpsqrmmr53fhu6b7gnjj5n1v9i
[ "$status" -eq 0 ]
[[ "$output" =~ "Blob" ]] || false
run dolt show 69cm31n0k392sch3snf5jc7ndfiaund8
[ "$status" -eq 0 ]
[[ "$output" =~ "Blob" ]] || false
}

0 comments on commit 34f9158

Please sign in to comment.