Skip to content

Commit

Permalink
Updating journal to allow chunk records larger than 1MB. Adding tests…
Browse files Browse the repository at this point in the history
… for JSON cases that triggered this.
  • Loading branch information
fulghum committed Jan 8, 2025
1 parent dab16ea commit bd8385e
Show file tree
Hide file tree
Showing 5 changed files with 68 additions and 8 deletions.
41 changes: 41 additions & 0 deletions go/libraries/doltcore/sqle/enginetest/dolt_queries.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"github.com/dolthub/go-mysql-server/sql/plan"
"github.com/dolthub/go-mysql-server/sql/types"
"github.com/dolthub/vitess/go/vt/sqlparser"
"github.com/hashicorp/go-uuid"

"github.com/dolthub/dolt/go/libraries/doltcore/sqle/dtablefunctions"
)
Expand Down Expand Up @@ -4769,6 +4770,46 @@ var LargeJsonObjectScriptTests = []queries.ScriptTest{
},
},
},
{
// JSON chunking can't currently break chunks in a JSON value, so large string values can
// generate chunks that are larger than typical chunks.
Name: "JSON with large string (> 1MB)",
SetUpScript: []string{
"create table t (pk int primary key, j1 JSON)",
},
Assertions: []queries.ScriptTestAssertion{
{
// NOTE: This doesn't trigger the same error that we see with sql-server
// because the Golang enginetests use an in-memory chunk store, and
// not the filesystem journaling chunk store.
Query: fmt.Sprintf(`insert into t (pk, j1) VALUES (1, '{"large_value": "%s"}');`, generateStringData(1024*1024*3)),
Expected: []sql.Row{{types.OkResult{RowsAffected: 1}}},
},
{
Skip: true,
// TODO: The JSON is coming back truncated for some reason and failing this test.
// When that's fixed, unskip this test, and fix the length value below.
Query: `SELECT pk, length(j1) from t;`,
Expected: []sql.Row{{1, 123}},
},
},
},
}

// generateStringData generates random string data of length |length|. The data is generated
// using UUIDs to avoid data that could be easily compressed.
func generateStringData(length int) string {
var b strings.Builder
for length > 0 {
uuid, err := uuid.GenerateUUID()
if err != nil {
panic(err)
}
uuid = strings.ReplaceAll(uuid, "-", "")
b.WriteString(uuid)
length -= len(uuid)
}
return b.String()
}

var DoltTagTestScripts = []queries.ScriptTest{
Expand Down
7 changes: 1 addition & 6 deletions go/store/nbs/journal_record.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,6 @@ const (
journalRecAddrSz = 20
journalRecChecksumSz = 4
journalRecTimestampSz = 8

// todo(andy): less arbitrary
journalRecMaxSz = 128 * 1024
)

// journalRecordTimestampGenerator returns the current time in Unix epoch seconds. This function is stored in a
Expand Down Expand Up @@ -248,9 +245,7 @@ func processJournalRecords(ctx context.Context, r io.ReadSeeker, off int64, cb f
}

l := readUint32(buf)
if l > journalRecMaxSz {
break
} else if buf, err = rdr.Peek(int(l)); err != nil {
if buf, err = rdr.Peek(int(l)); err != nil {
break
}

Expand Down
7 changes: 5 additions & 2 deletions go/store/nbs/journal_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,11 @@ const (
chunkJournalFileSize = 16 * 1024

// todo(andy): buffer must be able to hold an entire record,
// but we don't have a hard limit on record size right now
journalWriterBuffSize = 1024 * 1024
// but we don't have a hard limit on record size right now.
// JSON data has cases where it won't chunk down as small as other data,
// so we have increased this to 5MB. If/when JSON chunking handles those
// cases, we could decrease this size to 1MB again.
journalWriterBuffSize = 5 * 1024 * 1024

chunkJournalAddr = chunks.JournalFileID

Expand Down
1 change: 1 addition & 0 deletions integration-tests/bats/json-large-value-insert.sql

Large diffs are not rendered by default.

20 changes: 20 additions & 0 deletions integration-tests/bats/json.bats
Original file line number Diff line number Diff line change
Expand Up @@ -261,3 +261,23 @@ SQL
[ "$status" -eq 0 ]
[ "${lines[1]}" = '1,"[{""a"":""<>&""}]"' ]
}

@test "json: insert large string value (> 1MB)" {
dolt sql <<SQL
CREATE TABLE t (
pk int PRIMARY KEY,
j1 json
);
SQL

dolt sql -f $BATS_TEST_DIRNAME/json-large-value-insert.sql

# TODO: Retrieving the JSON errors with a JSON truncated message
# Unskip this once the JSON truncation issue is fixed and
# fill in the expected length below.
skip "Function Support is currently disabled"

run dolt sql -q "SELECT pk, length(j1) FROM t;" -r csv
[ "$status" -eq 0 ]
[ "${lines[1]}" = '1,???' ]
}

0 comments on commit bd8385e

Please sign in to comment.