Skip to content

Commit

Permalink
apacheGH-39870: [Go] Include buffered pages in TotalBytesWritten (apa…
Browse files Browse the repository at this point in the history
…che#40105)

### Rationale for this change

Currently, buffered data pages are not included in TotalBytesWritten this means that their is not an accurate estimate of the size of the current size. 

### Are there any user-facing changes?
`RowGroupTotalBytesWritten` will include the TotalBytes in buffered DataPages minus the buffered data pages headers. 

* Closes: apache#39870

Authored-by: Matthew McNew <[email protected]>
Signed-off-by: Matt Topol <[email protected]>
  • Loading branch information
matthewmcnew authored and thisisnic committed Mar 8, 2024
1 parent 0fdfbdc commit b52bbb3
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 5 deletions.
7 changes: 6 additions & 1 deletion go/parquet/file/column_writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,12 @@ func (w *columnWriter) TotalCompressedBytes() int64 {
}

func (w *columnWriter) TotalBytesWritten() int64 {
return w.totalBytesWritten
bufferedPagesBytes := int64(0)
for _, p := range w.pages {
bufferedPagesBytes += int64(len(p.Data()))
}

return w.totalBytesWritten + bufferedPagesBytes
}

func (w *columnWriter) RowsWritten() int {
Expand Down
14 changes: 10 additions & 4 deletions go/parquet/file/column_writer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,11 @@ func (p *PrimitiveWriterTestSuite) testDictionaryFallbackEncoding(version parque
}

func (p *PrimitiveWriterTestSuite) testDictionaryFallbackAndCompressedSize(version parquet.Version) {
// skip boolean as dictionary encoding is not used
if p.Typ.Kind() == reflect.Bool {
return
}

p.GenerateData(SmallSize)
props := parquet.DefaultColumnProperties()
props.DictionaryEnabled = true
Expand All @@ -440,13 +445,14 @@ func (p *PrimitiveWriterTestSuite) testDictionaryFallbackAndCompressedSize(versi
props.Encoding = parquet.Encodings.RLEDict
}

writer := p.buildWriter(SmallSize, props, parquet.WithVersion(version))
writer := p.buildWriter(SmallSize, props, parquet.WithVersion(version), parquet.WithDataPageSize(SmallSize-1))
p.WriteBatchValues(writer, nil, nil)
p.NotZero(writer.TotalBytesWritten())
writer.FallbackToPlain()
p.NotEqual(0, writer.TotalCompressedBytes())
p.NotZero(writer.TotalCompressedBytes())
writer.Close()
p.NotEqual(0, writer.TotalCompressedBytes())
p.NotEqual(0, writer.TotalBytesWritten())
p.NotZero(writer.TotalCompressedBytes())
p.NotZero(writer.TotalBytesWritten())
}

func (p *PrimitiveWriterTestSuite) TestRequiredPlain() {
Expand Down

0 comments on commit b52bbb3

Please sign in to comment.