Skip to content

Commit

Permalink
bump
Browse files Browse the repository at this point in the history
  • Loading branch information
James Cor committed Jul 25, 2024
2 parents fcbe0f3 + a7d4fce commit bc48aac
Show file tree
Hide file tree
Showing 7 changed files with 585 additions and 172 deletions.
108 changes: 93 additions & 15 deletions go/cmd/dolt/commands/admin/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ package admin

import (
"context"
"fmt"
"io"
"time"

"github.com/pkg/errors"

Expand All @@ -41,14 +43,17 @@ func (cmd ArchiveCmd) Name() string {
}

var docs = cli.CommandDocumentationContent{
ShortDesc: "Create archive files using native or cgo compression, then verify.",
LongDesc: `Run this command on a dolt database only after running 'dolt gc'. This command will create an archive file to the CWD. Suffix: .darc. After the new file is generated, it will read every chunk from the new file and verify that the chunk hashes to the correct addr.`,
ShortDesc: "Create archive files for greater compression, then verify all chunks.",
LongDesc: `Run this command on a dolt database only after running 'dolt gc'. This command will convert all 'oldgen'
table files into archives. Currently, for safety, table files are left in place.`,

Synopsis: []string{
`--no-group`,
`[--group-chunks]`,
},
}

const groupChunksFlag = "group-chunks"

// Description returns a description of the command
func (cmd ArchiveCmd) Description() string {
return "Hidden command to kick the tires with the new archive format."
Expand All @@ -63,11 +68,10 @@ func (cmd ArchiveCmd) Docs() *cli.CommandDocumentation {

func (cmd ArchiveCmd) ArgParser() *argparser.ArgParser {
ap := argparser.NewArgParserWithMaxArgs(cmd.Name(), 0)
ap.SupportsFlag(groupChunksFlag, "", "Attempt to group chunks. This will produce smaller archives, but can take much longer to build.")
/* TODO: Implement these flags
ap.SupportsFlag("raw", "", "Create an archive file with 0 compression")
ap.SupportsFlag("no-manifest", "", "Do not alter the manifest file. Generate the archive file only")
ap.SupportsFlag("no-grouping", "", "Do not attempt to group chunks. Default dictionary will be used for all chunks")
ap.SupportsFlag("verify-existing", "", "Skip generation altogether and just verify the existing archive file.")
ap.SupportsFlag("purge", "", "remove table files after archiving")
ap.SupportsFlag("revert", "", "Return to unpurged table files, or rebuilt table files from archives")
*/
return ap
}
Expand All @@ -78,7 +82,7 @@ func (cmd ArchiveCmd) Hidden() bool {
func (cmd ArchiveCmd) Exec(ctx context.Context, commandStr string, args []string, dEnv *env.DoltEnv, cliCtx cli.CliContext) int {
ap := cmd.ArgParser()
help, _ := cli.HelpAndUsagePrinters(cli.CommandDocsForCommandString(commandStr, docs, ap))
_ = cli.ParseArgsOrDie(ap, args, help)
apr := cli.ParseArgsOrDie(ap, args, help)

db := doltdb.HackDatasDatabaseFromDoltDB(dEnv.DoltDB)
cs := datas.ChunkStoreFromDatabase(db)
Expand All @@ -100,14 +104,18 @@ func (cmd ArchiveCmd) Exec(ctx context.Context, commandStr string, args []string
})

groupings := nbs.NewChunkRelations()
err = historicalFuzzyMatching(ctx, hs, &groupings, dEnv.DoltDB)
if err != nil {
cli.PrintErrln(err)
return 1
if apr.Contains(groupChunksFlag) {
err = historicalFuzzyMatching(ctx, hs, &groupings, dEnv.DoltDB)
if err != nil {
cli.PrintErrln(err)
return 1
}
}
cli.Printf("Found %d possible relations by walking history\n", groupings.Count())

err = nbs.BuildArchive(ctx, cs, &groupings)
progress := make(chan interface{}, 32)
handleProgress(ctx, progress)

err = nbs.BuildArchive(ctx, cs, &groupings, progress)
if err != nil {
cli.PrintErrln(err)
return 1
Expand All @@ -116,8 +124,78 @@ func (cmd ArchiveCmd) Exec(ctx context.Context, commandStr string, args []string
return 0
}

func handleProgress(ctx context.Context, progress chan interface{}) {
go func() {
rotation := 0
p := cli.NewEphemeralPrinter()
currentMessage := "Starting Archive Build"
var lastProgressMsg *nbs.ArchiveBuildProgressMsg
lastUpdateTime := time.Now()

for {
select {
case <-ctx.Done():
return
case msg, ok := <-progress:
if !ok {
return
}
switch v := msg.(type) {
case string:
cli.Printf("LOG: %s\n", v)
case nbs.ArchiveBuildProgressMsg:
if v.Total == v.Completed {
p.Printf("%s: Done\n", v.Stage)
lastProgressMsg = nil
currentMessage = ""
p.Display()
cli.Printf("\n")
} else {
lastProgressMsg = &v
}
default:
cli.Printf("Unexpected Message: %v\n", v)
}
// If no events come in, we still want to update the progress bar every second.
case <-time.After(1 * time.Second):
}

if now := time.Now(); now.Sub(lastUpdateTime) > 1*time.Second {
rotation++
switch rotation % 4 {
case 0:
p.Printf("- ")
case 1:
p.Printf("\\ ")
case 2:
p.Printf("| ")
case 3:
p.Printf("/ ")
}

if lastProgressMsg != nil {
percentDone := 0.0
totalCount := lastProgressMsg.Total
if lastProgressMsg.Total > 0 {
percentDone = float64(lastProgressMsg.Completed) / float64(lastProgressMsg.Total)
percentDone *= 100.0
}

currentMessage = fmt.Sprintf("%s: %d/%d (%.2f%%)", lastProgressMsg.Stage, lastProgressMsg.Completed, totalCount, percentDone)
}

p.Printf("%s", currentMessage) // Don't update message, but allow ticker to turn.
lastUpdateTime = now

p.Display()
}

}
}()
}

func historicalFuzzyMatching(ctx context.Context, heads hash.HashSet, groupings *nbs.ChunkRelations, db *doltdb.DoltDB) error {
hs := []hash.Hash{}
var hs []hash.Hash
for h := range heads {
_, err := db.ReadCommit(ctx, h)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion go/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ require (
github.com/cespare/xxhash/v2 v2.2.0
github.com/creasty/defaults v1.6.0
github.com/dolthub/flatbuffers/v23 v23.3.3-dh.2
github.com/dolthub/go-mysql-server v0.18.2-0.20240724232948-d95a59fdb2b5
github.com/dolthub/go-mysql-server v0.18.2-0.20240725184149-27ed7002e0d9
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63
github.com/dolthub/swiss v0.1.0
github.com/goccy/go-json v0.10.2
Expand Down
4 changes: 2 additions & 2 deletions go/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ github.com/dolthub/fslock v0.0.3 h1:iLMpUIvJKMKm92+N1fmHVdxJP5NdyDK5bK7z7Ba2s2U=
github.com/dolthub/fslock v0.0.3/go.mod h1:QWql+P17oAAMLnL4HGB5tiovtDuAjdDTPbuqx7bYfa0=
github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e h1:kPsT4a47cw1+y/N5SSCkma7FhAPw7KeGmD6c9PBZW9Y=
github.com/dolthub/go-icu-regex v0.0.0-20230524105445-af7e7991c97e/go.mod h1:KPUcpx070QOfJK1gNe0zx4pA5sicIK1GMikIGLKC168=
github.com/dolthub/go-mysql-server v0.18.2-0.20240724232948-d95a59fdb2b5 h1:os6CtaIf7xsCm2giSc8oGC34JV0OnPA/aOrZq23np/4=
github.com/dolthub/go-mysql-server v0.18.2-0.20240724232948-d95a59fdb2b5/go.mod h1:P6bG0p+3mH4LS4DLo3BySh10ZJTDqgWyfWBu8gGE3eU=
github.com/dolthub/go-mysql-server v0.18.2-0.20240725184149-27ed7002e0d9 h1:zi9hcxnydT3zdvSKu5qTLyMcvT1Xmissbfv3Z58rYUA=
github.com/dolthub/go-mysql-server v0.18.2-0.20240725184149-27ed7002e0d9/go.mod h1:P6bG0p+3mH4LS4DLo3BySh10ZJTDqgWyfWBu8gGE3eU=
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63 h1:OAsXLAPL4du6tfbBgK0xXHZkOlos63RdKYS3Sgw/dfI=
github.com/dolthub/gozstd v0.0.0-20240423170813-23a2903bca63/go.mod h1:lV7lUeuDhH5thVGDCKXbatwKy2KW80L4rMT46n+Y2/Q=
github.com/dolthub/ishell v0.0.0-20240701202509-2b217167d718 h1:lT7hE5k+0nkBdj/1UOSFwjWpNxf+LCApbRHgnCA17XE=
Expand Down
12 changes: 12 additions & 0 deletions go/store/nbs/archive.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,18 @@ const ( // afr = Archive FooteR
afrSigOffset = afrVersionOffset + 1
)

// Archive Metadata Data Keys are the fields in the archive metadata that are stored in the footer. These are used
// to store information about the archive that is semi-structured. The data is stored in JSON format, all values are strings.
const ( //amdk = Archive Metadata Data Key
// The version of Dolt that created the archive.
amdkDoltVersion = "dolt_version"
// The id of the table file that the archive was created from. This value can be used during the reverse process
// to quickly get back to the original table file if it is still available.
amdkOriginTableFile = "origin_table_file"
// The timestamp of when the archive was created.
amdkConversionTime = "conversion_time"
)

var ErrInvalidChunkRange = errors.New("invalid chunk range")
var ErrInvalidDictionaryRange = errors.New("invalid dictionary range")
var ErrInvalidFileSignature = errors.New("invalid file signature")
Expand Down
Loading

0 comments on commit bc48aac

Please sign in to comment.