diff --git a/db/revtree.go b/db/revtree.go index b3e8aefa5e..51524e2491 100644 --- a/db/revtree.go +++ b/db/revtree.go @@ -30,6 +30,11 @@ type RevInfo struct { Deleted bool Body []byte Channels base.Set + depth uint32 +} + +func (rev RevInfo) IsRoot() bool { + return rev.Parent == "" } func (rev RevInfo) IsRoot() bool { @@ -167,6 +172,17 @@ func (tree RevTree) getHistory(revid string) []string { // Returns the leaf revision IDs (those that have no children.) func (tree RevTree) GetLeaves() []string { + acceptAllLeavesFilter := func(revId string) bool { + return true + } + return tree.GetLeavesFiltered(acceptAllLeavesFilter) +} + + + + +func (tree RevTree) GetLeavesFiltered(filter func(revId string) bool) []string { + isParent := map[string]bool{} for _, info := range tree { isParent[info.Parent] = true @@ -174,10 +190,13 @@ func (tree RevTree) GetLeaves() []string { leaves := make([]string, 0, len(tree)-len(isParent)+1) for revid := range tree { if !isParent[revid] { - leaves = append(leaves, revid) + if filter(revid) { + leaves = append(leaves, revid) + } } } return leaves + } func (tree RevTree) forEachLeaf(callback func(*RevInfo)) { @@ -306,51 +325,150 @@ func (tree RevTree) copy() RevTree { return result } -// Removes older ancestor nodes from the tree; if there are no conflicts, the tree's depth will be -// <= maxDepth. The revision named by `keepRev` will not be pruned (unless `keepRev` is empty.) -// Returns the number of nodes pruned. +// Prune all branches so that they have a maximum depth of maxdepth. +// There is one exception to that, which is tombstoned (deleted) branches that have been deemed "too old" +// to keep around. The criteria for "too old" is as follows: +// +// - Find the generation of the shortest non-tombstoned branch (eg, 100) +// - Calculate the tombstone generation threshold based on this formula: +// tombstoneGenerationThreshold = genShortestNonTSBranch - maxDepth +// Ex: if maxDepth is 20, and tombstoneGenerationThreshold is 100, then tombstoneGenerationThreshold will be 80 +// - Check each tombstoned branch, and if the leaf node on that branch has a generation older (less) than +// tombstoneGenerationThreshold, then remove all nodes on that branch up to the root of the branch. func (tree RevTree) pruneRevisions(maxDepth uint32, keepRev string) (pruned int) { + if len(tree) <= int(maxDepth) { - return 0 + return } - // Find the minimum generation that has a non-deleted leaf: - minLeafGen := math.MaxInt32 - maxDeletedLeafGen := 0 - for _, revid := range tree.GetLeaves() { - gen := genOfRevID(revid) - if tree[revid].Deleted { - if gen > maxDeletedLeafGen { - maxDeletedLeafGen = gen + computedMaxDepth, leaves := tree.computeDepthsAndFindLeaves() + if computedMaxDepth <= maxDepth { + return + } + + // Calculate tombstoneGenerationThreshold + genShortestNonTSBranch := tree.FindShortestNonTombstonedBranch() + tombstoneGenerationThreshold := genShortestNonTSBranch - int(maxDepth) + + // Delete nodes whose depth is greater than maxDepth: + for revid, node := range tree { + if node.depth > maxDepth { + delete(tree, revid) + pruned++ + } + } + + // Snip dangling Parent links: + if pruned > 0 { + for _, node := range tree { + if node.Parent != "" { + if _, found := tree[node.Parent]; !found { + node.Parent = "" + } } - } else if gen > 0 && gen < minLeafGen { - minLeafGen = gen } } - if minLeafGen == math.MaxInt32 { - // If there are no non-deleted leaves, use the deepest leaf's generation - minLeafGen = maxDeletedLeafGen + // Delete any tombstoned branches that are too old + for _, leafRevId := range leaves { + leaf := tree[leafRevId] + if !leaf.Deleted { // Ignore non-tombstoned leaves + continue + } + leafGeneration, _ := ParseRevID(leaf.ID) + if leafGeneration < tombstoneGenerationThreshold { + pruned += tree.DeleteBranch(leaf) + } + } + + return + +} + +func (tree RevTree) DeleteBranch(node *RevInfo) (pruned int) { + + revId := node.ID + + for node := tree[revId]; node != nil; node = tree[node.Parent] { + delete(tree, node.ID) + pruned++ + } + + return pruned + +} + +func (tree RevTree) computeDepthsAndFindLeaves() (maxDepth uint32, leaves []string) { + + // Performance is somewhere between O(n) and O(n^2), depending on the branchiness of the tree. + for _, info := range tree { + info.depth = math.MaxUint32 } - minGenToKeep := int(minLeafGen) - int(maxDepth) + 1 - if gen := genOfRevID(keepRev); gen > 0 && gen < minGenToKeep { - // Make sure keepRev's generation isn't pruned - minGenToKeep = gen + // Walk from each leaf to its root, assigning ancestors consecutive depths, + // but stopping if we'd increase an already-visited ancestor's depth: + leaves = tree.GetLeaves() + for _, revid := range leaves { + + var depth uint32 = 1 + for node := tree[revid]; node != nil; node = tree[node.Parent] { + if node.depth <= depth { + break // This hierarchy already has a shorter path to another leaf + } + node.depth = depth + if depth > maxDepth { + maxDepth = depth + } + depth++ + } } + return maxDepth, leaves + +} - // Delete nodes whose generation is less than minGenToKeep: - if minGenToKeep > 1 { - for revid, node := range tree { - if gen := genOfRevID(revid); gen < minGenToKeep { - delete(tree, revid) - pruned++ - } else if gen == minGenToKeep { - node.Parent = "" +// Find the minimum generation that has a non-deleted leaf. For example in this rev tree: +// http://cbmobile-bucket.s3.amazonaws.com/diagrams/example-sync-gateway-revtrees/three_branches.png +// The minimim generation that has a non-deleted leaf is "7-non-winning unresolved" +func (tree RevTree) FindShortestNonTombstonedBranch() (generation int) { + return tree.FindShortestNonTombstonedBranchFromLeaves(tree.GetLeaves()) +} + +func (tree RevTree) FindShortestNonTombstonedBranchFromLeaves(leaves []string) (generation int) { + + genShortestNonTSBranch := math.MaxInt32 + for _, revid := range leaves { + + revInfo := tree[revid] + if revInfo.Deleted { + // This is a tombstoned branch, skip it + continue + } + gen := genOfRevID(revid) + if gen > 0 && gen < genShortestNonTSBranch { + genShortestNonTSBranch = gen + } + } + return genShortestNonTSBranch +} + +// Find the generation of the longest deleted branch. For example in this rev tree: +// http://cbmobile-bucket.s3.amazonaws.com/diagrams/example-sync-gateway-revtrees/four_branches_two_tombstoned.png +// The longest deleted branch has a generation of 10 +func (tree RevTree) FindLongestTombstonedBranch() (generation int) { + return tree.FindLongestTombstonedBranchFromLeaves(tree.GetLeaves()) +} + +func (tree RevTree) FindLongestTombstonedBranchFromLeaves(leaves []string) (generation int) { + genLongestTSBranch := 0 + for _, revid := range leaves { + gen := genOfRevID(revid) + if tree[revid].Deleted { + if gen > genLongestTSBranch { + genLongestTSBranch = gen } } } - return + return genLongestTSBranch } // Render the RevTree in Graphviz Dot format, which can then be used to generate a PNG diagram diff --git a/db/revtree_test.go b/db/revtree_test.go index 9fc3cb3a8e..c810d52fe8 100644 --- a/db/revtree_test.go +++ b/db/revtree_test.go @@ -21,20 +21,151 @@ import ( "github.com/couchbaselabs/go.assert" ) +// 1-one -- 2-two -- 3-three var testmap = RevTree{"3-three": {ID: "3-three", Parent: "2-two", Body: []byte("{}")}, "2-two": {ID: "2-two", Parent: "1-one", Channels: base.SetOf("ABC", "CBS")}, "1-one": {ID: "1-one", Channels: base.SetOf("ABC")}} -// 1-one -- 2-two -- 3-three - +// / 3-three +// 1-one -- 2-two +// \ 3-drei var branchymap = RevTree{"3-three": {ID: "3-three", Parent: "2-two"}, "2-two": {ID: "2-two", Parent: "1-one"}, "1-one": {ID: "1-one"}, "3-drei": {ID: "3-drei", Parent: "2-two"}} -// / 3-three -// 1-one -- 2-two -// \ 3-drei +var multiroot = RevTree{"3-a": {ID: "3-a", Parent: "2-a"}, + "2-a": {ID: "2-a", Parent: "1-a"}, + "1-a": {ID: "1-a"}, + "7-b": {ID: "7-b", Parent: "6-b"}, + "6-b": {ID: "6-b"}, +} + +type BranchSpec struct { + NumRevs int + LastRevisionIsTombstone bool + Digest string +} + +// / 3-a -- 4-a -- 5-a ...... etc (winning branch) +// 1-a -- 2-a +// \ 3-b -- 4-b ... etc (losing branch) +// +// NOTE: the 1-a -- 2-a unconflicted branch can be longer, depending on value of unconflictedBranchNumRevs +func getTwoBranchTestRevtree1(unconflictedBranchNumRevs, winningBranchNumRevs, losingBranchNumRevs int, tombstoneLosingBranch bool) RevTree { + + branchSpecs := []BranchSpec{ + { + NumRevs: losingBranchNumRevs, + Digest: "b", + LastRevisionIsTombstone: tombstoneLosingBranch, + }, + } + + return getMultiBranchTestRevtree1(unconflictedBranchNumRevs, winningBranchNumRevs, branchSpecs) + +} + +// / 3-a -- 4-a -- 5-a ...... etc (winning branch) +// 1-a -- 2-a +// \ 3-b -- 4-b ... etc (losing branch #1) +// \ 3-c -- 4-c ... etc (losing branch #2) +// \ 3-d -- 4-d ... etc (losing branch #n) +// +// NOTE: the 1-a -- 2-a unconflicted branch can be longer, depending on value of unconflictedBranchNumRevs +func getMultiBranchTestRevtree1(unconflictedBranchNumRevs, winningBranchNumRevs int, losingBranches []BranchSpec) RevTree { + + if unconflictedBranchNumRevs < 1 { + panic(fmt.Sprintf("Must have at least 1 unconflictedBranchNumRevs")) + } + + winningBranchDigest := "winning" + + const testJSON = `{ + "revs":[ + "1-winning" + ], + "parents":[ + -1 + ], + "channels":[ + null + ] + }` + + revTree := RevTree{} + if err := json.Unmarshal([]byte(testJSON), &revTree); err != nil { + panic(fmt.Sprintf("Error: %v", err)) + } + + if unconflictedBranchNumRevs > 1 { + // Add revs to unconflicted branch + addRevs( + revTree, + "1-winning", + unconflictedBranchNumRevs-1, + winningBranchDigest, + ) + } + + if winningBranchNumRevs > 0 { + + // Figure out which generation the conflicting branches will start at + generation := unconflictedBranchNumRevs + + // Figure out the starting revision id on winning and losing branches + winningBranchStartRev := fmt.Sprintf("%d-%s", generation, winningBranchDigest) + + // Add revs to winning branch + addRevs( + revTree, + winningBranchStartRev, + winningBranchNumRevs, + winningBranchDigest, + ) + + } + + for _, losingBranchSpec := range losingBranches { + + if losingBranchSpec.NumRevs > 0 { + + // Figure out which generation the conflicting branches will start at + generation := unconflictedBranchNumRevs + + losingBranchStartRev := fmt.Sprintf("%d-%s", generation, winningBranchDigest) // Start on last revision of the non-conflicting branch + + // Add revs to losing branch + addRevs( + revTree, + losingBranchStartRev, + losingBranchSpec.NumRevs, // Subtract 1 since we already added initial + losingBranchSpec.Digest, + ) + + generation += losingBranchSpec.NumRevs + + if losingBranchSpec.LastRevisionIsTombstone { + + newRevId := fmt.Sprintf("%v-%v", generation+1, losingBranchSpec.Digest) + parentRevId := fmt.Sprintf("%v-%v", generation, losingBranchSpec.Digest) + + revInfo := RevInfo{ + ID: newRevId, + Parent: parentRevId, + Deleted: true, + } + revTree.addRevision(revInfo) + + } + + } + + } + + return revTree + +} func testUnmarshal(t *testing.T, jsonString string) RevTree { gotmap := RevTree{} @@ -43,6 +174,29 @@ func testUnmarshal(t *testing.T, jsonString string) RevTree { return gotmap } +// Make sure that the getMultiBranchTestRevtree1() helper works as expected +// (added in reaction to bug where it created broken trees/forests) +func TestGetMultiBranchTestRevtree(t *testing.T) { + + branchSpecs := []BranchSpec{ + { + NumRevs: 60, + Digest: "left", + LastRevisionIsTombstone: false, + }, + { + NumRevs: 25, + Digest: "right", + LastRevisionIsTombstone: true, + }, + } + revTree := getMultiBranchTestRevtree1(50, 100, branchSpecs) + leaves := revTree.GetLeaves() + sort.Strings(leaves) + assert.DeepEquals(t, leaves, []string{"110-left", "150-winning", "76-right"}) + +} + func TestRevTreeUnmarshalOldFormat(t *testing.T) { const testJSON = `{"revs": ["3-three", "2-two", "1-one"], "parents": [1, 2, -1], "bodies": ["{}", "", ""], "channels": [null, ["ABC", "CBS"], ["ABC"]]}` gotmap := testUnmarshal(t, testJSON) @@ -140,11 +294,29 @@ func TestRevTreeWinningRev(t *testing.T) { } func TestPruneRevisions(t *testing.T) { - tempmap := branchymap.copy() + + tempmap := testmap.copy() + tempmap.computeDepthsAndFindLeaves() + assert.Equals(t, tempmap["3-three"].depth, uint32(1)) + assert.Equals(t, tempmap["2-two"].depth, uint32(2)) + assert.Equals(t, tempmap["1-one"].depth, uint32(3)) + + tempmap = branchymap.copy() + tempmap.computeDepthsAndFindLeaves() + assert.Equals(t, tempmap["3-three"].depth, uint32(1)) + assert.Equals(t, tempmap["3-drei"].depth, uint32(1)) + assert.Equals(t, tempmap["2-two"].depth, uint32(2)) + assert.Equals(t, tempmap["1-one"].depth, uint32(3)) + tempmap["4-vier"] = &RevInfo{ID: "4-vier", Parent: "3-drei"} - // / 3-three - // 1-one -- 2-two - // \ 3-drei -- 4-vier + tempmap.computeDepthsAndFindLeaves() + assert.Equals(t, tempmap["4-vier"].depth, uint32(1)) + assert.Equals(t, tempmap["3-drei"].depth, uint32(2)) + assert.Equals(t, tempmap["3-three"].depth, uint32(1)) + assert.Equals(t, tempmap["2-two"].depth, uint32(2)) + assert.Equals(t, tempmap["1-one"].depth, uint32(3)) + + // Prune: assert.Equals(t, tempmap.pruneRevisions(1000, ""), 0) assert.Equals(t, tempmap.pruneRevisions(3, ""), 0) assert.Equals(t, tempmap.pruneRevisions(2, ""), 1) @@ -152,42 +324,251 @@ func TestPruneRevisions(t *testing.T) { assert.Equals(t, tempmap["1-one"], (*RevInfo)(nil)) assert.Equals(t, tempmap["2-two"].Parent, "") - // Make sure leaves are never pruned: (note: by now 1-one is already gone) - assert.Equals(t, tempmap.pruneRevisions(1, ""), 1) - assert.Equals(t, len(tempmap), 3) + // Make sure leaves are never pruned: + assert.Equals(t, tempmap.pruneRevisions(1, ""), 2) + assert.Equals(t, len(tempmap), 2) assert.True(t, tempmap["3-three"] != nil) assert.Equals(t, tempmap["3-three"].Parent, "") assert.True(t, tempmap["4-vier"] != nil) - assert.Equals(t, tempmap["4-vier"].Parent, "3-drei") - assert.Equals(t, tempmap["3-drei"].Parent, "") + assert.Equals(t, tempmap["4-vier"].Parent, "") - // Make sure old merged conflicts don't prevent pruning: - tempmap = branchymap.copy() - tempmap["4-vier"] = &RevInfo{ID: "4-vier", Parent: "3-drei", Deleted: true} - tempmap["4-four"] = &RevInfo{ID: "4-four", Parent: "3-three"} - tempmap["5-five"] = &RevInfo{ID: "5-five", Parent: "4-four"} - tempmap["6-six"] = &RevInfo{ID: "6-six", Parent: "5-five"} - // / 3-three -- 4-four -- 5-five -- 6-six - // 1-one -- 2-two - // \ 3-drei -- [4-vier] - assert.Equals(t, tempmap.pruneRevisions(3, "1-one"), 0) - assert.Equals(t, tempmap.pruneRevisions(3, "2-two"), 1) - assert.Equals(t, tempmap.pruneRevisions(3, ""), 3) - assert.Equals(t, len(tempmap), 4) - assert.Equals(t, tempmap.pruneRevisions(2, ""), 2) - assert.Equals(t, len(tempmap), 2) - assert.Equals(t, tempmap["5-five"].Parent, "") - assert.Equals(t, tempmap["6-six"].Parent, "5-five") - // Check what happens when all revs are deleted: - tempmap = branchymap.copy() - tempmap["3-three"].Deleted = true - tempmap["3-drei"].Deleted = true - // / [3-three] - // 1-one -- 2-two - // \ [3-drei] - assert.Equals(t, tempmap.pruneRevisions(3, ""), 0) - assert.Equals(t, tempmap.pruneRevisions(2, ""), 1) +} + + +func TestPruneRevsSingleBranch(t *testing.T) { + + numRevs := 100 + + revTree := getMultiBranchTestRevtree1(numRevs, 0, []BranchSpec{}) + + maxDepth := uint32(20) + expectedNumPruned := numRevs - int(maxDepth) + + numPruned := revTree.pruneRevisions(maxDepth, "") + assert.Equals(t, numPruned, expectedNumPruned) + +} + +func TestPruneRevsOneWinningOneNonwinningBranch(t *testing.T) { + + branchSpecs := []BranchSpec{ + { + NumRevs: 1, + Digest: "non-winning unresolved", + LastRevisionIsTombstone: false, + }, + } + + unconflictedBranchNumRevs := 2 + winningBranchNumRevs := 4 + + revTree := getMultiBranchTestRevtree1(unconflictedBranchNumRevs, winningBranchNumRevs, branchSpecs) + + maxDepth := uint32(2) + + revTree.pruneRevisions(maxDepth, "") + + assert.Equals(t, revTree.LongestBranch(), int(maxDepth)) + + +} + +func TestPruneRevsOneWinningOneOldTombstonedBranch(t *testing.T) { + + branchSpecs := []BranchSpec{ + { + NumRevs: 1, + Digest: "non-winning tombstoned", + LastRevisionIsTombstone: true, + }, + } + + unconflictedBranchNumRevs := 1 + winningBranchNumRevs := 5 + + revTree := getMultiBranchTestRevtree1(unconflictedBranchNumRevs, winningBranchNumRevs, branchSpecs) + + maxDepth := uint32(2) + + revTree.pruneRevisions(maxDepth, "") + + assert.True(t, revTree.LongestBranch() == int(maxDepth)) + + // we shouldn't have any tombstoned branches, since the tombstoned branch was so old + // it should have been pruned away + assert.Equals(t, revTree.FindLongestTombstonedBranch(), 0) + + +} + +func TestPruneRevsOneWinningOneOldAndOneRecentTombstonedBranch(t *testing.T) { + + branchSpecs := []BranchSpec{ + { + NumRevs: 1, + Digest: "non-winning low-gen tombstoned", + LastRevisionIsTombstone: true, + }, + { + NumRevs: 4, + Digest: "non-winning high-gen tombstoned", + LastRevisionIsTombstone: true, + }, + } + + unconflictedBranchNumRevs := 1 + winningBranchNumRevs := 5 + + revTree := getMultiBranchTestRevtree1(unconflictedBranchNumRevs, winningBranchNumRevs, branchSpecs) + + maxDepth := uint32(2) + + revTree.pruneRevisions(maxDepth, "") + + assert.True(t, revTree.LongestBranch() == int(maxDepth)) + + // the "non-winning high-gen tombstoned" branch should still be around, but pruned to maxDepth + tombstonedLeaves := revTree.GetTombstonedLeaves() + assert.Equals(t, len(tombstonedLeaves), 1) + tombstonedLeaf := tombstonedLeaves[0] + + tombstonedBranch := revTree.getHistory(tombstonedLeaf) + assert.Equals(t, len(tombstonedBranch), int(maxDepth)) + + // The generation of the longest deleted branch is 97: + // 1 unconflictedBranchNumRevs + // + + // 4 revs in branchspec + // + + // 1 extra rev in branchspec since LastRevisionIsTombstone (that variable name is misleading) + expectedGenLongestTSd := 6 + assert.Equals(t, revTree.FindLongestTombstonedBranch(), expectedGenLongestTSd) + +} + + + +func TestGenerationShortestNonTombstonedBranch(t *testing.T) { + + branchSpecs := []BranchSpec{ + { + NumRevs: 4, + Digest: "non-winning unresolved", + LastRevisionIsTombstone: false, + }, + { + NumRevs: 2, + Digest: "non-winning tombstoned", + LastRevisionIsTombstone: true, + }, + } + + revTree := getMultiBranchTestRevtree1(3, 7, branchSpecs) + + generationShortestNonTombstonedBranch := revTree.FindShortestNonTombstonedBranch() + + // The "non-winning unresolved" branch has 7 revisions due to: + // 3 unconflictedBranchNumRevs + // + + // 4 from it's BranchSpec + // Since the "non-winning tombstoned" is a deleted branch, it will be ignored by GenerationShortestNonTombstonedBranch() + // Also, the winning branch has more revisions (10 total), and so will be ignored too + expectedGenerationShortestNonTombstonedBranch := 7 + + assert.Equals(t, generationShortestNonTombstonedBranch, expectedGenerationShortestNonTombstonedBranch) + +} + + +func TestGenerationLongestTombstonedBranch(t *testing.T) { + + branchSpecs := []BranchSpec{ + { + NumRevs: 4, + Digest: "non-winning unresolved", + LastRevisionIsTombstone: false, + }, + { + NumRevs: 2, + Digest: "non-winning tombstoned #1", + LastRevisionIsTombstone: true, + }, + { + NumRevs: 100, + Digest: "non-winning tombstoned #2", + LastRevisionIsTombstone: true, + }, + } + + revTree := getMultiBranchTestRevtree1(3, 7, branchSpecs) + generationLongestTombstonedBranch := revTree.FindLongestTombstonedBranch() + + // The generation of the longest deleted branch is: + // 3 unconflictedBranchNumRevs + // + + // 100 revs in branchspec + // + + // 1 extra rev in branchspec since LastRevisionIsTombstone (that variable name is misleading) + expectedGenerationLongestTombstonedBranch := 3 + 100 + 1 + + assert.Equals(t, generationLongestTombstonedBranch, expectedGenerationLongestTombstonedBranch) + + +} + + +// Tests for updated pruning algorithm, post https://github.com/couchbase/sync_gateway/issues/2651 +func TestPruneRevisionsPostIssue2651ThreeBranches(t *testing.T) { + + // Try large rev tree with multiple branches + branchSpecs := []BranchSpec{ + { + NumRevs: 60, + Digest: "non-winning unresolved", + LastRevisionIsTombstone: false, + }, + { + NumRevs: 25, + Digest: "non-winning tombstoned", + LastRevisionIsTombstone: true, + }, + } + revTree := getMultiBranchTestRevtree1(50, 100, branchSpecs) + + maxDepth := uint32(50) + numPruned := revTree.pruneRevisions(maxDepth, "") + fmt.Printf("numPruned: %v", numPruned) + fmt.Printf("LongestBranch: %v", revTree.LongestBranch()) + + assert.True(t, uint32(revTree.LongestBranch()) == maxDepth) + +} + +func TestLongestBranch1(t *testing.T) { + + branchSpecs := []BranchSpec{ + { + NumRevs: 60, + Digest: "non-winning unresolved", + LastRevisionIsTombstone: false, + }, + { + NumRevs: 25, + Digest: "non-winning tombstoned", + LastRevisionIsTombstone: true, + }, + } + revTree := getMultiBranchTestRevtree1(50, 100, branchSpecs) + + assert.True(t, revTree.LongestBranch() == 150) + +} + +func TestLongestBranch2(t *testing.T) { + + assert.True(t, multiroot.LongestBranch() == 3) + } func TestParseRevisions(t *testing.T) { @@ -237,7 +618,7 @@ func TestTrimEncodedRevisionsToAncestor(t *testing.T) { assert.True(t, result) assert.DeepEquals(t, trimmedRevs, Body{"start": 5, "ids": []string{"huey", "dewey", "louie"}}) - result, trimmedRevs = trimEncodedRevisionsToAncestor(trimmedRevs, []string{"3-walter", "3-louie", "1-fooey"}, 3) + result, trimmedRevs = trimEncodedRevisionsToAncestor(trimmedRevs, []string{"3-walter", "3-louie", "1-fooey"}, 3) assert.True(t, result) assert.DeepEquals(t, trimmedRevs, Body{"start": 5, "ids": []string{"huey", "dewey", "louie"}}) @@ -257,6 +638,37 @@ func TestTrimEncodedRevisionsToAncestor(t *testing.T) { assert.DeepEquals(t, trimmedRevs, Body{"start": 5, "ids": []string{"huey", "dewey"}}) } +//////// BENCHMARK: + +func BenchmarkRevTreePruning(b *testing.B) { + + // Try large rev tree with multiple branches + branchSpecs := []BranchSpec{ + { + NumRevs: 60, + Digest: "non-winning unresolved", + LastRevisionIsTombstone: false, + }, + { + NumRevs: 25, + Digest: "non-winning tombstoned", + LastRevisionIsTombstone: true, + }, + } + + b.ResetTimer() + + for i := 0; i < b.N; i++ { + + b.StopTimer() + revTree := getMultiBranchTestRevtree1(50, 100, branchSpecs) + b.StartTimer() + + revTree.pruneRevisions(50, "") + } + +} + //////// HELPERS: func assertFailed(t testing.TB, message string) { @@ -292,3 +704,100 @@ func assertFalse(t *testing.T, failure bool, message string) { assertFailed(t, message) } } + +func addRevs(revTree RevTree, startingParentRevId string, numRevs int, revDigest string) { + + docSizeBytes := 1024 * 5 + body := createBodyContentAsMapWithSize(docSizeBytes) + bodyBytes, err := json.Marshal(body) + if err != nil { + panic(fmt.Sprintf("Error: %v", err)) + } + + channels := base.SetOf("ABC", "CBS") + + generation, _ := ParseRevID(startingParentRevId) + + for i := 0; i < numRevs; i++ { + + newRevId := fmt.Sprintf("%v-%v", generation+1, revDigest) + parentRevId := "" + if i == 0 { + parentRevId = startingParentRevId + } else { + parentRevId = fmt.Sprintf("%v-%v", generation, revDigest) + } + + revInfo := RevInfo{ + ID: newRevId, + Parent: parentRevId, + Body: bodyBytes, + Deleted: false, + Channels: channels, + } + revTree.addRevision(revInfo) + + generation += 1 + + } + +} + +func (tree RevTree) GetTombstonedLeaves() []string { + onlyTombstonedLeavesFilter := func(revId string) bool { + revInfo := tree[revId] + return revInfo.Deleted + } + return tree.GetLeavesFiltered(onlyTombstonedLeavesFilter) + +} + +// Find the length of the longest branch +func (tree RevTree) LongestBranch() int { + + longestBranch := 0 + + leafProcessor := func(leaf *RevInfo) { + + lengthOfBranch := 0 + + // Walk up the tree until we find a root, and append each node + node := leaf + for { + + // Increment length of branch + lengthOfBranch += 1 + + // Reached a root, we're done -- if this branch is longer than the + // current longest branch, record branch length as longestBranch + if node.IsRoot() { + if lengthOfBranch > longestBranch { + longestBranch = lengthOfBranch + } + break + } + + // Walk up the branch to the parent node + node = tree[node.Parent] + + } + } + + tree.forEachLeaf(leafProcessor) + + return longestBranch + +} + + +// Create body content as map of 100 byte entries. Rounds up to the nearest 100 bytes +func createBodyContentAsMapWithSize(docSizeBytes int) map[string]string { + + numEntries := int(docSizeBytes/100) + 1 + body := make(map[string]string, numEntries) + for i := 0; i < numEntries; i++ { + key := fmt.Sprintf("field_%d", i) + body[key] = "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + } + return body +}