Ordered edges (#11)

Rework to provide a number of improvements - Add bytes-based radix tree - Iteration done in lexical order making iteration deterministic - Zero allocation for all read operations, including walk - Remove mostly useless ability to skip during iteration - Get returns boolean to indicate the presence of value (can differentiate between no value and nil value) - No more stringer for walk function, provides a string. - Speed improvements - Remove Runes tree since Bytes makes it unneeded
gammazero · Jun 13, 2021 · fe38970 · fe38970
1 parent 48727be
commit fe38970
Show file tree

Hide file tree

Showing 11 changed files with 1,269 additions and 1,117 deletions.
diff --git a/README.md b/README.md
@@ -6,13 +6,19 @@
 [![codecov](https://codecov.io/gh/gammazero/radixtree/branch/master/graph/badge.svg)](https://codecov.io/gh/gammazero/radixtree)
 [![License](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
 
-Package `radixtree` implements multiple forms of an Adaptive [Radix Tree](https://en.wikipedia.org/wiki/Radix_tree), aka compressed [trie](https://en.wikipedia.org/wiki/Trie) or compact prefix tree.  This data structure is useful to quickly lookup data, using only the portion of the key that prefixes existing data.  It is also useful for finding items whose keys are a prefix of a search key (i.e. are found along the way when retrieving an item identified by a key), or when finding items whose keys are prefixed by the serach key (i.e. are found at or after a key).  When different values are stored using keys that have a common prefix, the common part of the key is only stored once.  Consider this when keys are similar to an OID, filepath, geohash, network address, etc.
+Package `radixtree` implements multiple forms of an Adaptive [Radix Tree](https://en.wikipedia.org/wiki/Radix_tree), aka compressed [trie](https://en.wikipedia.org/wiki/Trie) or compact prefix tree.  This data structure is useful to quickly lookup data by key, find find data whose keys have a common prefix, or find data whose keys are a prefix (i.e. found along the way) of a search key.
 
-This radix tree is adaptive in the sense that nodes are not constant size, having as few or many children as needed, up to the number of different key segments to traverse to the next branch or value.
+The implementations are optimized for Get performance and allocate 0 bytes of heap memory for any read operation (Get, Walk, WalkPath, etc.); therefore no garbage to collect.  Once a radix tree is built, it can be repeatedly searched quickly. Concurrent searches are safe since these do not modify the data structure. Access is not synchronized (not concurrent safe with writes), allowing the caller to synchronize, if needed, in whatever manner works best for the application.
 
-An iterator for each type of radix tree allows a tree to be traversed one key segment at a time.  This is useful for incremental lookups of partial keys.  Iterators can be copied in order to branched a search, and copies iterated concurrently.
+This radix tree offers the following features:
 
-The implementations are optimized for Get performance and allocates 0 bytes of heap memory per Get; therefore no garbage to collect.  Once the radix tree is built, it can be repeatedly searched quickly. Concurrent searches are safe since these do not modify the radixtree. Access is not synchronized (not concurrent safe with writes), allowing the caller to synchronize, if needed, in whatever manner works best for the application.
+- Multiple types of radix tree: Bytes, Paths
+- Efficient: Operations for all types of radix tree are O(k).  Zero memory allocation for all read operations.
+- Compact: When values are stored using keys that have a common prefix, the common part of the key is only stored once.  Consider this when keys are similar to a timestamp, OID, filepath, geohash, network address, etc. Nodes that do not branch or contain values are compressed out of the tree.
+- Adaptive: This radix tree is adaptive in the sense that nodes are not constant size, having only as many children that are needed, from zero to the maximum possible number of different key segments.
+- Iterators: An iterator for each type of radix tree allows a tree to be traversed one key segment at a time.  This is useful for incremental lookup.  Iterators can be copied in order to branch a search, and iterate the copies concurrently.
+- Able to store nil values: Get differentiates between nil value and missing value.
+- Ordered iteration: Walking and iterating the tree is done in lexical order, making the output deterministic.
 
 ## Install
 
@@ -31,57 +37,49 @@ import (
 )
 
 func main() {
-    rt := new(radixtree.Runes)
+    rt := radixtree.New()
     rt.Put("tomato", "TOMATO")
     rt.Put("tom", "TOM")
     rt.Put("tommy", "TOMMY")
     rt.Put("tornado", "TORNADO")
 
-    val := rt.Get("tom")
-    fmt.Println("Found", val)
+    val, found := rt.Get("tom")
+    if found {
+        fmt.Println("Found", val)
+    }
     // Output: Found TOM
 
     // Find all items whose keys start with "tom"
-    rt.Walk("tom", func(key fmt.Stringer, value interface{}) error {
+    rt.Walk("tom", func(key string, value interface{}) bool {
         fmt.Println(value)
-        return nil
+        return false
     })
     // Output:
     // TOM
     // TOMATO
     // TOMMY
 
     // Find all items whose keys are a prefix of "tomato"
-    rt.WalkPath("tomato", func(key string, value interface{}) error {
+    rt.WalkPath("tomato", func(key string, value interface{}) bool {
         fmt.Println(value)
-        return nil
+        return false
     })
     // Output:
     // TOM
     // TOMATO
 
-    // Find each item whose key is a prefix of "tomato", using iterator
-    iter := rt.NewIterator()
-    for _, r := range "tomato" {
-        if !iter.Next(r) {
-            break
-        }
-        if val := iter.Value(); val != nil {
-            fmt.Println(val)
-        }
-    }
-    // Output:
-    // TOM
-    // TOMATO
-
     if rt.Delete("tom") {
         fmt.Println("Deleted tom")
     }
     // Output: Deleted tom
 
-    val = rt.Get("tom")
-    fmt.Println("Found", val)
-    // Output: Found <nil>
+    val, found = rt.Get("tom")
+    if found {
+        fmt.Println("Found", val)
+    } else {
+        fmt.Println("not found")
+    }
+    // Output: not found
 }
 ```
 

diff --git a/bench_test.go b/bench_test.go
@@ -2,7 +2,6 @@ package radixtree
 
 import (
 	"bufio"
-	"fmt"
 	"os"
 	"testing"
 )
@@ -15,36 +14,50 @@ const (
 //
 // Benchmarks
 //
-func BenchmarkWordsRunesPut(b *testing.B) {
-	benchmarkPut(wordsPath, b)
+func BenchmarkWordsMapGet(b *testing.B) {
+	benchmarkMapToCompareWithGet(wordsPath, b)
 }
 
-func BenchmarkWordsRunesGet(b *testing.B) {
-	benchmarkGet(wordsPath, b)
+func BenchmarkWordsMapPut(b *testing.B) {
+	benchmarkMapToCompareWithPut(wordsPath, b)
 }
 
-func BenchmarkWordsRunesWalk(b *testing.B) {
-	benchmarkWalk(wordsPath, b)
+func BenchmarkWordsBytesGet(b *testing.B) {
+	benchmarkBytesGet(wordsPath, b)
 }
 
-func BenchmarkWordsRunesWalkPath(b *testing.B) {
-	benchmarkWalkPath(wordsPath, b)
+func BenchmarkWordsBytesPut(b *testing.B) {
+	benchmarkBytesPut(wordsPath, b)
 }
 
-func BenchmarkWeb2aRunesPut(b *testing.B) {
-	benchmarkPut(web2aPath, b)
+func BenchmarkWordsBytesWalk(b *testing.B) {
+	benchmarkBytesWalk(wordsPath, b)
 }
 
-func BenchmarkWeb2aRunesGet(b *testing.B) {
-	benchmarkGet(web2aPath, b)
+func BenchmarkWordsBytesWalkPath(b *testing.B) {
+	benchmarkBytesWalkPath(wordsPath, b)
 }
 
-func BenchmarkWeb2aRunesWalk(b *testing.B) {
-	benchmarkWalk(web2aPath, b)
+// ----- Web2a -----
+
+func BenchmarkWeb2aMap(b *testing.B) {
+	benchmarkMapToCompareWithGet(web2aPath, b)
+}
+
+func BenchmarkWeb2aBytesGet(b *testing.B) {
+	benchmarkBytesGet(web2aPath, b)
+}
+
+func BenchmarkWeb2aBytesPut(b *testing.B) {
+	benchmarkBytesPut(web2aPath, b)
 }
 
-func BenchmarkWeb2aRunesWalkPath(b *testing.B) {
-	benchmarkWalkPath(web2aPath, b)
+func BenchmarkWeb2aBytesWalk(b *testing.B) {
+	benchmarkBytesWalk(web2aPath, b)
+}
+
+func BenchmarkWeb2aBytesWalkPath(b *testing.B) {
+	benchmarkBytesWalkPath(web2aPath, b)
 }
 
 func BenchmarkWeb2aPathsPut(b *testing.B) {
@@ -63,45 +76,87 @@ func BenchmarkWeb2aPathsWalkPath(b *testing.B) {
 	benchmarkPathsWalkPath(web2aPath, b)
 }
 
-func benchmarkPut(filePath string, b *testing.B) {
+func benchmarkMapToCompareWithGet(filePath string, b *testing.B) {
+	words, err := loadWords(filePath)
+	if err != nil {
+		b.Skip(err.Error())
+	}
+	m := make(map[string]string, len(words))
+	for _, w := range words {
+		m[w] = w
+	}
+	b.ResetTimer()
+	b.ReportAllocs()
+	for n := 0; n < b.N; n++ {
+		for _, w := range words {
+			_, ok := m[w]
+			if !ok {
+				panic("missing value")
+			}
+		}
+	}
+}
+
+func benchmarkMapToCompareWithPut(filePath string, b *testing.B) {
+	words, err := loadWords(filePath)
+	if err != nil {
+		b.Skip(err.Error())
+	}
+
+	b.ResetTimer()
+	b.ReportAllocs()
+	for n := 0; n < b.N; n++ {
+		m := map[string]string{}
+		for _, w := range words {
+			m[w] = w
+		}
+		if len(m) != len(words) {
+			panic("wrong size map")
+		}
+	}
+}
+
+func benchmarkBytesPut(filePath string, b *testing.B) {
 	words, err := loadWords(filePath)
 	if err != nil {
 		b.Skip(err.Error())
 	}
 	b.ResetTimer()
 	b.ReportAllocs()
 	for n := 0; n < b.N; n++ {
-		tree := new(Runes)
+		tree := new(Bytes)
 		for _, w := range words {
 			tree.Put(w, w)
 		}
 	}
 }
 
-func benchmarkGet(filePath string, b *testing.B) {
+func benchmarkBytesGet(filePath string, b *testing.B) {
 	words, err := loadWords(filePath)
 	if err != nil {
 		b.Skip(err.Error())
 	}
-	tree := new(Runes)
+	tree := new(Bytes)
 	for _, w := range words {
 		tree.Put(w, w)
 	}
 	b.ResetTimer()
 	b.ReportAllocs()
 	for n := 0; n < b.N; n++ {
 		for _, w := range words {
-			tree.Get(w)
+			if _, ok := tree.Get(w); !ok {
+				panic("missing value")
+			}
 		}
 	}
 }
 
-func benchmarkWalk(filePath string, b *testing.B) {
+func benchmarkBytesWalk(filePath string, b *testing.B) {
 	words, err := loadWords(filePath)
 	if err != nil {
 		b.Skip(err.Error())
 	}
-	tree := new(Runes)
+	tree := new(Bytes)
 	for _, w := range words {
 		tree.Put(w, w)
 	}
@@ -110,22 +165,22 @@ func benchmarkWalk(filePath string, b *testing.B) {
 	var count int
 	for n := 0; n < b.N; n++ {
 		count = 0
-		_ = tree.Walk("", func(k fmt.Stringer, value interface{}) error {
+		tree.Walk("", func(k string, value interface{}) bool {
 			count++
-			return nil
+			return false
 		})
 	}
 	if count != len(words) {
 		panic("wrong count")
 	}
 }
 
-func benchmarkWalkPath(filePath string, b *testing.B) {
+func benchmarkBytesWalkPath(filePath string, b *testing.B) {
 	words, err := loadWords(filePath)
 	if err != nil {
 		b.Skip(err.Error())
 	}
-	tree := new(Runes)
+	tree := new(Bytes)
 	for _, w := range words {
 		tree.Put(w, w)
 	}
@@ -135,9 +190,9 @@ func benchmarkWalkPath(filePath string, b *testing.B) {
 	for n := 0; n < b.N; n++ {
 		count = 0
 		for _, w := range words {
-			_ = tree.WalkPath(w, func(key string, value interface{}) error {
+			tree.WalkPath(w, func(key string, value interface{}) bool {
 				count++
-				return nil
+				return false
 			})
 		}
 	}
@@ -193,9 +248,9 @@ func benchmarkPathsWalk(filePath string, b *testing.B) {
 	var count int
 	for n := 0; n < b.N; n++ {
 		count = 0
-		_ = tree.Walk("", func(k fmt.Stringer, value interface{}) error {
+		tree.Walk("", func(k string, value interface{}) bool {
 			count++
-			return nil
+			return false
 		})
 	}
 	if count != len(words) {
@@ -218,9 +273,9 @@ func benchmarkPathsWalkPath(filePath string, b *testing.B) {
 	for n := 0; n < b.N; n++ {
 		count = 0
 		for _, w := range words {
-			_ = tree.WalkPath(w, func(key string, value interface{}) error {
+			tree.WalkPath(w, func(key string, value interface{}) bool {
 				count++
-				return nil
+				return false
 			})
 		}
 	}