-
Notifications
You must be signed in to change notification settings - Fork 42
/
Copy pathindex_test.py
178 lines (152 loc) · 6.53 KB
/
index_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# -*- coding: utf-8 -*-
# Usage: py.test tests
from keyvi.index import Index, ReadOnlyIndex
import os
import random
import shutil
import tempfile
import gc
def test_open_index():
test_dir = os.path.join(tempfile.gettempdir(), "index_open_index")
try:
if not os.path.exists(test_dir):
os.mkdir(test_dir)
index = Index(os.path.join(test_dir, "index"))
index.Set("a", "{}")
del index
# required for pypy to ensure deletion/destruction of the index object
gc.collect()
index = Index(os.path.join(test_dir, "index"))
assert "a" in index
del index
finally:
shutil.rmtree(test_dir, ignore_errors=True)
def test_some_indexing():
test_dir = os.path.join(tempfile.gettempdir(), "index_some_indexing")
iterations = 10000
split = 2000
try:
if not os.path.exists(test_dir):
os.mkdir(test_dir)
index = Index(os.path.join(test_dir, "index"))
for i in range(0, iterations):
index.Set("key-{}".format(i), "value-{}".format(i))
index.Flush()
for i in range(split, iterations):
assert "key-{}".format(i) in index
index.Delete("key-{}".format(i))
index.Flush()
for i in range(0, split):
assert "key-{}".format(i) in index
for i in range(split, iterations):
assert not "key-{}".format(i) in index
del index
finally:
shutil.rmtree(test_dir, ignore_errors=True)
def test_bulk_add():
test_dir = os.path.join(tempfile.gettempdir(), "index_bulk_add")
iterations = 10
chunk_size = 1000
try:
if not os.path.exists(test_dir):
os.mkdir(test_dir)
index = Index(os.path.join(test_dir, "index"))
key_values = []
for i in range(0, chunk_size * iterations):
key_values.append(("key-{}".format(i), "value-{}".format(i)))
if i % chunk_size == 0:
index.MSet(key_values)
key_values = []
index.MSet(key_values)
index.Flush()
for i in range(0, 50):
assert "key-{}".format(random.randrange(0,
chunk_size * iterations)) in index
del index
finally:
shutil.rmtree(test_dir, ignore_errors=True)
def test_get_fuzzy():
test_dir = os.path.join(tempfile.gettempdir(), "index_test_fuzzy")
try:
if not os.path.exists(test_dir):
os.mkdir(test_dir)
write_index = Index(os.path.join(test_dir, "index"))
write_index.Set("apple", "{}")
write_index.Set("apples", "{}")
write_index.Set("banana", "{}")
write_index.Set("orange", "{}")
write_index.Set("avocado", "{}")
write_index.Set("peach", "{}")
write_index.Flush()
read_only_index = ReadOnlyIndex(os.path.join(test_dir, "index"))
for index in [write_index, read_only_index]:
matches = list(index.GetFuzzy("appe", 1, 2))
assert len(matches) == 1
assert u'apple' == matches[0].matched_string
matches = list(index.GetFuzzy("appes", 2, 2))
assert len(matches) == 2
assert u'apple' == matches[0].matched_string
assert u'apples' == matches[1].matched_string
matches = list(index.GetFuzzy("apples", 1, 2))
assert len(matches) == 2
assert u'apple' == matches[0].matched_string
assert u'apples' == matches[1].matched_string
matches = list(index.GetFuzzy("atocao", 2, 1))
assert len(matches) == 1
assert u'avocado' == matches[0].matched_string
write_index.Delete("avocado")
write_index.Flush()
matches = list(write_index.GetFuzzy("atocao", 2, 1))
assert len(matches) == 0
del write_index
del read_only_index
finally:
shutil.rmtree(test_dir, ignore_errors=True)
def test_get_near():
test_dir = os.path.join(tempfile.gettempdir(), "index_test_near")
try:
if not os.path.exists(test_dir):
os.mkdir(test_dir)
write_index = Index(os.path.join(test_dir, "index"))
# the following geohashes are created from openstreetmap coordinates and translated using a geohash encoder
write_index.Set(
"u21xj502gs79", "{'city' : 'Kobarid', 'country': 'si'}")
write_index.Set(
"u21xk2uxkhh2", "{'city' : 'Trnovo ob soci', 'country': 'si'}")
write_index.Set(
"u21x75n34qrp", "{'city' : 'Srpnecia', 'country': 'si'}")
write_index.Set("u21x6v1nx0c3", "{'city' : 'Zaga', 'country': 'si'}")
write_index.Set(
"u21xs20w9ssu", "{'city' : 'Cezsoca', 'country': 'si'}")
write_index.Set(
"u21x6yx5cqy6", "{'city' : 'Log Cezsoski', 'country': 'si'}")
write_index.Set("u21xs7ses4s3", "{'city' : 'Bovec', 'country': 'si'}")
write_index.Flush()
read_only_index = ReadOnlyIndex(os.path.join(test_dir, "index"))
for index in [write_index, read_only_index]:
# some coordinate nearby, greedy false, so it prefers as close as possible
matches = list(index.GetNear("u21xjjhhymt7", 4))
assert len(matches) == 1
assert u'u21xj502gs79' == matches[0].matched_string
assert u"{'city' : 'Kobarid', 'country': 'si'}" == matches[0].value
# greedy match, still closest should be the 1st match
matches = list(index.GetNear("u21xjjhhymt7", 4, True))
assert len(matches) == 7
assert u'u21xj502gs79' == matches[0].matched_string
assert u"{'city' : 'Kobarid', 'country': 'si'}" == matches[0].value
# closer match near Bovec and Cezsoca but closer to Cezsoca
matches = list(index.GetNear("u21xs20w9ssu", 5))
assert len(matches) == 1
assert u'u21xs20w9ssu' == matches[0].matched_string
assert u"{'city' : 'Cezsoca', 'country': 'si'}" == matches[0].value
# greedy should return Bovec, but not the other locations due to the prefix
matches = list(index.GetNear("u21xs20w9ssu", 5, True))
assert len(matches) == 2
assert u'u21xs20w9ssu' == matches[0].matched_string
assert u"{'city' : 'Cezsoca', 'country': 'si'}" == matches[0].value
assert u'u21xs7ses4s3' == matches[1].matched_string
assert u"{'city' : 'Bovec', 'country': 'si'}" == matches[1].value
del write_index
del read_only_index
finally:
shutil.rmtree(test_dir, ignore_errors=True)