-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathdatabase.cpp
236 lines (207 loc) · 7.57 KB
/
database.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
// Copyright (C) 2014-2015 Jonathan Müller <[email protected]>
// This file is subject to the license terms in the LICENSE file
// found in the top-level directory of this distribution.
#include "database.hpp"
#include <cassert>
#include <cmath>
#include <cstring>
#include <string>
namespace sid = foonathan::string_id;
sid::basic_database::insert_status sid::basic_database::insert_prefix(hash_type hash, hash_type prefix,
const char *str, std::size_t length)
{
std::string prefix_str = lookup(prefix);
return insert(hash, (prefix_str + str).c_str(), prefix_str.size() + length);
}
namespace
{
// equivalent to prefix + str == other_str for std::string
// prefix and other_str are null-terminated
bool strequal(const char *prefix,
const char *str, std::size_t length, const char *other_str) FOONATHAN_NOEXCEPT
{
assert(prefix);
while (*prefix)
if (*prefix++ != *other_str++)
return false;
return std::strncmp(str, other_str, length) == 0;
}
}
/// \cond impl
class sid::map_database::node_list
{
struct node
{
std::size_t length; // length of string
hash_type hash;
node *next;
node(const char *str, std::size_t length,
hash_type h, node *next) FOONATHAN_NOEXCEPT
: length(length), hash(h), next(next)
{
void* mem = this;
auto dest = static_cast<char*>(mem) + sizeof(node);
std::strncpy(dest, str, length);
dest[length] = 0;
}
node(const char *prefix, std::size_t length_prefix,
const char *str, std::size_t length_string,
hash_type h, node *next) FOONATHAN_NOEXCEPT
: length(length_prefix + length_string), hash(h), next(next)
{
void* mem = this;
auto dest = static_cast<char*>(mem) + sizeof(node);
std::strncpy(dest, prefix, length_prefix);
dest += length_prefix;
std::strncpy(dest, str, length_string);
dest[length_string] = 0;
}
const char* get_str() const FOONATHAN_NOEXCEPT
{
const void *mem = this;
return static_cast<const char*>(mem) + sizeof(node);
}
};
struct insert_pos_t
{
bool exists; // if true: cur set, else: prev and next set
node*& prev;
node* next;
node* cur;
insert_pos_t(node*& prev, node *next) FOONATHAN_NOEXCEPT
: exists(false), prev(prev), next(next), cur(nullptr) {}
insert_pos_t(node *cur) FOONATHAN_NOEXCEPT
: exists(true), prev(this->cur), next(nullptr), cur(cur) {}
};
public:
node_list() FOONATHAN_NOEXCEPT
: head_(nullptr) {}
~node_list() FOONATHAN_NOEXCEPT
{
auto cur = head_;
while (cur)
{
auto next = cur->next;
::operator delete(cur);
cur = next;
}
}
basic_database::insert_status insert(hash_type hash, const char *str, std::size_t length)
{
auto pos = insert_pos(hash);
if (pos.exists)
return std::strncmp(str, pos.cur->get_str(), length) == 0 ?
basic_database::old_string : basic_database::collision;
auto mem = ::operator new(sizeof(node) + length + 1);
auto n = ::new(mem) node(str, length, hash, pos.next);
pos.prev = n;
return basic_database::new_string;
}
basic_database::insert_status insert_prefix(node_list &prefix_bucket, hash_type prefix,
hash_type hash, const char *str, std::size_t length)
{
auto prefix_node = prefix_bucket.find_node(prefix);
auto pos = insert_pos(hash);
if (pos.exists)
return strequal(prefix_node->get_str(), str, length, pos.cur->get_str()) ?
basic_database::old_string : basic_database::collision;
auto mem = ::operator new(sizeof(node) + prefix_node->length + length + 1);
auto n = ::new(mem) node(prefix_node->get_str(), prefix_node->length,
str, length, hash, pos.next);
pos.prev = n;
return basic_database::new_string;
}
// inserts all nodes into new buckets, this list is empty afterwards
void rehash(node_list *buckets, std::size_t size) FOONATHAN_NOEXCEPT
{
auto cur = head_;
while (cur)
{
auto next = cur->next;
auto pos = buckets[cur->hash % size].insert_pos(cur->hash);
assert(!pos.exists && "element can't be there already");
pos.prev = cur;
cur->next = pos.next;
cur = next;
}
head_ = nullptr;
}
// returns element with hash, there must be one
const char* lookup(hash_type h) const FOONATHAN_NOEXCEPT
{
return find_node(h)->get_str();
}
private:
node* find_node(hash_type h) const FOONATHAN_NOEXCEPT
{
assert(head_ && "hash not inserted");
auto cur = head_;
while (cur->hash < h)
{
cur = cur->next;
assert(cur && "hash not inserted");
}
assert(cur->hash == h && "hash not inserted");
return cur;
}
insert_pos_t insert_pos(hash_type hash) FOONATHAN_NOEXCEPT
{
node *cur = head_, *prev = nullptr;
while (cur && cur->hash <= hash)
{
if (cur->hash < hash)
{
prev = cur;
cur = cur->next;
}
else if (cur->hash == hash)
return {cur};
}
return {prev ? prev->next : head_, cur};
}
node *head_;
};
/// \endcond
sid::map_database::map_database(std::size_t size, double max_load_factor)
: buckets_(new node_list[size]),
no_items_(0u), no_buckets_(size),
max_load_factor_(max_load_factor),
next_resize_(static_cast<std::size_t>(std::floor(no_buckets_ * max_load_factor_)))
{}
sid::map_database::~map_database() FOONATHAN_NOEXCEPT {}
sid::basic_database::insert_status sid::map_database::insert(hash_type hash, const char *str, std::size_t length)
{
if (no_items_ + 1 >= next_resize_)
rehash();
auto status = buckets_[hash % no_buckets_].insert(hash, str, length);
if (status == insert_status::new_string)
++no_items_;
return status;
}
sid::basic_database::insert_status sid::map_database::insert_prefix(hash_type hash, hash_type prefix,
const char *str, std::size_t length)
{
if (no_items_ + 1 >= next_resize_)
rehash();
auto status = buckets_[hash % no_buckets_].insert_prefix(buckets_[prefix % no_buckets_], prefix,
hash, str, length);
if (status == insert_status::new_string)
++no_items_;
return status;
}
const char* sid::map_database::lookup(hash_type hash) const FOONATHAN_NOEXCEPT
{
return buckets_[hash % no_buckets_].lookup(hash);
}
void sid::map_database::rehash()
{
static FOONATHAN_CONSTEXPR auto growth_factor = 2;
auto new_size = growth_factor * no_buckets_;
auto buckets = new node_list[new_size]();
auto end = buckets_.get() + no_buckets_;
for (auto list = buckets_.get(); list != end; ++list)
list->rehash(buckets, new_size);
buckets_.reset(buckets);
no_buckets_ = new_size;
next_resize_ = static_cast<std::size_t>(std::floor(no_buckets_ * max_load_factor_));
}