Skip to content

Commit 162bba0

Browse files
laanwjPastaPastaPasta
authored andcommitted
Merge bitcoin#12048: Use best-fit strategy in Arena, now O(log(n)) instead O(n)
5fbf7c4 fix nits: variable naming, typos (Martin Ankerl) 1e0ee90 Use best-fit strategy in Arena, now O(log(n)) instead O(n) (Martin Ankerl) Pull request description: This replaces the first-fit algorithm used in the Arena with a best-fit. According to "Dynamic Storage Allocation: A Survey and Critical Review", Wilson et. al. 1995, http://www.scs.stanford.edu/14wi-cs140/sched/readings/wilson.pdf, both startegies work well in practice. The advantage of using best-fit is that we can switch the O(n) allocation to O(log(n)). Additionally, some previously O(log(n)) operations are now O(1) operations by using hash maps. The end effect is that the benchmark runs about 2.5 times faster on my machine: # Benchmark, evals, iterations, total, min, max, median old: BenchLockedPool, 5, 530, 5.25749, 0.00196938, 0.00199755, 0.00198172 new: BenchLockedPool, 5, 1300, 5.11313, 0.000781493, 0.000793314, 0.00078606 I've run all unit tests and benchmarks, and increased the number of iterations so that BenchLockedPool takes about 5 seconds again. Tree-SHA512: 6551e384671f93f10c60df530a29a1954bd265cc305411f665a8756525e5afe2873a8032c797d00b6e8c07e16d9827465d0b662875433147381474a44119ccce
1 parent 4e17167 commit 162bba0

File tree

3 files changed

+60
-31
lines changed

3 files changed

+60
-31
lines changed

src/bench/lockedpool.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -43,4 +43,4 @@ static void BenchLockedPool(benchmark::State& state)
4343
addr.clear();
4444
}
4545

46-
BENCHMARK(BenchLockedPool, 530);
46+
BENCHMARK(BenchLockedPool, 1300);

src/support/lockedpool.cpp

+45-25
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,9 @@ Arena::Arena(void *base_in, size_t size_in, size_t alignment_in):
4747
base(static_cast<char*>(base_in)), end(static_cast<char*>(base_in) + size_in), alignment(alignment_in)
4848
{
4949
// Start with one free chunk that covers the entire arena
50-
chunks_free.emplace(base, size_in);
50+
auto it = size_to_free_chunk.emplace(size_in, base);
51+
chunks_free.emplace(base, it);
52+
chunks_free_end.emplace(base + size_in, it);
5153
}
5254

5355
Arena::~Arena()
@@ -63,26 +65,30 @@ void* Arena::alloc(size_t size)
6365
if (size == 0)
6466
return nullptr;
6567

66-
// Pick a large enough free-chunk
67-
auto it = std::find_if(chunks_free.begin(), chunks_free.end(),
68-
[=](const std::map<char*, size_t>::value_type& chunk){ return chunk.second >= size; });
69-
if (it == chunks_free.end())
68+
// Pick a large enough free-chunk. Returns an iterator pointing to the first element that is not less than key.
69+
// This allocation strategy is best-fit. According to "Dynamic Storage Allocation: A Survey and Critical Review",
70+
// Wilson et. al. 1995, http://www.scs.stanford.edu/14wi-cs140/sched/readings/wilson.pdf, best-fit and first-fit
71+
// policies seem to work well in practice.
72+
auto size_ptr_it = size_to_free_chunk.lower_bound(size);
73+
if (size_ptr_it == size_to_free_chunk.end())
7074
return nullptr;
7175

7276
// Create the used-chunk, taking its space from the end of the free-chunk
73-
auto alloced = chunks_used.emplace(it->first + it->second - size, size).first;
74-
if (!(it->second -= size))
75-
chunks_free.erase(it);
76-
return reinterpret_cast<void*>(alloced->first);
77-
}
78-
79-
/* extend the Iterator if other begins at its end */
80-
template <class Iterator, class Pair> bool extend(Iterator it, const Pair& other) {
81-
if (it->first + it->second == other.first) {
82-
it->second += other.second;
83-
return true;
77+
const size_t size_remaining = size_ptr_it->first - size;
78+
auto alloced = chunks_used.emplace(size_ptr_it->second + size_remaining, size).first;
79+
chunks_free_end.erase(size_ptr_it->second + size_ptr_it->first);
80+
if (size_ptr_it->first == size) {
81+
// whole chunk is used up
82+
chunks_free.erase(size_ptr_it->second);
83+
} else {
84+
// still some memory left in the chunk
85+
auto it_remaining = size_to_free_chunk.emplace(size_remaining, size_ptr_it->second);
86+
chunks_free[size_ptr_it->second] = it_remaining;
87+
chunks_free_end.emplace(size_ptr_it->second + size_remaining, it_remaining);
8488
}
85-
return false;
89+
size_to_free_chunk.erase(size_ptr_it);
90+
91+
return reinterpret_cast<void*>(alloced->first);
8692
}
8793

8894
void Arena::free(void *ptr)
@@ -97,16 +103,30 @@ void Arena::free(void *ptr)
97103
if (i == chunks_used.end()) {
98104
throw std::runtime_error("Arena: invalid or double free");
99105
}
100-
auto freed = *i;
106+
std::pair<char*, size_t> freed = *i;
101107
chunks_used.erase(i);
102108

103-
// Add space to free map, coalescing contiguous chunks
104-
auto next = chunks_free.upper_bound(freed.first);
105-
auto prev = (next == chunks_free.begin()) ? chunks_free.end() : std::prev(next);
106-
if (prev == chunks_free.end() || !extend(prev, freed))
107-
prev = chunks_free.emplace_hint(next, freed);
108-
if (next != chunks_free.end() && extend(prev, *next))
109+
// coalesce freed with previous chunk
110+
auto prev = chunks_free_end.find(freed.first);
111+
if (prev != chunks_free_end.end()) {
112+
freed.first -= prev->second->first;
113+
freed.second += prev->second->first;
114+
size_to_free_chunk.erase(prev->second);
115+
chunks_free_end.erase(prev);
116+
}
117+
118+
// coalesce freed with chunk after freed
119+
auto next = chunks_free.find(freed.first + freed.second);
120+
if (next != chunks_free.end()) {
121+
freed.second += next->second->first;
122+
size_to_free_chunk.erase(next->second);
109123
chunks_free.erase(next);
124+
}
125+
126+
// Add/set space with coalesced free chunk
127+
auto it = size_to_free_chunk.emplace(freed.second, freed.first);
128+
chunks_free[freed.first] = it;
129+
chunks_free_end[freed.first + freed.second] = it;
110130
}
111131

112132
Arena::Stats Arena::stats() const
@@ -115,7 +135,7 @@ Arena::Stats Arena::stats() const
115135
for (const auto& chunk: chunks_used)
116136
r.used += chunk.second;
117137
for (const auto& chunk: chunks_free)
118-
r.free += chunk.second;
138+
r.free += chunk.second->first;
119139
r.total = r.used + r.free;
120140
return r;
121141
}

src/support/lockedpool.h

+14-5
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
#include <map>
1111
#include <mutex>
1212
#include <memory>
13+
#include <unordered_map>
1314

1415
/**
1516
* OS-dependent allocation and deallocation of locked/pinned memory pages.
@@ -88,11 +89,19 @@ class Arena
8889
*/
8990
bool addressInArena(void *ptr) const { return ptr >= base && ptr < end; }
9091
private:
91-
/** Map of chunk address to chunk information. This class makes use of the
92-
* sorted order to merge previous and next chunks during deallocation.
93-
*/
94-
std::map<char*, size_t> chunks_free;
95-
std::map<char*, size_t> chunks_used;
92+
typedef std::multimap<size_t, char*> SizeToChunkSortedMap;
93+
/** Map to enable O(log(n)) best-fit allocation, as it's sorted by size */
94+
SizeToChunkSortedMap size_to_free_chunk;
95+
96+
typedef std::unordered_map<char*, SizeToChunkSortedMap::const_iterator> ChunkToSizeMap;
97+
/** Map from begin of free chunk to its node in size_to_free_chunk */
98+
ChunkToSizeMap chunks_free;
99+
/** Map from end of free chunk to its node in size_to_free_chunk */
100+
ChunkToSizeMap chunks_free_end;
101+
102+
/** Map from begin of used chunk to its size */
103+
std::unordered_map<char*, size_t> chunks_used;
104+
96105
/** Base address of arena */
97106
char* base;
98107
/** End address of arena */

0 commit comments

Comments
 (0)