Skip to content

Commit b7c6504

Browse files
committed
hashindex: test cleanup, decide for INT32_MAX as the refcount limit
1 parent 0ac324c commit b7c6504

File tree

2 files changed

+48
-51
lines changed

2 files changed

+48
-51
lines changed

borg/hashindex.pyx

+18-22
Original file line numberDiff line numberDiff line change
@@ -161,21 +161,17 @@ cdef class NSKeyIterator:
161161
cdef int32_t *value = <int32_t *>(self.key + self.key_size)
162162
return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))
163163

164-
165-
# to be safe from constants like EMPTY and DELETED
166-
# but don't decrease it any further, these *must* hold:
167-
# (1) max_ref > INT32_MAX/2
168-
# (2) max_ref > -INT32_MIN/2
169-
# If these don't hold the subtractions in the overflow checks themselves can overflow for values >max_ref
170-
cdef int32_t _MAX_REF = INT32_MAX
171-
# MAX_REF is visible to Python
172-
MAX_REF = _MAX_REF
173-
174164
assert INT32_MAX == 2**31-1
175-
assert _MAX_REF > (<long long> INT32_MAX) / 2
176-
assert _MAX_REF > -(<long long> INT32_MIN) / 2
165+
177166

178167
cdef class ChunkIndex(IndexBase):
168+
"""
169+
Mapping of 32 byte keys to (refcount, size, csize) (all 32 bit signed).
170+
171+
The reference count cannot overflow. If an overflow would occur, it is fixed to 2**31-1 and won't decrease
172+
with decref() anymore.
173+
"""
174+
179175
value_size = 12
180176

181177
def __getitem__(self, key):
@@ -206,9 +202,9 @@ cdef class ChunkIndex(IndexBase):
206202
if not data:
207203
raise KeyError(key)
208204
cdef int32_t refcount = _le32toh(data[0])
209-
if refcount != _MAX_REF:
210-
if _MAX_REF - refcount < 1:
211-
refcount = _MAX_REF
205+
if refcount != INT32_MAX:
206+
if INT32_MAX - refcount < 1:
207+
refcount = INT32_MAX
212208
else:
213209
refcount += 1
214210
data[0] = <int32_t> _htole32(refcount)
@@ -221,9 +217,9 @@ cdef class ChunkIndex(IndexBase):
221217
if not data:
222218
raise KeyError(key)
223219
cdef int32_t refcount = _le32toh(data[0])
224-
if refcount != _MAX_REF:
225-
if _MAX_REF - refcount < 1:
226-
refcount = _MAX_REF
220+
if refcount != INT32_MAX:
221+
if INT32_MAX - refcount < 1:
222+
refcount = INT32_MAX
227223
else:
228224
refcount -= 1
229225
data[0] = _htole32(refcount)
@@ -274,10 +270,10 @@ cdef class ChunkIndex(IndexBase):
274270
if values:
275271
refcount1 = _le32toh(values[0])
276272
refcount2 = _le32toh(data[0])
277-
if _MAX_REF - refcount1 < refcount2:
278-
values[0] = _htole32(_MAX_REF)
279-
elif _MAX_REF - refcount2 < refcount1:
280-
values[0] = _htole32(_MAX_REF)
273+
if INT32_MAX - refcount1 < refcount2:
274+
values[0] = _htole32(INT32_MAX)
275+
elif INT32_MAX - refcount2 < refcount1:
276+
values[0] = _htole32(INT32_MAX)
281277
else:
282278
values[0] = _htole32(refcount1 + refcount2)
283279
else:

borg/testsuite/hashindex.py

+30-29
Original file line numberDiff line numberDiff line change
@@ -116,54 +116,55 @@ def test_chunkindex_summarize(self):
116116
assert chunks == 1 + 2 + 3
117117
assert unique_chunks == 3
118118

119+
120+
class HashIndexRefcountingTestCase(BaseTestCase):
121+
MAX_REF = 2**31-1
122+
119123
def test_chunkindex_overflow(self):
120-
max_ref = hashindex.MAX_REF
121124
idx = ChunkIndex()
122-
idx[H(1)] = max_ref - 1, 1, 2
123-
124-
refcount, *_ = idx.incref(H(1))
125-
assert refcount == max_ref
125+
idx[H(1)] = self.MAX_REF - 1, 1, 2
126126

127-
refcount, *_ = idx.decref(H(1))
128-
assert refcount == max_ref
127+
# 5 is arbitray, any number of incref/decrefs shouldn't move it once it maxes out
128+
for i in range(5):
129+
refcount, *_ = idx.incref(H(1))
130+
assert refcount == self.MAX_REF
131+
for i in range(5):
132+
refcount, *_ = idx.decref(H(1))
133+
assert refcount == self.MAX_REF
129134

130135
def test_chunkindex_merge_overflow1(self):
131-
# Check that it does not overflow at max_ref - 1
136+
# Check that it does not overflow at 2**31-2
132137
idx1 = ChunkIndex()
133-
half = int(hashindex.MAX_REF / 2)
134-
inverse_parity = 1 - hashindex.MAX_REF % 2
135-
# if max_ref is even, then the result is
136-
# 2 * floor(max_ref/2) - 1
137-
# ^^^^^^^^^^^^^^^^^^^^ = max_ref
138-
# if max_ref is odd, then
139-
# 2 * floor(max_ref/2) - 0
140-
# ^^^^^^^^^^^^^^^^^^^^ = max_ref - 1
141-
idx1[H(1)] = half - inverse_parity, 1, 2
138+
# n.b. 2**31-1 is odd
139+
half = self.MAX_REF // 2
140+
idx1[H(1)] = half, 1, 2
142141
idx2 = ChunkIndex()
143142
idx2[H(1)] = half, 1, 2
144143
idx1.merge(idx2)
145144
refcount, *_ = idx1[H(1)]
146-
assert refcount == hashindex.MAX_REF - 1
145+
assert refcount == self.MAX_REF - 1
146+
refcount, *_ = idx1.decref(H(1))
147+
assert refcount == self.MAX_REF - 2
147148

148149
def test_chunkindex_merge_overflow2(self):
149-
# Check that it goes to max_ref even with values that are larged than max_ref themselves
150150
idx1 = ChunkIndex()
151-
idx1[H(1)] = 147483647, 1, 2
151+
idx1[H(1)] = 2000000000, 1, 2
152152
idx2 = ChunkIndex()
153153
idx2[H(1)] = 2000000000, 1, 2
154154
idx1.merge(idx2)
155155
refcount, *_ = idx1[H(1)]
156-
assert refcount == hashindex.MAX_REF
156+
assert refcount == self.MAX_REF
157157

158158
def test_chunkindex_merge_overflow3(self):
159+
# Check that merging maxes out right at 2**31-1
159160
idx1 = ChunkIndex()
160-
half = hashindex.MAX_REF / 2
161+
half = self.MAX_REF // 2
161162
idx1[H(1)] = half + 1, 1, 2
162163
idx2 = ChunkIndex()
163164
idx2[H(1)] = half, 1, 2
164165
idx1.merge(idx2)
165166
refcount, *_ = idx1[H(1)]
166-
assert refcount == hashindex.MAX_REF
167+
assert refcount == self.MAX_REF
167168

168169
def test_chunkindex_add(self):
169170
idx1 = ChunkIndex()
@@ -178,16 +179,16 @@ def test_chunkindex_addinc(self):
178179
idx1.add(H(1), 2, 3, 4)
179180
assert idx1[H(1)] == (7, 6, 7)
180181

181-
def test_max_ref_transparent_inc(self):
182+
def test_transparent_inc(self):
182183
idx1 = ChunkIndex()
183-
idx1[H(1)] = (2**31-1, 6, 7)
184+
idx1[H(1)] = (self.MAX_REF, 6, 7)
184185
idx1.incref(H(1))
185186
refcount, *_ = idx1[H(1)]
186-
assert refcount == hashindex.MAX_REF
187+
assert refcount == self.MAX_REF
187188

188-
def test_max_ref_transparent_dec(self):
189+
def test_transparent_dec(self):
189190
idx1 = ChunkIndex()
190-
idx1[H(1)] = (2**31-1, 6, 7)
191+
idx1[H(1)] = (self.MAX_REF, 6, 7)
191192
idx1.decref(H(1))
192193
refcount, *_ = idx1[H(1)]
193-
assert refcount == hashindex.MAX_REF
194+
assert refcount == self.MAX_REF

0 commit comments

Comments
 (0)