Update set_global_shift_decrease! to shift weights rather than recomputing them (#112)

LilithHafner · web-flow · commit ca5d92d6e420 · 2025-02-24T12:23:59.000-06:00
* Update `set_global_shift_decrease!` to shift weights rather than recomputing them, taking advantage of the fact that the shift is always down (note: this includes a bug) and add comments about invariants and a TODO

* Add test

* inline the only remaining call-site of recompute_range

Note: this does include some regressions, though the gestalt is positive and 5B′ is an improvement.
diff --git a/src/DynamicDiscreteSamplers.jl b/src/DynamicDiscreteSamplers.jl
@@ -318,7 +318,7 @@ function _set_from_zero!(m::Memory, v::Float64, i::Int)
     # update group total weight and total weight
     significand = 0x8000000000000000 | uv << 11
     weight_index = _convert(Int, exponent + 4)
-    significand_sum = update_significand_sum(m, weight_index, significand)
+    significand_sum = update_significand_sum(m, weight_index, significand) # Temporarily break the "weights are accurately computed" invariant
 
     if m[4] == 0 # if we were empty, set global shift (m[3]) so that m[4] will become ~2^40.
         m[3] = -24 - exponent
@@ -339,16 +339,18 @@ function _set_from_zero!(m::Memory, v::Float64, i::Int)
             # Base.top_set_bit(significand_sum)+signed(exponent) + signed(m[3]) == 48
             # signed(m[3]) == 48 - Base.top_set_bit(significand_sum) - signed(exponent)
             m3 = 48 - Base.top_set_bit(significand_sum) - exponent
+            # The "weights are accurately computed" invariant is broken for weight_index, but the "sum(weights) == m[4]" invariant still holds
+            # set_global_shift_decrease! will do something wrong to weight_index, but preserve the "sum(weights) == m[4]" invariant.
             set_global_shift_decrease!(m, m3) # TODO for perf: special case all call sites to this function to take advantage of known shift direction and/or magnitude; also try outlining
             shift = signed(exponent + m3)
         end
         weight = _convert(UInt64, significand_sum << shift) + 1
 
         old_weight = m[weight_index]
-        m[weight_index] = weight
-        m4 = m[4]
+        m[weight_index] = weight # The "weights are accurately computed" invariant is now restored
+        m4 = m[4] # The "sum(weights) == m[4]" invariant is broken
         m4 -= old_weight
-        m4, o = Base.add_with_overflow(m4, weight)
+        m4, o = Base.add_with_overflow(m4, weight) # The "sum(weights) == m4" invariant now holds, though the computation overflows
         if o
             # If weights overflow (>2^64) then shift down by 16 bits
             m3 = m[3]-0x10
@@ -491,8 +493,30 @@ function set_global_shift_increase!(m::Memory, m2, m3::UInt64, m4) # Increase sh
     i <= -signed(m3)-122+4
     So for -signed(m3)-118 < i, we could need to adjust the ith weight
     =#
-    recompute_range = max(5, -signed(m3)-117):m2 # TODO It would be possible to scale this range with length (m[1]) in which case testing could be stricter and performance could be (marginally) better, though not in large cases so possibly not worth doing at all)
-    m[4] = recompute_weights!(m, m3, m4, recompute_range)
+    r0 = max(5, -signed(m3)-117)
+    r1 = m2 # TODO It would be possible to scale this range with length (m[1]) in which case testing could be stricter and performance could be (marginally) better, though not in large cases so possibly not worth doing at all)
+
+    # shift = signed(i-4+m3)
+    # weight = significand_sum == 0 ? 0 : UInt64(significand_sum << shift) + 1
+    # shift < -64; the low 64 bits are shifted off.
+    # i < -60-signed(m3); the low 64 bits are shifted off.
+
+    checkbounds(m, r0:2r1+2042)
+    @inbounds for i in r0:min(r1, -61-signed(m3))
+        significand_sum_lo = m[_convert(Int, 2i+2041)]
+        significand_sum_hi = m[_convert(Int, 2i+2042)]
+        significand_sum_lo == significand_sum_hi == 0 && continue # in this case, the weight was and still is zero
+        shift = signed(i-4+m3) + 64
+        m4 += update_weight!(m, i, significand_sum_hi << shift)
+    end
+    @inbounds for i in max(r0,-60-signed(m3)):r1
+        significand_sum = get_significand_sum(m, i)
+        significand_sum == 0 && continue # in this case, the weight was and still is zero
+        shift = signed(i-4+m3)
+        m4 += update_weight!(m, i, significand_sum << shift)
+    end
+
+    m[4] = m4
 end
 
 function set_global_shift_decrease!(m::Memory, m3::UInt64, m4=m[4]) # Decrease shift, on insertion of elements
@@ -503,7 +527,7 @@ function set_global_shift_decrease!(m::Memory, m3::UInt64, m4=m[4]) # Decrease s
     # In the case of adding a giant element, call this first, then add the element.
     # In any case, this only adjusts elements at or before m[2]
     # from the first index that previously could have had a weight > 1 to min(m[2], the first index that can't have a weight > 1) (never empty), set weights to 1 or 0
-    # from the first index that could have a weight > 1 to m[2] (possibly empty), recompute weights.
+    # from the first index that could have a weight > 1 to m[2] (possibly empty), shift weights by delta.
     m2 = signed(m[2])
     i1 = -signed(m3)-117 # see above, this is the first index that could have weight > 1 (anything after this will have weight 1 or 0)
     i1_old = -signed(m3_old)-117 # anything before this is already weight 1 or 0
@@ -520,35 +544,18 @@ function set_global_shift_decrease!(m::Memory, m3::UInt64, m4=m[4]) # Decrease s
         m[i] = weight
         m4 += weight-old_weight
     end
-    m4 = recompute_weights!(m, m3, m4, recompute_range)
+
+    delta = m3_old-m3
+    checkbounds(m, recompute_range)
+    @inbounds for i in recompute_range
+        old_weight = m[i]
+        old_weight <= 1 && continue # in this case, the weight was and still is 0 or 1
+        m4 += update_weight!(m, i, (old_weight-1) >> delta)
+    end
 
     m[4] = m4
 end
 
-function recompute_weights!(m::Memory{UInt64}, m3::UInt64, m4::UInt64, range::UnitRange{Int64})
-    isempty(range) && return m4
-    r0,r1 = extrema(range)
-    # shift = signed(i-4+m3)
-    # weight = significand_sum == 0 ? 0 : UInt64(significand_sum << shift) + 1
-    # shift < -64; the low 64 bits are shifted off.
-    # i < -60-signed(m3); the low 64 bits are shifted off.
-
-    checkbounds(m, r0:2r1+2042)
-    @inbounds for i in r0:min(r1, -61-signed(m3))
-        significand_sum_lo = m[_convert(Int, 2i+2041)]
-        significand_sum_hi = m[_convert(Int, 2i+2042)]
-        significand_sum_lo == significand_sum_hi == 0 && continue # in this case, the weight was and still is zero
-        shift = signed(i-4+m3) + 64
-        m4 += update_weight!(m, i, significand_sum_hi << shift)
-    end
-    @inbounds for i in max(r0,-60-signed(m3)):r1
-        significand_sum = get_significand_sum(m, i)
-        significand_sum == 0 && continue # in this case, the weight was and still is zero
-        shift = signed(i-4+m3)
-        m4 += update_weight!(m, i, significand_sum << shift)
-    end
-    m4
-end
 Base.@propagate_inbounds function update_weight!(m::Memory{UInt64}, i, shifted_significand_sum)
     weight = _convert(UInt64, shifted_significand_sum) + 1
     old_weight = m[i]
diff --git a/test/weights.jl b/test/weights.jl
@@ -240,6 +240,13 @@ w[1] = 0.95
 w[2] = 6.41e14
 verify(w.m)
 
+# This test catches a bug that was not revealed by the RNG tests below
+w = DynamicDiscreteSamplers.FixedSizeWeights(3);
+w[1] = 1.5
+w[2] = prevfloat(1.5)
+w[3] = 2^25
+verify(w.m)
+
 # This test catches a bug that was not revealed by the RNG tests below.
 # The final line is calibrated to have about a 50% fail rate on that bug
 # and run in about 3 seconds: