Skip to content

Commit

Permalink
fix boundary sampler so it is true to given rate (#226)
Browse files Browse the repository at this point in the history
Fixes #225 

Solution works accurately on both amd64 and arm64 architectures. 

Adds test to ensure number of sampling decisions falls within a given
range.

Avoid float to int conversion which is platform dependent for high
values (such as trace IDs)

---------

Co-authored-by: Adrian Cole <[email protected]>
  • Loading branch information
jamesmoessis and codefromthecrypt authored Feb 6, 2025
1 parent e609ce4 commit 7c09a19
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 17 deletions.
8 changes: 6 additions & 2 deletions sample.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,15 @@ func NewBoundarySampler(rate float64, salt int64) (Sampler, error) {
}

var (
boundary = int64(rate * 10000)
// convert rate into a proportional boundary where values below it are sampled
// (e.g., 1% rate ≈ first 1% of space)
boundary = uint64(rate * (1 << 63))
usalt = uint64(salt)
)
return func(id uint64) bool {
return int64(math.Abs(float64(id^usalt)))%10000 < boundary
// XOR with salt provides deterministic randomization
// right shift ensures uniform distribution across [0, 2^63)
return ((id ^ usalt) >> 1) < boundary
}, nil
}

Expand Down
42 changes: 27 additions & 15 deletions sample_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,21 +31,16 @@ func TestBoundarySampler(t *testing.T) {
hasError bool
}
for input, sampled := range map[triple]bool{
{123, 456, 1.0, false}: true,
{123, 456, 999, true}: true,
{123, 456, 0.0, false}: false,
{123, 456, -42, true}: false,
{1229998, 0, 0.01, false}: false,
{1229999, 0, 0.01, false}: false,
{1230000, 0, 0.01, false}: true,
{1230001, 0, 0.01, false}: true,
{1230098, 0, 0.01, false}: true,
{1230099, 0, 0.01, false}: true,
{1230100, 0, 0.01, false}: false,
{1230101, 0, 0.01, false}: false,
{1, 9999999, 0.01, false}: false,
{999, 0, 0.99, false}: true,
{9999, 0, 0.99, false}: false,
{123, 456, 1.0, false}: true,
{123, 456, 999, true}: true,
{123, 456, 0.0, false}: false,
{123, 456, -42, true}: false,
{0xffffffffffffffff, 0, 0.01, false}: false,
{0xa000000000000000, 0, 0.01, false}: false,
{0x028f5c28f5c28f5f, 0, 0.01, false}: true,
{0x028f5c28f5c28f60, 0, 0.01, false}: false,
{1, 0xfffffffffffffff, 0.01, false}: false,
{999, 0, 0.99, false}: true,
} {
sampler, err := zipkin.NewBoundarySampler(input.rate, input.salt)
if want, have := input.hasError, (err != nil); want != have {
Expand All @@ -61,6 +56,23 @@ func TestBoundarySampler(t *testing.T) {
if want, have := sampled, sampler(input.id); want != have {
t.Errorf("%#+v: want %v, have %v", input, want, have)
}

}
}

func TestBoundarySamplerProducesSamplingDecisionsTrueToTheRate(t *testing.T) {
rand.Uint64()
c := 0
sampler, _ := zipkin.NewBoundarySampler(0.01, 0)
n := 10000
for i := 0; i < n; i++ {
id := rand.Uint64()
if sampler(id) {
c++
}
}
if !(c > 50 && c < 150) {
t.Error("should sample at 1%, should be in vicinity of 100")
}
}

Expand Down

0 comments on commit 7c09a19

Please sign in to comment.