Skip to content

Commit d592b0c

Browse files
authored
chore(deps): Swap out bloom crate for bloomy (vectordotdev#17911)
Signed-off-by: Jesse Szwedko <[email protected]> --------- Signed-off-by: Jesse Szwedko <[email protected]>
1 parent 45e24c7 commit d592b0c

File tree

7 files changed

+35
-45
lines changed

7 files changed

+35
-45
lines changed

Cargo.lock

+8-14
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

+2-2
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ async-compression = { version = "0.4.0", default-features = false, features = ["
239239
apache-avro = { version = "0.14.0", default-features = false, optional = true }
240240
axum = { version = "0.6.18", default-features = false }
241241
base64 = { version = "0.21.2", default-features = false, optional = true }
242-
bloom = { version = "0.3.2", default-features = false, optional = true }
242+
bloomy = { version = "1.2.0", default-features = false, optional = true }
243243
bollard = { version = "0.14.0", default-features = false, features = ["ssl", "chrono"], optional = true }
244244
bytes = { version = "1.4.0", default-features = false, features = ["serde"] }
245245
bytesize = { version = "1.2.0", default-features = false }
@@ -593,7 +593,7 @@ transforms-reduce = []
593593
transforms-remap = []
594594
transforms-route = []
595595
transforms-sample = []
596-
transforms-tag_cardinality_limit = ["dep:bloom", "dep:hashbrown"]
596+
transforms-tag_cardinality_limit = ["dep:bloomy", "dep:hashbrown"]
597597
transforms-throttle = ["dep:governor"]
598598

599599
# Sinks

LICENSE-3rdparty.csv

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ bitvec,https://github.com/bitvecto-rs/bitvec,MIT,The bitvec Authors
8787
block-buffer,https://github.com/RustCrypto/utils,MIT OR Apache-2.0,RustCrypto Developers
8888
block-padding,https://github.com/RustCrypto/utils,MIT OR Apache-2.0,RustCrypto Developers
8989
blocking,https://github.com/smol-rs/blocking,Apache-2.0 OR MIT,Stjepan Glavina <[email protected]>
90-
bloom,https://github.com/nicklan/bloom-rs,GPL-2.0,Nick Lanham <[email protected]>
90+
bloomy,https://docs.rs/bloomy/,MIT,"Aleksandr Bezobchuk <[email protected]>, Alexis Sellier <[email protected]>"
9191
bollard,https://github.com/fussybeaver/bollard,Apache-2.0,Bollard contributors
9292
borsh,https://github.com/near/borsh-rs,MIT OR Apache-2.0,Near Inc <[email protected]>
9393
borsh-derive,https://github.com/nearprotocol/borsh,Apache-2.0,Near Inc <[email protected]>

src/transforms/tag_cardinality_limit/config.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ use vector_core::config::LogNamespace;
1818
pub struct TagCardinalityLimitConfig {
1919
/// How many distinct values to accept for any given key.
2020
#[serde(default = "default_value_limit")]
21-
pub value_limit: u32,
21+
pub value_limit: usize,
2222

2323
#[configurable(derived)]
2424
#[serde(default = "default_limit_exceeded_action")]
@@ -81,7 +81,7 @@ const fn default_limit_exceeded_action() -> LimitExceededAction {
8181
LimitExceededAction::DropTag
8282
}
8383

84-
const fn default_value_limit() -> u32 {
84+
const fn default_value_limit() -> usize {
8585
500
8686
}
8787

src/transforms/tag_cardinality_limit/mod.rs

+3-3
Original file line numberDiff line numberDiff line change
@@ -55,11 +55,11 @@ impl TagCardinalityLimit {
5555
}
5656

5757
// Tag value not yet part of the accepted set.
58-
if tag_value_set.len() < self.config.value_limit as usize {
58+
if tag_value_set.len() < self.config.value_limit {
5959
// accept the new value
6060
tag_value_set.insert(value.clone());
6161

62-
if tag_value_set.len() == self.config.value_limit as usize {
62+
if tag_value_set.len() == self.config.value_limit {
6363
emit!(TagCardinalityValueLimitReached { key });
6464
}
6565

@@ -76,7 +76,7 @@ impl TagCardinalityLimit {
7676
self.accepted_tags
7777
.get(key)
7878
.map(|value_set| {
79-
!value_set.contains(value) && value_set.len() >= self.config.value_limit as usize
79+
!value_set.contains(value) && value_set.len() >= self.config.value_limit
8080
})
8181
.unwrap_or(false)
8282
}
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,18 @@
11
use crate::event::metric::TagValueSet;
22
use crate::transforms::tag_cardinality_limit::config::Mode;
3-
use bloom::{BloomFilter, ASMS};
3+
use bloomy::BloomFilter;
44
use std::collections::HashSet;
55
use std::fmt;
66

77
/// Container for storing the set of accepted values for a given tag key.
88
#[derive(Debug)]
99
pub struct AcceptedTagValueSet {
1010
storage: TagValueSetStorage,
11-
num_elements: usize,
1211
}
1312

1413
enum TagValueSetStorage {
1514
Set(HashSet<TagValueSet>),
16-
Bloom(BloomFilter),
15+
Bloom(BloomFilter<TagValueSet>),
1716
}
1817

1918
impl fmt::Debug for TagValueSetStorage {
@@ -26,40 +25,37 @@ impl fmt::Debug for TagValueSetStorage {
2625
}
2726

2827
impl AcceptedTagValueSet {
29-
pub fn new(value_limit: u32, mode: &Mode) -> Self {
28+
pub fn new(value_limit: usize, mode: &Mode) -> Self {
3029
let storage = match &mode {
31-
Mode::Exact => TagValueSetStorage::Set(HashSet::with_capacity(value_limit as usize)),
30+
Mode::Exact => TagValueSetStorage::Set(HashSet::with_capacity(value_limit)),
3231
Mode::Probabilistic(config) => {
3332
let num_bits = config.cache_size_per_key / 8; // Convert bytes to bits
34-
let num_hashes = bloom::optimal_num_hashes(num_bits, value_limit);
35-
TagValueSetStorage::Bloom(BloomFilter::with_size(num_bits, num_hashes))
33+
TagValueSetStorage::Bloom(BloomFilter::with_size(num_bits))
3634
}
3735
};
38-
Self {
39-
storage,
40-
num_elements: 0,
41-
}
36+
Self { storage }
4237
}
4338

4439
pub fn contains(&self, value: &TagValueSet) -> bool {
4540
match &self.storage {
4641
TagValueSetStorage::Set(set) => set.contains(value),
47-
TagValueSetStorage::Bloom(bloom) => bloom.contains(&value),
42+
TagValueSetStorage::Bloom(bloom) => bloom.contains(value),
4843
}
4944
}
5045

51-
pub const fn len(&self) -> usize {
52-
self.num_elements
46+
pub fn len(&self) -> usize {
47+
match &self.storage {
48+
TagValueSetStorage::Set(set) => set.len(),
49+
TagValueSetStorage::Bloom(bloom) => bloom.count(),
50+
}
5351
}
5452

55-
pub fn insert(&mut self, value: TagValueSet) -> bool {
56-
let inserted = match &mut self.storage {
57-
TagValueSetStorage::Set(set) => set.insert(value),
53+
pub fn insert(&mut self, value: TagValueSet) {
54+
match &mut self.storage {
55+
TagValueSetStorage::Set(set) => {
56+
set.insert(value);
57+
}
5858
TagValueSetStorage::Bloom(bloom) => bloom.insert(&value),
5959
};
60-
if inserted {
61-
self.num_elements += 1
62-
}
63-
inserted
6460
}
6561
}

src/transforms/tag_cardinality_limit/tests.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ fn make_metric(tags: MetricTags) -> Event {
3535
}
3636

3737
const fn make_transform_hashset(
38-
value_limit: u32,
38+
value_limit: usize,
3939
limit_exceeded_action: LimitExceededAction,
4040
) -> TagCardinalityLimitConfig {
4141
TagCardinalityLimitConfig {
@@ -46,7 +46,7 @@ const fn make_transform_hashset(
4646
}
4747

4848
const fn make_transform_bloom(
49-
value_limit: u32,
49+
value_limit: usize,
5050
limit_exceeded_action: LimitExceededAction,
5151
) -> TagCardinalityLimitConfig {
5252
TagCardinalityLimitConfig {

0 commit comments

Comments
 (0)