Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

All instances of HashMap and HashSet now deterministic. #273

Merged
merged 1 commit into from
Mar 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ license = "Apache-2.0"
homepage = "https://github.com/CDCgov/ixa"

[dependencies]
fxhash = "^0.2.1"
rand = "^0.8.5"
csv = "^1.3.1"
serde = { version = "^1.0.217", features = ["derive"] }
Expand All @@ -32,6 +31,7 @@ reqwest = { version = "0.12.12", features = ["blocking", "json"] }
uuid = "1.12.1"
tower-http = { version = "0.6.2", features = ["full"] }
mime = "0.3.17"
rustc-hash = "^2.1.1"

[dev-dependencies]
rand_distr = "^0.4.3"
Expand All @@ -47,6 +47,7 @@ ixa_example_births_deaths = { path = "examples/births-deaths" }
[lints.clippy]
pedantic = { level = "warn", priority = -1 }
module-name-repetitions = "allow"
implicit_hasher = "allow"

[lib]
# Prevent Cargo from implicitly linking `libtest` for Criterion.rs compatibility.
Expand Down
2 changes: 1 addition & 1 deletion examples/births-deaths/src/infection_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ use ixa::global_properties::ContextGlobalPropertiesExt;
use ixa::people::{ContextPeopleExt, PersonId, PersonPropertyChangeEvent};
use ixa::plan::PlanId;
use ixa::random::ContextRandomExt;
use ixa::{HashMap, HashMapExt, HashSet, HashSetExt};
use rand_distr::Exp;
use std::collections::{HashMap, HashSet};

define_rng!(InfectionRng);
define_data_plugin!(
Expand Down
2 changes: 1 addition & 1 deletion examples/births-deaths/src/parameters_loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ use ixa::context::Context;
use ixa::define_global_property;
use ixa::error::IxaError;
use ixa::global_properties::ContextGlobalPropertiesExt;
use ixa::HashMap;
use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::fmt::Debug;
use std::path::Path;

Expand Down
9 changes: 6 additions & 3 deletions examples/load-people/population_loader.rs
Original file line number Diff line number Diff line change
Expand Up @@ -74,10 +74,12 @@ mod tests {
fn test_creation_event_access_properties() {
let flag = Rc::new(RefCell::new(false));

// Define expected computed values for each person
// Define expected computed values for each person. The value for dosage will change for
// any change in the deterministic RNG.
let expected_computed = vec![
(20, RiskCategoryValue::Low, VaccineTypeValue::B, 0.8, 1),
(80, RiskCategoryValue::High, VaccineTypeValue::A, 0.9, 2),
// (age, risk_category, vaccine_type, efficacy, doses)
(20, RiskCategoryValue::Low, VaccineTypeValue::B, 0.8, 3),
(80, RiskCategoryValue::High, VaccineTypeValue::A, 0.9, 1),
];

let mut context = Context::new();
Expand Down Expand Up @@ -117,6 +119,7 @@ mod tests {
context.get_person_property(person, VaccineEfficacy),
efficacy
);
// This assert will break for any change that affects the deterministic hasher.
assert_eq!(context.get_person_property(person, VaccineDoses), doses);

*counter.borrow_mut() += 1;
Expand Down
4 changes: 3 additions & 1 deletion examples/time-varying-infection/exposure_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,8 @@ mod test {
let hazard_fcn = func!(move |t| foi_t(t, parameters.foi, parameters.foi_sin_shift));
let survival_fcn = func!(move |t| f64::exp(-integrate(&hazard_fcn, 0.0, t)));
let theoretical_mean = integrate(&survival_fcn, 0.0, 10000.0); // large enough upper bound
assert!((mean - theoretical_mean).abs() < 0.1);

// This can break with any change that affects the deterministic RNG.
assert!((mean - theoretical_mean).abs() < 0.2);
}
}
3 changes: 2 additions & 1 deletion src/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,10 @@
//!
//! Defines a `Context` that is intended to provide the foundational mechanism
//! for storing and manipulating the state of a given simulation.
use crate::{HashMap, HashMapExt};
use std::{
any::{Any, TypeId},
collections::{HashMap, VecDeque},
collections::VecDeque,
rc::Rc,
};

Expand Down
2 changes: 1 addition & 1 deletion src/debugger.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ use crate::IxaError;
use clap::{ArgMatches, Command, FromArgMatches, Parser, Subcommand};
use rustyline;

use crate::{HashMap, HashMapExt};
use log::trace;
use std::collections::HashMap;
use std::io::Write;

trait DebuggerCommand {
Expand Down
5 changes: 3 additions & 2 deletions src/global_properties.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,12 @@
//! Global properties can be read with [`Context::get_global_property_value()`]
use crate::context::Context;
use crate::error::IxaError;
use log::trace;
use crate::trace;
use crate::{HashMap, HashMapExt};
use serde::de::DeserializeOwned;
use std::any::{Any, TypeId};
use std::cell::RefCell;
use std::collections::{hash_map::Entry, HashMap};
use std::collections::hash_map::Entry;
use std::fmt::Debug;
use std::fs;
use std::io::BufReader;
Expand Down
50 changes: 50 additions & 0 deletions src/hashing.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
//! This module provides a deterministic hasher and `HashMap` and `HashSet` variants that use
//! it. The hashing data structures in the standard library are not deterministic:
//!
//! > By default, HashMap uses a hashing algorithm selected to provide
//! > resistance against HashDoS attacks. The algorithm is randomly seeded, and a
//! > reasonable best-effort is made to generate this seed from a high quality,
//! > secure source of randomness provided by the host without blocking the program.
//!
//! The standard library `HashMap` has a `new` method, but `HashMap<K, V, S>` does not have a `new`
//! method by default. Use `HashMap::default()` instead to create a new hashmap with the default
//! hasher. If you really need to keep the API the same across implementations, we provide the
//! `HashMapExt` trait extension. Similarly, for `HashSet` and `HashSetExt`.The traits need only be
//! in scope.
//!
//! The `hash_usize` free function is a convenience function used in `crate::random::get_rng`.

pub use rustc_hash::FxHashMap as HashMap;
pub use rustc_hash::FxHashSet as HashSet;
use std::hash::Hasher;

/// Provides API parity with `std::collections::HashMap`.
pub trait HashMapExt {
fn new() -> Self;
}

impl<K, V> HashMapExt for HashMap<K, V> {
fn new() -> Self {
HashMap::default()
}
}

// Note that trait aliases are not yet stabilized in rustc.
// See https://github.com/rust-lang/rust/issues/41517
/// Provides API parity with `std::collections::HashSet`.
pub trait HashSetExt {
fn new() -> Self;
}

impl<T> HashSetExt for HashSet<T> {
fn new() -> Self {
HashSet::default()
}
}

/// A convenience method to compute the hash of a `&str`.
pub fn hash_str(data: &str) -> u64 {
let mut hasher = rustc_hash::FxHasher::default();
hasher.write(data.as_bytes());
hasher.finish()
}
4 changes: 4 additions & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -65,13 +65,17 @@ pub use log::{
};

pub mod external_api;
mod hashing;
pub mod web_api;

// Re-export for macros
pub use ctor;
pub use paste;
pub use rand;

// Deterministic hashing data structures
pub use crate::hashing::{HashMap, HashMapExt, HashSet, HashSetExt};

#[cfg(test)]
mod tests {
use assert_cmd::cargo::CargoError;
Expand Down
2 changes: 1 addition & 1 deletion src/log.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@

pub use log::{debug, error, info, trace, warn, LevelFilter};

use crate::HashMap;
use log4rs;
use log4rs::append::console::ConsoleAppender;
use log4rs::config::runtime::ConfigBuilder;
use log4rs::config::{Appender, Logger, Root};
use log4rs::encode::pattern::PatternEncoder;
use log4rs::{Config, Handle};
use std::collections::hash_map::Entry;
use std::collections::HashMap;
use std::sync::LazyLock;
use std::sync::{Mutex, MutexGuard};

Expand Down
6 changes: 2 additions & 4 deletions src/network.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,13 @@
//! arbitrary number of outgoing edges of a given type, with each edge
//! having a weight. Edge types can also specify their own per-type
//! data which will be stored along with the edge.
use crate::HashMap;
use crate::{
context::Context, define_data_plugin, error::IxaError, people::PersonId,
random::ContextRandomExt, random::RngId,
};
use rand::Rng;
use std::{
any::{Any, TypeId},
collections::HashMap,
};
use std::any::{Any, TypeId};

#[derive(Copy, Clone, Debug, PartialEq)]
/// An edge in network graph. Edges are directed, so the
Expand Down
2 changes: 1 addition & 1 deletion src/people/context_extension.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@ use crate::{
Context, ContextRandomExt, IxaError, PersonCreatedEvent, PersonId, PersonProperty,
PersonPropertyChangeEvent, RngId, Tabulator,
};
use crate::{HashMap, HashMapExt, HashSet, HashSetExt};
use rand::Rng;
use std::any::TypeId;
use std::cell::Ref;
use std::collections::{HashMap, HashSet};

use crate::people::methods::Methods;

Expand Down
2 changes: 1 addition & 1 deletion src/people/data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,9 @@ use crate::people::index::Index;
use crate::people::methods::Methods;
use crate::people::InitializationList;
use crate::{Context, IxaError, PersonId, PersonProperty, PersonPropertyChangeEvent};
use crate::{HashMap, HashSet, HashSetExt};
use std::any::{Any, TypeId};
use std::cell::{Ref, RefCell, RefMut};
use std::collections::{HashMap, HashSet};

type ContextCallback = dyn FnOnce(&mut Context);

Expand Down
5 changes: 2 additions & 3 deletions src/people/external_api.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
use std::any::TypeId;
use std::collections::HashMap;

use crate::people::ContextPeopleExt;
use crate::people::PeoplePlugin;
use crate::Context;
use crate::IxaError;
use crate::PersonId;
use crate::{HashMap, HashMapExt};
use std::any::TypeId;

pub(crate) trait ContextPeopleExtCrate {
fn get_person_property_by_name(
Expand Down
2 changes: 1 addition & 1 deletion src/people/index.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use super::methods::Methods;
use crate::{Context, ContextPeopleExt, PersonId, PersonProperty};
use crate::{HashMap, HashSet, HashSetExt};
use bincode::serialize;
use serde::Serialize;
use std::collections::{HashMap, HashSet};

#[derive(Clone, PartialEq, Eq, Hash, Debug)]
// The lookup key for entries in the index. This is a serialized version of the value.
Expand Down
7 changes: 2 additions & 5 deletions src/people/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -88,15 +88,12 @@ pub use property::{
PersonProperty,
};

use crate::{HashMap, HashMapExt, HashSet, HashSetExt};
use seq_macro::seq;
use serde::{Deserialize, Serialize};
use std::cell::RefCell;
use std::fmt::{Debug, Display, Formatter};
use std::{
any::TypeId,
collections::{HashMap, HashSet},
hash::Hash,
};
use std::{any::TypeId, hash::Hash};

define_data_plugin!(
PeoplePlugin,
Expand Down
8 changes: 3 additions & 5 deletions src/plan.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,9 @@
//! This queue is used by `Context` to store future events where some callback
//! closure `FnOnce(&mut Context)` will be executed at a given point in time.

use log::trace;
use std::{
cmp::Ordering,
collections::{BinaryHeap, HashMap},
};
use crate::trace;
use crate::{HashMap, HashMapExt};
use std::{cmp::Ordering, collections::BinaryHeap};

/// A priority queue that stores arbitrary data sorted by time
///
Expand Down
22 changes: 15 additions & 7 deletions src/random.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use crate::context::Context;
use crate::hashing::hash_str;
use crate::{HashMap, HashMapExt};
use log::trace;
use rand::distributions::uniform::{SampleRange, SampleUniform};
use rand::distributions::WeightedIndex;
use rand::prelude::Distribution;
use rand::{Rng, SeedableRng};
use std::any::{Any, TypeId};
use std::cell::{RefCell, RefMut};
use std::collections::HashMap;

/// Use this to define a unique type which will be used as a key to retrieve
/// an independent rng instance when calling `.get_rng`.
Expand Down Expand Up @@ -86,9 +87,11 @@ fn get_rng<R: RngId + 'static>(context: &Context) -> RefMut<R::RngType> {
TypeId::of::<R>()
);
let base_seed = data_container.base_seed;
let seed_offset = fxhash::hash64(R::get_name());
let seed_offset = hash_str(R::get_name());
RngHolder {
rng: Box::new(R::RngType::seed_from_u64(base_seed + seed_offset)),
rng: Box::new(R::RngType::seed_from_u64(
base_seed.wrapping_add(seed_offset),
)),
}
})
.rng
Expand Down Expand Up @@ -279,7 +282,9 @@ mod test {
fn sampler_function_closure_capture() {
let mut context = Context::new();
context.init_random(42);
// Initialize weighted sampler

// Initialize weighted sampler. Zero is selected with probability 1/3, one with a
// probability of 2/3.
*context.get_data_container_mut(SamplerData) = WeightedIndex::new(vec![1.0, 2.0]).unwrap();

let parameters = context.get_data_container(SamplerData).unwrap();
Expand All @@ -291,15 +296,17 @@ mod test {
zero_counter += 1;
}
}
assert!((zero_counter - 1000_i32).abs() < 30);
// The expected value of `zero_counter` is 1000.
assert!((zero_counter - 1000_i32).abs() < 100);
}

#[test]
fn sample_distribution() {
let mut context = Context::new();
context.init_random(42);

// Initialize weighted sampler
// Initialize weighted sampler. Zero is selected with probability 1/3, one with a
// probability of 2/3.
*context.get_data_container_mut(SamplerData) = WeightedIndex::new(vec![1.0, 2.0]).unwrap();

let parameters = context.get_data_container(SamplerData).unwrap();
Expand All @@ -311,7 +318,8 @@ mod test {
zero_counter += 1;
}
}
assert!((zero_counter - 1000_i32).abs() < 30);
// The expected value of `zero_counter` is 1000.
assert!((zero_counter - 1000_i32).abs() < 100);
}

#[test]
Expand Down
5 changes: 3 additions & 2 deletions src/report.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ use crate::error::IxaError;
use crate::people::ContextPeopleExt;
use crate::Tabulator;
use crate::{error, trace};
use crate::{HashMap, HashMapExt};
use csv::Writer;
use serde::Serializer;
use std::any::TypeId;
use std::cell::{RefCell, RefMut};
use std::collections::HashMap;
use std::env;
use std::fs::File;
use std::path::PathBuf;
Expand Down Expand Up @@ -273,7 +273,7 @@ impl ContextReportExt for Context {

#[cfg(test)]
mod test {
use crate::define_person_property_with_default;
use crate::{define_person_property_with_default, info};

use super::*;
use core::convert::TryInto;
Expand Down Expand Up @@ -525,6 +525,7 @@ mod test {
let mut context2 = Context::new();
let config = context2.report_options();
config.file_prefix("prefix1_".to_string()).directory(path);
info!("The next 'file already exists' error is intended for a passing test.");
let result = context2.add_report::<SampleReport>("sample_report");
assert!(result.is_err());
let error = result.err().unwrap();
Expand Down
Loading