From 62cd375a0122ce8eaa072e3ad876c8a0ab72f3a1 Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Fri, 8 Dec 2023 04:18:59 +0100 Subject: [PATCH] feat: optimize minimizer index storage format --- packages_rs/nextclade/src/sort/minimizer_index.rs | 12 +++++------- packages_rs/nextclade/src/sort/minimizer_search.rs | 6 ++++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/packages_rs/nextclade/src/sort/minimizer_index.rs b/packages_rs/nextclade/src/sort/minimizer_index.rs index 291b88ce8..4bdcefd7d 100644 --- a/packages_rs/nextclade/src/sort/minimizer_index.rs +++ b/packages_rs/nextclade/src/sort/minimizer_index.rs @@ -14,7 +14,7 @@ pub const MINIMIZER_INDEX_SCHEMA_VERSION_FROM: &str = "3.0.0"; pub const MINIMIZER_INDEX_SCHEMA_VERSION_TO: &str = "3.0.0"; pub const MINIMIZER_INDEX_ALGO_VERSION: &str = "1"; -pub type MinimizerMap = BTreeMap; +pub type MinimizerMap = BTreeMap>; /// Contains external configuration and data specific for a particular pathogen #[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)] @@ -43,24 +43,22 @@ pub struct MinimizerIndexJson { pub other: serde_json::Value, } -/// Serde serializer for Letter sequences pub fn serde_serialize_minimizers(minimizers: &MinimizerMap, s: S) -> Result { let mut map = s.serialize_map(Some(minimizers.len()))?; for (k, v) in minimizers { - map.serialize_entry(&k.to_string(), &v.to_string())?; + map.serialize_entry(&k.to_string(), &v)?; } map.end() } -/// Serde deserializer for Letter sequences pub fn serde_deserialize_minimizers<'de, D: Deserializer<'de>>(deserializer: D) -> Result { - let map = BTreeMap::::deserialize(deserializer)?; + let map = BTreeMap::>::deserialize(deserializer)?; let res = map .into_iter() .map(|(k, v)| Ok((u64::from_str(&k)?, v))) .collect::>() - .unwrap(); + .map_err(serde::de::Error::custom)?; Ok(res) } @@ -121,6 +119,6 @@ impl MinimizerIndexJson { warn!("Version of the minimizer index data ({version}) is greater than maximum supported by this version of Nextclade ({MINIMIZER_INDEX_ALGO_VERSION}). This may lead to errors or incorrect results. Please try to update your version of Nextclade and/or contact dataset maintainers for more details."); } - json_parse(s) + json_parse(s).wrap_err("When parsing minimizer index") } } diff --git a/packages_rs/nextclade/src/sort/minimizer_search.rs b/packages_rs/nextclade/src/sort/minimizer_search.rs index bc01dbf1b..1a45a8e6e 100644 --- a/packages_rs/nextclade/src/sort/minimizer_search.rs +++ b/packages_rs/nextclade/src/sort/minimizer_search.rs @@ -45,8 +45,10 @@ pub fn run_minimizer_search( let mut hit_counts = vec![0; n_refs]; for m in minimizers { if let Some(mz) = index.minimizers.get(&m) { - for i in 0..n_refs { - hit_counts[i] += u64::from_str(&mz[i..=i])?; + for (ri, hit_count) in hit_counts.iter_mut().enumerate() { + if mz.contains(&ri) { + *hit_count += 1; + } } } }