Skip to content

Commit

Permalink
Merge pull request #1332 from nextstrain/feat/optimize-minimizer-storage
Browse files Browse the repository at this point in the history
  • Loading branch information
ivan-aksamentov authored Dec 8, 2023
2 parents 699d44f + 62cd375 commit 559e477
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 9 deletions.
12 changes: 5 additions & 7 deletions packages_rs/nextclade/src/sort/minimizer_index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ pub const MINIMIZER_INDEX_SCHEMA_VERSION_FROM: &str = "3.0.0";
pub const MINIMIZER_INDEX_SCHEMA_VERSION_TO: &str = "3.0.0";
pub const MINIMIZER_INDEX_ALGO_VERSION: &str = "1";

pub type MinimizerMap = BTreeMap<u64, String>;
pub type MinimizerMap = BTreeMap<u64, Vec<usize>>;

/// Contains external configuration and data specific for a particular pathogen
#[derive(Debug, Clone, Serialize, Deserialize, JsonSchema)]
Expand Down Expand Up @@ -43,24 +43,22 @@ pub struct MinimizerIndexJson {
pub other: serde_json::Value,
}

/// Serde serializer for Letter sequences
pub fn serde_serialize_minimizers<S: Serializer>(minimizers: &MinimizerMap, s: S) -> Result<S::Ok, S::Error> {
let mut map = s.serialize_map(Some(minimizers.len()))?;
for (k, v) in minimizers {
map.serialize_entry(&k.to_string(), &v.to_string())?;
map.serialize_entry(&k.to_string(), &v)?;
}
map.end()
}

/// Serde deserializer for Letter sequences
pub fn serde_deserialize_minimizers<'de, D: Deserializer<'de>>(deserializer: D) -> Result<MinimizerMap, D::Error> {
let map = BTreeMap::<String, String>::deserialize(deserializer)?;
let map = BTreeMap::<String, Vec<usize>>::deserialize(deserializer)?;

let res = map
.into_iter()
.map(|(k, v)| Ok((u64::from_str(&k)?, v)))
.collect::<Result<MinimizerMap, Report>>()
.unwrap();
.map_err(serde::de::Error::custom)?;

Ok(res)
}
Expand Down Expand Up @@ -121,6 +119,6 @@ impl MinimizerIndexJson {
warn!("Version of the minimizer index data ({version}) is greater than maximum supported by this version of Nextclade ({MINIMIZER_INDEX_ALGO_VERSION}). This may lead to errors or incorrect results. Please try to update your version of Nextclade and/or contact dataset maintainers for more details.");
}

json_parse(s)
json_parse(s).wrap_err("When parsing minimizer index")
}
}
6 changes: 4 additions & 2 deletions packages_rs/nextclade/src/sort/minimizer_search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,10 @@ pub fn run_minimizer_search(
let mut hit_counts = vec![0; n_refs];
for m in minimizers {
if let Some(mz) = index.minimizers.get(&m) {
for i in 0..n_refs {
hit_counts[i] += u64::from_str(&mz[i..=i])?;
for (ri, hit_count) in hit_counts.iter_mut().enumerate() {
if mz.contains(&ri) {
*hit_count += 1;
}
}
}
}
Expand Down

1 comment on commit 559e477

@vercel
Copy link

@vercel vercel bot commented on 559e477 Dec 8, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Successfully deployed to the following URLs:

nextclade – ./

nextclade.vercel.app
nextclade-git-master-nextstrain.vercel.app
nextclade-nextstrain.vercel.app

Please sign in to comment.