From 5eac092c92b3f0d44a49e2b3cbc340540c14a7a5 Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Thu, 7 Dec 2023 11:22:26 +0100 Subject: [PATCH 1/2] feat(web): sort dataset suggestions by mean score across sequences --- .../src/hooks/useRunSeqAutodetect.ts | 36 +++++++++++++++++-- .../src/state/autodetect.state.ts | 13 +------ 2 files changed, 34 insertions(+), 15 deletions(-) diff --git a/packages_rs/nextclade-web/src/hooks/useRunSeqAutodetect.ts b/packages_rs/nextclade-web/src/hooks/useRunSeqAutodetect.ts index 24aceb5c6..23a4b0db2 100644 --- a/packages_rs/nextclade-web/src/hooks/useRunSeqAutodetect.ts +++ b/packages_rs/nextclade-web/src/hooks/useRunSeqAutodetect.ts @@ -1,4 +1,4 @@ -import { first, get, isNil, sortBy } from 'lodash' +import { first, get, isNil, mean, sortBy, uniq } from 'lodash' import type { Subscription } from 'observable-fns' import { useMemo } from 'react' import { useRecoilCallback, useRecoilValue } from 'recoil' @@ -9,7 +9,6 @@ import { autodetectResultsAtom, AutodetectRunState, autodetectRunStateAtom, - groupByDatasets, minimizerIndexAtom, } from 'src/state/autodetect.state' import { datasetsAtom, minimizerIndexVersionAtom } from 'src/state/dataset.state' @@ -114,6 +113,32 @@ export class SeqAutodetectWasmWorker { } } +export function groupByDatasets(records: MinimizerSearchRecord[]) { + const names = uniq(records.flatMap((record) => record.result.datasets.map((dataset) => dataset.name))) + let byDataset: Record = {} + // eslint-disable-next-line no-loops/no-loops + for (const name of names) { + // Find sequence records which match this dataset + const selectedRecords = records.filter((record) => record.result.datasets.some((dataset) => dataset.name === name)) + + // Get scores for sequence records which match this dataset + const scores = selectedRecords.map((record) => { + const dataset = record.result.datasets.find((ds) => ds.name === name) + return dataset?.score ?? 0 + }) + const meanScore = mean(scores) + + byDataset = { ...byDataset, [name]: { records: selectedRecords, meanScore } } + } + + console.info( + sortBy(Object.entries(byDataset), ([_, val]) => -val.meanScore) + .map(([name, val]) => `${name.padEnd(60)} ${val.meanScore.toFixed(4)}`) + .join('\n'), + ) + return byDataset +} + export function useDatasetSuggestionResults() { const { datasets } = useRecoilValue(datasetsAtom) const autodetectResults = useRecoilValue(autodetectResultsAtom) @@ -128,7 +153,12 @@ export function useDatasetSuggestionResults() { Object.entries(recordsByDataset).some(([dataset, _]) => dataset === candidate.path), ) - itemsInclude = sortBy(itemsInclude, (dataset) => -get(recordsByDataset, dataset.path, []).length) + itemsInclude = sortBy(itemsInclude, (dataset) => { + const record = get(recordsByDataset, dataset.path) + return -record.meanScore ?? 0 + }) + + itemsInclude = sortBy(itemsInclude, (dataset) => -(get(recordsByDataset, dataset.path)?.records?.length ?? 0)) const itemsNotInclude = datasets.filter((candidate) => !itemsInclude.map((it) => it.path).includes(candidate.path)) diff --git a/packages_rs/nextclade-web/src/state/autodetect.state.ts b/packages_rs/nextclade-web/src/state/autodetect.state.ts index 127d0932a..5f8e2bd5e 100644 --- a/packages_rs/nextclade-web/src/state/autodetect.state.ts +++ b/packages_rs/nextclade-web/src/state/autodetect.state.ts @@ -1,5 +1,4 @@ -/* eslint-disable no-loops/no-loops */ -import { isEmpty, isNil, uniq } from 'lodash' +import { isEmpty, isNil } from 'lodash' import { atom, atomFamily, DefaultValue, selector, selectorFamily } from 'recoil' import { isDefaultValue } from 'src/state/utils/isDefaultValue' import type { MinimizerIndexJson, MinimizerSearchRecord } from 'src/types' @@ -49,16 +48,6 @@ export const autodetectResultByIndexAtom = selectorFamily { - const names = uniq(records.flatMap((record) => record.result.datasets.map((dataset) => dataset.name))) - let byDataset = {} - for (const name of names) { - const selectedRecords = records.filter((record) => record.result.datasets.some((dataset) => dataset.name === name)) - byDataset = { ...byDataset, [name]: selectedRecords } - } - return byDataset -} - // Select autodetect results by dataset name export const autodetectResultsByDatasetAtom = selectorFamily({ key: 'autodetectResultByDatasetAtom', From e2d713ca62dae37422ad8d5ab70ffd69f90643e8 Mon Sep 17 00:00:00 2001 From: ivan-aksamentov Date: Thu, 7 Dec 2023 11:54:48 +0100 Subject: [PATCH 2/2] refactor: lint --- packages_rs/nextclade-web/src/hooks/useRunSeqAutodetect.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages_rs/nextclade-web/src/hooks/useRunSeqAutodetect.ts b/packages_rs/nextclade-web/src/hooks/useRunSeqAutodetect.ts index 23a4b0db2..b6d642d6e 100644 --- a/packages_rs/nextclade-web/src/hooks/useRunSeqAutodetect.ts +++ b/packages_rs/nextclade-web/src/hooks/useRunSeqAutodetect.ts @@ -155,7 +155,7 @@ export function useDatasetSuggestionResults() { itemsInclude = sortBy(itemsInclude, (dataset) => { const record = get(recordsByDataset, dataset.path) - return -record.meanScore ?? 0 + return -record.meanScore }) itemsInclude = sortBy(itemsInclude, (dataset) => -(get(recordsByDataset, dataset.path)?.records?.length ?? 0))