From e6227c07f2ef5a45d0e6e8b946862c52779e02a4 Mon Sep 17 00:00:00 2001 From: Nan Wang Date: Mon, 13 Apr 2020 18:22:58 +0800 Subject: [PATCH] feat(ranker): fix the idf --- jina/executors/rankers/tfidf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jina/executors/rankers/tfidf.py b/jina/executors/rankers/tfidf.py index 51387b3ad8080..838491496d0a7 100644 --- a/jina/executors/rankers/tfidf.py +++ b/jina/executors/rankers/tfidf.py @@ -178,7 +178,7 @@ def get_idf(match_idx): """ q_df, q_id = TfIdfRanker._get_df(match_idx) total_df = np.sum(q_df) - return {idx: np.log10((total_df + 1.) / (df + 0.5)) for idx, df in zip(q_id, q_df)} + return {idx: np.log10((total_df + 1.) / (df + 0.5)) ** 2 for idx, df in zip(q_id, q_df)} def get_tf(self, match_idx, match_chunk_meta): """Get the tf dictionary for query chunks that matched a given doc.