Skip to content

Commit

Permalink
Refactor QLD
Browse files Browse the repository at this point in the history
Make the implementation cleaner and compute term component only once per
term scorer instead of for each score.

Signed-off-by: Michal Siedlaczek <[email protected]>
  • Loading branch information
elshize committed Feb 8, 2024
1 parent c48386e commit aa8900b
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 9 deletions.
18 changes: 10 additions & 8 deletions include/pisa/scorer/qld.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,14 +23,16 @@ struct qld: public index_scorer<Wand> {
qld(const Wand& wdata, const float mu) : index_scorer<Wand>(wdata), m_mu(mu) {}

term_scorer_t term_scorer(uint64_t term_id) const override {
auto s = [&, term_id](uint32_t doc, uint32_t freq) {
float numerator = 1
+ freq
/ (this->m_mu
* ((float)this->m_wdata.term_occurrence_count(term_id)
/ this->m_wdata.collection_len()));
float denominator = this->m_mu / (this->m_wdata.doc_len(doc) + this->m_mu);
return std::max(0.F, std::log(numerator) + std::log(denominator));
float mu = this->m_mu;
float collection_len = this->m_wdata.collection_len();
float term_occurrences = this->m_wdata.term_occurrence_count(term_id);
float term_component = collection_len / (mu * term_occurrences);

auto s = [this, mu, term_component, term_id](uint32_t doc, uint32_t freq) {
float doclen = this->m_wdata.doc_len(doc);
float a = std::log(mu / (doclen + mu));
float b = std::log1p(freq * term_component);
return std::max(0.F, a + b);
};
return s;
}
Expand Down
4 changes: 3 additions & 1 deletion include/pisa/scorer/quantized.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ class QuantizingScorer {
-> std::function<std::uint32_t(std::uint32_t, std::uint32_t)> {
return
[this, scorer = m_scorer->term_scorer(term_id)](std::uint32_t doc, std::uint32_t freq) {
return this->m_quantizer(scorer(doc, freq));
auto score = scorer(doc, freq);
assert(score >= 0.0);
return this->m_quantizer(score);
};
}
};
Expand Down

0 comments on commit aa8900b

Please sign in to comment.