From 5564f01b3b9925ed53d5d4d32d0e4e5991f930e1 Mon Sep 17 00:00:00 2001 From: Max Bachmann Date: Thu, 28 Dec 2023 20:39:48 +0100 Subject: [PATCH] cleanup duplicated code for score_cutoff conversion --- src/rapidfuzz/cpp_common.pxd | 14 +-- src/rapidfuzz/distance/metrics_cpp.pyx | 129 +++++++++++-------------- src/rapidfuzz/fuzz_cpp.pyx | 6 +- src/rapidfuzz/process_cpp_impl.pyx | 100 ++++++++++--------- 4 files changed, 116 insertions(+), 133 deletions(-) diff --git a/src/rapidfuzz/cpp_common.pxd b/src/rapidfuzz/cpp_common.pxd index 0b647ade..ae1bbf79 100644 --- a/src/rapidfuzz/cpp_common.pxd +++ b/src/rapidfuzz/cpp_common.pxd @@ -4,7 +4,7 @@ from cpython.object cimport PyObject from cpython.pycapsule cimport PyCapsule_GetPointer, PyCapsule_IsValid, PyCapsule_New from libc.stddef cimport wchar_t -from libc.stdint cimport int64_t, uint64_t +from libc.stdint cimport int64_t, uint64_t, SIZE_MAX from libc.stdlib cimport free, malloc from libcpp cimport bool from libcpp.utility cimport move, pair @@ -332,9 +332,7 @@ cdef inline RF_String conv_sequence(seq) except *: else: return move(hash_sequence(seq)) -cdef inline double get_score_cutoff_f64(score_cutoff, const RF_ScorerFlags* scorer_flags) except *: - cdef float worst_score = scorer_flags.worst_score.f64 - cdef float optimal_score = scorer_flags.optimal_score.f64 +cdef inline double get_score_cutoff_f64(score_cutoff, float worst_score, float optimal_score) except *: cdef float c_score_cutoff = worst_score if score_cutoff is not None: @@ -350,9 +348,7 @@ cdef inline double get_score_cutoff_f64(score_cutoff, const RF_ScorerFlags* scor return c_score_cutoff -cdef inline int64_t get_score_cutoff_i64(score_cutoff, const RF_ScorerFlags* scorer_flags) except *: - cdef int64_t worst_score = scorer_flags.worst_score.i64 - cdef int64_t optimal_score = scorer_flags.optimal_score.i64 +cdef inline int64_t get_score_cutoff_i64(score_cutoff, int64_t worst_score, int64_t optimal_score) except *: cdef int64_t c_score_cutoff = worst_score if score_cutoff is not None: @@ -368,9 +364,7 @@ cdef inline int64_t get_score_cutoff_i64(score_cutoff, const RF_ScorerFlags* sco return c_score_cutoff -cdef inline size_t get_score_cutoff_size_t(score_cutoff, const RF_ScorerFlags* scorer_flags) except *: - cdef size_t worst_score = scorer_flags.worst_score.sizet - cdef size_t optimal_score = scorer_flags.optimal_score.sizet +cdef inline size_t get_score_cutoff_size_t(score_cutoff, size_t worst_score, size_t optimal_score) except *: cdef size_t c_score_cutoff = worst_score if score_cutoff is not None: diff --git a/src/rapidfuzz/distance/metrics_cpp.pyx b/src/rapidfuzz/distance/metrics_cpp.pyx index 6f85d621..14b68852 100644 --- a/src/rapidfuzz/distance/metrics_cpp.pyx +++ b/src/rapidfuzz/distance/metrics_cpp.pyx @@ -21,6 +21,11 @@ from rapidfuzz cimport ( from ._initialize_cpp cimport Editops, RfEditops +try: + from pandas import NA as pandas_NA +except: + pandas_NA = None + # required for preprocess_strings from array import array @@ -32,6 +37,8 @@ from cpp_common cimport ( SetFuncAttrs, SetScorerAttrs, preprocess_strings, + get_score_cutoff_f64, + get_score_cutoff_size_t, ) from libcpp.cmath cimport isnan from libc.stdint cimport SIZE_MAX, int64_t @@ -177,8 +184,9 @@ cdef extern from "metrics.hpp": bool PostfixSimilarityInit( RF_ScorerFunc*, const RF_Kwargs*, int64_t, const RF_String*) except False nogil bool PostfixNormalizedSimilarityInit(RF_ScorerFunc*, const RF_Kwargs*, int64_t, const RF_String*) except False nogil + cdef inline bool is_none(s): - if s is None: + if s is None or s is pandas_NA: return True if isinstance(s, float) and isnan(s): @@ -186,33 +194,6 @@ cdef inline bool is_none(s): return False -cdef size_t get_score_cutoff_size_t(score_cutoff, size_t default) except? -1: - cdef size_t c_score_cutoff = default - if score_cutoff is None: - return c_score_cutoff - - c_score_cutoff = score_cutoff - return c_score_cutoff - -cdef double get_score_cutoff_f64(score_cutoff, double default) except -1: - cdef double c_score_cutoff = default - if score_cutoff is None: - return c_score_cutoff - - c_score_cutoff = score_cutoff - if c_score_cutoff < 0: - raise ValueError("score_cutoff has to be >= 0") - - return c_score_cutoff - -cdef size_t get_score_hint_size_t(score_hint, size_t default) except? -1: - cdef size_t c_score_hint = default - if score_hint is None: - return c_score_hint - - c_score_hint = score_hint - return c_score_hint - def levenshtein_distance(s1, s2, *, weights=(1,1,1), processor=None, score_cutoff=None, score_hint=None): cdef RF_StringWrapper s1_proc, s2_proc @@ -221,8 +202,8 @@ def levenshtein_distance(s1, s2, *, weights=(1,1,1), processor=None, score_cutof if weights is not None: insertion, deletion, substitution = weights - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX) - cdef size_t c_score_hint = get_score_cutoff_size_t(score_hint, SIZE_MAX) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX, 0) + cdef size_t c_score_hint = get_score_cutoff_size_t(score_hint, SIZE_MAX, 0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return levenshtein_distance_func(s1_proc.string, s2_proc.string, insertion, deletion, substitution, c_score_cutoff, c_score_hint) @@ -234,8 +215,8 @@ def levenshtein_similarity(s1, s2, *, weights=(1,1,1), processor=None, score_cut if weights is not None: insertion, deletion, substitution = weights - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0) - cdef size_t c_score_hint = get_score_cutoff_size_t(score_hint, 0) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0, SIZE_MAX) + cdef size_t c_score_hint = get_score_cutoff_size_t(score_hint, 0, SIZE_MAX) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return levenshtein_similarity_func(s1_proc.string, s2_proc.string, insertion, deletion, substitution, c_score_cutoff, c_score_hint) @@ -250,8 +231,8 @@ def levenshtein_normalized_distance(s1, s2, *, weights=(1,1,1), processor=None, if weights is not None: insertion, deletion, substitution = weights - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0) - cdef double c_score_hint = get_score_cutoff_f64(score_hint, 1.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0, 0.0) + cdef double c_score_hint = get_score_cutoff_f64(score_hint, 1.0, 0.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return levenshtein_normalized_distance_func(s1_proc.string, s2_proc.string, insertion, deletion, substitution, c_score_cutoff, c_score_hint) @@ -266,8 +247,8 @@ def levenshtein_normalized_similarity(s1, s2, *, weights=(1,1,1), processor=None if weights is not None: insertion, deletion, substitution = weights - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0) - cdef double c_score_hint = get_score_cutoff_f64(score_hint, 0.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0, 1.0) + cdef double c_score_hint = get_score_cutoff_f64(score_hint, 0.0, 1.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return levenshtein_normalized_similarity_func(s1_proc.string, s2_proc.string, insertion, deletion, substitution, c_score_cutoff, c_score_hint) @@ -275,7 +256,7 @@ def levenshtein_normalized_similarity(s1, s2, *, weights=(1,1,1), processor=None def levenshtein_editops(s1, s2, *, processor=None, score_hint=None): cdef RF_StringWrapper s1_proc, s2_proc cdef Editops ops = Editops.__new__(Editops) - cdef size_t c_score_hint = get_score_hint_size_t(score_hint, SIZE_MAX) + cdef size_t c_score_hint = get_score_cutoff_size_t(score_hint, SIZE_MAX, 0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) ops.editops = levenshtein_editops_func(s1_proc.string, s2_proc.string, c_score_hint) @@ -285,7 +266,7 @@ def levenshtein_editops(s1, s2, *, processor=None, score_hint=None): def levenshtein_opcodes(s1, s2, *, processor=None, score_hint=None): cdef RF_StringWrapper s1_proc, s2_proc cdef Editops ops = Editops.__new__(Editops) - cdef size_t c_score_hint = get_score_hint_size_t(score_hint, SIZE_MAX) + cdef size_t c_score_hint = get_score_cutoff_size_t(score_hint, SIZE_MAX, 0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) ops.editops = levenshtein_editops_func(s1_proc.string, s2_proc.string, c_score_hint) @@ -373,14 +354,14 @@ SetFuncAttrs(levenshtein_editops, metrics_py.levenshtein_editops) SetFuncAttrs(levenshtein_opcodes, metrics_py.levenshtein_opcodes) def damerau_levenshtein_distance(s1, s2, *, processor=None, score_cutoff=None): - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX, 0) cdef RF_StringWrapper s1_proc, s2_proc preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return damerau_levenshtein_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff) def damerau_levenshtein_similarity(s1, s2, *, processor=None, score_cutoff=None): - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0, SIZE_MAX) cdef RF_StringWrapper s1_proc, s2_proc preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return damerau_levenshtein_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -391,7 +372,7 @@ def damerau_levenshtein_normalized_distance(s1, s2, *, processor=None, score_cut if is_none(s1) or is_none(s2): return 1.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0, 0.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return damerau_levenshtein_normalized_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -401,7 +382,7 @@ def damerau_levenshtein_normalized_similarity(s1, s2, *, processor=None, score_c if is_none(s1) or is_none(s2): return 0.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0, 1.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return damerau_levenshtein_normalized_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -443,14 +424,14 @@ SetScorerAttrs(damerau_levenshtein_normalized_similarity, metrics_py.damerau_lev def lcs_seq_distance(s1, s2, *, processor=None, score_cutoff=None): - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX, 0) cdef RF_StringWrapper s1_proc, s2_proc preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return lcs_seq_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff) def lcs_seq_similarity(s1, s2, *, processor=None, score_cutoff=None): - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0, SIZE_MAX) cdef RF_StringWrapper s1_proc, s2_proc preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return lcs_seq_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -461,7 +442,7 @@ def lcs_seq_normalized_distance(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 1.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0, 0.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return lcs_seq_normalized_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -471,7 +452,7 @@ def lcs_seq_normalized_similarity(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 0.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0, 1.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return lcs_seq_normalized_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -546,14 +527,14 @@ SetFuncAttrs(lcs_seq_editops, metrics_py.lcs_seq_editops) SetFuncAttrs(lcs_seq_opcodes, metrics_py.lcs_seq_opcodes) def indel_distance(s1, s2, *, processor=None, score_cutoff=None): - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX, 0) cdef RF_StringWrapper s1_proc, s2_proc preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return indel_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff) def indel_similarity(s1, s2, *, processor=None, score_cutoff=None): - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0, SIZE_MAX) cdef RF_StringWrapper s1_proc, s2_proc preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return indel_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -564,7 +545,7 @@ def indel_normalized_distance(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 1.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0, 0.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return indel_normalized_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -574,7 +555,7 @@ def indel_normalized_similarity(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 0.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0, 1.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return indel_normalized_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -652,14 +633,14 @@ SetFuncAttrs(indel_editops, metrics_py.indel_editops) SetFuncAttrs(indel_opcodes, metrics_py.indel_opcodes) def hamming_distance(s1, s2, *, pad=True, processor=None, score_cutoff=None): - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX, 0) cdef RF_StringWrapper s1_proc, s2_proc preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return hamming_distance_func(s1_proc.string, s2_proc.string, pad, c_score_cutoff) def hamming_similarity(s1, s2, *, pad=True, processor=None, score_cutoff=None): - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0, SIZE_MAX) cdef RF_StringWrapper s1_proc, s2_proc preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) @@ -670,7 +651,7 @@ def hamming_normalized_distance(s1, s2, *, pad=True, processor=None, score_cutof if is_none(s1) or is_none(s2): return 1.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0, 0.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return hamming_normalized_distance_func(s1_proc.string, s2_proc.string, pad, c_score_cutoff) @@ -680,7 +661,7 @@ def hamming_normalized_similarity(s1, s2, *, pad=True, processor=None, score_cut if is_none(s1) or is_none(s2): return 0.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0, 1.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return hamming_normalized_similarity_func(s1_proc.string, s2_proc.string, pad, c_score_cutoff) @@ -753,14 +734,14 @@ SetFuncAttrs(hamming_editops, metrics_py.hamming_editops) SetFuncAttrs(hamming_opcodes, metrics_py.hamming_opcodes) def osa_distance(s1, s2, *, processor=None, score_cutoff=None): - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX, 0) cdef RF_StringWrapper s1_proc, s2_proc preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return osa_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff) def osa_similarity(s1, s2, *, processor=None, score_cutoff=None): - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0, SIZE_MAX) cdef RF_StringWrapper s1_proc, s2_proc preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) @@ -771,7 +752,7 @@ def osa_normalized_distance(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 1.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0, 0.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return osa_normalized_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -781,7 +762,7 @@ def osa_normalized_similarity(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 0.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0, 1.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return osa_normalized_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -843,7 +824,7 @@ def jaro_distance(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 1.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0, 0.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return jaro_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -852,7 +833,7 @@ def jaro_similarity(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 0.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0, 1.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return jaro_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -861,7 +842,7 @@ def jaro_normalized_distance(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 1.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0, 0.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return jaro_normalized_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -871,7 +852,7 @@ def jaro_normalized_similarity(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 0.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0, 1.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return jaro_normalized_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -911,7 +892,7 @@ def jaro_winkler_distance(s1, s2, *, double prefix_weight=0.1, processor=None, s if is_none(s1) or is_none(s2): return 1.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0, 0.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return jaro_winkler_distance_func(s1_proc.string, s2_proc.string, prefix_weight, c_score_cutoff) @@ -920,7 +901,7 @@ def jaro_winkler_similarity(s1, s2, *, double prefix_weight=0.1, processor=None, if is_none(s1) or is_none(s2): return 0.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0, 1.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return jaro_winkler_similarity_func(s1_proc.string, s2_proc.string, prefix_weight, c_score_cutoff) @@ -929,7 +910,7 @@ def jaro_winkler_normalized_distance(s1, s2, *, double prefix_weight=0.1, proces if is_none(s1) or is_none(s2): return 1.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0, 0.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return jaro_winkler_normalized_distance_func(s1_proc.string, s2_proc.string, prefix_weight, c_score_cutoff) @@ -938,7 +919,7 @@ def jaro_winkler_normalized_similarity(s1, s2, *, double prefix_weight=0.1, proc if is_none(s1) or is_none(s2): return 0.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0, 1.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return jaro_winkler_normalized_similarity_func(s1_proc.string, s2_proc.string, prefix_weight, c_score_cutoff) @@ -984,14 +965,14 @@ SetScorerAttrs(jaro_winkler_normalized_similarity, metrics_py.jaro_winkler_norma ############################################### def postfix_distance(s1, s2, *, processor=None, score_cutoff=None): - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX, 0) cdef RF_StringWrapper s1_proc, s2_proc preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return postfix_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff) def postfix_similarity(s1, s2, *, processor=None, score_cutoff=None): - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0, SIZE_MAX) cdef RF_StringWrapper s1_proc, s2_proc preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) @@ -1002,7 +983,7 @@ def postfix_normalized_distance(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 1.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0, 0.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return postfix_normalized_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -1012,7 +993,7 @@ def postfix_normalized_similarity(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 0.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0, 1.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return postfix_normalized_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -1058,14 +1039,14 @@ SetScorerAttrs(postfix_normalized_similarity, metrics_py.postfix_normalized_simi ############################################### def prefix_distance(s1, s2, *, processor=None, score_cutoff=None): - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, SIZE_MAX, 0) cdef RF_StringWrapper s1_proc, s2_proc preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return prefix_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff) def prefix_similarity(s1, s2, *, processor=None, score_cutoff=None): - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, 0, SIZE_MAX) cdef RF_StringWrapper s1_proc, s2_proc preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) @@ -1076,7 +1057,7 @@ def prefix_normalized_distance(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 1.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 1.0, 0.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return prefix_normalized_distance_func(s1_proc.string, s2_proc.string, c_score_cutoff) @@ -1086,7 +1067,7 @@ def prefix_normalized_similarity(s1, s2, *, processor=None, score_cutoff=None): if is_none(s1) or is_none(s2): return 0.0 - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, 0.0, 1.0) preprocess_strings(s1, s2, processor, &s1_proc, &s2_proc) return prefix_normalized_similarity_func(s1_proc.string, s2_proc.string, c_score_cutoff) diff --git a/src/rapidfuzz/fuzz_cpp.pyx b/src/rapidfuzz/fuzz_cpp.pyx index 5b28828a..ce8c6b71 100644 --- a/src/rapidfuzz/fuzz_cpp.pyx +++ b/src/rapidfuzz/fuzz_cpp.pyx @@ -34,6 +34,10 @@ from libcpp cimport bool from array import array +try: + from pandas import NA as pandas_NA +except: + pandas_NA = None cdef extern from "fuzz_cpp.hpp": double ratio_func( const RF_String&, const RF_String&, double) except + nogil @@ -63,7 +67,7 @@ cdef extern from "fuzz_cpp.hpp": bool RatioMultiStringSupport(const RF_Kwargs*) nogil cdef inline bool is_none(s): - if s is None: + if s is None or s is pandas_NA: return True if isinstance(s, float) and isnan(s): diff --git a/src/rapidfuzz/process_cpp_impl.pyx b/src/rapidfuzz/process_cpp_impl.pyx index b751c65b..65f7c086 100644 --- a/src/rapidfuzz/process_cpp_impl.pyx +++ b/src/rapidfuzz/process_cpp_impl.pyx @@ -137,6 +137,7 @@ cdef extern from "process_cpp.hpp": RfMatrix cdist_two_lists_impl[T]( const RF_ScorerFlags* scorer_flags, const RF_Kwargs*, RF_Scorer*, const vector[RF_StringWrapper]&, const vector[RF_StringWrapper]&, MatrixType, int, T, T, T) except + + cdef inline bool is_none(s): if s is None or s is pandas_NA: return True @@ -146,6 +147,7 @@ cdef inline bool is_none(s): return False + cdef inline vector[DictStringElem] preprocess_dict(queries, processor) except *: cdef vector[DictStringElem] proc_queries cdef int64_t queries_len = len(queries) @@ -198,6 +200,7 @@ cdef inline vector[DictStringElem] preprocess_dict(queries, processor) except *: return move(proc_queries) + cdef inline vector[ListStringElem] preprocess_list(queries, processor) except *: cdef vector[ListStringElem] proc_queries cdef int64_t queries_len = len(queries) @@ -247,6 +250,7 @@ cdef inline vector[ListStringElem] preprocess_list(queries, processor) except *: return move(proc_queries) + cdef inline extractOne_dict_f64( query, choices, RF_Scorer* scorer, const RF_ScorerFlags* scorer_flags, processor, @@ -264,8 +268,8 @@ cdef inline extractOne_dict_f64( processor_context = PyCapsule_GetPointer(processor_capsule, NULL) cdef RF_StringWrapper proc_query = move(RF_StringWrapper(conv_sequence(query))) - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, scorer_flags) - cdef double c_score_hint = get_score_cutoff_f64(score_hint, scorer_flags) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64) + cdef double c_score_hint = get_score_cutoff_f64(score_hint, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64) cdef RF_ScorerFunc scorer_func scorer.scorer_func_init(&scorer_func, scorer_kwargs, 1, &proc_query.string) @@ -335,8 +339,8 @@ cdef inline extractOne_dict_i64( processor_context = PyCapsule_GetPointer(processor_capsule, NULL) cdef RF_StringWrapper proc_query = move(RF_StringWrapper(conv_sequence(query))) - cdef int64_t c_score_cutoff = get_score_cutoff_i64(score_cutoff, scorer_flags) - cdef int64_t c_score_hint = get_score_cutoff_i64(score_hint, scorer_flags) + cdef int64_t c_score_cutoff = get_score_cutoff_i64(score_cutoff, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64) + cdef int64_t c_score_hint = get_score_cutoff_i64(score_hint, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64) cdef RF_ScorerFunc scorer_func scorer.scorer_func_init(&scorer_func, scorer_kwargs, 1, &proc_query.string) @@ -406,8 +410,8 @@ cdef inline extractOne_dict_size_t( processor_context = PyCapsule_GetPointer(processor_capsule, NULL) cdef RF_StringWrapper proc_query = move(RF_StringWrapper(conv_sequence(query))) - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, scorer_flags) - cdef size_t c_score_hint = get_score_cutoff_size_t(score_hint, scorer_flags) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet) + cdef size_t c_score_hint = get_score_cutoff_size_t(score_hint, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet) cdef RF_ScorerFunc scorer_func scorer.scorer_func_init(&scorer_func, scorer_kwargs, 1, &proc_query.string) @@ -503,8 +507,8 @@ cdef inline extractOne_list_f64( processor_context = PyCapsule_GetPointer(processor_capsule, NULL) cdef RF_StringWrapper proc_query = move(RF_StringWrapper(conv_sequence(query))) - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, scorer_flags) - cdef double c_score_hint = get_score_cutoff_f64(score_hint, scorer_flags) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64) + cdef double c_score_hint = get_score_cutoff_f64(score_hint, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64) cdef RF_ScorerFunc scorer_func scorer.scorer_func_init(&scorer_func, scorer_kwargs, 1, &proc_query.string) @@ -572,8 +576,8 @@ cdef inline extractOne_list_i64( processor_context = PyCapsule_GetPointer(processor_capsule, NULL) cdef RF_StringWrapper proc_query = move(RF_StringWrapper(conv_sequence(query))) - cdef int64_t c_score_cutoff = get_score_cutoff_i64(score_cutoff, scorer_flags) - cdef int64_t c_score_hint = get_score_cutoff_i64(score_hint, scorer_flags) + cdef int64_t c_score_cutoff = get_score_cutoff_i64(score_cutoff, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64) + cdef int64_t c_score_hint = get_score_cutoff_i64(score_hint, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64) cdef RF_ScorerFunc scorer_func scorer.scorer_func_init(&scorer_func, scorer_kwargs, 1, &proc_query.string) @@ -641,8 +645,8 @@ cdef inline extractOne_list_size_t( processor_context = PyCapsule_GetPointer(processor_capsule, NULL) cdef RF_StringWrapper proc_query = move(RF_StringWrapper(conv_sequence(query))) - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, scorer_flags) - cdef size_t c_score_hint = get_score_cutoff_size_t(score_hint, scorer_flags) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet) + cdef size_t c_score_hint = get_score_cutoff_size_t(score_hint, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet) cdef RF_ScorerFunc scorer_func scorer.scorer_func_init(&scorer_func, scorer_kwargs, 1, &proc_query.string) @@ -863,8 +867,8 @@ cdef inline extract_dict_f64( cdef vector[DictMatchElem[double]] results = extract_dict_impl[double]( scorer_kwargs, scorer_flags, scorer, proc_query, proc_choices, - get_score_cutoff_f64(score_cutoff, scorer_flags), - get_score_cutoff_f64(score_hint, scorer_flags) + get_score_cutoff_f64(score_cutoff, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64), + get_score_cutoff_f64(score_hint, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64) ) # due to score_cutoff not always completely filled @@ -902,8 +906,8 @@ cdef inline extract_dict_i64( cdef vector[DictMatchElem[int64_t]] results = extract_dict_impl[int64_t]( scorer_kwargs, scorer_flags, scorer, proc_query, proc_choices, - get_score_cutoff_i64(score_cutoff, scorer_flags), - get_score_cutoff_i64(score_hint, scorer_flags) + get_score_cutoff_i64(score_cutoff, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64), + get_score_cutoff_i64(score_hint, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64) ) # due to score_cutoff not always completely filled @@ -941,8 +945,8 @@ cdef inline extract_dict_size_t( cdef vector[DictMatchElem[size_t]] results = extract_dict_impl[size_t]( scorer_kwargs, scorer_flags, scorer, proc_query, proc_choices, - get_score_cutoff_size_t(score_cutoff, scorer_flags), - get_score_cutoff_size_t(score_hint, scorer_flags) + get_score_cutoff_size_t(score_cutoff, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet), + get_score_cutoff_size_t(score_hint, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet) ) # due to score_cutoff not always completely filled @@ -1008,8 +1012,8 @@ cdef inline extract_list_f64( cdef vector[ListMatchElem[double]] results = extract_list_impl[double]( scorer_kwargs, scorer_flags, scorer, proc_query, proc_choices, - get_score_cutoff_f64(score_cutoff, scorer_flags), - get_score_cutoff_f64(score_hint, scorer_flags) + get_score_cutoff_f64(score_cutoff, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64), + get_score_cutoff_f64(score_hint, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64) ) # due to score_cutoff not always completely filled @@ -1047,8 +1051,8 @@ cdef inline extract_list_i64( cdef vector[ListMatchElem[int64_t]] results = extract_list_impl[int64_t]( scorer_kwargs, scorer_flags, scorer, proc_query, proc_choices, - get_score_cutoff_i64(score_cutoff, scorer_flags), - get_score_cutoff_i64(score_hint, scorer_flags) + get_score_cutoff_i64(score_cutoff, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64), + get_score_cutoff_i64(score_hint, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64) ) # due to score_cutoff not always completely filled @@ -1086,8 +1090,8 @@ cdef inline extract_list_size_t( cdef vector[ListMatchElem[size_t]] results = extract_list_impl[size_t]( scorer_kwargs, scorer_flags, scorer, proc_query, proc_choices, - get_score_cutoff_size_t(score_cutoff, scorer_flags), - get_score_cutoff_size_t(score_hint, scorer_flags) + get_score_cutoff_size_t(score_cutoff, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet), + get_score_cutoff_size_t(score_hint, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet) ) # due to score_cutoff not always completely filled @@ -1273,8 +1277,8 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=None, score_cutoff= float64 """ cdef RF_String proc_str - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, &scorer_flags) - cdef double c_score_hint = get_score_cutoff_f64(score_hint, &scorer_flags) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64) + cdef double c_score_hint = get_score_cutoff_f64(score_hint, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64) query_proc = RF_StringWrapper(conv_sequence(query)) cdef RF_ScorerFunc scorer_func @@ -1316,8 +1320,8 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=None, score_cutoff= int64_t """ cdef RF_String proc_str - cdef int64_t c_score_cutoff = get_score_cutoff_i64(score_cutoff, &scorer_flags) - cdef int64_t c_score_hint = get_score_cutoff_i64(score_hint, &scorer_flags) + cdef int64_t c_score_cutoff = get_score_cutoff_i64(score_cutoff, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64) + cdef int64_t c_score_hint = get_score_cutoff_i64(score_hint, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64) query_proc = RF_StringWrapper(conv_sequence(query)) cdef RF_ScorerFunc scorer_func @@ -1359,8 +1363,8 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=None, score_cutoff= size_t """ cdef RF_String proc_str - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, &scorer_flags) - cdef size_t c_score_hint = get_score_cutoff_size_t(score_hint, &scorer_flags) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet) + cdef size_t c_score_hint = get_score_cutoff_size_t(score_hint, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet) query_proc = RF_StringWrapper(conv_sequence(query)) cdef RF_ScorerFunc scorer_func @@ -1402,8 +1406,8 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=None, score_cutoff= float64 """ cdef RF_String proc_str - cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, &scorer_flags) - cdef double c_score_hint = get_score_cutoff_f64(score_hint, &scorer_flags) + cdef double c_score_cutoff = get_score_cutoff_f64(score_cutoff, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64) + cdef double c_score_hint = get_score_cutoff_f64(score_hint, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64) query_proc = RF_StringWrapper(conv_sequence(query)) cdef RF_ScorerFunc scorer_func @@ -1445,8 +1449,8 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=None, score_cutoff= int64_t """ cdef RF_String proc_str - cdef int64_t c_score_cutoff = get_score_cutoff_i64(score_cutoff, &scorer_flags) - cdef int64_t c_score_hint = get_score_cutoff_i64(score_hint, &scorer_flags) + cdef int64_t c_score_cutoff = get_score_cutoff_i64(score_cutoff, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64) + cdef int64_t c_score_hint = get_score_cutoff_i64(score_hint, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64) query_proc = RF_StringWrapper(conv_sequence(query)) cdef RF_ScorerFunc scorer_func @@ -1488,8 +1492,8 @@ def extract_iter(query, choices, *, scorer=WRatio, processor=None, score_cutoff= size_t """ cdef RF_String proc_str - cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, &scorer_flags) - cdef size_t c_score_hint = get_score_cutoff_size_t(score_hint, &scorer_flags) + cdef size_t c_score_cutoff = get_score_cutoff_size_t(score_cutoff, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet) + cdef size_t c_score_hint = get_score_cutoff_size_t(score_hint, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet) query_proc = RF_StringWrapper(conv_sequence(query)) cdef RF_ScorerFunc scorer_func @@ -1787,8 +1791,8 @@ cdef Matrix cdist_two_lists( scorer_kwargs, scorer, proc_queries, proc_choices, dtype_to_type_num_f64(dtype), c_workers, - get_score_cutoff_f64(score_cutoff, scorer_flags), - get_score_cutoff_f64(score_hint, scorer_flags), + get_score_cutoff_f64(score_cutoff, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64), + get_score_cutoff_f64(score_hint, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64), score_multiplier, scorer_flags.worst_score.f64, ) @@ -1798,8 +1802,8 @@ cdef Matrix cdist_two_lists( scorer_kwargs, scorer, proc_queries, proc_choices, dtype_to_type_num_size_t(dtype), c_workers, - get_score_cutoff_size_t(score_cutoff, scorer_flags), - get_score_cutoff_size_t(score_hint, scorer_flags), + get_score_cutoff_size_t(score_cutoff, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet), + get_score_cutoff_size_t(score_hint, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet), score_multiplier, scorer_flags.worst_score.sizet ) @@ -1809,8 +1813,8 @@ cdef Matrix cdist_two_lists( scorer_kwargs, scorer, proc_queries, proc_choices, dtype_to_type_num_i64(dtype), c_workers, - get_score_cutoff_i64(score_cutoff, scorer_flags), - get_score_cutoff_i64(score_hint, scorer_flags), + get_score_cutoff_i64(score_cutoff, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64), + get_score_cutoff_i64(score_hint, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64), score_multiplier, scorer_flags.worst_score.i64 ) @@ -1841,8 +1845,8 @@ cdef Matrix cdist_single_list( scorer_kwargs, scorer, proc_queries, dtype_to_type_num_f64(dtype), c_workers, - get_score_cutoff_f64(score_cutoff, scorer_flags), - get_score_cutoff_f64(score_hint, scorer_flags), + get_score_cutoff_f64(score_cutoff, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64), + get_score_cutoff_f64(score_hint, scorer_flags.worst_score.f64, scorer_flags.optimal_score.f64), score_multiplier, scorer_flags.worst_score.f64 ) @@ -1852,8 +1856,8 @@ cdef Matrix cdist_single_list( scorer_kwargs, scorer, proc_queries, dtype_to_type_num_size_t(dtype), c_workers, - get_score_cutoff_size_t(score_cutoff, scorer_flags), - get_score_cutoff_size_t(score_hint, scorer_flags), + get_score_cutoff_size_t(score_cutoff, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet), + get_score_cutoff_size_t(score_hint, scorer_flags.worst_score.sizet, scorer_flags.optimal_score.sizet), score_multiplier, scorer_flags.worst_score.sizet ) @@ -1863,8 +1867,8 @@ cdef Matrix cdist_single_list( scorer_kwargs, scorer, proc_queries, dtype_to_type_num_i64(dtype), c_workers, - get_score_cutoff_i64(score_cutoff, scorer_flags), - get_score_cutoff_i64(score_hint, scorer_flags), + get_score_cutoff_i64(score_cutoff, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64), + get_score_cutoff_i64(score_hint, scorer_flags.worst_score.i64, scorer_flags.optimal_score.i64), score_multiplier, scorer_flags.worst_score.i64 )