Merge pull request #472 from jina-ai/refactor-ranker-439

refactor: make BaseRanker abstract
jina-ai · Jun 1, 2020 · 35aa9e5 · 35aa9e5
2 parents ef9ead5 + 9de7dfd
commit 35aa9e5
Show file tree

Hide file tree

Showing 9 changed files with 29 additions and 22 deletions.
diff --git a/docs/chapters/all_driver.md b/docs/chapters/all_driver.md
@@ -19,8 +19,8 @@ This version of Jina includes 35 Drivers.
          - `SegmentDriver`
       - `BaseEncodeDriver`
          - `EncodeDriver`
-      - `BaseScoreDriver`
-         - `Chunk2DocScoreDriver`
+      - `BaseRankDriver`
+         - `Chunk2DocRankDriver`
       - `BaseSearchDriver`
          - `KVSearchDriver`
             - `ChunkKVSearchDriver`
@@ -48,9 +48,9 @@ This version of Jina includes 35 Drivers.
 | `BaseEncodeDriver` | `jina.drivers.index` |
 | `BaseExecutableDriver` | `jina.drivers.control` |
 | `BaseIndexDriver` | `jina.drivers.index` |
-| `BaseScoreDriver` | `jina.drivers.index` |
+| `BaseRankDriver` | `jina.drivers.index` |
 | `BaseSearchDriver` | `jina.drivers.index` |
-| `Chunk2DocScoreDriver` | `jina.drivers.score` |
+| `Chunk2DocRankDriver` | `jina.drivers.score` |
 | `ChunkCraftDriver` | `jina.drivers.craft` |
 | `ChunkKVIndexDriver` | `jina.drivers.index` |
 | `ChunkKVSearchDriver` | `jina.drivers.search` |

diff --git a/docs/chapters/all_exec.md b/docs/chapters/all_exec.md
@@ -74,7 +74,7 @@ This version of Jina includes 80 Executors.
          - `BaseAudioEncoder`
          - `BaseImageEncoder`
       - `BaseTransformerEncoder`
-   - `BaseRanker`
+   - `Chunk2DocRanker`
       - `MaxRanker`
       - `MinRanker`
       - `TfIdfRanker`
@@ -117,7 +117,7 @@ This version of Jina includes 80 Executors.
 | `BasePaddleExecutor` | `jina.executors.frameworks` |
 | `BasePaddlehubEncoder` |   |
 | `BasePbIndexer` | `jina.executors.indexers` |
-| `BaseRanker` | `jina.executors.encoders` |
+| `Chunk2DocRanker` | `jina.executors.encoders` |
 | `BaseSegmenter` | `jina.executors.crafters` |
 | `BaseTFEncoder` |   |
 | `BaseTFExecutor` |   |

diff --git a/docs/chapters/extend/executor.rst b/docs/chapters/extend/executor.rst
@@ -64,7 +64,7 @@ If your algorithm is so unique and does not fit any any of the category below, y
 
           You want to segment the documents into chunks.
 
-    * :class:`jina.executors.BaseRanker`
+    * :class:`jina.executors.Chunk2DocRanker`
 
       You want to segment/transform the documents and chunks.
 
@@ -155,7 +155,7 @@ Each :class:`Executor` has a core method, which defines the algorithmic behavior
 +----------------------+----------------------------+
 | :class:`BaseIndexer` | :meth:`add`, :meth:`query` |
 +----------------------+----------------------------+
-| :class:`BaseRanker`  | :meth:`score`              |
+| :class:`Chunk2DocRanker`  | :meth:`score`              |
 +----------------------+----------------------------+
 
 

diff --git a/jina/drivers/score.py b/jina/drivers/score.py
@@ -8,14 +8,14 @@
 from .helper import pb_obj2dict
 
 
-class BaseScoreDriver(BaseExecutableDriver):
+class BaseRankDriver(BaseExecutableDriver):
     """Drivers inherited from this Driver will bind :meth:`craft` by default """
 
     def __init__(self, executor: str = None, method: str = 'score', *args, **kwargs):
         super().__init__(executor, method, *args, **kwargs)
 
 
-class Chunk2DocScoreDriver(BaseScoreDriver):
+class Chunk2DocRankDriver(BaseRankDriver):
     """Extract chunk-level score and use the executor to compute the doc-level score
 
     """

diff --git a/jina/executors/rankers/__init__.py b/jina/executors/rankers/__init__.py
@@ -9,11 +9,18 @@
 
 
 class BaseRanker(BaseExecutor):
-    """The base class for a `Ranker`. A `Ranker` translates the chunk-wise score (distance) to the doc-wise score.
+    """The base class for a `Ranker`"""
 
-    In the query-time, :class:`BaseRanker` is an almost-always required component.
+    def score(self, *args, **kwargs):
+        raise NotImplementedError
+
+
+class Chunk2DocRanker(BaseRanker):
+    """ A :class:`Chunk2DocRanker` translates the chunk-wise score (distance) to the doc-wise score.
+
+    In the query-time, :class:`Chunk2DocRanker` is an almost-always required component.
     Because in the end we want to retrieve top-k documents of given query-document not top-k chunks of
-    given query-chunks. The purpose of :class:`BaseRanker` is to aggregate the already existed top-k chunks
+    given query-chunks. The purpose of :class:`Chunk2DocRanker` is to aggregate the already existed top-k chunks
     into documents.
 
     The key function here is :func:`score`.
@@ -89,7 +96,7 @@ def get_doc_id(self, match_with_same_doc_id):
         return match_with_same_doc_id[0, self.col_doc_id]
 
 
-class MaxRanker(BaseRanker):
+class MaxRanker(Chunk2DocRanker):
     """
     :class:`MaxRanker` calculates the score of the matched doc form the matched chunks. For each matched doc, the score
         is the maximal score from all the matched chunks belonging to this doc.
@@ -101,7 +108,7 @@ def _get_score(self, match_idx, query_chunk_meta, match_chunk_meta, *args, **kwa
         return self.get_doc_id(match_idx), match_idx[:, self.col_score].max()
 
 
-class MinRanker(BaseRanker):
+class MinRanker(Chunk2DocRanker):
     """
     :class:`MinRanker` calculates the score of the matched doc form the matched chunks. For each matched doc, the score
         is `1 / (1 + s)`, where `s` is the minimal score from all the matched chunks belonging to this doc.

diff --git a/jina/executors/rankers/bi_match.py b/jina/executors/rankers/bi_match.py
@@ -3,10 +3,10 @@
 
 import numpy as np
 
-from . import BaseRanker
+from . import Chunk2DocRanker
 
 
-class BiMatchRanker(BaseRanker):
+class BiMatchRanker(Chunk2DocRanker):
     """The :class:`BiMatchRanker` counts the best chunk-hit from both query and doc perspective.
 
     .. warning:: Here we suppose that the smaller chunk score means the more similar.

diff --git a/jina/executors/rankers/tfidf.py b/jina/executors/rankers/tfidf.py
@@ -5,10 +5,10 @@
 
 import numpy as np
 
-from . import BaseRanker
+from . import Chunk2DocRanker
 
 
-class TfIdfRanker(BaseRanker):
+class TfIdfRanker(Chunk2DocRanker):
     """
     :class:`TfIdfRanker` calculates the weighted score from the matched chunks. The weights of each chunk is based on
         the tf-idf algorithm. Each query chunk is considered as a ``term``, and the frequency of the query chunk in a

diff --git a/jina/resources/executors.requests.BaseRanker.yml b/jina/resources/executors.requests.BaseRanker.yml
@@ -3,5 +3,5 @@ on:
     - !ControlReqDriver {}
   SearchRequest:
     - !ChunkPruneDriver {}  # embedding info on chunk is used and no need anymore
-    - !Chunk2DocScoreDriver {}
+    - !Chunk2DocRankDriver {}
     - !DocPruneDriver {}  # no need on chunk-level info anymore
diff --git a/tests/test_driver_yaml.py b/tests/test_driver_yaml.py
@@ -82,7 +82,7 @@ def test_resource_executor(self):
     def test_multiple_executor(self):
         from jina.executors.encoders import BaseEncoder
         from jina.executors.indexers import BaseIndexer
-        from jina.executors.rankers import BaseRanker
+        from jina.executors.rankers import Chunk2DocRanker
         from jina.executors.crafters import BaseDocCrafter
         from jina.executors.crafters import BaseChunkCrafter
 
@@ -98,7 +98,7 @@ class D2(BaseIndexer):
         d2 = D2('dummy.bin')
         self.assertEqual(len(d2._drivers), 1)
 
-        class D3(BaseRanker):
+        class D3(Chunk2DocRanker):
             pass
 
         d3 = D3()