Skip to content

Commit

Permalink
Merge pull request #472 from jina-ai/refactor-ranker-439
Browse files Browse the repository at this point in the history
refactor: make BaseRanker abstract
  • Loading branch information
hanxiao authored Jun 1, 2020
2 parents ef9ead5 + 9de7dfd commit 35aa9e5
Show file tree
Hide file tree
Showing 9 changed files with 29 additions and 22 deletions.
8 changes: 4 additions & 4 deletions docs/chapters/all_driver.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@ This version of Jina includes 35 Drivers.
- `SegmentDriver`
- `BaseEncodeDriver`
- `EncodeDriver`
- `BaseScoreDriver`
- `Chunk2DocScoreDriver`
- `BaseRankDriver`
- `Chunk2DocRankDriver`
- `BaseSearchDriver`
- `KVSearchDriver`
- `ChunkKVSearchDriver`
Expand Down Expand Up @@ -48,9 +48,9 @@ This version of Jina includes 35 Drivers.
| `BaseEncodeDriver` | `jina.drivers.index` |
| `BaseExecutableDriver` | `jina.drivers.control` |
| `BaseIndexDriver` | `jina.drivers.index` |
| `BaseScoreDriver` | `jina.drivers.index` |
| `BaseRankDriver` | `jina.drivers.index` |
| `BaseSearchDriver` | `jina.drivers.index` |
| `Chunk2DocScoreDriver` | `jina.drivers.score` |
| `Chunk2DocRankDriver` | `jina.drivers.score` |
| `ChunkCraftDriver` | `jina.drivers.craft` |
| `ChunkKVIndexDriver` | `jina.drivers.index` |
| `ChunkKVSearchDriver` | `jina.drivers.search` |
Expand Down
4 changes: 2 additions & 2 deletions docs/chapters/all_exec.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ This version of Jina includes 80 Executors.
- `BaseAudioEncoder`
- `BaseImageEncoder`
- `BaseTransformerEncoder`
- `BaseRanker`
- `Chunk2DocRanker`
- `MaxRanker`
- `MinRanker`
- `TfIdfRanker`
Expand Down Expand Up @@ -117,7 +117,7 @@ This version of Jina includes 80 Executors.
| `BasePaddleExecutor` | `jina.executors.frameworks` |
| `BasePaddlehubEncoder` | |
| `BasePbIndexer` | `jina.executors.indexers` |
| `BaseRanker` | `jina.executors.encoders` |
| `Chunk2DocRanker` | `jina.executors.encoders` |
| `BaseSegmenter` | `jina.executors.crafters` |
| `BaseTFEncoder` | |
| `BaseTFExecutor` | |
Expand Down
4 changes: 2 additions & 2 deletions docs/chapters/extend/executor.rst
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ If your algorithm is so unique and does not fit any any of the category below, y

You want to segment the documents into chunks.

* :class:`jina.executors.BaseRanker`
* :class:`jina.executors.Chunk2DocRanker`

You want to segment/transform the documents and chunks.

Expand Down Expand Up @@ -155,7 +155,7 @@ Each :class:`Executor` has a core method, which defines the algorithmic behavior
+----------------------+----------------------------+
| :class:`BaseIndexer` | :meth:`add`, :meth:`query` |
+----------------------+----------------------------+
| :class:`BaseRanker` | :meth:`score` |
| :class:`Chunk2DocRanker` | :meth:`score` |
+----------------------+----------------------------+


Expand Down
4 changes: 2 additions & 2 deletions jina/drivers/score.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,14 @@
from .helper import pb_obj2dict


class BaseScoreDriver(BaseExecutableDriver):
class BaseRankDriver(BaseExecutableDriver):
"""Drivers inherited from this Driver will bind :meth:`craft` by default """

def __init__(self, executor: str = None, method: str = 'score', *args, **kwargs):
super().__init__(executor, method, *args, **kwargs)


class Chunk2DocScoreDriver(BaseScoreDriver):
class Chunk2DocRankDriver(BaseRankDriver):
"""Extract chunk-level score and use the executor to compute the doc-level score
"""
Expand Down
17 changes: 12 additions & 5 deletions jina/executors/rankers/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,18 @@


class BaseRanker(BaseExecutor):
"""The base class for a `Ranker`. A `Ranker` translates the chunk-wise score (distance) to the doc-wise score.
"""The base class for a `Ranker`"""

In the query-time, :class:`BaseRanker` is an almost-always required component.
def score(self, *args, **kwargs):
raise NotImplementedError


class Chunk2DocRanker(BaseRanker):
""" A :class:`Chunk2DocRanker` translates the chunk-wise score (distance) to the doc-wise score.
In the query-time, :class:`Chunk2DocRanker` is an almost-always required component.
Because in the end we want to retrieve top-k documents of given query-document not top-k chunks of
given query-chunks. The purpose of :class:`BaseRanker` is to aggregate the already existed top-k chunks
given query-chunks. The purpose of :class:`Chunk2DocRanker` is to aggregate the already existed top-k chunks
into documents.
The key function here is :func:`score`.
Expand Down Expand Up @@ -89,7 +96,7 @@ def get_doc_id(self, match_with_same_doc_id):
return match_with_same_doc_id[0, self.col_doc_id]


class MaxRanker(BaseRanker):
class MaxRanker(Chunk2DocRanker):
"""
:class:`MaxRanker` calculates the score of the matched doc form the matched chunks. For each matched doc, the score
is the maximal score from all the matched chunks belonging to this doc.
Expand All @@ -101,7 +108,7 @@ def _get_score(self, match_idx, query_chunk_meta, match_chunk_meta, *args, **kwa
return self.get_doc_id(match_idx), match_idx[:, self.col_score].max()


class MinRanker(BaseRanker):
class MinRanker(Chunk2DocRanker):
"""
:class:`MinRanker` calculates the score of the matched doc form the matched chunks. For each matched doc, the score
is `1 / (1 + s)`, where `s` is the minimal score from all the matched chunks belonging to this doc.
Expand Down
4 changes: 2 additions & 2 deletions jina/executors/rankers/bi_match.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,10 @@

import numpy as np

from . import BaseRanker
from . import Chunk2DocRanker


class BiMatchRanker(BaseRanker):
class BiMatchRanker(Chunk2DocRanker):
"""The :class:`BiMatchRanker` counts the best chunk-hit from both query and doc perspective.
.. warning:: Here we suppose that the smaller chunk score means the more similar.
Expand Down
4 changes: 2 additions & 2 deletions jina/executors/rankers/tfidf.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@

import numpy as np

from . import BaseRanker
from . import Chunk2DocRanker


class TfIdfRanker(BaseRanker):
class TfIdfRanker(Chunk2DocRanker):
"""
:class:`TfIdfRanker` calculates the weighted score from the matched chunks. The weights of each chunk is based on
the tf-idf algorithm. Each query chunk is considered as a ``term``, and the frequency of the query chunk in a
Expand Down
2 changes: 1 addition & 1 deletion jina/resources/executors.requests.BaseRanker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@ on:
- !ControlReqDriver {}
SearchRequest:
- !ChunkPruneDriver {} # embedding info on chunk is used and no need anymore
- !Chunk2DocScoreDriver {}
- !Chunk2DocRankDriver {}
- !DocPruneDriver {} # no need on chunk-level info anymore
4 changes: 2 additions & 2 deletions tests/test_driver_yaml.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def test_resource_executor(self):
def test_multiple_executor(self):
from jina.executors.encoders import BaseEncoder
from jina.executors.indexers import BaseIndexer
from jina.executors.rankers import BaseRanker
from jina.executors.rankers import Chunk2DocRanker
from jina.executors.crafters import BaseDocCrafter
from jina.executors.crafters import BaseChunkCrafter

Expand All @@ -98,7 +98,7 @@ class D2(BaseIndexer):
d2 = D2('dummy.bin')
self.assertEqual(len(d2._drivers), 1)

class D3(BaseRanker):
class D3(Chunk2DocRanker):
pass

d3 = D3()
Expand Down

0 comments on commit 35aa9e5

Please sign in to comment.