Skip to content

Commit

Permalink
custom mapping fix for issue deepset-ai#293
Browse files Browse the repository at this point in the history
  • Loading branch information
karimjp committed Aug 7, 2020
1 parent efe97e3 commit 277bada
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 8 deletions.
10 changes: 3 additions & 7 deletions haystack/database/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,28 +42,24 @@ def to_dict(self):
return self.__dict__

@classmethod
def from_dict(cls, dict):
field_map = {}#{"context": "text"}
def from_dict(cls, dict, field_map={}):
_doc = dict.copy()
#import pdb
init_args = ["text", "id", "query_score", "question", "meta", "embedding"]
if "meta" not in _doc.keys():
_doc["meta"] = {}
# copy additional fields into "meta"
for k, v in _doc.items():
if k not in init_args and k not in field_map:
_doc["meta"][k] = v
#pdb.set_trace()
# remove additional fields from top level
_new_doc = {}
for k,v in _doc.items():
for k, v in _doc.items():
if k in init_args:
_new_doc[k] = v
elif k in field_map:
temp = k
k = field_map[k]
_new_doc[k] = v
#pdb.set_trace()

return cls(**_new_doc)


Expand Down
10 changes: 9 additions & 1 deletion haystack/database/elasticsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,14 @@ def _create_label_index(self, index_name):
}
self.client.indices.create(index=index_name, ignore=400, body=mapping)

# TODO: Add flexibility to define other non-meta and meta fields expected by the Document class
def _create_document_field_map(self) -> Dict:
return {
self.text_field: "text",
self.embedding_field: "embedding",
self.faq_question_field if self.faq_question_field else "question": "question"
}

def get_document_by_id(self, id: str, index=None) -> Optional[Document]:
if index is None:
index = self.index
Expand Down Expand Up @@ -161,7 +169,7 @@ def write_documents(self, documents: Union[List[dict], List[Document]], index: O
index = self.index

# Make sure we comply to Document class format
documents_objects = [Document.from_dict(d) if isinstance(d, dict) else d for d in documents]
documents_objects = [Document.from_dict(d, self._create_document_field_map()) if isinstance(d, dict) else d for d in documents]

documents_to_index = []
for doc in documents_objects:
Expand Down

0 comments on commit 277bada

Please sign in to comment.