Skip to content

Commit e9794ca

Browse files
committed
Added: working document_id overrides for TinyDB (#15)
1 parent 8d29c66 commit e9794ca

File tree

1 file changed

+135
-4
lines changed

1 file changed

+135
-4
lines changed

libreforms_fastapi/utils/document_database.py

+135-4
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,141 @@
22
from bson import ObjectId
33
from datetime import datetime
44
from zoneinfo import ZoneInfo
5-
from tinydb import TinyDB, Query
5+
from tinydb import (
6+
TinyDB,
7+
Query,
8+
Storage
9+
)
10+
from tinydb.table import (
11+
Table as TinyTable,
12+
Document
13+
)
14+
15+
from typing import (
16+
Mapping,
17+
Union,
18+
Iterable,
19+
List,
20+
)
621
from abc import ABC, abstractmethod
722

823
from libreforms_fastapi.utils.logging import set_logger
924

25+
# We want to modify TinyDB use use string representations of bson
26+
# ObjectIDs. As such, we will need to modify some underlying behavior,
27+
# see https://github.com/signebedi/libreforms-fastapi/issues/15.
28+
class CustomTable(TinyTable):
29+
document_id_class = str # Use string IDs instead of integers
30+
31+
def _get_next_id(self, document_id=str(ObjectId())):
32+
"""
33+
Generate a new BSON ObjectID string to use as the TinyDB document ID.
34+
"""
35+
return document_id
36+
37+
38+
def insert(self, document: Mapping, document_id:Union[str, bool]=False) -> int:
39+
"""
40+
Insert a new document into the table.
41+
42+
:param document: the document to insert
43+
:returns: the inserted document's ID
44+
"""
45+
46+
if not document_id:
47+
document_id = str(ObjectId())
48+
49+
# Make sure the document implements the ``Mapping`` interface
50+
if not isinstance(document, Mapping):
51+
raise ValueError('Document is not a Mapping')
52+
53+
# First, we get the document ID for the new document
54+
if isinstance(document, Document):
55+
# For a `Document` object we use the specified ID
56+
doc_id = document.doc_id
57+
58+
# We also reset the stored next ID so the next insert won't
59+
# re-use document IDs by accident when storing an old value
60+
self._next_id = None
61+
else:
62+
# In all other cases we use the next free ID
63+
doc_id = self._get_next_id(document_id=document_id)
64+
65+
# Now, we update the table and add the document
66+
def updater(table: dict):
67+
if doc_id in table:
68+
raise ValueError(f'Document with ID {str(doc_id)} '
69+
f'already exists')
70+
71+
# By calling ``dict(document)`` we convert the data we got to a
72+
# ``dict`` instance even if it was a different class that
73+
# implemented the ``Mapping`` interface
74+
table[doc_id] = dict(document)
75+
76+
# See below for details on ``Table._update``
77+
self._update_table(updater)
78+
79+
return doc_id
80+
81+
def insert_multiple(self, documents: Iterable[Mapping], document_ids:Union[List, bool]=False) -> List[int]:
82+
"""
83+
Insert multiple documents into the table.
84+
85+
:param documents: an Iterable of documents to insert
86+
:returns: a list containing the inserted documents' IDs
87+
"""
88+
doc_ids = []
89+
90+
if document_ids and len(document_ids) != len(documents):
91+
raise Exception("When inserting multiple and passing your own document_ids," \
92+
"the list must be the same length as the document list")
93+
94+
def updater(table: dict):
95+
# for document in documents:
96+
for i, document in enumerate(documents):
97+
98+
# Make sure the document implements the ``Mapping`` interface
99+
if not isinstance(document, Mapping):
100+
raise ValueError('Document is not a Mapping')
101+
102+
if isinstance(document, Document):
103+
# Check if document does not override an existing document
104+
if document.doc_id in table:
105+
raise ValueError(
106+
f'Document with ID {str(document.doc_id)} '
107+
f'already exists'
108+
)
109+
110+
# Store the doc_id, so we can return all document IDs
111+
# later. Then save the document with its doc_id and
112+
# skip the rest of the current loop
113+
doc_id = document.doc_id
114+
doc_ids.append(doc_id)
115+
table[doc_id] = dict(document)
116+
continue
117+
118+
# Generate new document ID for this document
119+
# Store the doc_id, so we can return all document IDs
120+
# later, then save the document with the new doc_id
121+
if not document_ids:
122+
document_id = str(ObjectId())
123+
else:
124+
document_id = document_ids[i]
125+
doc_id = self._get_next_id()
126+
doc_ids.append(doc_id)
127+
table[doc_id] = dict(document)
128+
129+
# See below for details on ``Table._update``
130+
self._update_table(updater)
131+
132+
return doc_ids
133+
134+
# Subclass TinyDB and override the table_class attribute with our new logic
135+
class CustomTinyDB(TinyDB):
136+
table_class = CustomTable
137+
138+
139+
10140
class CollectionDoesNotExist(Exception):
11141
"""Exception raised when attempting to access a collection that does not exist."""
12142
def __init__(self, form_name):
@@ -133,7 +263,8 @@ def _initialize_database_collections(self):
133263
# Initialize databases
134264
self.databases = {}
135265
for form_name in self.config.keys():
136-
self.databases[form_name] = TinyDB(self._get_db_path(form_name))
266+
# self.databases[form_name] = TinyDB(self._get_db_path(form_name))
267+
self.databases[form_name] = CustomTinyDB(self._get_db_path(form_name))
137268

138269
def _get_db_path(self, form_name:str):
139270
"""Constructs a file path for the given form's database."""
@@ -160,7 +291,7 @@ def create_document(self, form_name:str, json_data, metadata={}):
160291
data_dict = {
161292
"data": convert_data_to_dict,
162293
"metadata": {
163-
self.document_id_field: document_id,
294+
# self.document_id_field: document_id,
164295
self.is_deleted_field: metadata.get(self.is_deleted_field, False),
165296
self.timezone_field: metadata.get(self.timezone_field, self.timezone.key),
166297
self.created_at_field: metadata.get(self.created_at_field, current_timestamp.isoformat()),
@@ -176,7 +307,7 @@ def create_document(self, form_name:str, json_data, metadata={}):
176307
}
177308

178309
# document_id = self.databases[form_name].insert(data_dict)
179-
_ = self.databases[form_name].insert(data_dict)
310+
_ = self.databases[form_name].insert(data_dict, document_id=document_id)
180311

181312
if self.use_logger:
182313
self.logger.info(f"Inserted document for {form_name} with document_id {document_id}")

0 commit comments

Comments
 (0)