Skip to content

Commit

Permalink
Revert "🔧 fix: improve error handling, type hints, and code structure"
Browse files Browse the repository at this point in the history
This reverts commit bb4f82d
  • Loading branch information
pdodds committed Feb 20, 2025
1 parent 99a9613 commit 48a61d2
Show file tree
Hide file tree
Showing 3 changed files with 543 additions and 1,324 deletions.
32 changes: 14 additions & 18 deletions kodexa/model/persistence.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import tempfile
import time
import uuid
from typing import List, Optional, Any, Dict, Union
from typing import List, Optional

import msgpack

Expand Down Expand Up @@ -74,7 +74,7 @@ class SqliteDocumentPersistence(object):
The Sqlite persistence engine to support large scale documents (part of the V4 Kodexa Document Architecture)
"""

def __init__(self, document: Document, filename: Optional[str] = None, delete_on_close: bool = False, inmemory: bool = False, persistence_manager: Optional[Any] = None):
def __init__(self, document: Document, filename: str = None, delete_on_close=False, inmemory=False, persistence_manager=None):
self.document = document

self.node_types = {}
Expand Down Expand Up @@ -244,9 +244,9 @@ def get_content_nodes(self, node_type, parent_node: ContentNode, include_childre
parent_node(id, pid, nt, idx, path) AS (
VALUES (?,?,?,?,?)
UNION ALL
SELECT cns.id, cns.pid, cns.nt, cns.idx, parent_node.path || substr('0000000' || cns.idx, -6, 6)
SELECT cns.id, cns.pid, cns.nt, cns.idx, parent_node.path || substr('0000000' || cns.idx, -6, 6)
FROM cn cns, parent_node
WHERE parent_node.id = cns.pid
WHERE parent_node.id = cns.pid
)
SELECT id, pid, nt, idx, path from parent_node order by path
"""
Expand Down Expand Up @@ -276,9 +276,9 @@ def get_content_nodes(self, node_type, parent_node: ContentNode, include_childre
parent_node(id, pid, nt, idx, path) AS (
VALUES (?,?,?,?,?)
UNION ALL
SELECT cns.id, cns.pid, cns.nt, cns.idx, parent_node.path || substr('000000' || cns.idx, -6, 6)
SELECT cns.id, cns.pid, cns.nt, cns.idx, parent_node.path || substr('000000' || cns.idx, -6, 6)
FROM cn cns, parent_node
WHERE parent_node.id = cns.pid
WHERE parent_node.id = cns.pid
)
SELECT id, pid, nt, idx, path from parent_node where nt=? order by path
"""
Expand Down Expand Up @@ -1245,7 +1245,7 @@ def __ensure_ed_table_exists(self):
"""
# First check if the old table exists and has key column
old_table = self.cursor.execute("""
SELECT name FROM sqlite_master
SELECT name FROM sqlite_master
WHERE type='table' AND name='ed'
""").fetchone()

Expand Down Expand Up @@ -1457,10 +1457,10 @@ class PersistenceManager(object):
This is implemented to allow us to work with large complex documents in a performance centered way.
"""

def __init__(self, document: Document, filename: Optional[str] = None, delete_on_close: bool = False, inmemory: bool = False):
def __init__(self, document: Document, filename: str = None, delete_on_close=False, inmemory=False):
self.document = document
self.node_cache = SimpleObjectCache()
self.child_cache: Dict[str, List[str]] = {}
self.child_cache = {}
self.child_id_cache = {}
self.feature_cache = {}
self.content_parts_cache = {}
Expand Down Expand Up @@ -1541,10 +1541,7 @@ def get_node_by_uuid(self, uuid: int) -> ContentNode:
self.node_cache.add_obj(node)
return node

node = self.node_cache.get_obj(uuid)
if node is None:
raise ValueError(f"Node with UUID {uuid} not found in cache")
return node
return self.node_cache.get_obj(uuid) # return the cached version

def add_model_insight(self, model_insight: ModelInsight):
"""
Expand Down Expand Up @@ -1882,11 +1879,10 @@ def remove_content_node(self, node):
all_ids = self._underlying_persistence.remove_content_node(node)

# remove all the ids from the cache
if all_ids is not None:
for id in all_ids:
tmp_node = self.node_cache.get_obj(id)
if tmp_node is not None:
self.node_cache.remove_obj(tmp_node)
for id in all_ids:
tmp_node = self.node_cache.get_obj(id)
if tmp_node is not None:
self.node_cache.remove_obj(tmp_node)
self.node_cache.dirty_objs.remove(id) if id in self.node_cache.dirty_objs else None

def get_children(self, node):
Expand Down
4 changes: 0 additions & 4 deletions kodexa/pipeline/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,6 @@ def execute(self, context: PipelineContext, document: Document):
result_document = self.step.process(document)
else:
result_document = self.step.process(document, context)

# result_document = self.step.process(document, context)
else:
logger.info(f"Starting step function {self.step.__name__}")

Expand All @@ -354,8 +352,6 @@ def execute(self, context: PipelineContext, document: Document):
else:
result_document = self.step(document, context)

# result_document = self.step(document, context)

end = time.perf_counter()
logger.info(f"Step completed (f{end - start:0.4f}s)")

Expand Down
Loading

0 comments on commit 48a61d2

Please sign in to comment.