Skip to content

Commit

Permalink
Merge pull request #169 from CornellNLP/storage-abstraction
Browse files Browse the repository at this point in the history
Storage abstraction
  • Loading branch information
jpwchang authored Aug 27, 2022
2 parents 11fe345 + 26fb5c0 commit f610e67
Show file tree
Hide file tree
Showing 10 changed files with 617 additions and 126 deletions.
1 change: 0 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,3 @@ repos:
rev: 22.3.0
hooks:
- id: black
language_version: python3.9
55 changes: 46 additions & 9 deletions convokit/model/convoKitMeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
from collections.abc import MutableMapping
except:
from collections import MutableMapping
from numpy import isin
from convokit.util import warn
from .convoKitIndex import ConvoKitIndex
import json
from typing import Union

# See reference: https://stackoverflow.com/questions/7760916/correct-usage-of-a-getter-setter-for-dictionary-values

Expand All @@ -14,12 +16,30 @@ class ConvoKitMeta(MutableMapping, dict):
ConvoKitMeta is a dictlike object that stores the metadata attributes of a corpus component
"""

def __init__(self, convokit_index, obj_type):
def __init__(self, owner, convokit_index, obj_type):
self.owner = owner # Corpus or CorpusComponent
self.index: ConvoKitIndex = convokit_index
self.obj_type = obj_type

self._get_storage().initialize_data_for_component("meta", self.storage_key)

@property
def storage_key(self) -> str:
return f"{self.obj_type}_{self.owner.id}"

def __getitem__(self, item):
return dict.__getitem__(self, item)
return self._get_storage().get_data("meta", self.storage_key, item)

def _get_storage(self):
# special case for Corpus meta since that's the only time owner is not a CorpusComponent
# since cannot directly import Corpus to check the type (circular import), as a proxy we
# check for the obj_type attribute which is common to all CorpusComponent but not
# present in Corpus
if not hasattr(self.owner, "obj_type"):
return self.owner.storage
# self.owner -> CorpusComponent
# self.owner.owner -> Corpus that owns the CorpusComponent (only Corpus has direct pointer to storage)
return self.owner.owner.storage

@staticmethod
def _check_type_and_update_index(index, obj_type, key, value):
Expand All @@ -45,12 +65,12 @@ def __setitem__(self, key, value):

if self.index.type_check:
ConvoKitMeta._check_type_and_update_index(self.index, self.obj_type, key, value)
dict.__setitem__(self, key, value)
self._get_storage().update_data("meta", self.storage_key, key, value)

def __delitem__(self, key):
if self.obj_type == "corpus":
dict.__delitem__(self, key)
self.index.del_from_index(self.obj_type, key)
self._get_storage().delete_data("meta", self.storage_key, key)
else:
if self.index.lock_metadata_deletion[self.obj_type]:
warn(
Expand All @@ -62,19 +82,36 @@ def __delitem__(self, key):
)
)
else:
dict.__delitem__(self, key)
self._get_storage().delete_data("meta", self.storage_key, key)

def __iter__(self):
return dict.__iter__(self)
return self._get_storage().get_data("meta", self.storage_key).__iter__()

def __len__(self):
return dict.__len__(self)
return self._get_storage().get_data("meta", self.storage_key).__len__()

def __contains__(self, x):
return dict.__contains__(self, x)
return self._get_storage().get_data("meta", self.storage_key).__contains__(x)

def __repr__(self) -> str:
return "ConvoKitMeta(" + self.to_dict().__repr__() + ")"

def to_dict(self):
return self.__dict__
return dict(self._get_storage().get_data("meta", self.storage_key))

def reinitialize_from(self, other: Union["ConvoKitMeta", dict]):
"""
Reinitialize this ConvoKitMeta instance with the data from other
"""
if isinstance(other, ConvoKitMeta):
other = {k: v for k, v in other.to_dict().items()}
elif not isinstance(other, dict):
raise TypeError(
"ConvoKitMeta can only be reinitialized from a dict instance or another ConvoKitMeta"
)
self._get_storage().initialize_data_for_component(
"meta", self.storage_key, overwrite=True, initial_value=other
)


_basic_types = {type(0), type(1.0), type("str"), type(True)} # cannot include lists or dicts
Expand Down
Loading

0 comments on commit f610e67

Please sign in to comment.