Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Storage abstraction #169

Merged
merged 25 commits into from
Aug 27, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
01a7153
initialize storage-abstraction branch as 2.6.0
jpwchang Jun 27, 2022
eca5d62
define StorageManager base class
jpwchang Jun 27, 2022
fe2587b
implement MemStorageManager
jpwchang Jun 27, 2022
4a2ad76
implementing temp storage for ownerless components
jpwchang Jul 5, 2022
b209286
Merge branch 'master' into storage-abstraction
jpwchang Jul 5, 2022
ec5e0a1
dataframe conversion and corpus id fixes
jpwchang Jul 5, 2022
17b6de9
fix missing parameter when assigning new owner
jpwchang Jul 5, 2022
b81112b
correctly handle metadata in owner reassignment
jpwchang Jul 6, 2022
5903143
enforce cleanup of old StorageManagers in Corpus operations
jpwchang Jul 12, 2022
a409fb1
Convert old meta to dict in reindex_conversations
jpwchang Jul 12, 2022
a6925fc
fix metadata reassignment in merge
jpwchang Jul 12, 2022
c450cd3
update reindex test to account for source corpus destruction
jpwchang Jul 12, 2022
a580a50
reindex test fix
jpwchang Jul 12, 2022
40b1011
create new storage integrity tests
jpwchang Jul 12, 2022
e44c622
Merge branch 'master' into storage-abstraction
calebchiam Jul 12, 2022
7dd6698
black fmt setup
calebchiam Jul 12, 2022
456dc1b
Merge, filter, and reindex now static
jpwchang Jul 18, 2022
a77609c
Merge branch 'master' into storage-abstraction
jpwchang Jul 18, 2022
c4398b2
boilerplate template for adding speaker and utterances individually
oscarso2000 Jul 19, 2022
357e1f4
Merge branch 'storage-abstraction' of https://github.com/CornellNLP/C…
oscarso2000 Jul 19, 2022
1d262e3
black format
oscarso2000 Jul 19, 2022
64b49d2
remove extra work
oscarso2000 Jul 19, 2022
03001c7
Minor style changes
jpwchang Jul 20, 2022
735e275
fix outdated references to reinitialize_from_other
jpwchang Jul 20, 2022
26fb5c0
__str__ implementation for Speaker, matching style of Utterance and C…
jpwchang Jul 20, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,3 @@ repos:
rev: 22.3.0
hooks:
- id: black
language_version: python3.9
55 changes: 46 additions & 9 deletions convokit/model/convoKitMeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,11 @@
from collections.abc import MutableMapping
except:
from collections import MutableMapping
from numpy import isin
from convokit.util import warn
from .convoKitIndex import ConvoKitIndex
import json
from typing import Union

# See reference: https://stackoverflow.com/questions/7760916/correct-usage-of-a-getter-setter-for-dictionary-values

Expand All @@ -14,12 +16,30 @@ class ConvoKitMeta(MutableMapping, dict):
ConvoKitMeta is a dictlike object that stores the metadata attributes of a corpus component
"""

def __init__(self, convokit_index, obj_type):
def __init__(self, owner, convokit_index, obj_type):
self.owner = owner # Corpus or CorpusComponent
self.index: ConvoKitIndex = convokit_index
self.obj_type = obj_type

self._get_storage().initialize_data_for_component("meta", self.storage_key)

@property
def storage_key(self) -> str:
return f"{self.obj_type}_{self.owner.id}"

def __getitem__(self, item):
return dict.__getitem__(self, item)
return self._get_storage().get_data("meta", self.storage_key, item)

def _get_storage(self):
# special case for Corpus meta since that's the only time owner is not a CorpusComponent
# since cannot directly import Corpus to check the type (circular import), as a proxy we
# check for the obj_type attribute which is common to all CorpusComponent but not
# present in Corpus
if not hasattr(self.owner, "obj_type"):
return self.owner.storage
# self.owner -> CorpusComponent
# self.owner.owner -> Corpus that owns the CorpusComponent (only Corpus has direct pointer to storage)
return self.owner.owner.storage

@staticmethod
def _check_type_and_update_index(index, obj_type, key, value):
Expand All @@ -45,12 +65,12 @@ def __setitem__(self, key, value):

if self.index.type_check:
ConvoKitMeta._check_type_and_update_index(self.index, self.obj_type, key, value)
dict.__setitem__(self, key, value)
self._get_storage().update_data("meta", self.storage_key, key, value)

def __delitem__(self, key):
if self.obj_type == "corpus":
dict.__delitem__(self, key)
self.index.del_from_index(self.obj_type, key)
self._get_storage().delete_data("meta", self.storage_key, key)
else:
if self.index.lock_metadata_deletion[self.obj_type]:
warn(
Expand All @@ -62,19 +82,36 @@ def __delitem__(self, key):
)
)
else:
dict.__delitem__(self, key)
self._get_storage().delete_data("meta", self.storage_key, key)

def __iter__(self):
return dict.__iter__(self)
return self._get_storage().get_data("meta", self.storage_key).__iter__()

def __len__(self):
return dict.__len__(self)
return self._get_storage().get_data("meta", self.storage_key).__len__()

def __contains__(self, x):
return dict.__contains__(self, x)
return self._get_storage().get_data("meta", self.storage_key).__contains__(x)

def __repr__(self) -> str:
return "ConvoKitMeta(" + self.to_dict().__repr__() + ")"

def to_dict(self):
return self.__dict__
return dict(self._get_storage().get_data("meta", self.storage_key))

def reinitialize_from(self, other: Union["ConvoKitMeta", dict]):
"""
Reinitialize this ConvoKitMeta instance with the data from other
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: from another ConvoKitMeta instance or a dictionary with metadata key-value pairs

"""
if isinstance(other, ConvoKitMeta):
other = {k: v for k, v in other.to_dict().items()}
elif not isinstance(other, dict):
raise TypeError(
"ConvoKitMeta can only be reinitialized from a dict instance or another ConvoKitMeta"
)
self._get_storage().initialize_data_for_component(
"meta", self.storage_key, overwrite=True, initial_value=other
)


_basic_types = {type(0), type(1.0), type("str"), type(True)} # cannot include lists or dicts
Expand Down
Loading