Skip to content

Commit

Permalink
DB storage manager (#175)
Browse files Browse the repository at this point in the history
* initial implementation of DBStorageManager

* Update github actions and dependencies (from original db-mode branch)

* Basic DB mode functionality implemented

* Implement database reconnection

* Add support for binary metadata in db

* Implement optimized JSON-to-DB path

* Delete accidentally added file arcExtractor.py

* ran black fmt

* corpusHelper -> corpus_helpers

* renaming some funcs for clarity

* Refactor DB operation calls into corpus_helpers

* Add docs for DB mode
  • Loading branch information
jpwchang authored Sep 4, 2022
1 parent f610e67 commit 9634005
Show file tree
Hide file tree
Showing 16 changed files with 950 additions and 138 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/continuous-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,18 @@ jobs:
strategy:
matrix:
python-version: [3.7, 3.8, 3.9]
mongodb-version: [5.0.2]

steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v2
with:
python-version: ${{ matrix.python-version }}
- name: Start MongoDB
uses: supercharge/[email protected]
with:
mongodb-version: ${{ matrix.mongodb-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
Expand Down
1 change: 1 addition & 0 deletions convokit/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@
from .bag_of_words import *
from .expected_context_framework import *
from .surprise import *
from .convokitConfig import *

# __path__ = __import__('pkgutil').extend_path(__path__, __name__)
56 changes: 56 additions & 0 deletions convokit/convokitConfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import os
from typing import Optional
from yaml import load, Loader


DEFAULT_CONFIG_CONTENTS = (
"# Default Storage Parameters\n"
"db_host: localhost:27017\n"
"data_directory: ~/.convokit/saved-corpora\n"
"default_storage_mode: mem"
)

ENV_VARS = {"db_host": "CONVOKIT_DB_HOST", "default_storage_mode": "CONVOKIT_STORAGE_MODE"}


class ConvoKitConfig:
"""
Utility class providing read-only access to the ConvoKit config file
"""

def __init__(self, filename: Optional[str] = None):
if filename is None:
filename = os.path.expanduser("~/.convokit/config.yml")

if not os.path.isfile(filename):
convo_dir = os.path.dirname(filename)
if not os.path.isdir(convo_dir):
os.makedirs(convo_dir)
with open(filename, "w") as f:
print(
f"No configuration file found at {filename}; writing with contents: \n{DEFAULT_CONFIG_CONTENTS}"
)
f.write(DEFAULT_CONFIG_CONTENTS)
self.config_contents = load(DEFAULT_CONFIG_CONTENTS, Loader=Loader)
else:
with open(filename, "r") as f:
self.config_contents = load(f.read(), Loader=Loader)

def _get_config_from_env_or_file(self, config_key: str, default_val):
env_val = os.environ.get(ENV_VARS[config_key], None)
if env_val is not None:
# environment variable setting takes priority
return env_val
return self.config_contents.get(config_key, default_val)

@property
def db_host(self):
return self._get_config_from_env_or_file("db_host", "localhost:27017")

@property
def data_directory(self):
return self.config_contents.get("data_directory", "~/.convokit/saved-corpora")

@property
def default_storage_mode(self):
return self._get_config_from_env_or_file("default_storage_mode", "mem")
10 changes: 5 additions & 5 deletions convokit/model/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
from .conversation import Conversation
from .convoKitIndex import ConvoKitIndex
from .convoKitMatrix import ConvoKitMatrix
from .corpus import Corpus
from .corpusComponent import CorpusComponent
from .corpus_helpers import *
from .speaker import Speaker
from .user import User
from .utterance import Utterance
from .corpusComponent import CorpusComponent
from .corpusHelper import *
from .convoKitIndex import ConvoKitIndex
from .convoKitMatrix import ConvoKitMatrix
from .utteranceNode import UtteranceNode
from .user import User
6 changes: 4 additions & 2 deletions convokit/model/convoKitMeta.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ def storage_key(self) -> str:
return f"{self.obj_type}_{self.owner.id}"

def __getitem__(self, item):
return self._get_storage().get_data("meta", self.storage_key, item)
item_type = self.index.get_index(self.obj_type).get(item, None)
return self._get_storage().get_data("meta", self.storage_key, item, item_type)

def _get_storage(self):
# special case for Corpus meta since that's the only time owner is not a CorpusComponent
Expand Down Expand Up @@ -65,7 +66,8 @@ def __setitem__(self, key, value):

if self.index.type_check:
ConvoKitMeta._check_type_and_update_index(self.index, self.obj_type, key, value)
self._get_storage().update_data("meta", self.storage_key, key, value)
item_type = self.index.get_index(self.obj_type).get(key, None)
self._get_storage().update_data("meta", self.storage_key, key, value, item_type)

def __delitem__(self, key):
if self.obj_type == "corpus":
Expand Down
Loading

0 comments on commit 9634005

Please sign in to comment.