Skip to content

Commit

Permalink
Merge pull request #584 from Breakthrough-Energy/data_access_refactor
Browse files Browse the repository at this point in the history
Merge data access refactor feature branch
  • Loading branch information
jenhagg authored Feb 10, 2022
2 parents 5282848 + ed1a3ab commit 69c1e36
Show file tree
Hide file tree
Showing 18 changed files with 413 additions and 408 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ jobs:
python-version: 3.9

- run: python -m pip install --upgrade pip tox
- run: tox -e pytest-local -- --cov-report=xml
- run: tox -e pytest-local -- --cov=powersimdata --cov-report=xml

- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ jobs:
python-version: ${{ matrix.python-version }}

- run: python -m pip install --upgrade pip tox
- run: tox -e pytest-local
- run: tox -e pytest-local -- --cov=powersimdata
1 change: 0 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
FROM python:3.8.3

RUN apt-get update
RUN apt-get install gawk
RUN ln -s /mnt/bes/pcm $HOME/ScenarioData

COPY powersimdata/utility/templates /mnt/bes/pcm/
Expand Down
56 changes: 21 additions & 35 deletions powersimdata/data_access/csv_store.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
import functools
import os
import shutil
from pathlib import Path
from tempfile import mkstemp

import pandas as pd

from powersimdata.utility import server_setup


def verify_hash(func):
"""Utility function which verifies the sha1sum of the file before writing
Expand All @@ -25,6 +19,19 @@ def wrapper(self, *args, **kwargs):
return wrapper


def _parse_csv(file_object):
"""Read file from disk into data frame
:param str, path object or file-like object file_object: a reference to
the csv file
:return: (*pandas.DataFrame*) -- the specified file as a data frame.
"""
table = pd.read_csv(file_object)
table.set_index("id", inplace=True)
table.fillna("", inplace=True)
return table.astype(str)


class CsvStore:
"""Base class for common functionality used to manage scenario and execute
list stored as csv files on the server
Expand All @@ -43,42 +50,21 @@ def get_table(self):
:return: (*pandas.DataFrame*) -- the specified table as a data frame.
"""
filename = self._FILE_NAME
local_path = Path(server_setup.LOCAL_DIR, filename)

try:
self.data_access.copy_from(filename)
return self._get_table(filename)
except: # noqa
print(f"Failed to download {filename} from server")
print("Falling back to local cache...")

if local_path.is_file():
return self._parse_csv(local_path)
else:
raise FileNotFoundError(f"{filename} does not exist locally.")
return self._get_table(filename + ".2")

def _parse_csv(self, file_object):
"""Read file from disk into data frame
:param str, path object or file-like object file_object: a reference to
the csv file
:return: (*pandas.DataFrame*) -- the specified file as a data frame.
"""
table = pd.read_csv(file_object)
table.set_index("id", inplace=True)
table.fillna("", inplace=True)
return table.astype(str)
def _get_table(self, filename):
self.data_access.copy_from(filename)
with self.data_access.get(filename) as (f, _):
return _parse_csv(f)

def commit(self, table, checksum):
"""Save to local directory and upload if needed
:param pandas.DataFrame table: the data frame to save
:param str checksum: the checksum prior to download
"""
tmp_file, tmp_path = mkstemp(dir=server_setup.LOCAL_DIR)
table.to_csv(tmp_path)
shutil.copy(tmp_path, os.path.join(server_setup.LOCAL_DIR, self._FILE_NAME))
os.close(tmp_file)
tmp_name = os.path.basename(tmp_path)
self.data_access.push(tmp_name, checksum, change_name_to=self._FILE_NAME)
if os.path.exists(tmp_path): # only required if data_access is LocalDataAccess
os.remove(tmp_path)
with self.data_access.push(self._FILE_NAME, checksum) as f:
table.to_csv(f)
Loading

0 comments on commit 69c1e36

Please sign in to comment.