From 9d2ae2122b90ebc19557bbda71405a381f8bf9cc Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Fri, 23 Apr 2021 15:08:29 -0700 Subject: [PATCH 1/9] feat: os agnostic copy functionality --- powersimdata/data_access/data_access.py | 104 ++++++++++-------- .../data_access/tests/test_data_access.py | 2 +- powersimdata/scenario/execute.py | 22 ++-- 3 files changed, 69 insertions(+), 59 deletions(-) diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py index 34b550e62..bbcff9075 100644 --- a/powersimdata/data_access/data_access.py +++ b/powersimdata/data_access/data_access.py @@ -4,7 +4,7 @@ import posixpath import shutil import time -from subprocess import PIPE, Popen +from subprocess import Popen from tempfile import mkstemp import paramiko @@ -43,9 +43,7 @@ def copy(self, src, dest, recursive=False, update=False): :param bool recursive: create directories recursively :param bool update: only copy if needed """ - self.makedir(posixpath.dirname(dest)) - command = CommandBuilder.copy(src, dest, recursive, update) - return self.execute_command(command) + raise NotImplementedError def remove(self, target, recursive=False, confirm=True): """Wrapper around rm command @@ -86,10 +84,10 @@ def _check_filename(self, filename): if len(os.path.dirname(filename)) != 0: raise ValueError(f"Expecting file name but got path {filename}") - def makedir(self, relative_path): - """Create paths relative to the instance root + def makedir(self, full_path): + """Create path, including parents - :param str relative_path: the path, without filename, relative to root + :param str full_path: the path, excluding filename """ raise NotImplementedError @@ -180,21 +178,41 @@ def move_to(self, file_name, to_dir, change_name_to=None): :param str change_name_to: new name for file when copied to data store. """ self._check_filename(file_name) - src = posixpath.join(server_setup.LOCAL_DIR, file_name) + src = os.path.join(server_setup.LOCAL_DIR, file_name) file_name = file_name if change_name_to is None else change_name_to - dest = posixpath.join(self.root, to_dir, file_name) + dest = os.path.join(self.root, to_dir, file_name) print(f"--> Moving file {src} to {dest}") self._check_file_exists(dest, should_exist=False) - self.copy(src, dest) - self.remove(src) + self.makedir(os.path.dirname(dest)) + shutil.move(src, dest) - def makedir(self, relative_path): - """Create paths relative to the instance root + def makedir(self, full_path): + """Create path on local machine - :param str relative_path: the path, without filename, relative to root + :param str full_path: the path, excluding filename """ - target = os.path.join(self.root, relative_path) - os.makedirs(target, exist_ok=True) + os.makedirs(full_path, exist_ok=True) + + @staticmethod + def _fapply(func, pattern): + files = [f for f in glob.glob(pattern) if os.path.isfile(f)] + for f in files: + func(f) + + def copy(self, src, dest, recursive=False, update=False): + """Wrapper around cp command which creates dest path if needed + + :param str src: path to original + :param str dest: destination path + :param bool recursive: create directories recursively + :param bool update: ignored + """ + self.makedir(dest) + if recursive: + shutil.copytree(src, dest) + else: + func = lambda s: shutil.copy(s, dest) # noqa: E731 + LocalDataAccess._fapply(func, src) def remove(self, target, recursive=False, confirm=True): """Remove target using rm semantics @@ -211,32 +229,9 @@ def remove(self, target, recursive=False, confirm=True): if recursive: shutil.rmtree(target) else: - files = [f for f in glob.glob(target) if os.path.isfile(f)] - for f in files: - os.remove(f) + LocalDataAccess._fapply(os.remove, target) print("--> Done!") - def execute_command(self, command): - """Execute a command locally at the data access. - - :param list command: list of str to be passed to command line. - """ - - def wrap(s): - if s is not None: - return s - return open(os.devnull) - - proc = Popen( - command, - shell=True, - executable="/bin/bash", - stdout=PIPE, - stderr=PIPE, - text=True, - ) - return wrap(None), wrap(proc.stdout), wrap(proc.stderr) - def get_profile_version(self, grid_model, kind): """Returns available raw profile from blob storage or local disk @@ -360,8 +355,8 @@ def move_to(self, file_name, to_dir=None, change_name_to=None): ) file_name = file_name if change_name_to is None else change_name_to - to_dir = "" if to_dir is None else to_dir - to_path = posixpath.join(self.root, to_dir, file_name) + to_dir = posixpath.join(self.root, "" if to_dir is None else to_dir) + to_path = posixpath.join(to_dir, file_name) self.makedir(to_dir) self._check_file_exists(to_path, should_exist=False) @@ -440,16 +435,31 @@ def push(self, file_name, checksum, change_name_to=None): print(e) raise IOError("Failed to push file - most likely a conflict was detected.") - def makedir(self, relative_path): - """Create paths relative to the instance root + def makedir(self, full_path): + """Create path on server - :param str relative_path: the path, without filename, relative to root + :param str full_path: the path, excluding filename :raises IOError: if command generated stderr """ - full_path = posixpath.join(self.root, relative_path) _, _, stderr = self.execute_command(f"mkdir -p {full_path}") + errors = stderr.readlines() + if len(errors) > 0: + raise IOError(f"Failed to create {full_path} on server") + + def copy(self, src, dest, recursive=False, update=False): + """Wrapper around cp command which creates dest path if needed + + :param str src: path to original + :param str dest: destination path + :param bool recursive: create directories recursively + :param bool update: only copy if needed + :raises IOError: if command generated stderr + """ + self.makedir(dest) + command = CommandBuilder.copy(src, dest, recursive, update) + _, _, stderr = self.execute_command(command) if len(stderr.readlines()) != 0: - raise IOError("Failed to create %s on server" % full_path) + raise IOError(f"Failed to execute {command}") def remove(self, target, recursive=False, confirm=True): """Run rm command on server diff --git a/powersimdata/data_access/tests/test_data_access.py b/powersimdata/data_access/tests/test_data_access.py index b058c364b..de0998138 100644 --- a/powersimdata/data_access/tests/test_data_access.py +++ b/powersimdata/data_access/tests/test_data_access.py @@ -108,7 +108,7 @@ def test_move_to_multi_path(mock_data_access, make_temp): remote_path = mock_data_access.root / rel_path remote_path.mkdir(parents=True) fname = make_temp(remote=False) - mock_data_access.move_to(fname, rel_path) + mock_data_access.move_to(fname, str(rel_path)) _check_content(os.path.join(remote_path, fname)) diff --git a/powersimdata/scenario/execute.py b/powersimdata/scenario/execute.py index fffc1c381..ba216d555 100644 --- a/powersimdata/scenario/execute.py +++ b/powersimdata/scenario/execute.py @@ -332,21 +332,25 @@ def __init__(self, data_access, scenario_info, grid, ct): self.grid = grid self.ct = ct self.server_config = server_setup.PathConfig(server_setup.DATA_ROOT_DIR) - self.scenario_folder = "scenario_%s" % scenario_info["id"] + self.scenario_id = scenario_info["id"] + self.scenario_folder = "scenario_%s" % self.scenario_id self.REL_TMP_DIR = posixpath.join( server_setup.EXECUTE_DIR, self.scenario_folder ) + self.TMP_DIR = posixpath.join( + self.server_config.execute_dir(), self.scenario_folder + ) def create_folder(self): """Creates folder on server that will enclose simulation inputs.""" print("--> Creating temporary folder on server for simulation inputs") - self._data_access.makedir(self.REL_TMP_DIR) + self._data_access.makedir(self.TMP_DIR) def prepare_mpc_file(self): """Creates MATPOWER case file.""" - file_name = "%s_case.mat" % self._scenario_info["id"] - storage_file_name = "%s_case_storage.mat" % self._scenario_info["id"] + file_name = f"{self.scenario_id}_case.mat" + storage_file_name = f"{self.scenario_id}_case_storage.mat" file_path = os.path.join(server_setup.LOCAL_DIR, file_name) storage_file_path = os.path.join(server_setup.LOCAL_DIR, storage_file_name) print("Building MPC file") @@ -371,7 +375,7 @@ def prepare_profile(self, kind, profile_as=None): print( f"Writing scaled {kind} profile in {server_setup.LOCAL_DIR} on local machine" ) - file_name = "%s_%s.csv" % (self._scenario_info["id"], kind) + file_name = "%s_%s.csv" % (self.scenario_id, kind) profile.to_csv(os.path.join(server_setup.LOCAL_DIR, file_name)) self._data_access.move_to( @@ -382,9 +386,5 @@ def prepare_profile(self, kind, profile_as=None): self.server_config.execute_dir(), f"scenario_{profile_as}", ) - to_dir = posixpath.join( - self.server_config.execute_dir(), self.scenario_folder - ) - _, _, stderr = self._data_access.copy(f"{from_dir}/{kind}.csv", to_dir) - if len(stderr.readlines()) != 0: - raise IOError(f"Failed to copy {kind}.csv on server") + to_dir = self.TMP_DIR + self._data_access.copy(f"{from_dir}/{kind}.csv", to_dir) From 900e183661af0f5b90e626e4cde4848b6ae9c800 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Wed, 28 Apr 2021 15:36:20 -0700 Subject: [PATCH 2/9] chore: execute_command -> _execute_command --- powersimdata/data_access/data_access.py | 16 ++++++++-------- .../data_access/tests/test_data_access.py | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py index bbcff9075..b4cb73495 100644 --- a/powersimdata/data_access/data_access.py +++ b/powersimdata/data_access/data_access.py @@ -91,7 +91,7 @@ def makedir(self, full_path): """ raise NotImplementedError - def execute_command(self, command): + def _execute_command(self, command): """Execute a command locally at the data access. :param list command: list of str to be passed to command line. @@ -366,7 +366,7 @@ def move_to(self, file_name, to_dir=None, change_name_to=None): os.remove(from_path) - def execute_command(self, command): + def _execute_command(self, command): """Execute a command locally at the data access. :param list command: list of str to be passed to command line. @@ -396,7 +396,7 @@ def checksum(self, relative_path): self._check_file_exists(full_path) command = f"sha1sum {full_path}" - _, stdout, _ = self.execute_command(command) + _, stdout, _ = self._execute_command(command) lines = stdout.readlines() return lines[0].strip() @@ -427,7 +427,7 @@ def push(self, file_name, checksum, change_name_to=None): 200>{lockfile}" command = template.format(**values) - _, _, stderr = self.execute_command(command) + _, _, stderr = self._execute_command(command) errors = stderr.readlines() if len(errors) > 0: @@ -441,7 +441,7 @@ def makedir(self, full_path): :param str full_path: the path, excluding filename :raises IOError: if command generated stderr """ - _, _, stderr = self.execute_command(f"mkdir -p {full_path}") + _, _, stderr = self._execute_command(f"mkdir -p {full_path}") errors = stderr.readlines() if len(errors) > 0: raise IOError(f"Failed to create {full_path} on server") @@ -457,7 +457,7 @@ def copy(self, src, dest, recursive=False, update=False): """ self.makedir(dest) command = CommandBuilder.copy(src, dest, recursive, update) - _, _, stderr = self.execute_command(command) + _, _, stderr = self._execute_command(command) if len(stderr.readlines()) != 0: raise IOError(f"Failed to execute {command}") @@ -475,7 +475,7 @@ def remove(self, target, recursive=False, confirm=True): if confirmed.lower() != "y": print("Operation cancelled.") return - _, _, stderr = self.execute_command(command) + _, _, stderr = self._execute_command(command) if len(stderr.readlines()) != 0: raise IOError(f"Failed to delete target={target} on server") print("--> Done!") @@ -486,7 +486,7 @@ def _exists(self, filepath): :param str filepath: the path to the file :return: (*bool*) -- whether the file exists """ - _, _, stderr = self.execute_command(f"ls {filepath}") + _, _, stderr = self._execute_command(f"ls {filepath}") return len(stderr.readlines()) == 0 def close(self): diff --git a/powersimdata/data_access/tests/test_data_access.py b/powersimdata/data_access/tests/test_data_access.py index de0998138..2bc0aa4c4 100644 --- a/powersimdata/data_access/tests/test_data_access.py +++ b/powersimdata/data_access/tests/test_data_access.py @@ -70,7 +70,7 @@ def _check_content(filepath): @pytest.mark.integration @pytest.mark.ssh def test_setup_server_connection(data_access): - _, stdout, _ = data_access.execute_command("whoami") + _, stdout, _ = data_access._execute_command("whoami") assert stdout.read().decode("utf-8").strip() == get_server_user() From c39d60b9acb7e7b62615e87842201306340853f1 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Wed, 28 Apr 2021 17:32:43 -0700 Subject: [PATCH 3/9] refactor: move path joins to data access subclasses --- powersimdata/data_access/data_access.py | 28 +++++++++++++++++++++++-- powersimdata/scenario/delete.py | 13 ++++-------- powersimdata/scenario/execute.py | 15 ++++--------- powersimdata/scenario/move.py | 26 ++++++----------------- powersimdata/scenario/state.py | 2 -- powersimdata/utility/server_setup.py | 18 ---------------- 6 files changed, 40 insertions(+), 62 deletions(-) diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py index b4cb73495..45deb05ec 100644 --- a/powersimdata/data_access/data_access.py +++ b/powersimdata/data_access/data_access.py @@ -18,6 +18,12 @@ class DataAccess: """Interface to a local or remote data store.""" + def __init__(self, root=None): + """Constructor""" + self.root = server_setup.DATA_ROOT_DIR if root is None else root + self.backup_root = server_setup.BACKUP_DATA_ROOT_DIR + self.join = None + def copy_from(self, file_name, from_dir): """Copy a file from data store to userspace. @@ -35,6 +41,22 @@ def move_to(self, file_name, to_dir, change_name_to=None): """ raise NotImplementedError + def get_base_dir(self, kind, backup=False): + _allowed = ("input", "output", "tmp") + if kind not in _allowed: + raise ValueError(f"Invalid 'kind', must be one of {_allowed}") + + root = self.root if not backup else self.backup_root + if kind == "tmp": + return self.join(root, "tmp") + return self.join(root, "data", kind) + + def match_scenario_files(self, scenario_id, kind, backup=False): + base_dir = self.get_base_dir(kind, backup) + if kind == "tmp": + return self.join(base_dir, f"scenario_{scenario_id}") + return self.join(base_dir, f"{scenario_id}_*") + def copy(self, src, dest, recursive=False, update=False): """Wrapper around cp command which creates dest path if needed @@ -141,8 +163,9 @@ class LocalDataAccess(DataAccess): """Interface to shared data volume""" def __init__(self, root=None): - self.root = root if root else server_setup.DATA_ROOT_DIR + super().__init__(root) self.description = "local machine" + self.join = os.path.join def copy_from(self, file_name, from_dir=None): """Copy a file from data store to userspace. @@ -259,11 +282,12 @@ class SSHDataAccess(DataAccess): def __init__(self, root=None): """Constructor""" + super().__init__(root) self._ssh = None self._retry_after = 5 - self.root = server_setup.DATA_ROOT_DIR if root is None else root self.local_root = server_setup.LOCAL_DIR self.description = "server" + self.join = posixpath.join @property def ssh(self): diff --git a/powersimdata/scenario/delete.py b/powersimdata/scenario/delete.py index 5be2c455c..f7ffe3098 100644 --- a/powersimdata/scenario/delete.py +++ b/powersimdata/scenario/delete.py @@ -1,5 +1,4 @@ import os -import posixpath from powersimdata.data_access.data_access import LocalDataAccess from powersimdata.scenario.state import State @@ -41,25 +40,21 @@ def delete_scenario(self, confirm=True): self._scenario_list_manager.delete_entry(scenario_id) self._execute_list_manager.delete_entry(scenario_id) - wildcard = f"{scenario_id}_*" - print("--> Deleting scenario input data on server") - target = posixpath.join(self.path_config.input_dir(), wildcard) + target = self._data_access.match_scenario_files(scenario_id, "input") self._data_access.remove(target, recursive=False, confirm=confirm) print("--> Deleting scenario output data on server") - target = posixpath.join(self.path_config.output_dir(), wildcard) + target = self._data_access.match_scenario_files(scenario_id, "output") self._data_access.remove(target, recursive=False, confirm=confirm) # Delete temporary folder enclosing simulation inputs print("--> Deleting temporary folder on server") - tmp_dir = posixpath.join( - self.path_config.execute_dir(), f"scenario_{scenario_id}" - ) + tmp_dir = self._data_access.match_scenario_files(scenario_id, "tmp") self._data_access.remove(tmp_dir, recursive=True, confirm=confirm) print("--> Deleting input and output data on local machine") - target = os.path.join(server_setup.LOCAL_DIR, "data", "**", wildcard) + target = os.path.join(server_setup.LOCAL_DIR, "data", "**", f"{scenario_id}_*") LocalDataAccess().remove(target, recursive=False, confirm=confirm) # Delete attributes diff --git a/powersimdata/scenario/execute.py b/powersimdata/scenario/execute.py index ba216d555..2a69175bd 100644 --- a/powersimdata/scenario/execute.py +++ b/powersimdata/scenario/execute.py @@ -331,16 +331,12 @@ def __init__(self, data_access, scenario_info, grid, ct): self._scenario_info = scenario_info self.grid = grid self.ct = ct - self.server_config = server_setup.PathConfig(server_setup.DATA_ROOT_DIR) self.scenario_id = scenario_info["id"] - self.scenario_folder = "scenario_%s" % self.scenario_id - self.REL_TMP_DIR = posixpath.join( - server_setup.EXECUTE_DIR, self.scenario_folder - ) - self.TMP_DIR = posixpath.join( - self.server_config.execute_dir(), self.scenario_folder + self.REL_TMP_DIR = self._data_access.join( + server_setup.EXECUTE_DIR, f"scenario_{self.scenario_id}" ) + self.TMP_DIR = self._data_access.match_scenario_files(self.scenario_id, "tmp") def create_folder(self): """Creates folder on server that will enclose simulation inputs.""" @@ -382,9 +378,6 @@ def prepare_profile(self, kind, profile_as=None): file_name, self.REL_TMP_DIR, change_name_to=f"{kind}.csv" ) else: - from_dir = posixpath.join( - self.server_config.execute_dir(), - f"scenario_{profile_as}", - ) + from_dir = self._data_access.match_scenario_files(profile_as, "tmp") to_dir = self.TMP_DIR self._data_access.copy(f"{from_dir}/{kind}.csv", to_dir) diff --git a/powersimdata/scenario/move.py b/powersimdata/scenario/move.py index ee5eb840d..d811842ba 100644 --- a/powersimdata/scenario/move.py +++ b/powersimdata/scenario/move.py @@ -1,7 +1,4 @@ -import posixpath - from powersimdata.scenario.state import State -from powersimdata.utility import server_setup class Move(State): @@ -68,39 +65,28 @@ def __init__(self, data_access, scenario_info): """Constructor.""" self._data_access = data_access self._scenario_info = scenario_info - self.backup_config = server_setup.PathConfig(server_setup.BACKUP_DATA_ROOT_DIR) - self.server_config = server_setup.PathConfig(server_setup.DATA_ROOT_DIR) self.scenario_id = self._scenario_info["id"] - self.wildcard = f"{self.scenario_id}_*" def move_input_data(self, confirm=True): """Moves input data.""" print("--> Moving scenario input data to backup disk") - source = posixpath.join( - self.server_config.input_dir(), - self.wildcard, - ) - target = self.backup_config.input_dir() + source = self._data_access.match_scenario_files(self.scenario_id, "input") + target = self._data_access.get_base_dir("input", backup=True) self._data_access.copy(source, target, update=True) self._data_access.remove(source, recursive=False, confirm=confirm) def move_output_data(self, confirm=True): """Moves output data""" print("--> Moving scenario output data to backup disk") - source = posixpath.join( - self.server_config.output_dir(), - self.wildcard, - ) - target = self.backup_config.output_dir() + source = self._data_access.match_scenario_files(self.scenario_id, "output") + target = self._data_access.get_base_dir("output", backup=True) self._data_access.copy(source, target, update=True) self._data_access.remove(source, recursive=False, confirm=confirm) def move_temporary_folder(self, confirm=True): """Moves temporary folder.""" print("--> Moving temporary folder to backup disk") - source = posixpath.join( - self.server_config.execute_dir(), "scenario_" + self.scenario_id - ) - target = self.backup_config.execute_dir() + source = self._data_access.match_scenario_files(self.scenario_id, "tmp") + target = self._data_access.get_base_dir("tmp", backup=True) self._data_access.copy(source, target, recursive=True, update=True) self._data_access.remove(source, recursive=True, confirm=confirm) diff --git a/powersimdata/scenario/state.py b/powersimdata/scenario/state.py index cc818f4b4..5bfcff997 100644 --- a/powersimdata/scenario/state.py +++ b/powersimdata/scenario/state.py @@ -1,6 +1,5 @@ from powersimdata.data_access.execute_list import ExecuteListManager from powersimdata.data_access.scenario_list import ScenarioListManager -from powersimdata.utility import server_setup class State(object): @@ -22,7 +21,6 @@ def __init__(self, scenario): self._data_access = scenario.data_access self._scenario_list_manager = ScenarioListManager(self._data_access) self._execute_list_manager = ExecuteListManager(self._data_access) - self.path_config = server_setup.PathConfig(server_setup.DATA_ROOT_DIR) def switch(self, state): """Switches state. diff --git a/powersimdata/utility/server_setup.py b/powersimdata/utility/server_setup.py index 07fdcb759..a65c96cda 100644 --- a/powersimdata/utility/server_setup.py +++ b/powersimdata/utility/server_setup.py @@ -1,5 +1,4 @@ import os -import posixpath from pathlib import Path SERVER_ADDRESS = os.getenv("BE_SERVER_ADDRESS", "becompute01.gatesventures.com") @@ -25,23 +24,6 @@ def get_deployment_mode(): return DeploymentMode.Container -class PathConfig: - def __init__(self, root=None): - self.root = root - - def _join(self, rel_path): - return posixpath.join(self.root, rel_path) - - def execute_dir(self): - return self._join(EXECUTE_DIR) - - def input_dir(self): - return self._join(INPUT_DIR) - - def output_dir(self): - return self._join(OUTPUT_DIR) - - def get_server_user(): """Returns the first username found using the following sources: From cf0759b5152f4bcf1656b671efd05523b89ec372 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Wed, 28 Apr 2021 17:56:56 -0700 Subject: [PATCH 4/9] test: add tests for path creation logic --- powersimdata/data_access/data_access.py | 19 +++++++++-- .../data_access/tests/test_data_access.py | 33 +++++++++++++++++-- 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py index 45deb05ec..0211ef54e 100644 --- a/powersimdata/data_access/data_access.py +++ b/powersimdata/data_access/data_access.py @@ -42,6 +42,13 @@ def move_to(self, file_name, to_dir, change_name_to=None): raise NotImplementedError def get_base_dir(self, kind, backup=False): + """Get path to given kind relative to instance root + + :param str kind: one of {input, output, tmp} + :param bool backup: pass True if relative to backup root dir + :raises ValueError: if kind is invalid + :return: (*str*) -- the specified path + """ _allowed = ("input", "output", "tmp") if kind not in _allowed: raise ValueError(f"Invalid 'kind', must be one of {_allowed}") @@ -52,6 +59,13 @@ def get_base_dir(self, kind, backup=False): return self.join(root, "data", kind) def match_scenario_files(self, scenario_id, kind, backup=False): + """Get path matching the given kind of scenario data + + :param int/str scenario_id: the scenario id + :param str kind: one of {input, output, tmp} + :param bool backup: pass True if relative to backup root dir + :return: (*str*) -- the specified path + """ base_dir = self.get_base_dir(kind, backup) if kind == "tmp": return self.join(base_dir, f"scenario_{scenario_id}") @@ -514,8 +528,9 @@ def _exists(self, filepath): return len(stderr.readlines()) == 0 def close(self): - """Close the connection that was opened when the object was created.""" - self.ssh.close() + """Close the connection if one is open""" + if self._ssh is not None: + self._ssh.close() def progress_bar(*args, **kwargs): diff --git a/powersimdata/data_access/tests/test_data_access.py b/powersimdata/data_access/tests/test_data_access.py index 2bc0aa4c4..29e52dd01 100644 --- a/powersimdata/data_access/tests/test_data_access.py +++ b/powersimdata/data_access/tests/test_data_access.py @@ -7,7 +7,7 @@ from powersimdata.data_access.data_access import SSHDataAccess from powersimdata.tests.mock_ssh import MockConnection -from powersimdata.utility.server_setup import get_server_user +from powersimdata.utility import server_setup CONTENT = b"content" @@ -67,11 +67,40 @@ def _check_content(filepath): assert CONTENT == f.read() +root_dir = server_setup.DATA_ROOT_DIR +backup_root = server_setup.BACKUP_DATA_ROOT_DIR + + +def test_base_dir(data_access): + input_dir = data_access.get_base_dir("input") + assert f"{root_dir}/data/input" == input_dir + + output_dir = data_access.get_base_dir("output", backup=True) + assert f"{backup_root}/data/output" == output_dir + + tmp_dir = data_access.get_base_dir("tmp") + assert f"{root_dir}/tmp" == tmp_dir + + with pytest.raises(ValueError): + data_access.get_base_dir("foo") + + +def test_match_scenario_files(data_access): + output_files = data_access.match_scenario_files(99, "output") + assert f"{root_dir}/data/output/99_*" == output_files + + tmp_files = data_access.match_scenario_files(42, "tmp", backup=True) + assert f"{backup_root}/tmp/scenario_42" == tmp_files + + with pytest.raises(ValueError): + data_access.match_scenario_files(1, "foo") + + @pytest.mark.integration @pytest.mark.ssh def test_setup_server_connection(data_access): _, stdout, _ = data_access._execute_command("whoami") - assert stdout.read().decode("utf-8").strip() == get_server_user() + assert stdout.read().decode("utf-8").strip() == server_setup.get_server_user() def test_mocked_correctly(mock_data_access): From 56650838521cd6569746d2d06997df2b14901657 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Fri, 30 Apr 2021 15:52:28 -0700 Subject: [PATCH 5/9] fix: implicit posixpath usage --- powersimdata/data_access/profile_helper.py | 11 +++++------ powersimdata/data_access/tests/test_profile_helper.py | 4 +--- powersimdata/input/input_data.py | 10 +++++----- powersimdata/input/tests/test_input_data.py | 2 +- powersimdata/output/output_data.py | 5 +++-- powersimdata/scenario/create.py | 3 ++- powersimdata/utility/server_setup.py | 4 ++-- 7 files changed, 19 insertions(+), 20 deletions(-) diff --git a/powersimdata/data_access/profile_helper.py b/powersimdata/data_access/profile_helper.py index eb4a6fabc..5554a76df 100644 --- a/powersimdata/data_access/profile_helper.py +++ b/powersimdata/data_access/profile_helper.py @@ -17,26 +17,25 @@ def get_file_components(scenario_info, field_name): :param dict scenario_info: a ScenarioInfo instance :param str field_name: the kind of profile - :return: (*tuple*) -- file name and path + :return: (*tuple*) -- file name and list of path components """ version = scenario_info["base_" + field_name] file_name = field_name + "_" + version + ".csv" grid_model = scenario_info["grid_model"] - from_dir = os.path.join("raw", grid_model) - return file_name, from_dir + return file_name, ("raw", grid_model) @staticmethod def download_file(file_name, from_dir): """Download the profile from blob storage at the given path :param str file_name: profile csv - :param str from_dir: the path relative to the blob container + :param tuple from_dir: tuple of path components :return: (*str*) -- path to downloaded file """ print(f"--> Downloading {file_name} from blob storage.") - url_path = "/".join(os.path.split(from_dir)) + url_path = "/".join(from_dir) url = f"{ProfileHelper.BASE_URL}/{url_path}/{file_name}" - dest = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name) + dest = os.path.join(server_setup.LOCAL_DIR, *from_dir, file_name) os.makedirs(os.path.dirname(dest), exist_ok=True) resp = requests.get(url, stream=True) content_length = int(resp.headers.get("content-length", 0)) diff --git a/powersimdata/data_access/tests/test_profile_helper.py b/powersimdata/data_access/tests/test_profile_helper.py index df90e2036..ce67b0afd 100644 --- a/powersimdata/data_access/tests/test_profile_helper.py +++ b/powersimdata/data_access/tests/test_profile_helper.py @@ -1,5 +1,3 @@ -import os - from powersimdata.data_access.profile_helper import ProfileHelper @@ -23,4 +21,4 @@ def test_get_file_components(): s_info = {"base_wind": "v8", "grid_model": "europe"} file_name, from_dir = ProfileHelper.get_file_components(s_info, "wind") assert "wind_v8.csv" == file_name - assert os.path.join("raw", "europe") == from_dir + assert ("raw", "europe") == from_dir diff --git a/powersimdata/input/input_data.py b/powersimdata/input/input_data.py index c16d0876a..7fa64954d 100644 --- a/powersimdata/input/input_data.py +++ b/powersimdata/input/input_data.py @@ -28,19 +28,19 @@ def get_file_components(scenario_info, field_name): :param dict scenario_info: a ScenarioInfo instance :param str field_name: the input file type - :return: (*tuple*) -- file name and path + :return: (*tuple*) -- file name and list of path components """ ext = _file_extension[field_name] file_name = scenario_info["id"] + "_" + field_name + "." + ext - from_dir = server_setup.INPUT_DIR - return file_name, from_dir + return file_name, server_setup.INPUT_DIR def download_file(self, file_name, from_dir): """Download the file if using server, otherwise no-op :param str file_name: either grid or ct file name - :param str from_dir: the path relative to the root dir + :param tuple from_dir: tuple of path components """ + from_dir = self.data_access.join(*from_dir) self.data_access.copy_from(file_name, from_dir) @@ -90,7 +90,7 @@ def get_data(self, scenario_info, field_name): file_name, from_dir = helper.get_file_components(scenario_info, field_name) - filepath = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name) + filepath = os.path.join(server_setup.LOCAL_DIR, *from_dir, file_name) key = cache_key(filepath) cached = _cache.get(key) if cached is not None: diff --git a/powersimdata/input/tests/test_input_data.py b/powersimdata/input/tests/test_input_data.py index 0e37b3b20..fc90da6b8 100644 --- a/powersimdata/input/tests/test_input_data.py +++ b/powersimdata/input/tests/test_input_data.py @@ -9,7 +9,7 @@ def test_get_file_components(): grid_file, from_dir = InputHelper.get_file_components(s_info, "grid") assert "123_ct.pkl" == ct_file assert "123_grid.mat" == grid_file - assert "data/input" == from_dir + assert ("data", "input") == from_dir def test_check_field(): diff --git a/powersimdata/output/output_data.py b/powersimdata/output/output_data.py index 5da537b95..c835f8c10 100644 --- a/powersimdata/output/output_data.py +++ b/powersimdata/output/output_data.py @@ -36,7 +36,7 @@ def get_data(self, scenario_id, field_name): print("--> Loading %s" % field_name) file_name = scenario_id + "_" + field_name + ".pkl" from_dir = server_setup.OUTPUT_DIR - filepath = os.path.join(server_setup.LOCAL_DIR, from_dir, file_name) + filepath = os.path.join(server_setup.LOCAL_DIR, *from_dir, file_name) try: return pd.read_pickle(filepath) @@ -46,7 +46,8 @@ def get_data(self, scenario_id, field_name): except FileNotFoundError: print(f"{filepath} not found on local machine") - self._data_access.copy_from(file_name, from_dir) + remote_dir = self._data_access.join(*from_dir) + self._data_access.copy_from(file_name, remote_dir) return pd.read_pickle(filepath) diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py index bb11e27b5..bc0b677b4 100644 --- a/powersimdata/scenario/create.py +++ b/powersimdata/scenario/create.py @@ -80,7 +80,8 @@ def _upload_change_table(self): print("--> Writing change table on local machine") self.builder.change_table.write(self._scenario_info["id"]) file_name = self._scenario_info["id"] + "_ct.pkl" - self._data_access.move_to(file_name, server_setup.INPUT_DIR) + input_dir = self._data_access.join(*server_setup.INPUT_DIR) + self._data_access.move_to(file_name, input_dir) def get_bus_demand(self): """Returns demand profiles, by bus. diff --git a/powersimdata/utility/server_setup.py b/powersimdata/utility/server_setup.py index a65c96cda..e65a74e45 100644 --- a/powersimdata/utility/server_setup.py +++ b/powersimdata/utility/server_setup.py @@ -6,8 +6,8 @@ BACKUP_DATA_ROOT_DIR = "/mnt/RE-Storage/v2" DATA_ROOT_DIR = "/mnt/bes/pcm" EXECUTE_DIR = "tmp" -INPUT_DIR = "data/input" -OUTPUT_DIR = "data/output" +INPUT_DIR = ("data", "input") +OUTPUT_DIR = ("data", "output") LOCAL_DIR = os.path.join(Path.home(), "ScenarioData", "") MODEL_DIR = "/home/bes/pcm" From dec82c101cf7656e785b3a7fac91339677608d68 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Mon, 3 May 2021 12:51:59 -0700 Subject: [PATCH 6/9] refactor: avoid hard coded strings --- powersimdata/data_access/data_access.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py index 0211ef54e..867f2d040 100644 --- a/powersimdata/data_access/data_access.py +++ b/powersimdata/data_access/data_access.py @@ -14,6 +14,12 @@ from powersimdata.utility import server_setup from powersimdata.utility.helpers import CommandBuilder +_dirs = { + "tmp": (server_setup.EXECUTE_DIR,), + "input": server_setup.INPUT_DIR, + "output": server_setup.OUTPUT_DIR, +} + class DataAccess: """Interface to a local or remote data store.""" @@ -49,14 +55,12 @@ def get_base_dir(self, kind, backup=False): :raises ValueError: if kind is invalid :return: (*str*) -- the specified path """ - _allowed = ("input", "output", "tmp") + _allowed = list(_dirs.keys()) if kind not in _allowed: raise ValueError(f"Invalid 'kind', must be one of {_allowed}") root = self.root if not backup else self.backup_root - if kind == "tmp": - return self.join(root, "tmp") - return self.join(root, "data", kind) + return self.join(root, *_dirs[kind]) def match_scenario_files(self, scenario_id, kind, backup=False): """Get path matching the given kind of scenario data From 3a53e65e2be625996d9cfca65205a6294ac4a6f2 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Mon, 3 May 2021 16:48:21 -0700 Subject: [PATCH 7/9] chore: use consistent self.join --- powersimdata/data_access/data_access.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py index 867f2d040..bb751ec15 100644 --- a/powersimdata/data_access/data_access.py +++ b/powersimdata/data_access/data_access.py @@ -219,9 +219,9 @@ def move_to(self, file_name, to_dir, change_name_to=None): :param str change_name_to: new name for file when copied to data store. """ self._check_filename(file_name) - src = os.path.join(server_setup.LOCAL_DIR, file_name) + src = self.join(server_setup.LOCAL_DIR, file_name) file_name = file_name if change_name_to is None else change_name_to - dest = os.path.join(self.root, to_dir, file_name) + dest = self.join(self.root, to_dir, file_name) print(f"--> Moving file {src} to {dest}") self._check_file_exists(dest, should_exist=False) self.makedir(os.path.dirname(dest)) @@ -248,10 +248,10 @@ def copy(self, src, dest, recursive=False, update=False): :param bool recursive: create directories recursively :param bool update: ignored """ - self.makedir(dest) if recursive: shutil.copytree(src, dest) else: + self.makedir(dest) func = lambda s: shutil.copy(s, dest) # noqa: E731 LocalDataAccess._fapply(func, src) @@ -366,7 +366,7 @@ def copy_from(self, file_name, from_dir=None): to_dir = os.path.join(self.local_root, from_dir) os.makedirs(to_dir, exist_ok=True) - from_path = posixpath.join(self.root, from_dir, file_name) + from_path = self.join(self.root, from_dir, file_name) to_path = os.path.join(to_dir, file_name) self._check_file_exists(from_path, should_exist=True) @@ -397,8 +397,8 @@ def move_to(self, file_name, to_dir=None, change_name_to=None): ) file_name = file_name if change_name_to is None else change_name_to - to_dir = posixpath.join(self.root, "" if to_dir is None else to_dir) - to_path = posixpath.join(to_dir, file_name) + to_dir = self.join(self.root, "" if to_dir is None else to_dir) + to_path = self.join(to_dir, file_name) self.makedir(to_dir) self._check_file_exists(to_path, should_exist=False) @@ -434,7 +434,7 @@ def checksum(self, relative_path): :param str relative_path: path relative to root :return: (*str*) -- the checksum of the file """ - full_path = posixpath.join(self.root, relative_path) + full_path = self.join(self.root, relative_path) self._check_file_exists(full_path) command = f"sha1sum {full_path}" @@ -455,9 +455,9 @@ def push(self, file_name, checksum, change_name_to=None): self.move_to(file_name, change_name_to=backup) values = { - "original": posixpath.join(self.root, new_name), - "updated": posixpath.join(self.root, backup), - "lockfile": posixpath.join(self.root, "scenario.lockfile"), + "original": self.join(self.root, new_name), + "updated": self.join(self.root, backup), + "lockfile": self.join(self.root, "scenario.lockfile"), "checksum": checksum, } From 8f62277a869cb22240ccda6c5c845de8230befae Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Tue, 4 May 2021 11:33:42 -0700 Subject: [PATCH 8/9] chore: remove _execute_command --- powersimdata/data_access/data_access.py | 27 +++++-------------- .../data_access/tests/test_data_access.py | 2 +- 2 files changed, 7 insertions(+), 22 deletions(-) diff --git a/powersimdata/data_access/data_access.py b/powersimdata/data_access/data_access.py index bb751ec15..0d900b7ca 100644 --- a/powersimdata/data_access/data_access.py +++ b/powersimdata/data_access/data_access.py @@ -131,13 +131,6 @@ def makedir(self, full_path): """ raise NotImplementedError - def _execute_command(self, command): - """Execute a command locally at the data access. - - :param list command: list of str to be passed to command line. - """ - raise NotImplementedError - def execute_command_async(self, command): """Execute a command locally at the DataAccess, without waiting for completion. @@ -408,14 +401,6 @@ def move_to(self, file_name, to_dir=None, change_name_to=None): os.remove(from_path) - def _execute_command(self, command): - """Execute a command locally at the data access. - - :param list command: list of str to be passed to command line. - :return: (*tuple*) -- stdin, stdout, stderr of executed command. - """ - return self.ssh.exec_command(command) - def execute_command_async(self, command): """Execute a command via ssh, without waiting for completion. @@ -438,7 +423,7 @@ def checksum(self, relative_path): self._check_file_exists(full_path) command = f"sha1sum {full_path}" - _, stdout, _ = self._execute_command(command) + _, stdout, _ = self.ssh.exec_command(command) lines = stdout.readlines() return lines[0].strip() @@ -469,7 +454,7 @@ def push(self, file_name, checksum, change_name_to=None): 200>{lockfile}" command = template.format(**values) - _, _, stderr = self._execute_command(command) + _, _, stderr = self.ssh.exec_command(command) errors = stderr.readlines() if len(errors) > 0: @@ -483,7 +468,7 @@ def makedir(self, full_path): :param str full_path: the path, excluding filename :raises IOError: if command generated stderr """ - _, _, stderr = self._execute_command(f"mkdir -p {full_path}") + _, _, stderr = self.ssh.exec_command(f"mkdir -p {full_path}") errors = stderr.readlines() if len(errors) > 0: raise IOError(f"Failed to create {full_path} on server") @@ -499,7 +484,7 @@ def copy(self, src, dest, recursive=False, update=False): """ self.makedir(dest) command = CommandBuilder.copy(src, dest, recursive, update) - _, _, stderr = self._execute_command(command) + _, _, stderr = self.ssh.exec_command(command) if len(stderr.readlines()) != 0: raise IOError(f"Failed to execute {command}") @@ -517,7 +502,7 @@ def remove(self, target, recursive=False, confirm=True): if confirmed.lower() != "y": print("Operation cancelled.") return - _, _, stderr = self._execute_command(command) + _, _, stderr = self.ssh.exec_command(command) if len(stderr.readlines()) != 0: raise IOError(f"Failed to delete target={target} on server") print("--> Done!") @@ -528,7 +513,7 @@ def _exists(self, filepath): :param str filepath: the path to the file :return: (*bool*) -- whether the file exists """ - _, _, stderr = self._execute_command(f"ls {filepath}") + _, _, stderr = self.ssh.exec_command(f"ls {filepath}") return len(stderr.readlines()) == 0 def close(self): diff --git a/powersimdata/data_access/tests/test_data_access.py b/powersimdata/data_access/tests/test_data_access.py index 29e52dd01..f83aa7f3b 100644 --- a/powersimdata/data_access/tests/test_data_access.py +++ b/powersimdata/data_access/tests/test_data_access.py @@ -99,7 +99,7 @@ def test_match_scenario_files(data_access): @pytest.mark.integration @pytest.mark.ssh def test_setup_server_connection(data_access): - _, stdout, _ = data_access._execute_command("whoami") + _, stdout, _ = data_access.ssh.exec_command("whoami") assert stdout.read().decode("utf-8").strip() == server_setup.get_server_user() From a2cab2be8556f4b2d140624b1c7910b7a268d1f0 Mon Sep 17 00:00:00 2001 From: Jon Hagg Date: Wed, 5 May 2021 13:46:35 -0700 Subject: [PATCH 9/9] chore: cleanup print statements and reuse join method --- powersimdata/scenario/delete.py | 15 ++++++++------- powersimdata/scenario/execute.py | 3 ++- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/powersimdata/scenario/delete.py b/powersimdata/scenario/delete.py index f7ffe3098..7a9587a0e 100644 --- a/powersimdata/scenario/delete.py +++ b/powersimdata/scenario/delete.py @@ -1,5 +1,3 @@ -import os - from powersimdata.data_access.data_access import LocalDataAccess from powersimdata.scenario.state import State from powersimdata.utility import server_setup @@ -40,22 +38,25 @@ def delete_scenario(self, confirm=True): self._scenario_list_manager.delete_entry(scenario_id) self._execute_list_manager.delete_entry(scenario_id) - print("--> Deleting scenario input data on server") + print("--> Deleting scenario input data") target = self._data_access.match_scenario_files(scenario_id, "input") self._data_access.remove(target, recursive=False, confirm=confirm) - print("--> Deleting scenario output data on server") + print("--> Deleting scenario output data") target = self._data_access.match_scenario_files(scenario_id, "output") self._data_access.remove(target, recursive=False, confirm=confirm) # Delete temporary folder enclosing simulation inputs - print("--> Deleting temporary folder on server") + print("--> Deleting temporary folder") tmp_dir = self._data_access.match_scenario_files(scenario_id, "tmp") self._data_access.remove(tmp_dir, recursive=True, confirm=confirm) print("--> Deleting input and output data on local machine") - target = os.path.join(server_setup.LOCAL_DIR, "data", "**", f"{scenario_id}_*") - LocalDataAccess().remove(target, recursive=False, confirm=confirm) + local_data_access = LocalDataAccess() + target = local_data_access.join( + server_setup.LOCAL_DIR, "data", "**", f"{scenario_id}_*" + ) + local_data_access.remove(target, recursive=False, confirm=confirm) # Delete attributes self._clean() diff --git a/powersimdata/scenario/execute.py b/powersimdata/scenario/execute.py index 2a69175bd..9a92e30bc 100644 --- a/powersimdata/scenario/execute.py +++ b/powersimdata/scenario/execute.py @@ -340,7 +340,8 @@ def __init__(self, data_access, scenario_info, grid, ct): def create_folder(self): """Creates folder on server that will enclose simulation inputs.""" - print("--> Creating temporary folder on server for simulation inputs") + description = self._data_access.description + print(f"--> Creating temporary folder on {description} for simulation inputs") self._data_access.makedir(self.TMP_DIR) def prepare_mpc_file(self):