Skip to content

Commit

Permalink
done with two file save tests and code
Browse files Browse the repository at this point in the history
  • Loading branch information
wangpatrick57 committed Dec 29, 2024
1 parent 8fa8ac0 commit 36d1c68
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 15 deletions.
93 changes: 84 additions & 9 deletions util/tests/unittest_workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@ def test_save_file_same_dependency_twice(self) -> None:
self.init_workspace_helper()
assert self.workspace is not None and self.expected_structure is not None
prev_run_name = self.workspace.dbgym_this_run_path.name
result_path = self.make_result_helper()
result_path = self.make_result_helper(file_obj=("file",))
self.init_workspace_helper()
self.workspace.save_file(result_path)
self.workspace.save_file(result_path)
Expand All @@ -315,10 +315,64 @@ def test_save_file_same_dependency_twice(self) -> None:
verify_structure(self.scratchspace_path, self.expected_structure)
)

def test_save_file_two_different_dependencies_with_same_name(self) -> None:
# TODO
# TODO: also do the config version
pass
def test_save_file_two_different_dependencies_with_same_filename_both_directly_inside_run(
self,
) -> None:
self.init_workspace_helper()
assert self.workspace is not None and self.expected_structure is not None
prev_run_names = []
prev_run_names.append(self.workspace.dbgym_this_run_path.name)
result1_path = self.make_result_helper(file_obj=("file",))
self.init_workspace_helper()
prev_run_names.append(self.workspace.dbgym_this_run_path.name)
result2_path = self.make_result_helper(file_obj=("file",))
filename = result1_path.name
assert filename == result2_path.name

self.init_workspace_helper()
self.workspace.save_file(result1_path)
self.workspace.save_file(result2_path)
# The second save_file() should have overwritten the first one.
self.expected_structure["dbgym_workspace"]["task_runs"][
self.workspace.dbgym_this_run_path.name
][f"{filename}.link"] = (
"symlink",
f"dbgym_workspace/task_runs/{prev_run_names[-1]}/{filename}",
)
self.assertTrue(
verify_structure(self.scratchspace_path, self.expected_structure)
)

def test_save_file_two_different_dependencies_with_same_filename_but_different_outermost_dirs(
self,
) -> None:
self.init_workspace_helper()
assert self.workspace is not None and self.expected_structure is not None
prev_run_name = self.workspace.dbgym_this_run_path.name
result1_path = self.make_result_helper("dir1/result.txt", file_obj=("file",))
result2_path = self.make_result_helper("result.txt", file_obj=("file",))
filename = result1_path.name
assert filename == result2_path.name

self.init_workspace_helper()
self.workspace.save_file(result1_path)
self.workspace.save_file(result2_path)
# The second save_file() should not overwrite the first one because the outermost dirs are different.
self.expected_structure["dbgym_workspace"]["task_runs"][
self.workspace.dbgym_this_run_path.name
][f"{filename}.link"] = (
"symlink",
f"dbgym_workspace/task_runs/{prev_run_name}/{filename}",
)
self.expected_structure["dbgym_workspace"]["task_runs"][
self.workspace.dbgym_this_run_path.name
]["dir1.link"] = (
"symlink",
f"dbgym_workspace/task_runs/{prev_run_name}/dir1",
)
self.assertTrue(
verify_structure(self.scratchspace_path, self.expected_structure)
)

def test_save_file_config(self) -> None:
"""
Expand All @@ -340,12 +394,35 @@ def test_save_file_config(self) -> None:
def test_save_file_same_config_twice(self) -> None:
self.init_workspace_helper()
assert self.workspace is not None and self.expected_structure is not None
result_path = self.make_file_helper("external/result.txt")
result_path = self.make_file_helper(
"external/result.txt", file_obj=("file", "contents")
)
self.workspace.save_file(result_path)
self.workspace.save_file(result_path)
self.expected_structure["dbgym_workspace"]["task_runs"][
self.workspace.dbgym_this_run_path.name
][f"{result_path.name}"] = ("file",)
][f"{result_path.name}"] = ("file", "contents")
self.assertTrue(
verify_structure(self.scratchspace_path, self.expected_structure)
)

def test_save_file_two_different_configs_with_same_filename(self) -> None:
self.init_workspace_helper()
assert self.workspace is not None and self.expected_structure is not None
result1_path = self.make_file_helper(
"external/result.txt", file_obj=("file", "contents1")
)
result2_path = self.make_file_helper(
"external/dir1/result.txt", file_obj=("file", "contents2")
)
filename = result1_path.name
assert filename == result2_path.name

self.workspace.save_file(result1_path)
self.workspace.save_file(result2_path)
self.expected_structure["dbgym_workspace"]["task_runs"][
self.workspace.dbgym_this_run_path.name
][f"{filename}"] = ("file", "contents2")
self.assertTrue(
verify_structure(self.scratchspace_path, self.expected_structure)
)
Expand Down Expand Up @@ -379,8 +456,6 @@ def test_save_file_generated_this_run_raises_error(self) -> None:
):
self.workspace.save_file(result_path)

# TODO: test saving different configs/dependencies with the same name


if __name__ == "__main__":
unittest.main()
16 changes: 10 additions & 6 deletions util/workspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -291,6 +291,14 @@ def save_file(self, fpath: Path) -> None:
We copy the file if it is a "config", meaning it just exists without having been generated
We create a symlink if it is a "dependency", meaning a task.py command was run to generate it
In these cases we create a symlink so we have full provenance for how the dependency was created
**Notable Behavior**
- When you save a dependency, it actually creates a link to the outermost directory still inside run_*/.
- The second save will overwrite the first.
- If you save the same file twice in the same run, the second save will overwrite the first.
- If you save two configs with the same name, the second save will overwrite the first.
- If you save two dependencies with the same *outermost* directory, or two dependencies with the same filename
both directly inside run_*/, the second save will overwrite the first.
"""
# validate fpath
assert isinstance(fpath, Path)
Expand All @@ -312,6 +320,7 @@ def save_file(self, fpath: Path) -> None:
if parent_dpath.samefile(run_dpath):
fname = basename_of_path(fpath)
symlink_fpath = self.dbgym_this_run_path / (fname + ".link")
try_remove_file(symlink_fpath)
try_create_symlink(fpath, symlink_fpath)
# Otherwise, we know the fpath file is _not_ directly inside run_dpath dir.
# We go as far back as we can while still staying in run_dpath and symlink that "base" dir.
Expand All @@ -326,6 +335,7 @@ def save_file(self, fpath: Path) -> None:
# Create symlink
open_base_dname = basename_of_path(base_dpath)
symlink_dpath = self.dbgym_this_run_path / (open_base_dname + ".link")
try_remove_file(symlink_dpath)
try_create_symlink(base_dpath, symlink_dpath)
# If the file wasn't generated by a run, we can't just symlink it because we don't know that it's immutable.
else:
Expand Down Expand Up @@ -553,12 +563,6 @@ def open_and_save(
If you are generating a "result" for the run, _do not_ use this. Just use the normal open().
This shouldn't be too hard to remember because this function crashes if open_fpath doesn't exist,
and when you write results you're usually opening open_fpaths which do not exist.
**Notable Behavior**
- If you open the same "config" file twice in the same run, it'll only be saved the first time (even if the file has changed in between).
- "Dependency" files should be immutable so there's no problem here.
- If you open two "config" files of the same name but different paths, only the first open will be saved.
- Opening two "dependency" files of the same name but different paths will lead to two different "base dirs" being symlinked.
"""
# validate open_fpath
assert isinstance(open_fpath, Path)
Expand Down

0 comments on commit 36d1c68

Please sign in to comment.