Skip to content

Commit

Permalink
hot fix for failed_jobs_dir (#506)
Browse files Browse the repository at this point in the history
  • Loading branch information
vuillaut authored Oct 18, 2024
1 parent d25c6a8 commit 7bdfdab
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 17 deletions.
8 changes: 4 additions & 4 deletions codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
"license": "https://spdx.org/licenses/MIT",
"codeRepository": "git+https://github.com/cta-observatory/lstmcpipe.git",
"dateCreated": "2019-09-18",
"datePublished": "2024-10-17",
"datePublished": "2024-10-18",
"downloadUrl": "https://github.com/cta-observatory/lstmcpipe/archive/refs/tags/v0.11.1.tar.gz",
"issueTracker": "https://github.com/cta-observatory/lstmcpipe/issues",
"name": "lstmcpipe",
"version": "0.11.2",
"softwareVersion": "0.11.2",
"version": "0.11.3",
"softwareVersion": "0.11.3",
"readme": "https://github.com/cta-observatory/lstmcpipe/blob/master/README.rst",
"description": "Scripts to ease the reduction of MC data on the LST cluster at La Palma. With this package, the analysis/creation of R1/DL0/DL1/DL2/IRFs can be orchestrated.",
"applicationCategory": "CTA, LST",
"releaseNotes": "lstmcpipe v0.11.2 introduces bug fixes in the r0 to dl1 processing. For bug fixes, see the **full changelog**: https://github.com/cta-observatory/lstmcpipe/compare/v0.11.1...v0.11.2",
"releaseNotes": "lstmcpipe v0.11.3 makes a hotfix about failed jobs directory. For bug fixes, see the **full changelog**: https://github.com/cta-observatory/lstmcpipe/compare/v0.11.2...v0.11.3",
"funding": "824064",
"developmentStatus": "active",
"isPartOf": "https://www.cta-observatory.org/",
Expand Down
17 changes: 9 additions & 8 deletions lstmcpipe/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,23 +34,24 @@ def test_rerun_cmd():
with tempfile.TemporaryDirectory() as tmp_dir:
file, filename = tempfile.mkstemp(dir=tmp_dir)
cmd = f'echo "1" >> {filename}; rm nonexistingfile'

filename = Path(filename)

# first test: the cmd fails 3 times but the outfile stays in place
subdir_failures = ""
failed_jobs_dir = Path(tmp_dir) / ""
try:
n_tries = rerun_cmd(cmd, filename, max_ntry=3, subdir_failures=subdir_failures, shell=True)
filename = Path(filename)
filename = Path(tmp_dir).joinpath(subdir_failures, filename.name)
n_tries = rerun_cmd(cmd, filename, max_ntry=3, failed_jobs_dir=failed_jobs_dir, shell=True)
filename = failed_jobs_dir.joinpath(filename.name)
assert open(filename).read() == "1\n1\n1\n"
assert n_tries == 3
except Exception as e:
assert isinstance(e, RuntimeError)

# 2nd test: the cmd fails and the outfile is moved in subdir
subdir_failures = "fail"
failed_jobs_dir = filename.parent.joinpath("fail")
try:
rerun_cmd(cmd, filename, max_ntry=3, subdir_failures=subdir_failures, shell=True)
filename = filename.parent.joinpath(subdir_failures).joinpath(filename.name)
rerun_cmd(cmd, filename, max_ntry=3, failed_jobs_dir=failed_jobs_dir, shell=True)
filename = failed_jobs_dir.joinpath(filename.name)
assert open(filename).read() == "1\n"
assert filename.exists()
except Exception as e:
Expand All @@ -66,7 +67,7 @@ def test_rerun_cmd_lstchain_mc_r0_to_dl1(mc_gamma_testfile):
ntry = rerun_cmd(cmd, outfile, max_ntry=3)
assert ntry == 1
# second try should fail because the outfile already exists
ntry = rerun_cmd(cmd, outfile, max_ntry=3, subdir_failures='failed_outputs')
ntry = rerun_cmd(cmd, outfile, max_ntry=3, failed_jobs_dir=Path(tmp_dir) / 'failed_outputs')
assert ntry == 2
assert Path(tmp_dir, 'failed_outputs', outfilename).exists()

Expand Down
10 changes: 5 additions & 5 deletions lstmcpipe/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def batch_mc_production_check(
return jobid


def rerun_cmd(cmd, outfile, max_ntry=2, subdir_failures=prod_logs/"failed_outputs", **run_kwargs):
def rerun_cmd(cmd, outfile, max_ntry=2, failed_jobs_dir=prod_logs/"failed_outputs", **run_kwargs):
"""
Rerun the command up to max_ntry times.
If all attempts fail, raise an exception.
Expand All @@ -124,7 +124,7 @@ def rerun_cmd(cmd, outfile, max_ntry=2, subdir_failures=prod_logs/"failed_output
Path to the cmd output file
max_ntry: int
Maximum number of attempts to run the command
subdir_failures: str
failed_jobs_dir: Path or str
Subdirectory to move failed output files to
run_kwargs: kwargs
Additional keyword arguments for subprocess.run
Expand All @@ -135,17 +135,17 @@ def rerun_cmd(cmd, outfile, max_ntry=2, subdir_failures=prod_logs/"failed_output
If the command fails after all retry attempts
"""
outfile = Path(outfile)
failed_jobs_dir = Path(failed_jobs_dir)
for ntry in range(1, max_ntry + 1):
result = sp.run(cmd, **run_kwargs, capture_output=True, text=True, check=False)

if result.returncode == 0:
return ntry # Success, return the number of tries it took

# Command failed, handle the error
failed_jobs_subdir = outfile.parent.joinpath(subdir_failures)
if outfile.exists():
failed_jobs_subdir.mkdir(exist_ok=True)
outfile_target = failed_jobs_subdir.joinpath(outfile.name)
failed_jobs_dir.mkdir(exist_ok=True)
outfile_target = failed_jobs_dir.joinpath(outfile.name)
print(f"Try #{ntry} - move failed output file from {outfile} to {outfile_target}")
shutil.move(outfile, outfile_target)

Expand Down

0 comments on commit 7bdfdab

Please sign in to comment.