Skip to content

Commit

Permalink
Various other slurm/sockeye preset fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
alexandrebouchard committed Nov 25, 2023
1 parent 8c54c01 commit 7b615b8
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 19 deletions.
7 changes: 4 additions & 3 deletions src/submission/MPI.jl
Original file line number Diff line number Diff line change
Expand Up @@ -109,21 +109,22 @@ function mpi_submission_script(exec_folder, mpi_submission::MPI, julia_cmd)
# MethodError(f=Core.Compiler.widenconst, args=(Symbol("#342"),), world=0x0000000000001342)
export JULIA_PKG_PRECOMPILE_AUTO=0
mpiexec $(mpi_submission.mpiexec_args) --merge-stderr-to-stdout --output-filename $exec_folder $julia_cmd_str
mpiexec $(cmd_to_string(mpi_submission.mpiexec_args)) --merge-stderr-to-stdout --output-filename $exec_folder $julia_cmd_str
"""
script_path = "$exec_folder/.submission_script.sh"
write(script_path, code)
return script_path
end

cmd_to_string(cmd::Cmd) = "$cmd"[2:(end-1)]

# Internal: "rosetta stone" of submission commands
const _rosetta = (;
queue_concept = [:submit, :del, :directive, :job_name, :output_file, :error_file, :submit_dir, :job_status, :job_status_all, :ncpu_info],

# tested:
pbs = [`qsub`, `qdel`, "#PBS", "-N ", "-o ", "-e ", "\$PBS_O_WORKDIR", `qstat -x`, `qstat -u`, `pbsnodes -aSj -F dsv`],
slurm = [`sbatch`, `scancel`,"#SBATCH", "--job-name=","-o ", "-e ", "\$SLURM_SUBMIT_DIR", `squeue --job`, `squeue -u`, `sinfo`],
pbs = [`qsub`, `qdel`, "#PBS", "-N ", "-o ", "-e ", "\$PBS_O_WORKDIR", `qstat -x`, `qstat -u`, `pbsnodes`],
slurm = [`sbatch`, `scancel`,"#SBATCH", "--job-name=","-o ", "-e ", "\$SLURM_SUBMIT_DIR", `squeue --job`, `squeue -u`, `sinfo -o%C`],

# not yet tested:
lsf = [`bsub`, `bkill`, "#BSUB", "-J ", "-o ", "-e ", "\$LSB_SUBCWD", `bjobs`, `bjobs -u`, `bhosts`],
Expand Down
5 changes: 4 additions & 1 deletion src/submission/presets.jl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@ setup_mpi_sockeye(my_user_allocation_code) =
setup_mpi(
submission_system = :slurm,
environment_modules = ["gcc", "openmpi", "git"],
add_to_submission = ["#PBS -A $my_user_allocation_code"],
add_to_submission = [
"#SBATCH -A $my_user_allocation_code"
"#SBATCH --nodes=1-10000" # required by cluster
],
library_name = "/arc/software/spack-2023/opt/spack/linux-centos7-skylake_avx512/gcc-9.4.0/openmpi-4.1.1-d7o6cdvp67ngi5c5wdcw2qyjyseq3l3o/lib/libmpi"
)

Expand Down
17 changes: 3 additions & 14 deletions src/submission/submission_utils.jl
Original file line number Diff line number Diff line change
Expand Up @@ -35,20 +35,9 @@ function queue_status()
end

function queue_ncpus_free()
mpi_settings = load_mpi_settings()
@assert mpi_settings.submission_system == :pbs "Feature only supported on PBS at the moment"
r = rosetta()
n = 0
for line in readlines(`$(r.ncpu_info)`)
for item in eachsplit(line, "|")
m = match(r"ncpus[(]f[/]t[)][=]([0-9]+)[/].*", item)
if m !== nothing
suffix = m.captures[1]
n += parse(Int, suffix)
end
end
end
return n
run(`$(r.ncpu_info)`)
return nothing
end

"""
Expand All @@ -59,7 +48,7 @@ Instruct the scheduler to cancel or kill a job.
function kill_job(result::Result)
r = rosetta()
exec_folder = result.exec_folder
submission_code = readline("$exec_folder/info/submission_output.txt")
submission_code = queue_code(result)
run(`$(r.del) $submission_code`)
return nothing
end
Expand Down
3 changes: 2 additions & 1 deletion test/test_stan.jl
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ if !is_windows_in_CI()
pigeons(target = Pigeons.stan_funnel(1), record = [online], n_chains = 1, n_rounds = 5, explorer = SliceSampler())

# check we get reasonable accept on one real example
pt = pigeons(target = pp.posterior, reference = pp.prior, n_rounds = 5, n_chains = 2)
post_prior = Pigeons.stan_mRNA_post_prior_pair()
pt = pigeons(target = post_prior.posterior, reference = post_prior.prior, n_rounds = 5, n_chains = 2)
@test minimum(Pigeons.explorer_mh_prs(pt)) > 0.3
end

Expand Down

0 comments on commit 7b615b8

Please sign in to comment.