Skip to content

Commit

Permalink
Merge branch 'jgfouca/cime/add_support_for_cori_knl' into master (PR #…
Browse files Browse the repository at this point in the history
…1432)

Changes needed to support jenkins testing on cori-knl

Cori-knl is not the default machine on cori, so some additional
command-line argument were needed to support jenkins setting the
machine.

Fixes #1232

[BFB]

* jgfouca/cime/add_support_for_cori_knl:
  change underscore to hyphens
  Minor change to cori scratch dirs
  Changes needed to support jenkins testing on cori-knl
  • Loading branch information
jgfouca committed Apr 24, 2017
2 parents d273cba + 8b58e23 commit 9cf2de4
Show file tree
Hide file tree
Showing 4 changed files with 49 additions and 28 deletions.
4 changes: 2 additions & 2 deletions config/acme/machines/config_machines.xml
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@
<TESTS>acme_developer</TESTS>
<COMPILERS>intel,gnu,cray</COMPILERS>
<MPILIBS>mpt,mpi-serial</MPILIBS>
<CIME_OUTPUT_ROOT>$ENV{SCRATCH}/acme_scratch</CIME_OUTPUT_ROOT>
<CIME_OUTPUT_ROOT>$ENV{SCRATCH}/acme_scratch/cori-haswell</CIME_OUTPUT_ROOT>
<RUNDIR>$CIME_OUTPUT_ROOT/$CASE/run</RUNDIR>
<EXEROOT>$CIME_OUTPUT_ROOT/$CASE/bld</EXEROOT>
<DIN_LOC_ROOT>/project/projectdirs/acme/inputdata</DIN_LOC_ROOT>
Expand Down Expand Up @@ -321,7 +321,7 @@
<TESTS>acme_developer</TESTS>
<COMPILERS>intel,gnu,cray</COMPILERS>
<MPILIBS>mpt,mpi-serial</MPILIBS>
<CIME_OUTPUT_ROOT>$ENV{SCRATCH}/acme_scratch</CIME_OUTPUT_ROOT>
<CIME_OUTPUT_ROOT>$ENV{SCRATCH}/acme_scratch/cori-knl</CIME_OUTPUT_ROOT>
<RUNDIR>$CIME_OUTPUT_ROOT/$CASE/run</RUNDIR>
<EXEROOT>$CIME_OUTPUT_ROOT/$CASE/bld</EXEROOT>
<DIN_LOC_ROOT>/project/projectdirs/acme/inputdata</DIN_LOC_ROOT>
Expand Down
44 changes: 30 additions & 14 deletions scripts/Tools/jenkins_generic_job
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ from jenkins_generic_job import jenkins_generic_job
# Don't know if this belongs here longterm
MACHINES_THAT_MAINTAIN_BASELINES = ("redsky", "melvin", "skybridge")

_MACHINE = Machines()

###############################################################################
def parse_command_line(args, description):
###############################################################################
Expand All @@ -43,18 +41,13 @@ description=description,
formatter_class=argparse.ArgumentDefaultsHelpFormatter
)

machine = _MACHINE.get_machine_name()
default_test_suite = _MACHINE.get_value("TESTS")
default_maintain_baselines = machine in MACHINES_THAT_MAINTAIN_BASELINES
default_scratch_root = _MACHINE.get_value("CIME_OUTPUT_ROOT")

CIME.utils.setup_standard_logging_options(parser)

parser.add_argument("-g", "--generate-baselines", action="store_true",
help="Generate baselines")

parser.add_argument("--baseline-compare", action="store", choices=("yes", "no"), default=("yes" if default_maintain_baselines else "no"),
help="Do baseline comparisons")
parser.add_argument("--baseline-compare",
help="Do baseline comparisons (yes/no)")

parser.add_argument("--submit-to-cdash", action="store_true",
help="Send results to CDash")
Expand All @@ -71,10 +64,10 @@ formatter_class=argparse.ArgumentDefaultsHelpFormatter
parser.add_argument("-b", "--baseline-name", default=CIME.utils.get_current_branch(repo=CIME.utils.get_cime_root()),
help="Baseline name for baselines to use. Also impacts dashboard job name. Useful for testing a branch other than next or master")

parser.add_argument("-t", "--test-suite", default=default_test_suite,
parser.add_argument("-t", "--test-suite",
help="Override default acme test suite that will be run")

parser.add_argument("-r", "--scratch-root", default=default_scratch_root,
parser.add_argument("-r", "--scratch-root",
help="Override default acme scratch root. Use this to avoid conflicting with other jenkins jobs")

parser.add_argument("--cdash-build-group", default=CIME.wait_for_tests.CDASH_DEFAULT_BUILD_GROUP,
Expand All @@ -87,6 +80,17 @@ formatter_class=argparse.ArgumentDefaultsHelpFormatter
parser.add_argument("--walltime",
help="Force a specific walltime for all tests.")

parser.add_argument("-m", "--machine",
help="The machine for which to build tests, this machine must be defined"
" in the config_machines.xml file for the given model. "
"Default is to match the name of the machine in the test name or "
"the name of the machine this script is run on to the "
"NODENAME_REGEX field in config_machines.xml. This option is highly "
"unsafe and should only be used if you know what you're doing.")

parser.add_argument("--compiler",
help="Compiler to use to build cime. Default will be the default defined for the machine.")

args = parser.parse_args(args[1:])

CIME.utils.handle_standard_logging_options(args)
Expand All @@ -97,9 +101,21 @@ formatter_class=argparse.ArgumentDefaultsHelpFormatter
"Does not make sense to use --cdash-build-name without --submit-to-cdash")
expect(not (args.cdash_project is not CIME.wait_for_tests.ACME_MAIN_CDASH and not args.submit_to_cdash),
"Does not make sense to use --cdash-project without --submit-to-cdash")
expect (args.baseline_compare in [None, "yes", "no"],
"Valid args for --baseline-compare are 'yes' or 'no'")

machine = Machines(machine=args.machine)
machine_name = machine.get_machine_name()

args.machine = machine
args.test_suite = machine.get_value("TESTS") if args.test_suite is None else args.test_suite
default_maintain_baselines = machine_name in MACHINES_THAT_MAINTAIN_BASELINES
args.baseline_compare = default_maintain_baselines if args.baseline_compare is None else args.baseline_compare == "yes"
args.scratch_root = machine.get_value("CIME_OUTPUT_ROOT") if args.scratch_root is None else args.scratch_root
args.compiler = machine.get_default_compiler() if args.compiler is None else args.compiler

return args.generate_baselines, args.submit_to_cdash, args.no_batch, args.baseline_name, args.cdash_build_name, \
args.cdash_project, args.test_suite, args.cdash_build_group, args.baseline_compare, args.scratch_root, args.parallel_jobs, args.walltime
args.cdash_project, args.test_suite, args.cdash_build_group, args.baseline_compare, args.scratch_root, args.parallel_jobs, args.walltime, args.machine, args.compiler

###############################################################################
def _main_func(description):
Expand All @@ -108,10 +124,10 @@ def _main_func(description):
test_results = doctest.testmod(verbose=True)
sys.exit(1 if test_results.failed > 0 else 0)

generate_baselines, submit_to_cdash, no_batch, cdash_build_name, cdash_project, baseline_branch, test_suite, cdash_build_group, no_baseline_compare, scratch_root, parallel_jobs, walltime = \
generate_baselines, submit_to_cdash, no_batch, cdash_build_name, cdash_project, baseline_branch, test_suite, cdash_build_group, no_baseline_compare, scratch_root, parallel_jobs, walltime, machine, compiler = \
parse_command_line(sys.argv, description)

sys.exit(0 if jenkins_generic_job(generate_baselines, submit_to_cdash, no_batch, cdash_build_name, cdash_project, baseline_branch, test_suite, cdash_build_group, no_baseline_compare, scratch_root, parallel_jobs, walltime)
sys.exit(0 if jenkins_generic_job(generate_baselines, submit_to_cdash, no_batch, cdash_build_name, cdash_project, baseline_branch, test_suite, cdash_build_group, no_baseline_compare, scratch_root, parallel_jobs, walltime, machine, compiler)
else CIME.utils.TESTS_FAILED_ERR_CODE)

###############################################################################
Expand Down
8 changes: 8 additions & 0 deletions scripts/lib/CIME/aprun.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,17 @@ def _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids,
>>> run_exe = "acme.exe"
>>> _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, max_tasks_per_node, pes_per_node, pio_numtasks, pio_async_interface, compiler, machine, run_exe)
('aprun -S 4 -n 680 -N 8 -d 2 acme.exe : -S 2 -n 128 -N 4 -d 4 acme.exe ', 117)
>>> compiler = "intel"
>>> _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, max_tasks_per_node, pes_per_node, pio_numtasks, pio_async_interface, compiler, machine, run_exe)
('aprun -S 4 -cc numa_node -n 680 -N 8 -d 2 acme.exe : -S 2 -cc numa_node -n 128 -N 4 -d 4 acme.exe ', 117)
>>> ntasks = [64, 64, 64, 64, 64, 64, 64, 64, 1]
>>> nthreads = [1, 1, 1, 1, 1, 1, 1, 1, 1]
>>> rootpes = [0, 0, 0, 0, 0, 0, 0, 0, 0]
>>> pstrids = [1, 1, 1, 1, 1, 1, 1, 1, 1]
>>> _get_aprun_cmd_for_case_impl(ntasks, nthreads, rootpes, pstrids, max_tasks_per_node, pes_per_node, pio_numtasks, pio_async_interface, compiler, machine, run_exe)
('aprun -S 8 -cc numa_node -n 64 -N 16 -d 1 acme.exe ', 4)
"""
max_tasks_per_node = 1 if max_tasks_per_node < 1 else max_tasks_per_node

Expand Down
21 changes: 9 additions & 12 deletions scripts/lib/jenkins_generic_job.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,8 @@
import CIME.wait_for_tests
from CIME.utils import expect
from CIME.XML.machines import Machines

import os, shutil, glob, signal, logging

_MACHINE = Machines()

###############################################################################
def cleanup_queue(set_of_jobs_we_created):
###############################################################################
Expand All @@ -27,20 +24,20 @@ def jenkins_generic_job(generate_baselines, submit_to_cdash, no_batch,
arg_cdash_build_name, cdash_project,
arg_test_suite,
cdash_build_group, baseline_compare,
scratch_root, parallel_jobs, walltime):
scratch_root, parallel_jobs, walltime,
machine, compiler):
###############################################################################
"""
Return True if all tests passed
"""
use_batch = _MACHINE.has_batch_system() and not no_batch
compiler = _MACHINE.get_default_compiler()
test_suite = _MACHINE.get_value("TESTS")
proxy = _MACHINE.get_value("PROXY")
use_batch = machine.has_batch_system() and not no_batch
test_suite = machine.get_value("TESTS")
proxy = machine.get_value("PROXY")
test_suite = test_suite if arg_test_suite is None else arg_test_suite
test_root = os.path.join(scratch_root, "jenkins")

if (use_batch):
batch_system = _MACHINE.get_value("BATCH_SYSTEM")
batch_system = machine.get_value("BATCH_SYSTEM")
expect(batch_system is not None, "Bad XML. Batch machine has no batch_system configuration.")

#
Expand Down Expand Up @@ -101,16 +98,16 @@ def jenkins_generic_job(generate_baselines, submit_to_cdash, no_batch,
baseline_args = ""
if (generate_baselines):
baseline_args = "-g -b %s" % baseline_name
elif (baseline_compare == "yes"):
elif (baseline_compare):
baseline_args = "-c -b %s" % baseline_name

batch_args = "--no-batch" if no_batch else ""
pjob_arg = "" if parallel_jobs is None else "-j %d" % parallel_jobs
walltime_arg = "" if walltime is None else " --walltime %s" % walltime

test_id = "%s_%s" % (test_id_root, CIME.utils.get_timestamp())
create_test_cmd = "./create_test %s --test-root %s -t %s %s %s %s %s" % \
(test_suite, test_root, test_id, baseline_args, batch_args, pjob_arg, walltime_arg)
create_test_cmd = "./create_test %s --test-root %s -t %s --machine %s --compiler %s %s %s %s %s" % \
(test_suite, test_root, test_id, machine.get_machine_name(), compiler, baseline_args, batch_args, pjob_arg, walltime_arg)

if (not CIME.wait_for_tests.SIGNAL_RECEIVED):
create_test_stat = CIME.utils.run_cmd(create_test_cmd, from_dir=CIME.utils.get_scripts_root(),
Expand Down

0 comments on commit 9cf2de4

Please sign in to comment.