Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[develop] Feature cicd scorecard metric #1079

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
73d26e6
Update Jenkinsfile to trigger metrics job only once
BruceKropp-Raytheon Apr 16, 2024
353e095
integrated metric script into the srw test script
EdwardSnyder-NOAA Apr 17, 2024
6a45e03
updated metric file name
EdwardSnyder-NOAA Apr 17, 2024
085ba86
Update name of skill-score outfile to include platform
BruceKropp-Raytheon Apr 17, 2024
a77734a
updated skill score file name
EdwardSnyder-NOAA Apr 17, 2024
390adff
added full path
EdwardSnyder-NOAA Apr 17, 2024
f3c9b26
Update Jenkinsfile to define a way to get only onlineNodes
BruceKropp-Raytheon Apr 17, 2024
8692060
Update Jenkinsfile to skip offline nodes by label
BruceKropp-Raytheon Apr 17, 2024
1142030
Update srw_test.sh to check if skill-score E2E test ran before doing …
BruceKropp-Raytheon Apr 18, 2024
815624c
fixed typo
EdwardSnyder-NOAA Apr 18, 2024
12c3c21
Merge branch 'feature/cicd_scorecard' of https://github.com/BruceKrop…
EdwardSnyder-NOAA Apr 18, 2024
ec38aa6
Update Jenkinsfile with post on Metrics stage
BruceKropp-Raytheon Apr 18, 2024
2bc34c1
Update Jenkinsfile remove variable in stage name
BruceKropp-Raytheon Apr 18, 2024
4da210a
Update Jenkinsfile re-enable post for metrics after stages
BruceKropp-Raytheon Apr 18, 2024
997820d
Update Jenkinsfile to move metrics job trigger to after all stages
BruceKropp-Raytheon Apr 18, 2024
708bee2
Update Jenkinsfile to trim CI_JOB_NAME for metrics job param
BruceKropp-Raytheon Apr 18, 2024
cb60b16
Update srw_metric.sh to add compiler to outfile name
BruceKropp-Raytheon Apr 19, 2024
b064033
Update Jenkinsfile to include saving skill-score.txt to s3
BruceKropp-Raytheon Apr 19, 2024
c18d707
Update Jenkinsfile to add timeout limit to Build stage and Test stage
BruceKropp-Raytheon Apr 22, 2024
b81b968
Update Jenkinsfile to allow ability to view timestamps in Jenkins con…
BruceKropp-Raytheon Apr 24, 2024
f7f1ca1
Merge pull request #3 from ufs-community/develop
BruceKropp-Raytheon Apr 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 32 additions & 17 deletions .cicd/Jenkinsfile
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ pipeline {
disableConcurrentBuilds()
overrideIndexTriggers(false)
skipDefaultCheckout(true)
timestamps()
timeout(time: 12, unit: 'HOURS')
}

parameters {
Expand Down Expand Up @@ -74,6 +76,11 @@ pipeline {
// Run on all platform/compiler combinations by default or build and test only on the platform(s) and
// compiler(s) specified by SRW_PLATFORM_FILTER and SRW_COMPILER_FILTER
when {
beforeAgent true
expression {
return nodesByLabel(env.SRW_PLATFORM).size() > 0
}

allOf {
anyOf {
expression { params.SRW_PLATFORM_FILTER == 'all' }
Expand Down Expand Up @@ -137,6 +144,7 @@ pipeline {
sh "STAGE_NAME=${env.STAGE_NAME} " + 'bash --login "${WORKSPACE}/${SRW_PLATFORM}/.cicd/scripts/disk_usage.sh"'
}
}

post {
always {
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'noaa-epic-prod-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.SRW_PLATFORM}-*-time-srw_init.json", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
Expand All @@ -147,6 +155,10 @@ pipeline {

// Run the unified build script; if successful create a tarball of the build and upload to S3
stage('Build') {
options {
timeout(time: 4, unit: 'HOURS')
}

steps {
dir ("${env.SRW_PLATFORM}") {
echo "${env.STAGE_NAME} SRW (${env.SRW_COMPILER}) on ${env.SRW_PLATFORM} (using ${env.WORKSPACE}/${env.SRW_PLATFORM})"
Expand Down Expand Up @@ -183,6 +195,7 @@ pipeline {
environment {
TASK_DEPTH = "${env.SRW_WRAPPER_TASK_DEPTH}"
}

steps {
dir ("${env.SRW_PLATFORM}") {
echo "Running ${TASK_DEPTH} simple workflow script task tests on ${env.SRW_PLATFORM} (using ${env.WORKSPACE}/${env.SRW_PLATFORM})"
Expand All @@ -193,6 +206,10 @@ pipeline {

// Run the unified test script
stage('Test') {
options {
timeout(time: 8, unit: 'HOURS')
}

environment {
SRW_WE2E_EXPERIMENT_BASE_DIR = "${env.WORKSPACE}/${env.SRW_PLATFORM}/expt_dirs"
}
Expand Down Expand Up @@ -228,39 +245,37 @@ pipeline {
post {
success {
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'noaa-epic-prod-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.SRW_PLATFORM}/*_test_results-*-*.txt", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false], [bucket: 'noaa-epic-prod-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.SRW_PLATFORM}/we2e_test_logs-${env.SRW_PLATFORM}-${env.SRW_COMPILER}.tgz", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'noaa-epic-prod-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.SRW_PLATFORM}/*-skill-score.txt", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
}
always {
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'noaa-epic-prod-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.SRW_PLATFORM}-*-time-srw_test.json", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
s3Upload consoleLogLevel: 'INFO', dontSetBuildResultOnFailure: false, dontWaitForConcurrentBuildCompletion: false, entries: [[bucket: 'noaa-epic-prod-jenkins-artifacts', excludedFile: '', flatten: false, gzipFiles: false, keepForever: false, managedArtifacts: true, noUploadOnFailure: false, selectedRegion: 'us-east-1', showDirectlyInBrowser: false, sourceFile: "${env.SRW_PLATFORM}-*-disk-usage${env.STAGE_NAME}.csv", storageClass: 'STANDARD', uploadFromSlave: false, useServerSideEncryption: false]], pluginFailureResultConstraint: 'FAILURE', profileName: 'main', userMetadata: []
// Remove the data sets from the experiments directory to conserve disk space
sh 'find "${SRW_WE2E_EXPERIMENT_BASE_DIR}" -regextype posix-extended -regex "^.*(orog|[0-9]{10})$" -type d | xargs rm -rf'
}
}
}

stage('Metrics') {
steps {
script {
CI_BRANCH_NAME=env.JOB_BASE_NAME.replace("%2F","%252F")
echo "Triggering job for branch ${CI_BRANCH_NAME}/${env.BUILD_NUMBER} ..."
build job: '/ufs-srweather-app/ufs-srw-metrics', parameters: [
string(name: 'CI_JOB_NAME', value: "ufs-srweather-app/metrics"),
string(name: 'CI_BUILD_NUMBER', value: "${CI_BRANCH_NAME}/${env.BUILD_NUMBER}")
], wait: false
}
}
}
}
}
}
}
// end of stages{}

// Uncomment the following block to re-enable PW clusters
/*
post {
always {
// Stop any Parallel Works clusters that were started during the pipeline execution
script {
// Trigger another job to collect all build statistics
CI_JOB_NAME=env.JOB_NAME.replace("/${env.JOB_BASE_NAME}","")
CI_BRANCH_NAME=env.JOB_BASE_NAME.replace("%2F","%252F")
echo "post: Triggering ufs-srweather-app/ufs-srw-metrics job for ${CI_JOB_NAME} on branch build ${CI_BRANCH_NAME}/${env.BUILD_NUMBER} ..."
build job: '/ufs-srweather-app/ufs-srw-metrics', parameters: [
string(name: 'CI_JOB_NAME', value: "${CI_JOB_NAME}"),
string(name: 'CI_BUILD_NUMBER', value: "${CI_BRANCH_NAME}/${env.BUILD_NUMBER}")
], wait: false

// Uncomment the following block to re-enable PW clusters
/*
// Stop any Parallel Works clusters that were started during the pipeline execution
// def pw_clusters = ['pclusternoaav2use1', 'azclusternoaav2eus1', 'gclusternoaav2usc1']
def pw_clusters = ['pclusternoaav2use1']
def clusters = []
Expand All @@ -279,8 +294,8 @@ pipeline {
// PW_CLUSTER_NAME parameter
build job: 'parallel-works-jenkins-client/stop-cluster', parameters: [string(name: 'PW_CLUSTER_NAME', value: clusters[i])]
}
*/
}
}
}
*/
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,17 +56,17 @@ else
fi

# Test directories
we2e_experiment_base_dir="${workspace}/../expt_dirs/metric_test"
we2e_test_dir="${workspace}/tests/WE2E"
we2e_test_name="grid_SUBCONUS_Ind_3km_ics_FV3GFS_lbcs_FV3GFS_suite_WoFS_v0"
we2e_experiment_base_dir="${we2e_experiment_base_dir:=${workspace}/../expt_dirs/metric_test}"
we2e_test_dir="${we2e_test_dir:=${workspace}/tests/WE2E}"
we2e_test_name="${test_type:=grid_SUBCONUS_Ind_3km_ics_FV3GFS_lbcs_FV3GFS_suite_WoFS_v0}"

pwd

# Setup the build environment
declare srw_compiler
srw_compiler=${SRW_COMPILER}
source etc/lmod-setup.sh ${platform,,}
module use modulefiles
source ${workspace}/etc/lmod-setup.sh ${platform,,}
module use ${workspace}/modulefiles
module load build_${platform,,}_${srw_compiler}

# Build srw
Expand Down Expand Up @@ -99,7 +99,7 @@ if [[ ${RUN_STAT_ANLY_OPT} == true ]]; then
rm -rf ${workspace}/Indy-Severe-Weather/
# Check if metprd data exists locally otherwise get it from S3
TEST_EXTRN_MDL_SOURCE_BASEDIR=$(grep TEST_EXTRN_MDL_SOURCE_BASEDIR ${workspace}/ush/machine/${SRW_PLATFORM}.yaml | awk '{print $NF}')
if [[ ! -d $(dirname ${TEST_EXTRN_MDL_SOURCE_BASEDIR})/metprd/point_stat ]] ; then
if [[ -d $(dirname ${TEST_EXTRN_MDL_SOURCE_BASEDIR})/metprd/point_stat ]] ; then
mkdir -p Indy-Severe-Weather/metprd/point_stat
cp -rp $(dirname ${TEST_EXTRN_MDL_SOURCE_BASEDIR})/metprd/point_stat Indy-Severe-Weather/metprd
elif [[ -f Indy-Severe-Weather.tgz ]]; then
Expand All @@ -108,7 +108,7 @@ if [[ ${RUN_STAT_ANLY_OPT} == true ]]; then
wget https://noaa-ufs-srw-pds.s3.amazonaws.com/sample_cases/release-public-v2.1.0/Indy-Severe-Weather.tgz
tar xvfz Indy-Severe-Weather.tgz
fi
[[ -f skill-score.txt ]] && rm skill-score.txt
[[ -f ${platform,,}-${srw_compiler}-skill-score.txt ]] && rm ${platform,,}-${srw_compiler}-skill-score.txt
# Skill score index is computed over several terms that are defined in parm/metplus/STATAnalysisConfig_skill_score.
# It is computed by aggregating the output from earlier runs of the Point-Stat and/or Grid-Stat tools over one or more cases.
# In this example, skill score index is a weighted average of 4 skill scores of RMSE statistics for wind speed, dew point temperature,
Expand All @@ -126,15 +126,15 @@ if [[ ${RUN_STAT_ANLY_OPT} == true ]]; then
sed -i 's|--load("conda")|load("conda")|g' ${workspace}/modulefiles/tasks/${platform,,}/run_vx.local.lua
fi
# Run stat_analysis
stat_analysis -config parm/metplus/STATAnalysisConfig_skill_score -lookin ${workspace}/Indy-Severe-Weather/metprd/point_stat -v 2 -out skill-score.txt
stat_analysis -config parm/metplus/STATAnalysisConfig_skill_score -lookin ${workspace}/Indy-Severe-Weather/metprd/point_stat -v 2 -out ${platform,,}-${srw_compiler}-skill-score.txt

# check skill-score.txt
cat skill-score.txt
cat ${platform,,}-${srw_compiler}-skill-score.txt

# get skill-score (SS_INDEX) and check if it is significantly smaller than 1.0
# A value greater than 1.0 indicates that the forecast model outperforms the reference,
# while a value less than 1.0 indicates that the reference outperforms the forecast.
tmp_string=$( tail -2 skill-score.txt | head -1 )
tmp_string=$( tail -2 ${platform,,}-${srw_compiler}-skill-score.txt | head -1 )
SS_INDEX=$(echo $tmp_string | awk -F " " '{print $NF}')
echo "Skill Score: ${SS_INDEX}"
if [[ ${SS_INDEX} < "0.700" ]]; then
Expand Down
15 changes: 8 additions & 7 deletions .cicd/scripts/srw_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ script_dir="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" > /dev/null 2>&1 && pwd)
# Get repository root from Jenkins WORKSPACE variable if set, otherwise, set
# relative to script directory.
declare workspace
if [[ -n "${WORKSPACE}/${SRW_PLATFORM}" ]]; then
if [[ -d "${WORKSPACE}/${SRW_PLATFORM}" ]]; then
workspace="${WORKSPACE}/${SRW_PLATFORM}"
else
workspace="$(cd -- "${script_dir}/../.." && pwd)"
Expand All @@ -26,20 +26,20 @@ else
fi

# Test directories
we2e_experiment_base_dir="${workspace}/expt_dirs"
we2e_test_dir="${workspace}/tests/WE2E"
export we2e_experiment_base_dir="${workspace}/expt_dirs"
export we2e_test_dir="${workspace}/tests/WE2E"

# Clean any stale test logs
rm -f ${workspace}/tests/WE2E/log.*
rm -f ${we2e_experiment_base_dir}/*/log.generate_FV3LAM_wflow ${we2e_experiment_base_dir}/*/log/* WE2E_summary*txt

# Run the end-to-end tests.
if "${SRW_WE2E_COMPREHENSIVE_TESTS}"; then
test_type="comprehensive"
export test_type="comprehensive"
else
test_type=${SRW_WE2E_SINGLE_TEST:-"coverage"}
export test_type=${SRW_WE2E_SINGLE_TEST:-"coverage"}
if [[ "${test_type}" = skill-score ]]; then
test_type="grid_SUBCONUS_Ind_3km_ics_FV3GFS_lbcs_FV3GFS_suite_WoFS_v0"
export test_type="grid_SUBCONUS_Ind_3km_ics_FV3GFS_lbcs_FV3GFS_suite_WoFS_v0"
fi
fi

Expand All @@ -48,7 +48,8 @@ cd ${we2e_test_dir}
progress_file="${workspace}/we2e_test_results-${platform}-${SRW_COMPILER}.txt"
/usr/bin/time -p -f '{\n "cpu": "%P"\n, "memMax": "%M"\n, "mem": {"text": "%X", "data": "%D", "swaps": "%W", "context": "%c", "waits": "%w"}\n, "pagefaults": {"major": "%F", "minor": "%R"}\n, "filesystem": {"inputs": "%I", "outputs": "%O"}\n, "time": {"real": "%e", "user": "%U", "sys": "%S"}\n}' -o ${WORKSPACE}/${SRW_PLATFORM}-${SRW_COMPILER}-time-srw_test.json \
./setup_WE2E_tests.sh ${platform} ${SRW_PROJECT} ${SRW_COMPILER} ${test_type} \
--expt_basedir=${we2e_experiment_base_dir} | tee ${progress_file}
--expt_basedir=${we2e_experiment_base_dir} | tee ${progress_file}; \
[[ -f ${we2e_experiment_base_dir}/grid_SUBCONUS_Ind_3km_ics_FV3GFS_lbcs_FV3GFS_suite_WoFS_v0/log.generate_FV3LAM_wflow ]] && ${workspace}/.cicd/scripts/srw_metric.sh run_stat_anly

# Set exit code to number of failures
set +e
Expand Down
Loading