From db011feb971b126c0e7073e9c74dfb8df39cc9e1 Mon Sep 17 00:00:00 2001 From: Arjun Date: Mon, 4 Mar 2024 07:12:40 -0800 Subject: [PATCH 01/29] Cleanups --- cm-mlops/automation/script/module.py | 23 +++++++++++++------ cm-mlops/script/get-generic-sys-util/_cm.json | 4 +++- .../script/get-generic-sys-util/customize.py | 3 ++- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py index 5ef344b51..9114c467c 100644 --- a/cm-mlops/automation/script/module.py +++ b/cm-mlops/automation/script/module.py @@ -867,7 +867,8 @@ def _run(self, i): if r['return']>0: return r - update_env_with_values(env) + r = update_env_with_values(env) + if r['return']>0: return r @@ -1006,7 +1007,8 @@ def _run(self, i): if verbose: print (recursion_spaces + ' - Processing env after dependencies ...') - update_env_with_values(env) + r = update_env_with_values(env) + if r['return']>0: return r # Check chain of prehook dependencies on other CM scripts. (No execution of customize.py for cached scripts) @@ -1251,7 +1253,8 @@ def _run(self, i): if verbose: print (recursion_spaces + ' - Processing env after docker run dependencies ...') - update_env_with_values(env) + r = update_env_with_values(env) + if r['return']>0: return r # Check chain of dependencies on other CM scripts if len(deps)>0: @@ -1266,7 +1269,8 @@ def _run(self, i): if verbose: print (recursion_spaces + ' - Processing env after dependencies ...') - update_env_with_values(env) + r = update_env_with_values(env) + if r['return']>0: return r # Clean some output files clean_tmp_files(clean_files, recursion_spaces) @@ -2822,7 +2826,8 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a utils.merge_dicts({'dict1':ii, 'dict2':d, 'append_lists':True, 'append_unique':True}) - update_env_with_values(ii['env']) #to update env local to a dependency + r = update_env_with_values(ii['env']) #to update env local to a dependency + if r['return']>0: return r r = self.cmind.access(ii) if r['return']>0: return r @@ -2834,7 +2839,8 @@ def _run_deps(self, deps, clean_env_keys_deps, env, state, const, const_state, a # Restore local env env.update(tmp_env) - update_env_with_values(env) + r = update_env_with_values(env) + if r['return']>0: return r return {'return': 0} @@ -3988,6 +3994,9 @@ def update_env_with_values(env, fail_on_not_found=False): """ import re for key in env: + if key.startswith("+") and type(env[key]) != list: + return {'return': 1, 'error': 'List value expected for {} in env'.format(key)} + value = env[key] # Check cases such as --env.CM_SKIP_COMPILE @@ -4017,7 +4026,7 @@ def update_env_with_values(env, fail_on_not_found=False): env[key] = value - return + return {'return': 0} ############################################################################## diff --git a/cm-mlops/script/get-generic-sys-util/_cm.json b/cm-mlops/script/get-generic-sys-util/_cm.json index 9956a2961..563608020 100644 --- a/cm-mlops/script/get-generic-sys-util/_cm.json +++ b/cm-mlops/script/get-generic-sys-util/_cm.json @@ -287,7 +287,9 @@ }, "state": { "numactl": { - "apt": "numactl" + "apt": "numactl", + "dnf": "numactl-libs", + "yum": "numactl" } } }, diff --git a/cm-mlops/script/get-generic-sys-util/customize.py b/cm-mlops/script/get-generic-sys-util/customize.py index 36e5efbb9..e1c9e9e1d 100644 --- a/cm-mlops/script/get-generic-sys-util/customize.py +++ b/cm-mlops/script/get-generic-sys-util/customize.py @@ -44,5 +44,6 @@ def preprocess(i): if env.get('CM_HOST_OS_FLAVOR', '') == 'rhel': if env['CM_SYS_UTIL_NAME'] == "g++12": - env['+PATH'] = "/opt/rh/gcc-toolset-12/root/usr/bin" + env['+PATH'] = [ "/opt/rh/gcc-toolset-12/root/usr/bin" ] + return {'return':0} From c705fc2af904abdf2716a417998c4259c84c5311 Mon Sep 17 00:00:00 2001 From: Arjun Date: Mon, 4 Mar 2024 07:23:30 -0800 Subject: [PATCH 02/29] force redownload for large models --- cm-mlops/script/get-generic-sys-util/_cm.json | 4 ++-- cm-mlops/script/get-ml-model-dlrm-terabyte/_cm.json | 3 ++- cm-mlops/script/get-ml-model-gptj/_cm.json | 3 ++- cm-mlops/script/get-ml-model-stable-diffusion/_cm.json | 3 ++- 4 files changed, 8 insertions(+), 5 deletions(-) diff --git a/cm-mlops/script/get-generic-sys-util/_cm.json b/cm-mlops/script/get-generic-sys-util/_cm.json index 563608020..a0a5ec489 100644 --- a/cm-mlops/script/get-generic-sys-util/_cm.json +++ b/cm-mlops/script/get-generic-sys-util/_cm.json @@ -288,8 +288,8 @@ "state": { "numactl": { "apt": "numactl", - "dnf": "numactl-libs", - "yum": "numactl" + "dnf": "numactl-devel", + "yum": "numactl-devel" } } }, diff --git a/cm-mlops/script/get-ml-model-dlrm-terabyte/_cm.json b/cm-mlops/script/get-ml-model-dlrm-terabyte/_cm.json index 07ef945d1..a08e51e57 100644 --- a/cm-mlops/script/get-ml-model-dlrm-terabyte/_cm.json +++ b/cm-mlops/script/get-ml-model-dlrm-terabyte/_cm.json @@ -10,7 +10,8 @@ "CM_ML_MODEL_RETRAINING": "no", "CM_ML_MODEL_WEIGHT_TRANSFORMATIONS": "no", "CM_EXTRACT_FINAL_ENV_NAME": "CM_ML_MODEL_FILE_WITH_PATH", - "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_ML_MODEL_FILE_WITH_PATH" + "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_ML_MODEL_FILE_WITH_PATH", + "require_download": 1 }, "new_env_keys": [ "CM_ML_MODEL_*" diff --git a/cm-mlops/script/get-ml-model-gptj/_cm.json b/cm-mlops/script/get-ml-model-gptj/_cm.json index 512457a65..d1d0d23c9 100644 --- a/cm-mlops/script/get-ml-model-gptj/_cm.json +++ b/cm-mlops/script/get-ml-model-gptj/_cm.json @@ -7,7 +7,8 @@ "env": { "CM_ML_MODEL": "GPTJ", "CM_ML_MODEL_DATASET": "cnndm", - "CM_ML_MODEL_WEIGHT_TRANSFORMATIONS": "no" + "CM_ML_MODEL_WEIGHT_TRANSFORMATIONS": "no", + "require_download": 1 }, "input_mapping": { "checkpoint": "GPTJ_CHECKPOINT_PATH", diff --git a/cm-mlops/script/get-ml-model-stable-diffusion/_cm.json b/cm-mlops/script/get-ml-model-stable-diffusion/_cm.json index 39390a193..a649f8c2c 100644 --- a/cm-mlops/script/get-ml-model-stable-diffusion/_cm.json +++ b/cm-mlops/script/get-ml-model-stable-diffusion/_cm.json @@ -7,7 +7,8 @@ "env": { "CM_ML_MODEL": "SDXL", "CM_ML_MODEL_DATASET": "openorca", - "CM_ML_MODEL_WEIGHT_TRANSFORMATIONS": "no" + "CM_ML_MODEL_WEIGHT_TRANSFORMATIONS": "no", + "require_download": 1 }, "input_mapping": { "checkpoint": "SDXL_CHECKPOINT_PATH", From 8d22aa386cdcd62a4ddafecc152783e7ba3a6ce4 Mon Sep 17 00:00:00 2001 From: Arjun Date: Mon, 4 Mar 2024 10:48:56 -0800 Subject: [PATCH 03/29] Use rclone sync by default --- cm-mlops/script/download-file/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cm-mlops/script/download-file/customize.py b/cm-mlops/script/download-file/customize.py index 84f6f1f57..4d041ac82 100644 --- a/cm-mlops/script/download-file/customize.py +++ b/cm-mlops/script/download-file/customize.py @@ -112,7 +112,7 @@ def preprocess(i): elif tool == "rclone": if env.get('CM_RCLONE_CONFIG_CMD', '') != '': env['CM_DOWNLOAD_CONFIG_CMD'] = env['CM_RCLONE_CONFIG_CMD'] - env['CM_DOWNLOAD_CMD'] = f"rclone copy {url} {os.path.join(os.getcwd(), env['CM_DOWNLOAD_FILENAME'])} -P" + env['CM_DOWNLOAD_CMD'] = f"rclone sync {url} {os.path.join(os.getcwd(), env['CM_DOWNLOAD_FILENAME'])} -P" filename = env['CM_DOWNLOAD_FILENAME'] env['CM_DOWNLOAD_DOWNLOADED_FILENAME'] = filename From 905edc1007d602ac8edcd0abaf5df14003f5cbf1 Mon Sep 17 00:00:00 2001 From: Arjun Date: Mon, 4 Mar 2024 10:50:23 -0800 Subject: [PATCH 04/29] Added install numactl from src --- .../script/get-generic-sys-util/customize.py | 2 +- .../script/install-numactl-from-src/_cm.json | 93 +++++++++++++++++++ .../install-numactl-from-src/customize.py | 23 +++++ .../script/install-numactl-from-src/run.sh | 19 ++++ 4 files changed, 136 insertions(+), 1 deletion(-) create mode 100644 cm-mlops/script/install-numactl-from-src/_cm.json create mode 100644 cm-mlops/script/install-numactl-from-src/customize.py create mode 100644 cm-mlops/script/install-numactl-from-src/run.sh diff --git a/cm-mlops/script/get-generic-sys-util/customize.py b/cm-mlops/script/get-generic-sys-util/customize.py index e1c9e9e1d..f7a26a457 100644 --- a/cm-mlops/script/get-generic-sys-util/customize.py +++ b/cm-mlops/script/get-generic-sys-util/customize.py @@ -41,7 +41,7 @@ def preprocess(i): sudo = env.get('CM_SUDO', '') env['CM_SYS_UTIL_INSTALL_CMD'] = sudo + ' ' +install_cmd + ' ' + package_name - + env['+PATH'] = [] if env.get('CM_HOST_OS_FLAVOR', '') == 'rhel': if env['CM_SYS_UTIL_NAME'] == "g++12": env['+PATH'] = [ "/opt/rh/gcc-toolset-12/root/usr/bin" ] diff --git a/cm-mlops/script/install-numactl-from-src/_cm.json b/cm-mlops/script/install-numactl-from-src/_cm.json new file mode 100644 index 000000000..a79b720be --- /dev/null +++ b/cm-mlops/script/install-numactl-from-src/_cm.json @@ -0,0 +1,93 @@ +{ + "alias": "install-numactl-from-src", + "automation_alias": "script", + "automation_uid": "5b4e0237da074764", + "cache": true, + "category": "Compiler automation", + "deps": [ + { + "tags": "detect,os" + }, + { + "tags": "detect,cpu" + }, + { + "env": { + "CM_GIT_CHECKOUT_PATH_ENV_NAME": "CM_NUMACTL_SRC_REPO_PATH" + }, + "extra_cache_tags": "numactl,src,numactl-src,numactl-src-repo", + "names": [ + "numactl-src-repo" + ], + "tags": "get,git,repo", + "update_tags_from_env_with_prefix": { + "_branch.": [ + "CM_GIT_CHECKOUT" + ], + "_repo.": [ + "CM_GIT_URL" + ], + "_sha.": [ + "CM_GIT_CHECKOUT_SHA" + ], + "_tag.": [ + "CM_GIT_CHECKOUT_TAG" + ] + } + } + ], + "env": { + "CM_GIT_URL": "https://github.com/numactl/numactl" + }, + "name": "Build numactl from sources", + "new_env_keys": [ + "CM_NUMACTL_*", + "+PATH" + ], + "prehook_deps": [], + "sort": 1000, + "tags": [ + "install", + "src", + "from.src", + "numactl", + "src-numactl" + ], + "uid": "4f355ae8ca1948b2", + "variations": { + "branch.#": { + "env": { + "CM_GIT_CHECKOUT": "#" + } + }, + "repo.#": { + "env": { + "CM_GIT_URL": "#" + }, + "group": "repo" + }, + "repo.https://github.com/numactl/numactl": { + "default": true, + "env": { + "CM_GIT_URL": "https://github.com/numactl/numactl" + }, + "group": "repo" + }, + "sha.#": { + "env": { + "CM_GIT_CHECKOUT_SHA": "#" + } + }, + "tag.#": { + "ad": { + "pytorch-src-repo": { + "tags": "_no-recurse-submodules,_full-history" + } + }, + "env": { + "CM_GIT_CHECKOUT_TAG": "#" + } + } + }, + "versions": {} +} diff --git a/cm-mlops/script/install-numactl-from-src/customize.py b/cm-mlops/script/install-numactl-from-src/customize.py new file mode 100644 index 000000000..5c1ee2674 --- /dev/null +++ b/cm-mlops/script/install-numactl-from-src/customize.py @@ -0,0 +1,23 @@ +from cmind import utils +import os + +def preprocess(i): + + os_info = i['os_info'] + + if os_info['platform'] == 'windows': + return {'return':1, 'error': 'Windows is not supported in this script yet'} + + env = i['env'] + + run_cmd="python setup.py install" + + env['CM_RUN_CMD'] = run_cmd + + automation = i['automation'] + + recursion_spaces = i['recursion_spaces'] + + env['+PATH'] = [] + + return {'return':0} diff --git a/cm-mlops/script/install-numactl-from-src/run.sh b/cm-mlops/script/install-numactl-from-src/run.sh new file mode 100644 index 000000000..606b5d965 --- /dev/null +++ b/cm-mlops/script/install-numactl-from-src/run.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +CUR_DIR=$PWD +echo $PWD +rm -rf numactl +cmd="cp -r ${CM_NUMACTL_SRC_REPO_PATH} numactl" +echo "$cmd" +eval "$cmd" +cd numactl +./autogen.sh +./configure +if [ "${?}" != "0" ]; then exit 1; fi +make +if [ "${?}" != "0" ]; then exit 1; fi +#make install DESTDIR=$CUR_DIR +sudo make install +if [ "${?}" != "0" ]; then exit 1; fi + +echo "******************************************************" From 61c23e2d0d9d9af198475b03ff99d2e005d1844b Mon Sep 17 00:00:00 2001 From: Arjun Date: Mon, 4 Mar 2024 15:20:53 -0800 Subject: [PATCH 05/29] Cleanup for gh install on rhel --- cm-mlops/script/get-generic-sys-util/_cm.json | 11 ++++++++++- cm-mlops/script/get-generic-sys-util/customize.py | 4 ++++ cm-mlops/script/install-github-cli/run-rhel.sh | 4 ++-- cm-mlops/script/install-llvm-src/_cm.json | 2 +- 4 files changed, 17 insertions(+), 4 deletions(-) diff --git a/cm-mlops/script/get-generic-sys-util/_cm.json b/cm-mlops/script/get-generic-sys-util/_cm.json index a0a5ec489..5fa723b24 100644 --- a/cm-mlops/script/get-generic-sys-util/_cm.json +++ b/cm-mlops/script/get-generic-sys-util/_cm.json @@ -291,7 +291,16 @@ "dnf": "numactl-devel", "yum": "numactl-devel" } - } + }, + "deps": [ + { + "tags": "install,numactl,from.src", + "enable_if_env": { + "CM_HOST_OS_FLAVOR": [ "rhel" ], + "CM_HOST_OS_VERSION": [ "9.1", "9.2", "9.3" ] + } + } + ] }, "g++-12": { "env": { diff --git a/cm-mlops/script/get-generic-sys-util/customize.py b/cm-mlops/script/get-generic-sys-util/customize.py index f7a26a457..ef0bc33e8 100644 --- a/cm-mlops/script/get-generic-sys-util/customize.py +++ b/cm-mlops/script/get-generic-sys-util/customize.py @@ -42,8 +42,12 @@ def preprocess(i): env['CM_SYS_UTIL_INSTALL_CMD'] = sudo + ' ' +install_cmd + ' ' + package_name env['+PATH'] = [] + if env.get('CM_HOST_OS_FLAVOR', '') == 'rhel': if env['CM_SYS_UTIL_NAME'] == "g++12": env['+PATH'] = [ "/opt/rh/gcc-toolset-12/root/usr/bin" ] + if env['CM_SYS_UTIL_NAME'] == "numactl" and env['CM_HOST_OS_VERSION'] in [ "9.1", "9.2", "9.3" ]: + env['CM_SYS_UTIL_INSTALL_CMD'] = '' + return {'return':0} diff --git a/cm-mlops/script/install-github-cli/run-rhel.sh b/cm-mlops/script/install-github-cli/run-rhel.sh index b20912be7..e3ef08f5c 100644 --- a/cm-mlops/script/install-github-cli/run-rhel.sh +++ b/cm-mlops/script/install-github-cli/run-rhel.sh @@ -1,3 +1,3 @@ -sudo dnf install 'dnf-command(config-manager)' +sudo dnf install -y 'dnf-command(config-manager)' sudo dnf config-manager --add-repo https://cli.github.com/packages/rpm/gh-cli.repo -sudo dnf install gh +sudo dnf install -y gh diff --git a/cm-mlops/script/install-llvm-src/_cm.json b/cm-mlops/script/install-llvm-src/_cm.json index 0a38e7869..3ae795695 100644 --- a/cm-mlops/script/install-llvm-src/_cm.json +++ b/cm-mlops/script/install-llvm-src/_cm.json @@ -278,7 +278,7 @@ "tags": "get,generic-python-lib,_custom-python,_package.torch,_url.git+https://github.com/pytorch/pytorch.git@927dc662386af052018212c7d01309a506fc94cd", "env": { "CM_PYTHON_BIN_WITH_PATH": "<<>>/python3", - "+ CXXFLAGS": [ "-Wno-nonnull" ] + "+ CXXFLAGS": [ "-Wno-nonnull", "-Wno-maybe-uninitialized", "-Wno-uninitialized", "-Wno-free-nonheap-object" ] } }, { From 9512c339fce75736c85abec6c453a015c64afed6 Mon Sep 17 00:00:00 2001 From: Arjun Date: Mon, 4 Mar 2024 16:18:54 -0800 Subject: [PATCH 06/29] Fix bug in docker build-arg --- cm-mlops/script/build-docker-image/customize.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cm-mlops/script/build-docker-image/customize.py b/cm-mlops/script/build-docker-image/customize.py index 4a17d6280..c749b8f8d 100644 --- a/cm-mlops/script/build-docker-image/customize.py +++ b/cm-mlops/script/build-docker-image/customize.py @@ -17,7 +17,7 @@ def preprocess(i): CM_DOCKER_BUILD_ARGS.append( "CM_GH_TOKEN="+env['CM_GH_TOKEN'] ) if CM_DOCKER_BUILD_ARGS: - build_args = "--build-arg "+ " --build-arg".join(CM_DOCKER_BUILD_ARGS) + build_args = "--build-arg "+ " --build-arg ".join(CM_DOCKER_BUILD_ARGS) else: build_args = "" From 42accf5e281dd8288d39c6207488179d20f2cb3e Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 5 Mar 2024 06:00:28 +0530 Subject: [PATCH 07/29] Add tabulate deps for docker (needed in case of fake-deps) --- cm-mlops/script/build-dockerfile/dockerinfo.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cm-mlops/script/build-dockerfile/dockerinfo.json b/cm-mlops/script/build-dockerfile/dockerinfo.json index 5fba91b03..de2f8372b 100644 --- a/cm-mlops/script/build-dockerfile/dockerinfo.json +++ b/cm-mlops/script/build-dockerfile/dockerinfo.json @@ -1,6 +1,6 @@ { "python-packages": [ - "cmind", "requests", "giturlparse" + "cmind", "requests", "giturlparse", "tabulate" ], "ARGS": [ "CM_GH_TOKEN" From ad9bce8e95d3d4d8781d9a4cad1a6336bdc1e647 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 5 Mar 2024 13:07:38 +0000 Subject: [PATCH 08/29] updated requirements --- cm-mlops/requirements.txt | 5 +++-- requirements.txt | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/cm-mlops/requirements.txt b/cm-mlops/requirements.txt index 72d3086b5..4752077a6 100644 --- a/cm-mlops/requirements.txt +++ b/cm-mlops/requirements.txt @@ -1,4 +1,5 @@ -cmind >= 1.4.0 +cmind>=2.0.1 +pyyaml requests setuptools - +giturlparse diff --git a/requirements.txt b/requirements.txt index f74c75db4..4752077a6 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ -cmind>=1.6.0 +cmind>=2.0.1 pyyaml requests setuptools +giturlparse From d671397967c275d39d65cd195caa1c741002271c Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 5 Mar 2024 14:37:05 +0000 Subject: [PATCH 09/29] added "warnings" to the main CM script meta (for example to warn about sudo) --- cm-mlops/automation/script/module.py | 4 +++- cm-mlops/script/get-sys-utils-cm/_cm.json | 5 ++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/cm-mlops/automation/script/module.py b/cm-mlops/automation/script/module.py index 9114c467c..4017ad0e1 100644 --- a/cm-mlops/automation/script/module.py +++ b/cm-mlops/automation/script/module.py @@ -788,7 +788,9 @@ def _run(self, i): if r['return'] > 0: return r - warnings = r.get('warnings', []) + warnings = meta.get('warnings', []) + if len(r.get('warnings', [])) >0: + warnings += r['warnings'] variation_tags_string = r['variation_tags_string'] explicit_variation_tags = r['explicit_variation_tags'] diff --git a/cm-mlops/script/get-sys-utils-cm/_cm.json b/cm-mlops/script/get-sys-utils-cm/_cm.json index 8fd404c95..ce3c73c2d 100644 --- a/cm-mlops/script/get-sys-utils-cm/_cm.json +++ b/cm-mlops/script/get-sys-utils-cm/_cm.json @@ -31,5 +31,8 @@ "CM_PYTHON_PIP_USER": "--user" } } - } + }, + "warnings": [ + "This CM script will install extra OS system utils required for CM automation workflows using sudo!" + ] } From 77300f981b28a5761f4932954242b643042bfd6c Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 5 Mar 2024 14:41:09 +0000 Subject: [PATCH 10/29] added sudo warnings --- cm-mlops/script/get-rclone/_cm.json | 5 ++++- cm-mlops/script/install-numactl-from-src/_cm.json | 7 ++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cm-mlops/script/get-rclone/_cm.json b/cm-mlops/script/get-rclone/_cm.json index 23c590f8d..31e1e0a9a 100644 --- a/cm-mlops/script/get-rclone/_cm.json +++ b/cm-mlops/script/get-rclone/_cm.json @@ -30,7 +30,10 @@ "system": { "env": { "CM_RCLONE_SYSTEM": "yes" - } + }, + "warnings": [ + "This CM script will install rclone using sudo/brew!" + ] } } } diff --git a/cm-mlops/script/install-numactl-from-src/_cm.json b/cm-mlops/script/install-numactl-from-src/_cm.json index a79b720be..75256a663 100644 --- a/cm-mlops/script/install-numactl-from-src/_cm.json +++ b/cm-mlops/script/install-numactl-from-src/_cm.json @@ -3,7 +3,7 @@ "automation_alias": "script", "automation_uid": "5b4e0237da074764", "cache": true, - "category": "Compiler automation", + "category": "Detection or installation of tools and artifacts", "deps": [ { "tags": "detect,os" @@ -44,7 +44,6 @@ "CM_NUMACTL_*", "+PATH" ], - "prehook_deps": [], "sort": 1000, "tags": [ "install", @@ -89,5 +88,7 @@ } } }, - "versions": {} + "warnings": [ + "This CM script will need sudo to install numactl!" + ] } From 9df17105db6d2af85d9b0e3942da6fe99e3a7b13 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 5 Mar 2024 14:49:04 +0000 Subject: [PATCH 11/29] Make redownload the default if there is no checksum success --- cm-mlops/script/download-file/run.sh | 9 ++++----- cm-mlops/script/get-ml-model-dlrm-terabyte/_cm.json | 3 +-- cm-mlops/script/get-ml-model-gptj/_cm.json | 3 +-- cm-mlops/script/get-ml-model-stable-diffusion/_cm.json | 3 +-- 4 files changed, 7 insertions(+), 11 deletions(-) diff --git a/cm-mlops/script/download-file/run.sh b/cm-mlops/script/download-file/run.sh index 7df33180c..69d07f9bc 100644 --- a/cm-mlops/script/download-file/run.sh +++ b/cm-mlops/script/download-file/run.sh @@ -7,22 +7,21 @@ if [[ -n ${CM_DOWNLOAD_CONFIG_CMD} ]]; then test $? -eq 0 || exit $? fi +require_download=1 if [ -e ${CM_DOWNLOAD_DOWNLOADED_PATH} ]; then if [[ "${CM_DOWNLOAD_CHECKSUM_CMD}" != "" ]]; then echo "" echo "${CM_DOWNLOAD_CHECKSUM_CMD}" eval "${CM_DOWNLOAD_CHECKSUM_CMD}" if [ $? -ne 0 ]; then + # checksum not supposed to fail for locally given file if [[ "${CM_DOWNLOAD_LOCAL_FILE_PATH}" != "" ]]; then exit 1 fi - require_download="1" + else + require_download="0" fi - - test $? -eq 0 || require_download="1" fi -else - require_download="1" fi if [[ ${require_download} == "1" ]]; then diff --git a/cm-mlops/script/get-ml-model-dlrm-terabyte/_cm.json b/cm-mlops/script/get-ml-model-dlrm-terabyte/_cm.json index a08e51e57..07ef945d1 100644 --- a/cm-mlops/script/get-ml-model-dlrm-terabyte/_cm.json +++ b/cm-mlops/script/get-ml-model-dlrm-terabyte/_cm.json @@ -10,8 +10,7 @@ "CM_ML_MODEL_RETRAINING": "no", "CM_ML_MODEL_WEIGHT_TRANSFORMATIONS": "no", "CM_EXTRACT_FINAL_ENV_NAME": "CM_ML_MODEL_FILE_WITH_PATH", - "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_ML_MODEL_FILE_WITH_PATH", - "require_download": 1 + "CM_DOWNLOAD_FINAL_ENV_NAME": "CM_ML_MODEL_FILE_WITH_PATH" }, "new_env_keys": [ "CM_ML_MODEL_*" diff --git a/cm-mlops/script/get-ml-model-gptj/_cm.json b/cm-mlops/script/get-ml-model-gptj/_cm.json index d1d0d23c9..512457a65 100644 --- a/cm-mlops/script/get-ml-model-gptj/_cm.json +++ b/cm-mlops/script/get-ml-model-gptj/_cm.json @@ -7,8 +7,7 @@ "env": { "CM_ML_MODEL": "GPTJ", "CM_ML_MODEL_DATASET": "cnndm", - "CM_ML_MODEL_WEIGHT_TRANSFORMATIONS": "no", - "require_download": 1 + "CM_ML_MODEL_WEIGHT_TRANSFORMATIONS": "no" }, "input_mapping": { "checkpoint": "GPTJ_CHECKPOINT_PATH", diff --git a/cm-mlops/script/get-ml-model-stable-diffusion/_cm.json b/cm-mlops/script/get-ml-model-stable-diffusion/_cm.json index a649f8c2c..39390a193 100644 --- a/cm-mlops/script/get-ml-model-stable-diffusion/_cm.json +++ b/cm-mlops/script/get-ml-model-stable-diffusion/_cm.json @@ -7,8 +7,7 @@ "env": { "CM_ML_MODEL": "SDXL", "CM_ML_MODEL_DATASET": "openorca", - "CM_ML_MODEL_WEIGHT_TRANSFORMATIONS": "no", - "require_download": 1 + "CM_ML_MODEL_WEIGHT_TRANSFORMATIONS": "no" }, "input_mapping": { "checkpoint": "SDXL_CHECKPOINT_PATH", From 5ab15a4e4f3118c60ede3fb743cd686ad37344a8 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 5 Mar 2024 14:55:54 +0000 Subject: [PATCH 12/29] Make redownload the default if there is no checksum success --- cm-mlops/script/download-file/run.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cm-mlops/script/download-file/run.sh b/cm-mlops/script/download-file/run.sh index 69d07f9bc..505c7730f 100644 --- a/cm-mlops/script/download-file/run.sh +++ b/cm-mlops/script/download-file/run.sh @@ -8,6 +8,10 @@ if [[ -n ${CM_DOWNLOAD_CONFIG_CMD} ]]; then fi require_download=1 +if [[ "${CM_DOWNLOAD_LOCAL_FILE_PATH}" != "" ]]; then + require_download=0 +fi + if [ -e ${CM_DOWNLOAD_DOWNLOADED_PATH} ]; then if [[ "${CM_DOWNLOAD_CHECKSUM_CMD}" != "" ]]; then echo "" From d27e286c00370ba6515de75cb19d87fbafc4451d Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 5 Mar 2024 15:00:22 +0000 Subject: [PATCH 13/29] Make redownload the default if there is no checksum success --- cm-mlops/script/download-file/run.sh | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/cm-mlops/script/download-file/run.sh b/cm-mlops/script/download-file/run.sh index 505c7730f..cb6d43ffa 100644 --- a/cm-mlops/script/download-file/run.sh +++ b/cm-mlops/script/download-file/run.sh @@ -1,5 +1,9 @@ #!/bin/bash +if [[ ${CM_DOWNLOAD_TOOL} == "cmutil" ]]; then + exit 0 +fi + if [[ -n ${CM_DOWNLOAD_CONFIG_CMD} ]]; then echo "" echo "${CM_DOWNLOAD_CONFIG_CMD}" From e2821021690ae04879a40c90964887be9ad356b1 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 5 Mar 2024 15:08:06 +0000 Subject: [PATCH 14/29] added onnx image classification test --- .../test-image-classification-onnx.yml | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .github/workflows/test-image-classification-onnx.yml diff --git a/.github/workflows/test-image-classification-onnx.yml b/.github/workflows/test-image-classification-onnx.yml new file mode 100644 index 000000000..c0e76a5d5 --- /dev/null +++ b/.github/workflows/test-image-classification-onnx.yml @@ -0,0 +1,36 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: image classification with ONNX + +on: + pull_request: + branches: [ "master", "dev" ] + paths: + - '.github/workflows/test-image-classification-onnx.yml' + - 'cm-mlops/**' + - '!cm-mlops/**.md' + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [ "3.9", "3.12" ] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install cmind + cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} + cm run script --quiet --tags=get,sys-utils-cm + - name: Test image classification with ONNX + run: | + cmr "python app image-classification onnx" --quiet From 10c35822f2023ec7537d9e4bbd9f6fdc8af24cca Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 5 Mar 2024 15:20:57 +0000 Subject: [PATCH 15/29] Make redownload the default if there is no checksum success --- cm-mlops/script/download-file/run.sh | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cm-mlops/script/download-file/run.sh b/cm-mlops/script/download-file/run.sh index cb6d43ffa..6d922fc7e 100644 --- a/cm-mlops/script/download-file/run.sh +++ b/cm-mlops/script/download-file/run.sh @@ -1,8 +1,5 @@ #!/bin/bash -if [[ ${CM_DOWNLOAD_TOOL} == "cmutil" ]]; then - exit 0 -fi if [[ -n ${CM_DOWNLOAD_CONFIG_CMD} ]]; then echo "" @@ -12,10 +9,15 @@ if [[ -n ${CM_DOWNLOAD_CONFIG_CMD} ]]; then fi require_download=1 + if [[ "${CM_DOWNLOAD_LOCAL_FILE_PATH}" != "" ]]; then require_download=0 fi +if [[ ${CM_DOWNLOAD_TOOL} == "cmutil" ]]; then + require_download=0 +fi + if [ -e ${CM_DOWNLOAD_DOWNLOADED_PATH} ]; then if [[ "${CM_DOWNLOAD_CHECKSUM_CMD}" != "" ]]; then echo "" From 90a315959318c4ecbc24feb6c5d8185c584ae326 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 5 Mar 2024 21:02:33 +0530 Subject: [PATCH 16/29] Put mlperf sut configs and descriptions in CM cache --- cm-mlops/script/get-mlperf-inference-sut-configs/_cm.json | 2 +- .../script/get-mlperf-inference-sut-configs/customize.py | 4 ++-- .../script/get-mlperf-inference-sut-description/_cm.json | 2 +- .../get-mlperf-inference-sut-description/customize.py | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/cm-mlops/script/get-mlperf-inference-sut-configs/_cm.json b/cm-mlops/script/get-mlperf-inference-sut-configs/_cm.json index d7e7c5adf..7ad8376f3 100644 --- a/cm-mlops/script/get-mlperf-inference-sut-configs/_cm.json +++ b/cm-mlops/script/get-mlperf-inference-sut-configs/_cm.json @@ -2,7 +2,7 @@ "alias": "get-mlperf-inference-sut-configs", "automation_alias": "script", "automation_uid": "5b4e0237da074764", - "cache": false, + "cache": true, "category": "MLPerf benchmark support", "new_env_keys": [ "CM_HW_*", diff --git a/cm-mlops/script/get-mlperf-inference-sut-configs/customize.py b/cm-mlops/script/get-mlperf-inference-sut-configs/customize.py index 94f63e6e7..f074ed30b 100644 --- a/cm-mlops/script/get-mlperf-inference-sut-configs/customize.py +++ b/cm-mlops/script/get-mlperf-inference-sut-configs/customize.py @@ -41,7 +41,7 @@ def postprocess(i): elif env.get('CM_SUT_USE_EXTERNAL_CONFIG_REPO', '') == "yes": path = env.get('CM_GIT_CHECKOUT_PATH') else: - path = os.path.join(env['CM_TMP_CURRENT_SCRIPT_PATH'], "configs") + path = os.path.join(os.getcwd(), "configs") config_path = os.path.join(path, env['CM_HW_NAME'], implementation_string+"-implementation", device+"-device", backend+"-framework", "framework-version-"+backend_version, run_config_string + "-config.yaml") if not os.path.exists(config_path): @@ -51,7 +51,7 @@ def postprocess(i): shutil.copy(config_path_default, config_path) else: print(f"Config file missing for given hw_name: '{env['CM_HW_NAME']}', implementation: '{implementation_string}', device: '{device}, backend: '{backend}', copying from default") - src_config = os.path.join(path, "default", "config.yaml") + src_config = os.path.join(env['CM_TMP_CURRENT_SCRIPT_PATH'], "configs", "default", "config.yaml") shutil.copy(src_config, config_path) os.makedirs(os.path.dirname(config_path_default), exist_ok=True) shutil.copy(src_config, config_path_default) diff --git a/cm-mlops/script/get-mlperf-inference-sut-description/_cm.json b/cm-mlops/script/get-mlperf-inference-sut-description/_cm.json index 509bb268b..4c3f998e5 100644 --- a/cm-mlops/script/get-mlperf-inference-sut-description/_cm.json +++ b/cm-mlops/script/get-mlperf-inference-sut-description/_cm.json @@ -2,7 +2,7 @@ "alias": "get-mlperf-inference-sut-description", "automation_alias": "script", "automation_uid": "5b4e0237da074764", - "cache": false, + "cache": true, "category": "MLPerf benchmark support", "deps": [ { diff --git a/cm-mlops/script/get-mlperf-inference-sut-description/customize.py b/cm-mlops/script/get-mlperf-inference-sut-description/customize.py index 776e9b71c..7ee1646dc 100644 --- a/cm-mlops/script/get-mlperf-inference-sut-description/customize.py +++ b/cm-mlops/script/get-mlperf-inference-sut-description/customize.py @@ -32,8 +32,8 @@ def preprocess(i): backend_desc += ' v' + backend_version sut = hw_name + sut_suffix - path = i['run_script_input']['path'] - sut_path = os.path.join(path, "suts", sut + ".json") + script_path = i['run_script_input']['path'] + sut_path = os.path.join(os.getcwd(), "suts", sut + ".json") if os.path.exists(sut_path) and env.get('CM_SUT_DESC_CACHE', '') == "yes": print(f"Reusing SUT description file {sut}") state['CM_SUT_META'] = json.load(open(sut_path)) @@ -42,9 +42,9 @@ def preprocess(i): os.makedirs(os.path.dirname(sut_path)) print("Generating SUT description file for " + sut) - hw_path = os.path.join(path, "hardware", hw_name + ".json") + hw_path = os.path.join(os.getcwd(), "hardware", hw_name + ".json") if not os.path.exists(hw_path): - default_hw_path = os.path.join(path, "hardware", "default.json") + default_hw_path = os.path.join(script_path, "hardware", "default.json") print("HW description file for " + hw_name + " not found. Copying from default!!!") shutil.copy(default_hw_path, hw_path) From 44d3387e034bebf5a8996ba0c9bc54cce7270033 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 5 Mar 2024 21:05:47 +0530 Subject: [PATCH 17/29] Put mlperf sut configs and descriptions in CM cache --- .../script/get-mlperf-inference-sut-description/customize.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cm-mlops/script/get-mlperf-inference-sut-description/customize.py b/cm-mlops/script/get-mlperf-inference-sut-description/customize.py index 7ee1646dc..cd0c2f754 100644 --- a/cm-mlops/script/get-mlperf-inference-sut-description/customize.py +++ b/cm-mlops/script/get-mlperf-inference-sut-description/customize.py @@ -43,6 +43,8 @@ def preprocess(i): print("Generating SUT description file for " + sut) hw_path = os.path.join(os.getcwd(), "hardware", hw_name + ".json") + if not os.path.exists(os.path.dirname(hw_path)): + os.makedirs(os.path.dirname(hw_path)) if not os.path.exists(hw_path): default_hw_path = os.path.join(script_path, "hardware", "default.json") print("HW description file for " + hw_name + " not found. Copying from default!!!") From 6e26e12cb808bec8ea267ddd48266e294a6641de Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 5 Mar 2024 15:40:02 +0000 Subject: [PATCH 18/29] added extra Python 3.12 tests --- .github/workflows/test-cm-script-features.yml | 2 +- .github/workflows/test-cm-scripts.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test-cm-script-features.yml b/.github/workflows/test-cm-script-features.yml index 9cd32d081..026c79e74 100644 --- a/.github/workflows/test-cm-script-features.yml +++ b/.github/workflows/test-cm-script-features.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.9", "3.8"] + python-version: ["3.12", "3.11", "3.10", "3.9", "3.8"] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/test-cm-scripts.yml b/.github/workflows/test-cm-scripts.yml index bf5b00341..fc00a22f5 100644 --- a/.github/workflows/test-cm-scripts.yml +++ b/.github/workflows/test-cm-scripts.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.9"] + python-version: ["3.12", "3.9"] steps: - uses: actions/checkout@v3 From 62ff146fc828d0df54db40367d5bb15acac3c6f3 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 5 Mar 2024 15:45:25 +0000 Subject: [PATCH 19/29] Added MLPerf inference MIL C++ test --- ...test-mlperf-inference-mil-cpp-resnet50.yml | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 .github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml diff --git a/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml b/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml new file mode 100644 index 000000000..ccb27eef7 --- /dev/null +++ b/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml @@ -0,0 +1,38 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: MLPerf inference MIL C++ ResNet50 + +on: + pull_request: + branches: [ "master", "dev" ] + paths: + - '.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml' + - 'cm-mlops/**' + - '!cm-mlops/**.md' + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [ "3.12", "3.9" ] + llvm-version: [ "15.0.6", "16.0.4", "17.0.6" ] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install cmind + cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} + cm run script --quiet --tags=get,sys-utils-cm + cm run script --quiet --tags=install,llvm-prebuilt --version=${{ matrix.llvm-version }} + - name: Test MLPerf Inference MIL C++ ResNet50 + run: | + cmr "app mlperf inference cpp" -v --quiet From 873311a0fcf216e44d83c12e4d396c388fcec9d3 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 5 Mar 2024 15:58:04 +0000 Subject: [PATCH 20/29] testing 3.11 instead of 3.12 --- .github/workflows/test-cm-script-features.yml | 2 +- .github/workflows/test-cm-scripts.yml | 2 +- .github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-cm-script-features.yml b/.github/workflows/test-cm-script-features.yml index 026c79e74..5606fa565 100644 --- a/.github/workflows/test-cm-script-features.yml +++ b/.github/workflows/test-cm-script-features.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.12", "3.11", "3.10", "3.9", "3.8"] + python-version: ["3.11", "3.10", "3.9", "3.8"] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/test-cm-scripts.yml b/.github/workflows/test-cm-scripts.yml index fc00a22f5..49a67db10 100644 --- a/.github/workflows/test-cm-scripts.yml +++ b/.github/workflows/test-cm-scripts.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.12", "3.9"] + python-version: ["3.11", "3.9"] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml b/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml index ccb27eef7..0bd3cf318 100644 --- a/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.12", "3.9" ] + python-version: [ "3.11", "3.9" ] llvm-version: [ "15.0.6", "16.0.4", "17.0.6" ] steps: From f939247e8f35b8acaaf204cdbdbdcc09205e5f00 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 5 Mar 2024 16:00:20 +0000 Subject: [PATCH 21/29] fixed prebuilt llvm tags --- .github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml b/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml index 0bd3cf318..225a61856 100644 --- a/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml @@ -32,7 +32,7 @@ jobs: python3 -m pip install cmind cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm run script --quiet --tags=get,sys-utils-cm - cm run script --quiet --tags=install,llvm-prebuilt --version=${{ matrix.llvm-version }} + cm run script --quiet --tags=install,prebuilt,llvm --version=${{ matrix.llvm-version }} - name: Test MLPerf Inference MIL C++ ResNet50 run: | cmr "app mlperf inference cpp" -v --quiet From cb3d2b5beabe8e3169ce5d47826693e05219df92 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 5 Mar 2024 16:03:32 +0000 Subject: [PATCH 22/29] test standalone Python loadgen with ONNX and BERT --- ...adgen-onnx-huggingface-bert-fp32-squad.yml | 36 +++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 .github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml diff --git a/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml b/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml new file mode 100644 index 000000000..5d9494f90 --- /dev/null +++ b/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml @@ -0,0 +1,36 @@ +# This workflow will install Python dependencies, run tests and lint with a variety of Python versions +# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions + +name: MLPerf loadgen with HuggingFace bert onnx fp32 squad model + +on: + pull_request: + branches: [ "master", "dev" ] + paths: + - '.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml' + - 'cm-mlops/**' + - '!cm-mlops/**.md' + +jobs: + build: + + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [ "3.11", "3.9" ] + + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v3 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python3 -m pip install cmind + cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} + cm run script --quiet --tags=get,sys-utils-cm + - name: Test MLPerf Inference Bert + run: | + cmr "python app loadgen-generic _onnxruntime _custom _huggingface _model-stub.ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1" --adr.hf-downloader.model_filename=model.onnx From c7063ef28f87c69baf0b9e2962d63b24c3a33748 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 5 Mar 2024 21:35:32 +0530 Subject: [PATCH 23/29] Fixes for mlperf inference reference bert pytorch --- cm-mlops/script/app-mlperf-inference-reference/_cm.yaml | 4 ++++ cm-mlops/script/get-generic-python-lib/_cm.json | 4 ++-- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml b/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml index 3e441f08e..b318e5633 100644 --- a/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml +++ b/cm-mlops/script/app-mlperf-inference-reference/_cm.yaml @@ -111,6 +111,7 @@ deps: - onnxruntime - tf - tflite + - pytorch # Detect TensorRT if required - tags: get,nvidia,tensorrt @@ -835,6 +836,9 @@ variations: names: - ml-engine-pytorch - pytorch + skip_if_env: + CM_MLPERF_DEVICE: + - gpu add_deps_recursive: inference-src: tags: _deeplearningexamples diff --git a/cm-mlops/script/get-generic-python-lib/_cm.json b/cm-mlops/script/get-generic-python-lib/_cm.json index eb1a5b065..ebc29943d 100644 --- a/cm-mlops/script/get-generic-python-lib/_cm.json +++ b/cm-mlops/script/get-generic-python-lib/_cm.json @@ -887,7 +887,7 @@ }, "env": { "CM_GENERIC_PYTHON_PACKAGE_NAME": "torch", - "CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL": "https://download.pytorch.org/whl/<<>>", + "CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL1": "https://download.pytorch.org/whl/<<>>", "CM_TORCH_VERSION_EXTRA": "CUDA" }, "new_env_keys": [ @@ -917,7 +917,7 @@ }, "env": { "CM_GENERIC_PYTHON_PACKAGE_NAME": "torchaudio", - "CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL": "https://download.pytorch.org/whl/<<>>", + "CM_GENERIC_PYTHON_PIP_EXTRA_INDEX_URL1": "https://download.pytorch.org/whl/<<>>", "CM_TORCHAUDIO_VERSION_EXTRA": "CUDA" }, "new_env_keys": [ From faf7a0eae595c76b23a62563dd3d32542c6348bd Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 5 Mar 2024 16:06:10 +0000 Subject: [PATCH 24/29] forgot --quiet in loadgen test --- .../test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml b/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml index 5d9494f90..999063091 100644 --- a/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml +++ b/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml @@ -33,4 +33,4 @@ jobs: cm run script --quiet --tags=get,sys-utils-cm - name: Test MLPerf Inference Bert run: | - cmr "python app loadgen-generic _onnxruntime _custom _huggingface _model-stub.ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1" --adr.hf-downloader.model_filename=model.onnx + cmr "python app loadgen-generic _onnxruntime _custom _huggingface _model-stub.ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1" --adr.hf-downloader.model_filename=model.onnx --quiet From 3111edb1c62bed9bbe7a7d256bcc76fdcf5d2087 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 5 Mar 2024 16:30:15 +0000 Subject: [PATCH 25/29] adding new tests --- .github/workflows/test-cm-script-features.yml | 2 +- .github/workflows/test-cm-scripts.yml | 2 +- .../test-image-classification-onnx.yml | 2 +- .../workflows/test-mlperf-inference-bert.yml | 2 +- .../workflows/test-mlperf-inference-gptj.yml | 2 +- ...test-mlperf-inference-mil-cpp-resnet50.yml | 2 +- .../test-mlperf-inference-resnet50.yml | 2 +- .../test-mlperf-inference-retinanet.yml | 2 +- .../workflows/test-mlperf-inference-rnnt.yml | 2 +- .../workflows/test-mlperf-inference-tvm.yml | 2 +- ...adgen-onnx-huggingface-bert-fp32-squad.yml | 4 ++-- tests/script/test_deps.py | 21 ------------------- 12 files changed, 12 insertions(+), 33 deletions(-) delete mode 100644 tests/script/test_deps.py diff --git a/.github/workflows/test-cm-script-features.yml b/.github/workflows/test-cm-script-features.yml index 5606fa565..026c79e74 100644 --- a/.github/workflows/test-cm-script-features.yml +++ b/.github/workflows/test-cm-script-features.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.11", "3.10", "3.9", "3.8"] + python-version: ["3.12", "3.11", "3.10", "3.9", "3.8"] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/test-cm-scripts.yml b/.github/workflows/test-cm-scripts.yml index 49a67db10..fc00a22f5 100644 --- a/.github/workflows/test-cm-scripts.yml +++ b/.github/workflows/test-cm-scripts.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.11", "3.9"] + python-version: ["3.12", "3.9"] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/test-image-classification-onnx.yml b/.github/workflows/test-image-classification-onnx.yml index c0e76a5d5..62049d1c5 100644 --- a/.github/workflows/test-image-classification-onnx.yml +++ b/.github/workflows/test-image-classification-onnx.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.9", "3.12" ] + python-version: [ "3.12", "3.9"] steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/test-mlperf-inference-bert.yml b/.github/workflows/test-mlperf-inference-bert.yml index 8c679dc2a..7135802a7 100644 --- a/.github/workflows/test-mlperf-inference-bert.yml +++ b/.github/workflows/test-mlperf-inference-bert.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.9" ] + python-version: [ "3.12", "3.9" ] backend: [ "deepsparse", "tf", "onnxruntime", "pytorch" ] precision: [ "int8", "fp32" ] exclude: diff --git a/.github/workflows/test-mlperf-inference-gptj.yml b/.github/workflows/test-mlperf-inference-gptj.yml index 6752dea99..ea1a70fa8 100644 --- a/.github/workflows/test-mlperf-inference-gptj.yml +++ b/.github/workflows/test-mlperf-inference-gptj.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.9" ] + python-version: [ "3.12", "3.9" ] backend: [ "pytorch" ] precision: [ "bfloat16" ] diff --git a/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml b/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml index 225a61856..6a499d5a4 100644 --- a/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-mil-cpp-resnet50.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.11", "3.9" ] + python-version: [ "3.12", "3.9" ] llvm-version: [ "15.0.6", "16.0.4", "17.0.6" ] steps: diff --git a/.github/workflows/test-mlperf-inference-resnet50.yml b/.github/workflows/test-mlperf-inference-resnet50.yml index d28349c70..29b7b3cb5 100644 --- a/.github/workflows/test-mlperf-inference-resnet50.yml +++ b/.github/workflows/test-mlperf-inference-resnet50.yml @@ -20,7 +20,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.8" ] + python-version: [ "3.12", "3.9" ] backend: [ "onnxruntime", "tf" ] implementation: [ "python", "cpp" ] exclude: diff --git a/.github/workflows/test-mlperf-inference-retinanet.yml b/.github/workflows/test-mlperf-inference-retinanet.yml index 4d610575b..4846aa2bd 100644 --- a/.github/workflows/test-mlperf-inference-retinanet.yml +++ b/.github/workflows/test-mlperf-inference-retinanet.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.9" ] + python-version: [ "3.12", "3.9" ] backend: [ "onnxruntime", "pytorch" ] implementation: [ "python", "cpp" ] exclude: diff --git a/.github/workflows/test-mlperf-inference-rnnt.yml b/.github/workflows/test-mlperf-inference-rnnt.yml index fef2e12e7..ac092ae0b 100644 --- a/.github/workflows/test-mlperf-inference-rnnt.yml +++ b/.github/workflows/test-mlperf-inference-rnnt.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.9" ] + python-version: [ "3.12", "3.9" ] backend: [ "pytorch" ] precision: [ "fp32" ] diff --git a/.github/workflows/test-mlperf-inference-tvm.yml b/.github/workflows/test-mlperf-inference-tvm.yml index 44ba7d067..04c624513 100644 --- a/.github/workflows/test-mlperf-inference-tvm.yml +++ b/.github/workflows/test-mlperf-inference-tvm.yml @@ -19,7 +19,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.10" ] + python-version: [ "3.12", "3.10" ] backend: [ "tvm-onnx" ] steps: diff --git a/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml b/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml index 999063091..a66e40981 100644 --- a/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml +++ b/.github/workflows/test-mlperf-loadgen-onnx-huggingface-bert-fp32-squad.yml @@ -18,7 +18,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.11", "3.9" ] + python-version: [ "3.12", "3.9" ] steps: - uses: actions/checkout@v3 @@ -31,6 +31,6 @@ jobs: python3 -m pip install cmind cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm run script --quiet --tags=get,sys-utils-cm - - name: Test MLPerf Inference Bert + - name: Test MLPerf loadgen with HuggingFace bert onnx fp32 squad model run: | cmr "python app loadgen-generic _onnxruntime _custom _huggingface _model-stub.ctuning/mlperf-inference-bert-onnx-fp32-squad-v1.1" --adr.hf-downloader.model_filename=model.onnx --quiet diff --git a/tests/script/test_deps.py b/tests/script/test_deps.py deleted file mode 100644 index d4505788c..000000000 --- a/tests/script/test_deps.py +++ /dev/null @@ -1,21 +0,0 @@ -# This test covers version, variation, compilation from src, add_deps, add_deps_recursive, deps, post_deps - -import cmind as cm -import check as checks - -r = cm.access({'action':'run', 'automation':'script', 'tags': 'generate-run-cmds,mlperf', 'adr': - {'loadgen': {'version': 'r3.0'}, 'compiler': {'tags': "gcc"}}, 'env': {'CM_MODEL': 'resnet50', - 'CM_DEVICE': 'cpu', 'CM_BACKEND': 'onnxruntime'}, 'quiet': 'yes'}) -checks.check_return(r) - -r = cm.access({'action':'search', 'automation': 'cache', 'tags': 'loadgen,version-r3.0,deps-python-non-virtual'}) -checks.check_list(r, "loadgen,version-r3.0,deps-python-non-virtual") - -r = cm.access({'action':'search', 'automation': 'cache', 'tags': 'inference,src,version-r3.0'}) -checks.check_list(r, "inference,src,version-r3.0") - -r = cm.access({'action':'run', 'automation':'script', 'tags': 'app,mlperf,inference,generic,_python,_resnet50,_onnxruntime,_cpu,_r3.0_default', 'adr': {'mlperf-implementation': { 'version': 'master'}}, 'quiet': 'yes'}) -checks.check_return(r) - -r = cm.access({'action':'run', 'automation':'script', 'tags': 'app,mlperf,inference,generic,_python,_resnet50,_tf,_cpu,_r3.0_default', 'adr': {'mlperf-implementation': { 'version': 'master'}}, 'quiet': 'yes'}) -checks.check_return(r) From 7a02a143c6462f8467f92d7b92d01f63fa656517 Mon Sep 17 00:00:00 2001 From: Arjun Suresh Date: Tue, 5 Mar 2024 22:06:02 +0530 Subject: [PATCH 26/29] Fix rnnt test --- .github/workflows/test-mlperf-inference-rnnt.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-mlperf-inference-rnnt.yml b/.github/workflows/test-mlperf-inference-rnnt.yml index ac092ae0b..d6c1ae6a2 100644 --- a/.github/workflows/test-mlperf-inference-rnnt.yml +++ b/.github/workflows/test-mlperf-inference-rnnt.yml @@ -35,4 +35,4 @@ jobs: cm run script --quiet --tags=get,sys-utils-cm - name: Test MLPerf Inference RNNT run: | - cm run script --tags=run,mlperf,inference,generate-run-cmds,_performance-only --submitter="cTuning" --model=rnnt --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --precision=${{ matrix.precision }} --target_qps=5 --adr.ml-engine-pytorch.version=1.13.0 --adr.ml-engine-torchvision.version=0.14.1 --adr.librosa.version_max=0.9.1 -v --quiet + cm run script --tags=run,mlperf,inference,generate-run-cmds,_performance-only --submitter="cTuning" --model=rnnt --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --precision=${{ matrix.precision }} --target_qps=5 -v --quiet From d56f811ce0b8d0ac5b42ec7dd29a17e111b87848 Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 5 Mar 2024 16:41:34 +0000 Subject: [PATCH 27/29] added missing dummy test --- tests/script/test_deps.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) create mode 100644 tests/script/test_deps.py diff --git a/tests/script/test_deps.py b/tests/script/test_deps.py new file mode 100644 index 000000000..aaf19bc81 --- /dev/null +++ b/tests/script/test_deps.py @@ -0,0 +1,23 @@ +# This test covers version, variation, compilation from src, add_deps, add_deps_recursive, deps, post_deps + +import cmind as cm +import check as checks + +# MLPerf v3.0 inference is now very outdated and we are testing inference in separate tests + +#r = cm.access({'action':'run', 'automation':'script', 'tags': 'generate-run-cmds,mlperf', 'adr': +# {'loadgen': {'version': 'r3.0'}, 'compiler': {'tags': "gcc"}}, 'env': {'CM_MODEL': 'resnet50', +# 'CM_DEVICE': 'cpu', 'CM_BACKEND': 'onnxruntime'}, 'quiet': 'yes'}) +#checks.check_return(r) +# +#r = cm.access({'action':'search', 'automation': 'cache', 'tags': 'loadgen,version-r3.0,deps-python-non-virtual'}) +#checks.check_list(r, "loadgen,version-r3.0,deps-python-non-virtual") +# +#r = cm.access({'action':'search', 'automation': 'cache', 'tags': 'inference,src,version-r3.0'}) +#checks.check_list(r, "inference,src,version-r3.0") +# +#r = cm.access({'action':'run', 'automation':'script', 'tags': 'app,mlperf,inference,generic,_python,_resnet50,_onnxruntime,_cpu,_r3.0_default', 'adr': {'mlperf-implementation': { 'version': 'master'}}, 'quiet': 'yes'}) +#checks.check_return(r) +# +#r = cm.access({'action':'run', 'automation':'script', 'tags': 'app,mlperf,inference,generic,_python,_resnet50,_tf,_cpu,_r3.0_default', 'adr': {'mlperf-implementation': { 'version': 'master'}}, 'quiet': 'yes'}) +#checks.check_return(r) From 5e730169120fc0a03978483af59657b9450a260b Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 5 Mar 2024 16:54:12 +0000 Subject: [PATCH 28/29] clean up --- ...inference-bert-deepsparse-tf-onnxruntime-pytorch.yml} | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) rename .github/workflows/{test-mlperf-inference-bert.yml => test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml} (81%) diff --git a/.github/workflows/test-mlperf-inference-bert.yml b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml similarity index 81% rename from .github/workflows/test-mlperf-inference-bert.yml rename to .github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml index 7135802a7..89760294a 100644 --- a/.github/workflows/test-mlperf-inference-bert.yml +++ b/.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml @@ -1,13 +1,13 @@ # This workflow will install Python dependencies, run tests and lint with a variety of Python versions # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions -name: MLPerf inference bert +name: MLPerf inference bert (deepsparse, tf, onnxruntime, pytorch) on: pull_request: branches: [ "master", "dev" ] paths: - - '.github/workflows/test-mlperf-inference-bert.yml' + - '.github/workflows/test-mlperf-inference-bert-deepsparse-tf-onnxruntime-pytorch.yml' - 'cm-mlops/**' - '!cm-mlops/**.md' @@ -18,7 +18,8 @@ jobs: strategy: fail-fast: false matrix: - python-version: [ "3.12", "3.9" ] + # 3.12 didn't work on 20240305 - need to check + python-version: [ "3.11", "3.9" ] backend: [ "deepsparse", "tf", "onnxruntime", "pytorch" ] precision: [ "int8", "fp32" ] exclude: @@ -38,6 +39,6 @@ jobs: python3 -m pip install cmind cm pull repo --url=${{ github.event.pull_request.head.repo.html_url }} --checkout=${{ github.event.pull_request.head.ref }} cm run script --quiet --tags=get,sys-utils-cm - - name: Test MLPerf Inference Bert + - name: Test MLPerf Inference Bert (DeepSparse, TF, ONNX, PyTorch) run: | cm run script --tags=run,mlperf,inference,generate-run-cmds,_submission,_short --submitter="cTuning" --model=bert-99 --backend=${{ matrix.backend }} --device=cpu --scenario=Offline --test_query_count=5 --precision=${{ matrix.precision }} --target_qps=1 -v --quiet From 78a599e951a86f2b5b7ad47ab83fde7f25c5b04d Mon Sep 17 00:00:00 2001 From: Grigori Fursin Date: Tue, 5 Mar 2024 16:54:24 +0000 Subject: [PATCH 29/29] print MLPerf reference source path --- .../script/run-mlperf-inference-app/customize.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/cm-mlops/script/run-mlperf-inference-app/customize.py b/cm-mlops/script/run-mlperf-inference-app/customize.py index 711cd0068..adda01c73 100644 --- a/cm-mlops/script/run-mlperf-inference-app/customize.py +++ b/cm-mlops/script/run-mlperf-inference-app/customize.py @@ -239,6 +239,19 @@ def get_valid_scenarios(model, category, mlperf_version, mlperf_path): ################################################################################## def postprocess(i): + env = i['env'] + state = i['state'] + + if env.get('CM_MLPERF_IMPLEMENTATION', '') == 'reference': + x1 = env.get('CM_MLPERF_INFERENCE_SOURCE','') + x2 = env.get('CM_MLPERF_INFERENCE_CONF_PATH','') + + if x1 != '' and x2 != '': + print ('') + print ('Path to the MLPerf inference benchmark reference sources: {}'.format(x1)) + print ('Path to the MLPerf inference reference configuration file: {}'.format(x2)) + print ('') + return {'return':0} ##################################################################################