Skip to content

Commit

Permalink
Merge pull request #539 from GATEOverflow/mlperf-inference
Browse files Browse the repository at this point in the history
Enhancements to Docker for Multi-User Setups
  • Loading branch information
arjunsuresh authored Nov 13, 2024
2 parents b8a91f1 + 7869cc3 commit eca729e
Show file tree
Hide file tree
Showing 9 changed files with 34 additions and 4 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/test-mlperf-inference-llama2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@ name: MLPerf inference LLAMA 2 70B

on:
schedule:
- cron: "14 14 * * 5"
- cron: "59 16 * * *"

jobs:
build_reference:
if: github.repository_owner == 'gateoverflow'
runs-on: [ self-hosted, GO-spr, linux, x64 ]
runs-on: [ self-hosted, i9, linux, x64 ]
strategy:
fail-fast: false
matrix:
Expand Down
10 changes: 10 additions & 0 deletions automation/script/module_misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2027,6 +2027,9 @@ def docker(i):

shm_size = i.get('docker_shm_size', docker_settings.get('shm_size', ''))

pass_user_id = i.get('docker_pass_user_id', docker_settings.get('pass_user_id', ''))
pass_user_group = i.get('docker_pass_user_group', docker_settings.get('pass_user_group', ''))

extra_run_args = i.get('docker_extra_run_args', docker_settings.get('extra_run_args', ''))

if detached == '':
Expand Down Expand Up @@ -2124,6 +2127,13 @@ def docker(i):
if shm_size != '':
cm_docker_input['shm_size'] = shm_size

if pass_user_id != '':
cm_docker_input['pass_user_id'] = pass_user_id

if pass_user_group != '':
cm_docker_input['pass_user_group'] = pass_user_group


if extra_run_args != '':
cm_docker_input['extra_run_args'] = extra_run_args

Expand Down
1 change: 1 addition & 0 deletions script/app-mlperf-inference/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1675,6 +1675,7 @@ gui:
docker:
use_host_group_id: True
use_host_user_id: True
pass_user_group: True #useful if docker is run by a different user fromt he one who built it and under the same group
deps:
- tags: get,mlperf,inference,results,dir,local
names:
Expand Down
3 changes: 2 additions & 1 deletion script/generate-mlperf-inference-submission/_cm.json
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@
},
"default_env": {
"CM_RUN_MLPERF_ACCURACY": "on",
"CM_MLPERF_RUN_STYLE": "valid"
"CM_MLPERF_RUN_STYLE": "valid",
"CM_MLPERF_SUBMISSION_DIR_SHARED": "yes"
},
"post_deps": [
{
Expand Down
3 changes: 3 additions & 0 deletions script/generate-mlperf-inference-submission/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ def generate_submission(i):
if not os.path.isdir(submission_dir):
os.makedirs(submission_dir)

if str(env.get('CM_MLPERF_SUBMISSION_DIR_SHARED', '')).lower() in [ "yes", "true", "1" ]:
os.chmod(submission_dir, 0o2775)

print('* MLPerf inference submission dir: {}'.format(submission_dir))
print('* MLPerf inference results dir: {}'.format(results_dir))
results = [f for f in os.listdir(results_dir) if not os.path.isfile(os.path.join(results_dir, f))]
Expand Down
1 change: 1 addition & 0 deletions script/generate-mlperf-inference-user-conf/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ default_env:
CM_TEST_QUERY_COUNT: '10'
CM_FAST_FACTOR: '5'
CM_MLPERF_QUANTIZATION: off
CM_MLPERF_RESULTS_DIR_SHARED: yes

docker:
real_run: False
Expand Down
5 changes: 4 additions & 1 deletion script/generate-mlperf-inference-user-conf/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def preprocess(i):
submission_checker_dir = os.path.join(mlperf_path, "tools", "submission")
sys.path.append(submission_checker_dir)

version = env.get('CM_MLPERF_INFERENCE_VERSION', "4.0")
version = env.get('CM_MLPERF_INFERENCE_VERSION', "4.1")

required_files = []
required_files = get_checker_files()
Expand Down Expand Up @@ -355,6 +355,9 @@ def preprocess(i):

os.makedirs(OUTPUT_DIR, exist_ok=True)

if str(env.get('CM_MLPERF_RESULTS_DIR_SHARED', '')).lower() in [ "yes", "true", "1" ]:
os.chmod(OUTPUT_DIR, 0o2775)

return {'return':0}

def run_files_exist(mode, OUTPUT_DIR, run_files, env):
Expand Down
1 change: 1 addition & 0 deletions script/run-docker-container/_cm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ input_mapping:
interactive: CM_DOCKER_INTERACTIVE_MODE
it: CM_DOCKER_INTERACTIVE
mounts: CM_DOCKER_VOLUME_MOUNTS
pass_user_id: CM_DOCKER_PASS_USER_ID
pass_user_group: CM_DOCKER_PASS_USER_GROUP
port_maps: CM_DOCKER_PORT_MAPS
post_run_cmds: CM_DOCKER_POST_RUN_COMMANDS
Expand Down
10 changes: 10 additions & 0 deletions script/run-docker-container/customize.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,16 @@ def postprocess(i):
port_map_cmds = []
run_opts = ''

#not completed as su command breaks the execution sequence
#
#if env.get('CM_DOCKER_PASS_USER_ID', '') != '':
# run_opts += " --user 0 "
# run_cmds.append(f"(usermod -u {os.getuid()} cmuser || echo pass)")
# run_cmds.append(f"(chown -R {os.getuid()}:{os.getuid()} /home/cmuser || echo pass)")
# run_cmds.append(" ( su cmuser )")
# run_cmds.append('export PATH="/home/cmuser/venv/cm/bin:$PATH"')


if env.get('CM_DOCKER_PRE_RUN_COMMANDS', []):
for pre_run_cmd in env['CM_DOCKER_PRE_RUN_COMMANDS']:
run_cmds.append(pre_run_cmd)
Expand Down

0 comments on commit eca729e

Please sign in to comment.