Skip to content

Commit a8bd41f

Browse files
committed
Disable RDMACM, use scheduler file
1 parent cb88657 commit a8bd41f

File tree

1 file changed

+6
-7
lines changed

1 file changed

+6
-7
lines changed

examples/ucx/dask_cuda_worker.sh

+6-7
Original file line numberDiff line numberDiff line change
@@ -6,12 +6,10 @@ usage() {
66
}
77

88
# parse arguments
9-
address=localhost
109
rmm_pool_size=1GB
1110

1211
while getopts ":a:i:r:t:" flag; do
1312
case "${flag}" in
14-
a) address=${OPTARG};;
1513
i) interface=${OPTARG};;
1614
r) rmm_pool_size=${OPTARG};;
1715
t) transport=${OPTARG};;
@@ -29,8 +27,8 @@ DASK_UCX__CUDA_COPY=True
2927
DASK_UCX__TCP=True
3028
DASK_RMM__POOL_SIZE=$rmm_pool_size
3129

32-
scheduler_flags="--protocol ucx"
33-
worker_flags="--enable-tcp-over-ucx --rmm-pool-size ${rmm_pool_size}"
30+
scheduler_flags="--scheduler-file scheduler.json --protocol ucx"
31+
worker_flags="--scheduler-file scheduler.json --enable-tcp-over-ucx --rmm-pool-size ${rmm_pool_size}"
3432

3533
if ! [ -z ${interface+x} ]; then
3634
scheduler_flags+=" --interface ${interface}"
@@ -42,14 +40,15 @@ if [[ $transport == *"nvlink"* ]]; then
4240
fi
4341
if [[ $transport == *"ib"* ]]; then
4442
DASK_UCX__INFINIBAND=True
45-
DASK_UCX__RDMACM=True
43+
# DASK_UCX__RDMACM=True # RDMACM not working right now
4644
DASK_UCX__NET_DEVICES=mlx5_0:1
4745

48-
worker_flags+=" --enable-infiniband --enable-rdmacm --net-devices=auto"
46+
# worker_flags+=" --enable-infiniband --enable-rdmacm --net-devices=auto"
47+
worker_flags+=" --enable-infiniband --net-devices=auto"
4948
fi
5049

5150
# initialize scheduler
5251
dask-scheduler $scheduler_flags &
5352

5453
# initialize workers
55-
dask-cuda-worker ucx://${address}:8786 $worker_flags
54+
dask-cuda-worker $worker_flags

0 commit comments

Comments
 (0)