-
Notifications
You must be signed in to change notification settings - Fork 68
239 lines (232 loc) · 8.86 KB
/
llm_integration_p4d.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
name: Large model integration tests with P4D and compiler optimizations
on:
workflow_dispatch:
inputs:
djl-version:
description: 'The released version of DJL'
required: false
default: ''
run_test:
description: 'Run only the tests you need [aiccl]'
required: false
default: ''
schedule:
- cron: '0 15 * * *'
jobs:
create-runners-p4d:
runs-on: [self-hosted, scheduler]
steps:
- name: Create new P4d.24xl instance
id: create_gpu_p4d
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
token=$( curl -X POST -H "Authorization: token ${{ secrets.ACTION_RUNNER_PERSONAL_TOKEN }}" \
https://api.github.com/repos/deepjavalibrary/djl-serving/actions/runners/registration-token \
--fail \
| jq '.token' | tr -d '"' )
./start_instance.sh action_lmic_p4d $token djl-serving
outputs:
p4d_instance_id: ${{ steps.create_gpu_p4d.outputs.action_lmic_p4d_instance_id }}
lmi-dist-test:
if: contains(fromJson('["", "aiccl"]'), github.event.inputs.run_test)
runs-on: [ self-hosted, p4d ]
timeout-minutes: 120
needs: create-runners-p4d
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install pytest requests "numpy<2" pillow huggingface_hub tqdm
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh lmi ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Test Mixtral-8x7B
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist_aiccl mixtral-8x7b-aiccl
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py lmi_dist_aiccl mixtral-8x7b-aiccl
./remove_container.sh
- name: Test Llama-2-70B
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist_aiccl llama-2-70b-aiccl
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py lmi_dist_aiccl llama-2-70b-aiccl
./remove_container.sh
- name: Test codellama/CodeLlama-34b-hf
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist_aiccl codellama-34b-aiccl
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py lmi_dist_aiccl codellama-34b-aiccl
./remove_container.sh
- name: Test tiiuae/falcon-40b
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py lmi_dist_aiccl falcon-40b-aiccl
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py lmi_dist_aiccl falcon-40b-aiccl
./remove_container.sh
- name: Remove models dir
working-directory: tests/integration
run: |
sudo rm -rf models
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
sudo rm -rf models
./remove_container.sh || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v4
with:
name: lmi-dist-aiccl-logs
path: tests/integration/logs/
trtllm-test:
runs-on: [ self-hosted, p4d ]
timeout-minutes: 120
needs: create-runners-p4d
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests "numpy<2"
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh tensorrt-llm ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Test llama-2-70B with TP8
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py trtllm llama2-70b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm llama2-70b
./remove_container.sh
- name: Test mixtral-8x7b with with TP8
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py trtllm mixtral-8x7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models trtllm \
serve
python3 llm/client.py trtllm mixtral-8x7b
./remove_container.sh
- name: Remove models dir
working-directory: tests/integration
run: |
sudo rm -rf models
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
sudo rm -rf models
./remove_container.sh || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v4
with:
name: trtllm-logs
path: tests/integration/logs/
vllm-test:
runs-on: [ self-hosted, p4d ]
timeout-minutes: 120
needs: create-runners-p4d
steps:
- uses: actions/checkout@v4
- name: Clean env
run: |
yes | docker system prune -a --volumes
sudo rm -rf /home/ubuntu/actions-runner/_work/_tool/Java_Corretto_jdk/
echo "wait dpkg lock..."
while sudo fuser /var/{lib/{dpkg,apt/lists},cache/apt/archives}/lock >/dev/null 2>&1; do sleep 5; done
- name: Set up Python3
uses: actions/setup-python@v5
with:
python-version: '3.10.x'
- name: Install pip dependencies
run: pip3 install requests "numpy<2"
- name: Build container name
run: ./serving/docker/scripts/docker_name_builder.sh lmi ${{ github.event.inputs.djl-version }}
- name: Download models and dockers
working-directory: tests/integration
run: |
docker pull deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG
- name: Test llama-2-70B with TP8
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm llama2-70b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py vllm llama2-70b
./remove_container.sh
- name: Test mixtral-8x7b with with TP8
working-directory: tests/integration
run: |
rm -rf models
python3 llm/prepare.py vllm mixtral-8x7b
./launch_container.sh deepjavalibrary/djl-serving:$DJLSERVING_DOCKER_TAG $PWD/models lmi \
serve
python3 llm/client.py vllm mixtral-8x7b
./remove_container.sh
- name: Remove models dir
working-directory: tests/integration
run: |
sudo rm -rf models
- name: On fail step
if: ${{ failure() }}
working-directory: tests/integration
run: |
sudo rm -rf models
./remove_container.sh || true
cat logs/serving.log
- name: Upload test logs
uses: actions/upload-artifact@v4
with:
name: vllm-logs
path: tests/integration/logs/
stop-runners-p4d:
if: always()
runs-on: [ self-hosted, scheduler ]
needs: [ create-runners-p4d, lmi-dist-test, trtllm-test, vllm-test ]
steps:
- name: Stop all instances
run: |
cd /home/ubuntu/djl_benchmark_script/scripts
instance_id=${{ needs.create-runners-p4d.outputs.p4d_instance_id }}
./stop_instance.sh $instance_id