-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest_find_best_settings.py
85 lines (68 loc) · 3.87 KB
/
test_find_best_settings.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import subprocess
import itertools
import time
import os
from time import sleep
def do_test(number_of_netty_threads=1, netty_client_threads=1, default_workers_per_model=1,
job_queue_size=100, MKL_NUM_THREADS=1, test_parallelism=8):
# generate config.properties files based on combination
config_file_name = "config_file/" + f"config_{number_of_netty_threads}_{netty_client_threads}_{default_workers_per_model}_{job_queue_size}.properties"
result_file_name = "result_file/" + f"result_{number_of_netty_threads}_{netty_client_threads}_{default_workers_per_model}_{job_queue_size}_{MKL_NUM_THREADS}_{test_parallelism}.txt"
f = open(config_file_name, "w")
f.write("load_models=all\n")
f.write("inference_address=http://0.0.0.0:8080\n")
f.write("management_address=http://0.0.0.0:8081\n")
f.write("metrics_address=http://0.0.0.0:8082\n")
f.write("model_store=<ADD COMPLETE PATH HERE>/model-store\n")
f.write(f"number_of_netty_threads={number_of_netty_threads}\n")
f.write(f"netty_client_threads={netty_client_threads}\n")
f.write(f"default_workers_per_model={default_workers_per_model}\n")
f.write(f"job_queue_size={job_queue_size}\n")
f.close()
# os.system("MKL_NUM_THREADS=1 torchserve --start --model-store model-store --models fast=ptclassifier.mar slow=ptclassifiernotr.mar --ncs --ts-config config.properties")
# start the torch serve with proper config properties and other parameter settings
subprocess.call(f"MKL_NUM_THREADS={str(MKL_NUM_THREADS)} torchserve --start --model-store model-store --models model=ptclassifiernotr.mar --ncs --ts-config {config_file_name}", shell=True, stdout=subprocess.DEVNULL)
sleep(3)
print(result_file_name)
# test in parallel to inference API
print("start to send test request...")
start_time = time.time()
print(time.ctime())
subprocess.run(f"seq 1 1000 | xargs -n 1 -P {str(test_parallelism)} bash -c 'url=\"http://127.0.0.1:8080/predictions/model\"; curl -X POST $url -T input.txt'", shell=True, capture_output=True, text=True)
total_time = int((time.time() - start_time)*1e6)
print("total time in ms:", total_time)
# get metrics of ts inference latency and ts query latency
output = subprocess.run("curl http://127.0.0.1:8082/metrics", shell=True, capture_output=True, text=True)
inference_time=0
query_time=0
# capture inference latency and query latency from metrics
for line in output.stdout.split('\n'):
if line.startswith('ts_inference_latency_microseconds'):
inference_time = line.split(' ')[1]
if line.startswith('ts_queue_latency_microseconds'):
query_time = line.split(' ')[1]
# calculate the throughput
throughput = 1000 / total_time * 1000000
# write metrics to csv file for display
f = open("test_result_short.csv", "a")
f.write(f"{number_of_netty_threads},{netty_client_threads},{default_workers_per_model},{MKL_NUM_THREADS},{job_queue_size},{test_parallelism},{total_time},{inference_time},{query_time},{throughput}\n")
f.close()
# stop torchserve for this
stop_result = os.system("torchserve --stop")
print(stop_result)
stop_result = os.system("torchserve --stop")
print(stop_result)
stop_result = os.system("torchserve --stop")
print(stop_result)
def main():
# set the possible value, value range of each parameter
number_of_netty_threads = [1, 2, 4, 8]
netty_client_threads = [1, 2, 4, 8]
default_workers_per_model = [1, 2, 4, 8]
MKL_NUM_THREADS = [1, 2, 4, 8]
job_queue_size = [1000]#[100, 200, 500, 1000]
test_parallelism = [32]#[8, 16, 32, 64]
# for each combination of parameters
[do_test(a, b, c, d, e, f) for a, b, c, d, e, f in itertools.product(number_of_netty_threads, netty_client_threads, default_workers_per_model, job_queue_size, MKL_NUM_THREADS, test_parallelism)]
if __name__ == "__main__":
main()