-
Notifications
You must be signed in to change notification settings - Fork 473
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* test * test FastDeploy * test --------- Co-authored-by: root <[email protected]>
- Loading branch information
Showing
32 changed files
with
2,102 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,269 @@ | ||
#命令行提供:1.PaddleNLP/llm的路径 2.Fastdeploy/llm的路径 3.关于存放(Paddlenlp结果和FD_DY结果的数据文件) | ||
#存放的数据文件包括:NLP-llama-7b-fp16-bs1,NLP-llama-7b-fp16-bs4,NLP-llama-7b-ptuning-fp16-bs1,NLP-llama-7b-ptuning-fp16-bs4,NLP-llama-7b-ptuning-fp16-bs1-noprecache,NLP-llama-7b-ptuning-fp16-bs4-noprecache, | ||
#NLP-belle-7b-2m-fp16-bs1,NLP-belle-7b-2m-fp16-bs4,NLP-belle-7b-2m-ptuning-fp16-bs1,NLP-belle-7b-2m-ptuning-fp16-bs4,NLP-belle-7b-2m-ptuning-fp16-bs1-noprecache,NLP-belle-7b-2m-ptuning-fp16-bs4-noprecache | ||
#FD-llama-7b-fp16-bs4-dy,FD-llama-7b-ptuning-fp16-bs4-dy,FD-llama-7b-ptuning-fp16-bs4-dy-noprecache,FD-chatglm-6b-fp16-bs4-dy,FD-chatglm-6b-ptuning-fp16-bs4-dy,FD-chatglm-6b-ptuning-fp16-bs4-dy-noprecache,FD-belle-7b-2m-fp16-bs4-dy,FD-belle-7b-2m-ptuning-fp16-bs4-dy,FD-belle-7b-2m-ptuning-fp16-bs4-dy-noprecache | ||
|
||
#test_serving,read_serving以及compute_diff.py与IC打包到同一个文件夹 | ||
import os | ||
import sys | ||
import wget | ||
import compute_diff | ||
|
||
|
||
def main(): | ||
|
||
current_file_path = os.path.abspath(os.getcwd()) | ||
|
||
#以下跑程序都用绝对路径 | ||
inference_model_path = f'{current_file_path}/inference_model' #推理模型导出存放文件 | ||
pre_result_path = f'{current_file_path}/pre_result' #预存对比结果的文件 | ||
#输出表格数据的文件路径 | ||
out_path = f'{current_file_path}/results.txt' | ||
if os.path.exists(out_path): #原本存在,则删除,后面写文件会创建一个新的文件夹 | ||
os.remove(out_path) | ||
#从网上下载测试结果,分别为NLP预存tar包和FD预存tar包,存入pre_result_path | ||
if os.path.exists(pre_result_path): | ||
os.system(command=f"rm -rf {pre_result_path}") | ||
os.mkdir(pre_result_path) | ||
NLP_name = 'paddlenlp_llm_results' | ||
FD_name = 'fastdeploy_llm_dynamic_batching_results' | ||
NLP_url = f'https://bj.bcebos.com/paddle2onnx/third_libs/{NLP_name}.tar' | ||
FD_url = f'https://bj.bcebos.com/paddle2onnx/third_libs/{FD_name}.tar' | ||
|
||
wget.download(NLP_url) | ||
wget.download(FD_url) | ||
os.system(command=f"tar -xvf {NLP_name}.tar ") | ||
os.system(command=f"tar -xvf {FD_name}.tar ") | ||
os.system(command=f"mv {NLP_name}/* {pre_result_path}") | ||
os.system(command=f"mv {FD_name}/* {pre_result_path}") | ||
os.system(command=f"rm -f {NLP_name}.tar") | ||
os.system(command=f"rm -f {FD_name}.tar") | ||
#准备工作,导出模型 | ||
export_model_name = [ | ||
'linly-ai/chinese-llama-2-7b', 'THUDM/chatglm-6b', | ||
'bellegroup/belle-7b-2m' | ||
] | ||
noptuning_model_name = [ | ||
'llama-7b-fp16', 'chatglm-6b-fp16', 'belle-7b-2m-fp16' | ||
] | ||
ptuning_model_name = [ | ||
'llama-7b-ptuning-fp16', 'chatglm-6b-ptuning-fp16', | ||
'belle-7b-2m-ptuning-fp16' | ||
] | ||
num_model = len(export_model_name) | ||
#存放模型的绝对路径 | ||
noptuning_model_path_list = [] | ||
ptuning_model_path_list = [] | ||
#非P-Tuning导出以及P-Tuning导出 | ||
#判断存放模型文件是否存在 | ||
if os.path.exists(inference_model_path): | ||
os.system(command=f"rm -rf {inference_model_path}") | ||
os.mkdir(inference_model_path) | ||
for i in range(num_model): | ||
noptuning_model_path = os.path.join(inference_model_path, | ||
f"{noptuning_model_name[i]}") | ||
ptuning_model_path = os.path.join(inference_model_path, | ||
f"{ptuning_model_name[i]}") | ||
noptuning_model_path_list.append(noptuning_model_path) | ||
ptuning_model_path_list.append(ptuning_model_path) | ||
os.chdir(f"{current_file_path}/PaddleNLP/llm") | ||
#非P-Tuning | ||
if not os.path.exists(noptuning_model_path): | ||
os.system( | ||
command=f"python3 export_model.py --model_name_or_path {export_model_name[i]} --output_path {noptuning_model_path} --dtype float16 --inference_model" | ||
) | ||
#P-Tuning | ||
if not os.path.exists(ptuning_model_path): | ||
os.system( | ||
command=f"python3 export_model.py --model_name_or_path {export_model_name[i]} --output_path {ptuning_model_path} --dtype float16 --inference_model --export_precache 1" | ||
) | ||
#模型会导出到 PaddleNLP/llm/inference_model/ | ||
|
||
#下载precache | ||
#在Fastdeploy/llm中创建三个文件夹,存放三个模型的precache | ||
|
||
precache_url = [ | ||
'https://bj.bcebos.com/fastdeploy/llm/llama-7b-precache.npy', | ||
'https://bj.bcebos.com/fastdeploy/llm/chatglm-6b-precache.npy', | ||
'https://bj.bcebos.com/fastdeploy/llm/bloom-7b-precache.npy' | ||
] | ||
target_name = 'task_prompt_embeddings.npy' | ||
precache_path_list = [] | ||
for i in range(num_model): | ||
precache_path = f"{current_file_path}/precache_{ptuning_model_name[i]}" | ||
precache_path_list.append(precache_path) | ||
precache_path_FD = os.path.join(precache_path, '8-test', '1') | ||
if os.path.exists(precache_path_FD): | ||
continue | ||
else: | ||
os.system(command=f"mkdir -p {precache_path_FD}") | ||
wget.download( | ||
precache_url[i], | ||
out=os.path.join(precache_path, '8-test', '1', target_name)) | ||
|
||
#下载测试文件 | ||
inputs_url = 'https://bj.bcebos.com/paddle2onnx/third_libs/inputs_63.jsonl' | ||
inputs_name = f'{current_file_path}/inputs_base.jsonl' | ||
inputs_path = inputs_name | ||
if os.path.exists(inputs_path): | ||
os.system(command=f"rm -f {inputs_path}") | ||
wget.download(inputs_url, out=inputs_path) | ||
inputs_PT_url = 'https://bj.bcebos.com/paddle2onnx/third_libs/ptuning_inputs.json' | ||
inputs_PT_name = f'{current_file_path}/inputs_precache.jsonl' | ||
inputs_PT_path = inputs_PT_name | ||
if os.path.exists(inputs_PT_path): | ||
os.system(command=f"rm -f {inputs_PT_path}") | ||
wget.download(inputs_PT_url, out=inputs_PT_path) | ||
|
||
#进入Fastdeploy/llm进行测试 | ||
#分三个list进行结果存储(只存储一个模型的一行) | ||
no_PT = [] #非P_Tuning | ||
PT = [] #P_Tuning | ||
pre_PT = [] #P-Tuning with precache | ||
|
||
#分三种情况 bs=1 bs=4 bs=4stop=2 | ||
opts = ['bs1', 'bs4', 'bs4-dy'] | ||
|
||
#清空共享内存 | ||
os.system(command='rm -rf /dev/shm') | ||
#创建res文件进行结果存储,若已存在文件则将文件结果删除 | ||
res_path = f'{current_file_path}/FastDeploy/llm/res' | ||
if os.path.exists(res_path): | ||
os.system(command=f"rm -f {res_path}/*") | ||
else: | ||
os.mkdir(res_path) | ||
#删除运行时模型输出文件 | ||
os.system(command=f"rm -f real_time_save.temp_ids_rank_0_step*") | ||
#创建存放FD测试结果文件夹 | ||
FD_result_path = f'{current_file_path}/FD_result' | ||
if os.path.exists(FD_result_path): | ||
os.system(command=f"rm -rf {FD_result_path}") | ||
os.mkdir(FD_result_path) | ||
#测试非ptuning并保存diff率 | ||
# python3 test_serving.py /work/model_pkg/belle-7b-2m-fp16 inputs_63.jsonl 4 1 | ||
# python3 read_serving.py res fd_result/llama-6b-fp16-bs1.txt | ||
# python3 print_diff.py nlp_result/chatglm-6b-fp16-bs4.txt fd_result/chatglm-6b-fp16-bs4-dy.txt | ||
# NLP-belle-7b-2m-fp16-bs1 | ||
|
||
batch_size = [1, 4, 4] | ||
disdy = [1, 1, 0] | ||
mopt = ['NLP', 'NLP', 'FD'] | ||
bug_flag = 0 | ||
#总共需要三个维度,模型名称,模型类型(非ptuning,ptuning without precache,ptuning with precache),参数设置(bs=1,bs=4,bs=4动插) | ||
os.system( | ||
f'cp {current_file_path}/test_serving.py {current_file_path}/FastDeploy/llm/test_serving.py' | ||
) | ||
os.system( | ||
f'cp {current_file_path}/read_serving.py {current_file_path}/FastDeploy/llm/read_serving.py' | ||
) | ||
|
||
#写入文件表头,获取非P-Tuning情况 | ||
with open(out_path, 'a+') as f: | ||
f.write("非PTuning FP16 model test\n") | ||
#f.write("模型\t\tbs=1(与PaddleNLP对比)\t\tbs=4(与PaddleNLP对比)\t\tbs=4 stop=2(动态插入,与FD上一版本进行对比\n") | ||
#f.write('%-24s%-24s%-24s%-24s' % ("模型", "bs=1(与PaddleNLP对比)", "bs=4(与PaddleNLP对比)", "bs=4 stop=2(动态插入,与FD上一版本进行对比")) | ||
f.write('%-30s%-30s%-30s%-30s\n' % ( | ||
"model", "bs=1(compare with PaddleNLP)", | ||
"bs=4(compare with PaddleNLP)", "bs=4 stop=2(compare with FD)")) | ||
os.chdir(f"{current_file_path}/FastDeploy/llm") | ||
for model_index in range(len(noptuning_model_path_list)): #遍历模型路径 | ||
for i in range(3): #遍历参数设置 | ||
os.system( | ||
f"python3 test_serving.py {noptuning_model_path_list[model_index]} {inputs_path} {batch_size[i]} {disdy[i]} 0 0 {res_path}" | ||
) #倒数二三个参数表示ptuning/precache | ||
os.system( | ||
f"python3 read_serving.py {res_path} {FD_result_path}/{noptuning_model_name[model_index]}-{opts[i]}.txt" | ||
) | ||
file1 = os.path.join( | ||
pre_result_path, | ||
f"{mopt[i]}-{noptuning_model_name[model_index]}-{opts[i]}.txt") | ||
file2 = f"{FD_result_path}/{noptuning_model_name[model_index]}-{opts[i]}.txt" | ||
is_diff, diff_rate = compute_diff.get_diff(file1, file2) | ||
if is_diff: | ||
bug_flag = 1 | ||
no_PT.append(diff_rate) | ||
os.system(command=f"rm -f {res_path}/*") | ||
os.system(command=f"rm -f real_time_save.temp_ids_rank_0_step_*") | ||
os.system(command="rm -rf /dev/shm/*") | ||
with open(out_path, 'a+') as f: | ||
#f.write(f"{noptuning_model_name[model_index]}\t\t{no_PT[0]}\t\t{no_PT[1]}\t\t{no_PT[2]}\n") | ||
f.write('%-30s%-30s%-30s%-30s\n' % | ||
(noptuning_model_name[model_index], no_PT[0], no_PT[1], | ||
no_PT[2])) | ||
|
||
no_PT = [] | ||
|
||
with open(out_path, 'a+') as f: | ||
f.write("\n") | ||
|
||
#写入文件表头 | ||
with open(out_path, 'a+') as f: | ||
f.write("PTuning FP16 model test\n") | ||
#f.write("模型\t\t是否传precache\t\tbs=1(与PaddleNLP对比)\t\tbs=4(与PaddleNLP对比)\t\tbs=4 stop=2(动态插入,与FD上一版本进行对比\n") | ||
f.write('%-30s%-30s%-30s%-30s%-30s\n' % ( | ||
"model", "whether send precache", "bs=1(compare with PaddleNLP)", | ||
"bs=4(compare with PaddleNLP)", "bs=4 stop=2(compare with FD)")) | ||
|
||
#获取P-Tuning without precache | ||
for model_index in range(len(ptuning_model_path_list)): #遍历模型名称 | ||
for i in range(3): #遍历参数设置 | ||
os.system( | ||
f"python3 test_serving.py {ptuning_model_path_list[model_index]} {inputs_path} {batch_size[i]} {disdy[i]} 1 0 {res_path}" | ||
) #倒数二三个参数表示ptuning/precache | ||
os.system( | ||
f"python3 read_serving.py {res_path} {FD_result_path}/{ptuning_model_name[model_index]}-{opts[i]}-noprecache.txt" | ||
) | ||
file1 = os.path.join( | ||
pre_result_path, | ||
f"{mopt[i]}-{ptuning_model_name[model_index]}-{opts[i]}-noprecache.txt" | ||
) | ||
file2 = f"{FD_result_path}/{ptuning_model_name[model_index]}-{opts[i]}-noprecache.txt" | ||
is_diff, diff_rate = compute_diff.get_diff(file1, file2) | ||
if is_diff: | ||
bug_flag = 1 | ||
PT.append(diff_rate) | ||
os.system(command=f"rm -f {res_path}/*") | ||
os.system(command=f"rm -f real_time_save.temp_ids_rank_0_step_*") | ||
os.system(command="rm -rf /dev/shm/*") | ||
with open(out_path, 'a+') as f: | ||
#f.write(f"{ptuning_model_name[model_index]}\t\t否\t\t{PT[0]}\t\t{PT[1]}\t\t{PT[2]}\n") | ||
f.write('%-30s%-30s%-30s%-30s%-30s\n' % ( | ||
ptuning_model_name[model_index], 'no', PT[0], PT[1], PT[2])) | ||
PT = [] | ||
|
||
#获取P-Tuning with precache | ||
|
||
for model_index in range(len(ptuning_model_path_list)): #遍历模型名称 | ||
for i in range(3): #遍历参数设置 | ||
os.system( | ||
f"python3 test_serving.py {ptuning_model_path_list[model_index]} {inputs_PT_path} {batch_size[i]} {disdy[i]} 1 1 {res_path} {precache_path_list[model_index]}" | ||
) #倒数二三个参数表示ptuning/precache | ||
os.system( | ||
f"python3 read_serving.py {res_path} {FD_result_path}/{ptuning_model_name[model_index]}-{opts[i]}.txt" | ||
) | ||
file1 = os.path.join( | ||
pre_result_path, | ||
f"{mopt[i]}-{ptuning_model_name[model_index]}-{opts[i]}.txt") | ||
file2 = f"{FD_result_path}/{ptuning_model_name[model_index]}-{opts[i]}.txt" | ||
is_diff, diff_rate = compute_diff.get_diff(file1, file2) | ||
if is_diff: | ||
bug_flag = 1 | ||
pre_PT.append(diff_rate) | ||
os.system(command=f"rm -f {res_path}/*") | ||
os.system(command=f"rm -f real_time_save.temp_ids_rank_0_step_*") | ||
os.system(command="rm -rf /dev/shm/*") | ||
|
||
with open(out_path, 'a+') as f: | ||
#f.write(f"{ptuning_model_name[model_index]}\t\t是\t\t{pre_PT[0]}\t\t{pre_PT[1]}\t\t{pre_PT[2]}\n") | ||
f.write('%-30s%-30s%-30s%-30s%-30s\n' % | ||
(ptuning_model_name[model_index], 'yes', pre_PT[0], | ||
pre_PT[1], pre_PT[2])) | ||
|
||
pre_PT = [] | ||
os.chdir(f"{current_file_path}") | ||
|
||
sys.exit(bug_flag) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
def get_diff(file1,file2): | ||
with open(file1, 'r') as f1, open(file2, 'r') as f2: | ||
lines1 = f1.read().splitlines() | ||
lines2 = f2.read().splitlines() | ||
assert len(lines1)==len(lines2) | ||
total_lines=len(lines1) | ||
diff_lines=0 | ||
|
||
for i in range(total_lines): | ||
if lines1[i]!=lines2[i]: | ||
diff_lines=diff_lines+1 | ||
|
||
diff_rate = f"{diff_lines}/{total_lines}" | ||
if diff_lines==0: | ||
is_diff=0 | ||
else: | ||
is_diff=1 | ||
return is_diff,diff_rate |
Oops, something went wrong.