From e6a7d4e75b385237d0038c0c21fd45f74eb74219 Mon Sep 17 00:00:00 2001 From: Jason Date: Thu, 12 Oct 2023 20:05:36 -0700 Subject: [PATCH] [LLM] Fix P-Tuning difference (#2240) * Support multicards * fix ptuning diff * Update engine.py --- llm/fastdeploy_llm/engine.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/llm/fastdeploy_llm/engine.py b/llm/fastdeploy_llm/engine.py index 475298e7db..7e6d922361 100644 --- a/llm/fastdeploy_llm/engine.py +++ b/llm/fastdeploy_llm/engine.py @@ -257,6 +257,7 @@ def _init_predictor(self): dist_config.enable_dist_model(True) parent_dir = os.path.abspath(os.path.join(self.model_dir, "..")) mapping_file = os.path.join(parent_dir, "rank_mapping.csv") + if self.mp_degree > 1 and not os.path.exists(mapping_file): raise Exception("There's no file {}.".format(mapping_file)) dist_config.set_comm_init_config(mapping_file) @@ -383,6 +384,8 @@ def dy_input_preprocess(inputs): for i in range(bsz): if stop_flags[i] == 1: + attention_mask[i] = 0 + tgt_generation_mask[i] = 0 length = int(enc_length[i, 0]) if args.is_ptuning: model_id = inputs['model_id'][i]