modelscope · Jintao-Huang · Feb 20, 2025 · Feb 19, 2025 · Feb 19, 2025 · Feb 20, 2025
diff --git a/swift/llm/dataset/dataset/llm.py b/swift/llm/dataset/dataset/llm.py
@@ -397,19 +397,6 @@ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
 
 register_dataset(DatasetMeta(ms_dataset_id='swift/ToolBench', tags=['chat', 'agent', 'multi-round']))
 
-
-class CompetitionMathPreprocessor(ResponsePreprocessor):
-
-    def preprocess(self, row: Dict[str, Any], all_tools=None) -> Optional[Dict[str, Any]]:
-        query = row['problem']
-        response = row['solution']
-        row = {
-            'query': query,
-            'response': response,
-        }
-        return super().preprocess(row)
-
-
 register_dataset(
     DatasetMeta(
         ms_dataset_id='tastelikefeet/competition_math',
@@ -418,7 +405,6 @@ def preprocess(self, row: Dict[str, Any], all_tools=None) -> Optional[Dict[str,
                 name='default',
                 subset='default',
                 split=['train', 'test'],
-                preprocess_func=CompetitionMathPreprocessor(),
             ),
         ],
         tags=['qa', 'math']))
@@ -571,10 +557,6 @@ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
 
 class XlamFunctionCallingPreprocessor(ResponsePreprocessor):
 
-    def __init__(self, response=True):
-        self.response = response
-        super().__init__()
-
     def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
         query = row['query']
         answers = row['response']
@@ -584,28 +566,15 @@ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
         name = answer['name']
         args = json.dumps(answer['arguments'])
         response = f'Action: {name}\nAction Input: {args}'
-        key = 'response' if self.response else 'solution'
-        row = {'query': query, key: response, 'tools': row['tools']}
+        row = {'query': query, 'response': response, 'solution': response, 'tools': row['tools']}
         return super().preprocess(row)
 
 
 register_dataset(
     DatasetMeta(
         ms_dataset_id='LLM-Research/xlam-function-calling-60k',
-        subsets=[
-            SubsetDataset(
-                name='default',
-                subset='dataset',
-                split=['train'],
-                preprocess_func=XlamFunctionCallingPreprocessor(response=True),
-            ),
-            SubsetDataset(
-                name='grpo',
-                subset='dataset',
-                split=['train'],
-                preprocess_func=XlamFunctionCallingPreprocessor(response=False),
-            ),
-        ],
+        subsets=['dataset'],
+        preprocess_func=XlamFunctionCallingPreprocessor(),
         tags=['agent']))
 
 

diff --git a/swift/llm/dataset/preprocessor/core.py b/swift/llm/dataset/preprocessor/core.py
@@ -129,11 +129,13 @@ def rows_to_batched(rows: List[Dict[str, Any]]):
         return batched
 
     @staticmethod
-    def _fix_streaming_keys(row):
+    def _remove_prefix_keys(row, prefix: str):
         for k in list(row.keys()):
-            if k.startswith('__@'):
-                new_k = k[len('__@'):]
-                row[new_k] = row.pop(k)
+            if k.startswith(prefix):
+                new_k = k[len(prefix):]
+                new_v = row.pop(k)
+                if new_k not in row:
+                    row[new_k] = new_v
 
     @staticmethod
     def _check_objects(row):
@@ -160,7 +162,7 @@ def batched_preprocess(self, batched_row: Dict[str, Any], *, strict: bool,
         from ...template import MaxLengthError
         batched_row = dict(batched_row)
         assert len(batched_row) > 0
-        self._fix_streaming_keys(batched_row)
+        self._remove_prefix_keys(batched_row, '__@')  # compat streaming
         rows = self.batched_to_rows(batched_row)
 
         new_rows = []
@@ -191,7 +193,7 @@ def batched_preprocess(self, batched_row: Dict[str, Any], *, strict: bool,
                 row = []
             new_rows += row
         res = self.rows_to_batched(new_rows)
-
+        self._remove_prefix_keys(res, '__#')  # compat GRPO
         if len(res) == 0:
             res['messages'] = []
 
@@ -285,21 +287,22 @@ def __call__(
         if self.dataset_sample is not None:
             dataset = sample_dataset(dataset, self.dataset_sample, self.random_state)
 
+        map_kwargs = {'batched': True, 'batch_size': batch_size}
+        if isinstance(dataset, HfDataset):
+            map_kwargs['num_proc'] = num_proc
+        # compat GRPO: The solution field will be retained.
+        dataset = RowPreprocessor.get_features_dataset(dataset)
+        if 'solution' in dataset.features:
+            dataset = dataset.map(lambda x: {'__#solution': x['solution']}, **map_kwargs)
         dataset = self._rename_columns(dataset)
         dataset = self.prepare_dataset(dataset)
         dataset = self._cast_pil_image(dataset)
-        map_kwargs = {}
-        ignore_max_length_error = False
-        if isinstance(dataset, HfDataset):
-            map_kwargs['num_proc'] = num_proc
-            if num_proc > 1:
-                ignore_max_length_error = True
+
+        ignore_max_length_error = True if isinstance(dataset, HfDataset) and num_proc > 1 else False
         with self._patch_arrow_writer():
             try:
                 dataset_mapped = dataset.map(
                     self.batched_preprocess,
-                    batched=True,
-                    batch_size=batch_size,
                     fn_kwargs={
                         'strict': strict,
                         'ignore_max_length_error': ignore_max_length_error
@@ -321,8 +324,8 @@ class ResponsePreprocessor(RowPreprocessor):
     def __init__(self, *, columns: Optional[Dict[str, str]] = None, **kwargs) -> None:
         super().__init__(columns=columns, **kwargs)
         system_keys = ['system', 'system_prompt']
-        query_keys = ['query', 'prompt', 'input', 'instruction', 'question']
-        response_keys = ['response', 'answer', 'output', 'targets', 'target', 'answer_key', 'answers'
+        query_keys = ['query', 'prompt', 'input', 'instruction', 'question', 'problem']
+        response_keys = ['response', 'answer', 'output', 'targets', 'target', 'answer_key', 'answers', 'solution'
                          ] + ['text', 'completion', 'content']
         for key in system_keys:
             self.columns[key] = 'system'

diff --git a/swift/llm/infer/infer_engine/infer_engine.py b/swift/llm/infer/infer_engine/infer_engine.py
@@ -122,9 +122,6 @@ def _update_metrics(result, metrics: Optional[List[Metric]] = None):
                 metric.update(response)
         return result_origin
 
-    def __call__(self, *args, **kwargs):
-        return self.infer(*args, **kwargs)
-
     def infer(self,
               infer_requests: List[InferRequest],
               request_config: Optional[RequestConfig] = None,

diff --git a/swift/llm/sampling/utils.py b/swift/llm/sampling/utils.py
@@ -38,11 +38,13 @@ def get_reward(model: Any,
         Index 0: The min-max normalized scores matched the infer_requests
         Index 1: The mask filtered by the threshold
     """
-    parameters = inspect.signature(model.infer).parameters
+    from swift.llm import InferEngine
+    infer_func = model.infer if isinstance(model, InferEngine) else model.__call__
+    parameters = inspect.signature(infer_func).parameters
     gt_param = {}
     if 'ground_truths' in parameters:
         gt_param = {'ground_truths': ground_truths}
-    rewards = model(infer_requests, request_config=request_config, **gt_param)
+    rewards = infer_func(infer_requests, request_config=request_config, **gt_param)
     from swift.llm.infer.protocol import ChatCompletionResponse
     if isinstance(rewards[0], ChatCompletionResponse):
         rewards = [float(r.choices[0].message.content) for r in rewards]