Skip to content

Commit

Permalink
fix streaming (#3176)
Browse files Browse the repository at this point in the history
  • Loading branch information
Jintao-Huang authored Feb 19, 2025
1 parent 6350cde commit e81c4f3
Showing 1 changed file with 5 additions and 4 deletions.
9 changes: 5 additions & 4 deletions swift/llm/dataset/preprocessor/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,17 +222,18 @@ def safe_rename_columns(dataset, columns):
if safe_columns:
dataset = dataset.rename_columns(safe_columns)

return dataset

def _rename_columns(self, dataset: DATASET_TYPE) -> DATASET_TYPE:
dataset = self.safe_rename_columns(dataset, self.origin_columns)
dataset = self.safe_rename_columns(dataset, self.columns)
if isinstance(dataset, HfIterableDataset):
# fix: https://github.com/huggingface/datasets/issues/6408
columns = {k: f'__@{k}' for k in RowPreprocessor.standard_keys if k in dataset.features}
if columns:
dataset = dataset.rename_columns(columns)
return dataset

def _rename_columns(self, dataset: DATASET_TYPE) -> DATASET_TYPE:
dataset = self.safe_rename_columns(dataset, self.origin_columns)
return self.safe_rename_columns(dataset, self.columns)

@staticmethod
def remove_useless_columns(dataset: DATASET_TYPE) -> DATASET_TYPE:
dataset = RowPreprocessor.get_features_dataset(dataset)
Expand Down

0 comments on commit e81c4f3

Please sign in to comment.