From 9173f22d0cfacf28be83e0ba9bc8c129bb8ba3ce Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Thu, 20 Feb 2025 00:10:31 +0800
Subject: [PATCH 1/6] update dataset

---
 ...345\222\214\346\225\260\346\215\256\351\233\206.md" | 10 ++++++++++
 .../Instruction/Supported-models-and-datasets.md       | 10 ++++++++++
 swift/llm/dataset/data/dataset_info.json               | 10 +++++++---
 swift/llm/dataset/dataset/llm.py                       |  3 ++-
 4 files changed, 29 insertions(+), 4 deletions(-)
diff --git "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md" "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
index 5ef2ce6cc..ee790c58b 100644
--- "a/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
+++ "b/docs/source/Instruction/\346\224\257\346\214\201\347\232\204\346\250\241\345\236\213\345\222\214\346\225\260\346\215\256\351\233\206.md"
@@ -713,6 +713,7 @@
 |[AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR)|default<br>human_handwrite<br>human_handwrite_print<br>synthetic_handwrite<br>small|162149|117.6±44.9, min=41, max=312|chat, ocr, multi-modal, vision|[linxy/LaTeX_OCR](https://huggingface.co/datasets/linxy/LaTeX_OCR)|
 |[AI-ModelScope/LongAlpaca-12k](https://modelscope.cn/datasets/AI-ModelScope/LongAlpaca-12k)|default|11998|9941.8±3417.1, min=4695, max=25826|long-sequence, QA|[Yukang/LongAlpaca-12k](https://huggingface.co/datasets/Yukang/LongAlpaca-12k)|
 |[AI-ModelScope/M3IT](https://modelscope.cn/datasets/AI-ModelScope/M3IT)|coco<br>vqa-v2<br>shapes<br>shapes-rephrased<br>coco-goi-rephrased<br>snli-ve<br>snli-ve-rephrased<br>okvqa<br>a-okvqa<br>viquae<br>textcap<br>docvqa<br>science-qa<br>imagenet<br>imagenet-open-ended<br>imagenet-rephrased<br>coco-goi<br>clevr<br>clevr-rephrased<br>nlvr<br>coco-itm<br>coco-itm-rephrased<br>vsr<br>vsr-rephrased<br>mocheg<br>mocheg-rephrased<br>coco-text<br>fm-iqa<br>activitynet-qa<br>msrvtt<br>ss<br>coco-cn<br>refcoco<br>refcoco-rephrased<br>multi30k<br>image-paragraph-captioning<br>visual-dialog<br>visual-dialog-rephrased<br>iqa<br>vcr<br>visual-mrc<br>ivqa<br>msrvtt-qa<br>msvd-qa<br>gqa<br>text-vqa<br>ocr-vqa<br>st-vqa<br>flickr8k-cn|huge dataset|-|chat, multi-modal, vision|-|
+|[AI-ModelScope/MATH-lighteval](https://modelscope.cn/datasets/AI-ModelScope/MATH-lighteval)|default|7500|104.4±92.8, min=36, max=1683|grpo, math|[DigitalLearningGmbH/MATH-lighteval](https://huggingface.co/datasets/DigitalLearningGmbH/MATH-lighteval)|
 |[AI-ModelScope/Magpie-Qwen2-Pro-200K-Chinese](https://modelscope.cn/datasets/AI-ModelScope/Magpie-Qwen2-Pro-200K-Chinese)|default|200000|448.4±223.5, min=87, max=4098|chat, sft, 🔥, zh|[Magpie-Align/Magpie-Qwen2-Pro-200K-Chinese](https://huggingface.co/datasets/Magpie-Align/Magpie-Qwen2-Pro-200K-Chinese)|
 |[AI-ModelScope/Magpie-Qwen2-Pro-200K-English](https://modelscope.cn/datasets/AI-ModelScope/Magpie-Qwen2-Pro-200K-English)|default|200000|609.9±277.1, min=257, max=4098|chat, sft, 🔥, en|[Magpie-Align/Magpie-Qwen2-Pro-200K-English](https://huggingface.co/datasets/Magpie-Align/Magpie-Qwen2-Pro-200K-English)|
 |[AI-ModelScope/Magpie-Qwen2-Pro-300K-Filtered](https://modelscope.cn/datasets/AI-ModelScope/Magpie-Qwen2-Pro-300K-Filtered)|default|300000|556.6±288.6, min=175, max=4098|chat, sft, 🔥|[Magpie-Align/Magpie-Qwen2-Pro-300K-Filtered](https://huggingface.co/datasets/Magpie-Align/Magpie-Qwen2-Pro-300K-Filtered)|
@@ -769,10 +770,13 @@
 |[DAMO_NLP/jd](https://modelscope.cn/datasets/DAMO_NLP/jd)|default<br>cls|45012|66.9±87.0, min=41, max=1699|text-generation, classification, 🔥|-|
 |-|default|huge dataset|-|pretrain, quality|[HuggingFaceFW/fineweb](https://huggingface.co/datasets/HuggingFaceFW/fineweb)|
 |-|auto_math_text<br>khanacademy<br>openstax<br>stanford<br>stories<br>web_samples_v1<br>web_samples_v2<br>wikihow|huge dataset|-|multi-domain, en, qa|[HuggingFaceTB/cosmopedia](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)|
+|[HumanLLMs/Human-Like-DPO-Dataset](https://modelscope.cn/datasets/HumanLLMs/Human-Like-DPO-Dataset)|default|10884|47.5±7.9, min=32, max=85|rlhf, dpo|[HumanLLMs/Human-Like-DPO-Dataset](https://huggingface.co/datasets/HumanLLMs/Human-Like-DPO-Dataset)|
+|[LLM-Research/xlam-function-calling-60k](https://modelscope.cn/datasets/LLM-Research/xlam-function-calling-60k)|default<br>grpo|120000|453.7±219.5, min=164, max=2779|agent|-|
 |[OmniData/Zhihu-KOL](https://modelscope.cn/datasets/OmniData/Zhihu-KOL)|default|huge dataset|-|zhihu, qa|[wangrui6/Zhihu-KOL](https://huggingface.co/datasets/wangrui6/Zhihu-KOL)|
 |[OmniData/Zhihu-KOL-More-Than-100-Upvotes](https://modelscope.cn/datasets/OmniData/Zhihu-KOL-More-Than-100-Upvotes)|default|271261|1003.4±1826.1, min=28, max=52541|zhihu, qa|[bzb2023/Zhihu-KOL-More-Than-100-Upvotes](https://huggingface.co/datasets/bzb2023/Zhihu-KOL-More-Than-100-Upvotes)|
 |[PowerInfer/LONGCOT-Refine-500K](https://modelscope.cn/datasets/PowerInfer/LONGCOT-Refine-500K)|default|521921|296.5±158.4, min=39, max=4634|chat, sft, 🔥, cot|[PowerInfer/LONGCOT-Refine-500K](https://huggingface.co/datasets/PowerInfer/LONGCOT-Refine-500K)|
 |[PowerInfer/QWQ-LONGCOT-500K](https://modelscope.cn/datasets/PowerInfer/QWQ-LONGCOT-500K)|default|498082|310.7±303.1, min=35, max=22941|chat, sft, 🔥, cot|[PowerInfer/QWQ-LONGCOT-500K](https://huggingface.co/datasets/PowerInfer/QWQ-LONGCOT-500K)|
+|[ServiceNow-AI/R1-Distill-SFT](https://modelscope.cn/datasets/ServiceNow-AI/R1-Distill-SFT)|v0<br>v1|1850809|164.2±438.0, min=30, max=32469|chat, sft, cot, r1|[ServiceNow-AI/R1-Distill-SFT](https://huggingface.co/datasets/ServiceNow-AI/R1-Distill-SFT)|
 |[TIGER-Lab/MATH-plus](https://modelscope.cn/datasets/TIGER-Lab/MATH-plus)|train|893929|301.4±196.7, min=50, max=1162|qa, math, en, quality|[TIGER-Lab/MATH-plus](https://huggingface.co/datasets/TIGER-Lab/MATH-plus)|
 |[Tongyi-DataEngine/SA1B-Dense-Caption](https://modelscope.cn/datasets/Tongyi-DataEngine/SA1B-Dense-Caption)|default|huge dataset|-|zh, multi-modal, vqa|-|
 |[Tongyi-DataEngine/SA1B-Paired-Captions-Images](https://modelscope.cn/datasets/Tongyi-DataEngine/SA1B-Paired-Captions-Images)|default|7736284|106.4±18.5, min=48, max=193|zh, multi-modal, vqa|-|
@@ -780,6 +784,7 @@
 |[YorickHe/CoT_zh](https://modelscope.cn/datasets/YorickHe/CoT_zh)|default|74771|129.1±53.2, min=51, max=401|chat, general|-|
 |[ZhipuAI/LongWriter-6k](https://modelscope.cn/datasets/ZhipuAI/LongWriter-6k)|default|6000|5009.0±2932.8, min=117, max=30354|long, chat, sft, 🔥|[THUDM/LongWriter-6k](https://huggingface.co/datasets/THUDM/LongWriter-6k)|
 |-|default|huge dataset|-|pretrain, quality|[allenai/c4](https://huggingface.co/datasets/allenai/c4)|
+|[bespokelabs/Bespoke-Stratos-17k](https://modelscope.cn/datasets/bespokelabs/Bespoke-Stratos-17k)|default|16710|480.7±236.1, min=266, max=3556|chat, sft, cot, r1|[bespokelabs/Bespoke-Stratos-17k](https://huggingface.co/datasets/bespokelabs/Bespoke-Stratos-17k)|
 |-|default|huge dataset|-|pretrain, quality|[cerebras/SlimPajama-627B](https://huggingface.co/datasets/cerebras/SlimPajama-627B)|
 |[codefuse-ai/CodeExercise-Python-27k](https://modelscope.cn/datasets/codefuse-ai/CodeExercise-Python-27k)|default|27224|337.3±154.2, min=90, max=2826|chat, coding, 🔥|-|
 |[codefuse-ai/Evol-instruction-66k](https://modelscope.cn/datasets/codefuse-ai/Evol-instruction-66k)|default|66862|440.1±208.4, min=46, max=2661|chat, coding, 🔥|-|
@@ -794,15 +799,20 @@
 |[iic/MSAgent-Pro](https://modelscope.cn/datasets/iic/MSAgent-Pro)|default|21910|1978.1±747.9, min=339, max=8064|chat, agent, multi-round, 🔥|-|
 |[iic/ms_agent](https://modelscope.cn/datasets/iic/ms_agent)|default|30000|645.8±218.0, min=199, max=2070|chat, agent, multi-round, 🔥|-|
 |[iic/ms_bench](https://modelscope.cn/datasets/iic/ms_bench)|default|316820|353.4±424.5, min=29, max=2924|chat, general, multi-round, 🔥|-|
+|[liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT](https://modelscope.cn/datasets/liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT)|default|110000|72.1±60.9, min=29, max=2315|chat, sft, cot, r1|[Congliu/Chinese-DeepSeek-R1-Distill-data-110k-SFT](https://huggingface.co/datasets/Congliu/Chinese-DeepSeek-R1-Distill-data-110k-SFT)|
 |-|default|huge dataset|-|multi-modal, en, vqa, quality|[lmms-lab/GQA](https://huggingface.co/datasets/lmms-lab/GQA)|
 |-|0_30_s_academic_v0_1<br>0_30_s_youtube_v0_1<br>1_2_m_academic_v0_1<br>1_2_m_youtube_v0_1<br>2_3_m_academic_v0_1<br>2_3_m_youtube_v0_1<br>30_60_s_academic_v0_1<br>30_60_s_youtube_v0_1|1335486|273.7±78.8, min=107, max=638|chat, multi-modal, video|[lmms-lab/LLaVA-Video-178K](https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K)|
 |[lvjianjin/AdvertiseGen](https://modelscope.cn/datasets/lvjianjin/AdvertiseGen)|default|97484|130.9±21.9, min=73, max=232|text-generation, 🔥|[shibing624/AdvertiseGen](https://huggingface.co/datasets/shibing624/AdvertiseGen)|
 |[mapjack/openwebtext_dataset](https://modelscope.cn/datasets/mapjack/openwebtext_dataset)|default|huge dataset|-|pretrain, zh, quality|-|
 |[modelscope/DuReader_robust-QG](https://modelscope.cn/datasets/modelscope/DuReader_robust-QG)|default|17899|242.0±143.1, min=75, max=1416|text-generation, 🔥|-|
+|[modelscope/MathR](https://modelscope.cn/datasets/modelscope/MathR)|default<br>clean|6089|188.7±75.3, min=64, max=3341|qa, math|-|
+|[modelscope/MathR-32B-Distill](https://modelscope.cn/datasets/modelscope/MathR-32B-Distill)|data|25921|209.4±63.1, min=121, max=3407|qa, math|-|
 |[modelscope/chinese-poetry-collection](https://modelscope.cn/datasets/modelscope/chinese-poetry-collection)|default|1710|58.1±8.1, min=31, max=71|text-generation, poetry|-|
 |[modelscope/clue](https://modelscope.cn/datasets/modelscope/clue)|cmnli|391783|81.6±16.0, min=54, max=157|text-generation, classification|[clue](https://huggingface.co/datasets/clue)|
 |[modelscope/coco_2014_caption](https://modelscope.cn/datasets/modelscope/coco_2014_caption)|train<br>validation|454617|389.6±68.4, min=70, max=587|chat, multi-modal, vision, 🔥|-|
 |[modelscope/gsm8k](https://modelscope.cn/datasets/modelscope/gsm8k)|main|7473|88.6±21.6, min=41, max=241|qa, math|-|
+|[open-thoughts/OpenThoughts-114k](https://modelscope.cn/datasets/open-thoughts/OpenThoughts-114k)|default|113957|413.2±186.9, min=265, max=13868|chat, sft, cot, r1|[open-thoughts/OpenThoughts-114k](https://huggingface.co/datasets/open-thoughts/OpenThoughts-114k)|
+|[sentence-transformers/stsb](https://modelscope.cn/datasets/sentence-transformers/stsb)|default|5748|21.0±0.0, min=21, max=21|similarity, 🔥|-|
 |[shenweizhou/alpha-umi-toolbench-processed-v2](https://modelscope.cn/datasets/shenweizhou/alpha-umi-toolbench-processed-v2)|backbone<br>caller<br>planner<br>summarizer|huge dataset|-|chat, agent, 🔥|-|
 |[simpleai/HC3](https://modelscope.cn/datasets/simpleai/HC3)|finance<br>finance_cls<br>medicine<br>medicine_cls|11021|296.0±153.3, min=65, max=2267|text-generation, classification, 🔥|[Hello-SimpleAI/HC3](https://huggingface.co/datasets/Hello-SimpleAI/HC3)|
 |[simpleai/HC3-Chinese](https://modelscope.cn/datasets/simpleai/HC3-Chinese)|baike<br>baike_cls<br>open_qa<br>open_qa_cls<br>nlpcc_dbqa<br>nlpcc_dbqa_cls<br>finance<br>finance_cls<br>medicine<br>medicine_cls<br>law<br>law_cls<br>psychology<br>psychology_cls|39781|179.9±70.2, min=90, max=1070|text-generation, classification, 🔥|[Hello-SimpleAI/HC3-Chinese](https://huggingface.co/datasets/Hello-SimpleAI/HC3-Chinese)|
diff --git a/docs/source_en/Instruction/Supported-models-and-datasets.md b/docs/source_en/Instruction/Supported-models-and-datasets.md
index 63da4467b..ae058ba5b 100644
--- a/docs/source_en/Instruction/Supported-models-and-datasets.md
+++ b/docs/source_en/Instruction/Supported-models-and-datasets.md
@@ -714,6 +714,7 @@ The table below introduces information about the datasets integrated with ms-swi
 |[AI-ModelScope/LaTeX_OCR](https://modelscope.cn/datasets/AI-ModelScope/LaTeX_OCR)|default<br>human_handwrite<br>human_handwrite_print<br>synthetic_handwrite<br>small|162149|117.6±44.9, min=41, max=312|chat, ocr, multi-modal, vision|[linxy/LaTeX_OCR](https://huggingface.co/datasets/linxy/LaTeX_OCR)|
 |[AI-ModelScope/LongAlpaca-12k](https://modelscope.cn/datasets/AI-ModelScope/LongAlpaca-12k)|default|11998|9941.8±3417.1, min=4695, max=25826|long-sequence, QA|[Yukang/LongAlpaca-12k](https://huggingface.co/datasets/Yukang/LongAlpaca-12k)|
 |[AI-ModelScope/M3IT](https://modelscope.cn/datasets/AI-ModelScope/M3IT)|coco<br>vqa-v2<br>shapes<br>shapes-rephrased<br>coco-goi-rephrased<br>snli-ve<br>snli-ve-rephrased<br>okvqa<br>a-okvqa<br>viquae<br>textcap<br>docvqa<br>science-qa<br>imagenet<br>imagenet-open-ended<br>imagenet-rephrased<br>coco-goi<br>clevr<br>clevr-rephrased<br>nlvr<br>coco-itm<br>coco-itm-rephrased<br>vsr<br>vsr-rephrased<br>mocheg<br>mocheg-rephrased<br>coco-text<br>fm-iqa<br>activitynet-qa<br>msrvtt<br>ss<br>coco-cn<br>refcoco<br>refcoco-rephrased<br>multi30k<br>image-paragraph-captioning<br>visual-dialog<br>visual-dialog-rephrased<br>iqa<br>vcr<br>visual-mrc<br>ivqa<br>msrvtt-qa<br>msvd-qa<br>gqa<br>text-vqa<br>ocr-vqa<br>st-vqa<br>flickr8k-cn|huge dataset|-|chat, multi-modal, vision|-|
+|[AI-ModelScope/MATH-lighteval](https://modelscope.cn/datasets/AI-ModelScope/MATH-lighteval)|default|7500|104.4±92.8, min=36, max=1683|grpo, math|[DigitalLearningGmbH/MATH-lighteval](https://huggingface.co/datasets/DigitalLearningGmbH/MATH-lighteval)|
 |[AI-ModelScope/Magpie-Qwen2-Pro-200K-Chinese](https://modelscope.cn/datasets/AI-ModelScope/Magpie-Qwen2-Pro-200K-Chinese)|default|200000|448.4±223.5, min=87, max=4098|chat, sft, 🔥, zh|[Magpie-Align/Magpie-Qwen2-Pro-200K-Chinese](https://huggingface.co/datasets/Magpie-Align/Magpie-Qwen2-Pro-200K-Chinese)|
 |[AI-ModelScope/Magpie-Qwen2-Pro-200K-English](https://modelscope.cn/datasets/AI-ModelScope/Magpie-Qwen2-Pro-200K-English)|default|200000|609.9±277.1, min=257, max=4098|chat, sft, 🔥, en|[Magpie-Align/Magpie-Qwen2-Pro-200K-English](https://huggingface.co/datasets/Magpie-Align/Magpie-Qwen2-Pro-200K-English)|
 |[AI-ModelScope/Magpie-Qwen2-Pro-300K-Filtered](https://modelscope.cn/datasets/AI-ModelScope/Magpie-Qwen2-Pro-300K-Filtered)|default|300000|556.6±288.6, min=175, max=4098|chat, sft, 🔥|[Magpie-Align/Magpie-Qwen2-Pro-300K-Filtered](https://huggingface.co/datasets/Magpie-Align/Magpie-Qwen2-Pro-300K-Filtered)|
@@ -770,10 +771,13 @@ The table below introduces information about the datasets integrated with ms-swi
 |[DAMO_NLP/jd](https://modelscope.cn/datasets/DAMO_NLP/jd)|default<br>cls|45012|66.9±87.0, min=41, max=1699|text-generation, classification, 🔥|-|
 |-|default|huge dataset|-|pretrain, quality|[HuggingFaceFW/fineweb](https://huggingface.co/datasets/HuggingFaceFW/fineweb)|
 |-|auto_math_text<br>khanacademy<br>openstax<br>stanford<br>stories<br>web_samples_v1<br>web_samples_v2<br>wikihow|huge dataset|-|multi-domain, en, qa|[HuggingFaceTB/cosmopedia](https://huggingface.co/datasets/HuggingFaceTB/cosmopedia)|
+|[HumanLLMs/Human-Like-DPO-Dataset](https://modelscope.cn/datasets/HumanLLMs/Human-Like-DPO-Dataset)|default|10884|47.5±7.9, min=32, max=85|rlhf, dpo|[HumanLLMs/Human-Like-DPO-Dataset](https://huggingface.co/datasets/HumanLLMs/Human-Like-DPO-Dataset)|
+|[LLM-Research/xlam-function-calling-60k](https://modelscope.cn/datasets/LLM-Research/xlam-function-calling-60k)|default<br>grpo|120000|453.7±219.5, min=164, max=2779|agent|-|
 |[OmniData/Zhihu-KOL](https://modelscope.cn/datasets/OmniData/Zhihu-KOL)|default|huge dataset|-|zhihu, qa|[wangrui6/Zhihu-KOL](https://huggingface.co/datasets/wangrui6/Zhihu-KOL)|
 |[OmniData/Zhihu-KOL-More-Than-100-Upvotes](https://modelscope.cn/datasets/OmniData/Zhihu-KOL-More-Than-100-Upvotes)|default|271261|1003.4±1826.1, min=28, max=52541|zhihu, qa|[bzb2023/Zhihu-KOL-More-Than-100-Upvotes](https://huggingface.co/datasets/bzb2023/Zhihu-KOL-More-Than-100-Upvotes)|
 |[PowerInfer/LONGCOT-Refine-500K](https://modelscope.cn/datasets/PowerInfer/LONGCOT-Refine-500K)|default|521921|296.5±158.4, min=39, max=4634|chat, sft, 🔥, cot|[PowerInfer/LONGCOT-Refine-500K](https://huggingface.co/datasets/PowerInfer/LONGCOT-Refine-500K)|
 |[PowerInfer/QWQ-LONGCOT-500K](https://modelscope.cn/datasets/PowerInfer/QWQ-LONGCOT-500K)|default|498082|310.7±303.1, min=35, max=22941|chat, sft, 🔥, cot|[PowerInfer/QWQ-LONGCOT-500K](https://huggingface.co/datasets/PowerInfer/QWQ-LONGCOT-500K)|
+|[ServiceNow-AI/R1-Distill-SFT](https://modelscope.cn/datasets/ServiceNow-AI/R1-Distill-SFT)|v0<br>v1|1850809|164.2±438.0, min=30, max=32469|chat, sft, cot, r1|[ServiceNow-AI/R1-Distill-SFT](https://huggingface.co/datasets/ServiceNow-AI/R1-Distill-SFT)|
 |[TIGER-Lab/MATH-plus](https://modelscope.cn/datasets/TIGER-Lab/MATH-plus)|train|893929|301.4±196.7, min=50, max=1162|qa, math, en, quality|[TIGER-Lab/MATH-plus](https://huggingface.co/datasets/TIGER-Lab/MATH-plus)|
 |[Tongyi-DataEngine/SA1B-Dense-Caption](https://modelscope.cn/datasets/Tongyi-DataEngine/SA1B-Dense-Caption)|default|huge dataset|-|zh, multi-modal, vqa|-|
 |[Tongyi-DataEngine/SA1B-Paired-Captions-Images](https://modelscope.cn/datasets/Tongyi-DataEngine/SA1B-Paired-Captions-Images)|default|7736284|106.4±18.5, min=48, max=193|zh, multi-modal, vqa|-|
@@ -781,6 +785,7 @@ The table below introduces information about the datasets integrated with ms-swi
 |[YorickHe/CoT_zh](https://modelscope.cn/datasets/YorickHe/CoT_zh)|default|74771|129.1±53.2, min=51, max=401|chat, general|-|
 |[ZhipuAI/LongWriter-6k](https://modelscope.cn/datasets/ZhipuAI/LongWriter-6k)|default|6000|5009.0±2932.8, min=117, max=30354|long, chat, sft, 🔥|[THUDM/LongWriter-6k](https://huggingface.co/datasets/THUDM/LongWriter-6k)|
 |-|default|huge dataset|-|pretrain, quality|[allenai/c4](https://huggingface.co/datasets/allenai/c4)|
+|[bespokelabs/Bespoke-Stratos-17k](https://modelscope.cn/datasets/bespokelabs/Bespoke-Stratos-17k)|default|16710|480.7±236.1, min=266, max=3556|chat, sft, cot, r1|[bespokelabs/Bespoke-Stratos-17k](https://huggingface.co/datasets/bespokelabs/Bespoke-Stratos-17k)|
 |-|default|huge dataset|-|pretrain, quality|[cerebras/SlimPajama-627B](https://huggingface.co/datasets/cerebras/SlimPajama-627B)|
 |[codefuse-ai/CodeExercise-Python-27k](https://modelscope.cn/datasets/codefuse-ai/CodeExercise-Python-27k)|default|27224|337.3±154.2, min=90, max=2826|chat, coding, 🔥|-|
 |[codefuse-ai/Evol-instruction-66k](https://modelscope.cn/datasets/codefuse-ai/Evol-instruction-66k)|default|66862|440.1±208.4, min=46, max=2661|chat, coding, 🔥|-|
@@ -795,15 +800,20 @@ The table below introduces information about the datasets integrated with ms-swi
 |[iic/MSAgent-Pro](https://modelscope.cn/datasets/iic/MSAgent-Pro)|default|21910|1978.1±747.9, min=339, max=8064|chat, agent, multi-round, 🔥|-|
 |[iic/ms_agent](https://modelscope.cn/datasets/iic/ms_agent)|default|30000|645.8±218.0, min=199, max=2070|chat, agent, multi-round, 🔥|-|
 |[iic/ms_bench](https://modelscope.cn/datasets/iic/ms_bench)|default|316820|353.4±424.5, min=29, max=2924|chat, general, multi-round, 🔥|-|
+|[liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT](https://modelscope.cn/datasets/liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT)|default|110000|72.1±60.9, min=29, max=2315|chat, sft, cot, r1|[Congliu/Chinese-DeepSeek-R1-Distill-data-110k-SFT](https://huggingface.co/datasets/Congliu/Chinese-DeepSeek-R1-Distill-data-110k-SFT)|
 |-|default|huge dataset|-|multi-modal, en, vqa, quality|[lmms-lab/GQA](https://huggingface.co/datasets/lmms-lab/GQA)|
 |-|0_30_s_academic_v0_1<br>0_30_s_youtube_v0_1<br>1_2_m_academic_v0_1<br>1_2_m_youtube_v0_1<br>2_3_m_academic_v0_1<br>2_3_m_youtube_v0_1<br>30_60_s_academic_v0_1<br>30_60_s_youtube_v0_1|1335486|273.7±78.8, min=107, max=638|chat, multi-modal, video|[lmms-lab/LLaVA-Video-178K](https://huggingface.co/datasets/lmms-lab/LLaVA-Video-178K)|
 |[lvjianjin/AdvertiseGen](https://modelscope.cn/datasets/lvjianjin/AdvertiseGen)|default|97484|130.9±21.9, min=73, max=232|text-generation, 🔥|[shibing624/AdvertiseGen](https://huggingface.co/datasets/shibing624/AdvertiseGen)|
 |[mapjack/openwebtext_dataset](https://modelscope.cn/datasets/mapjack/openwebtext_dataset)|default|huge dataset|-|pretrain, zh, quality|-|
 |[modelscope/DuReader_robust-QG](https://modelscope.cn/datasets/modelscope/DuReader_robust-QG)|default|17899|242.0±143.1, min=75, max=1416|text-generation, 🔥|-|
+|[modelscope/MathR](https://modelscope.cn/datasets/modelscope/MathR)|default<br>clean|6089|188.7±75.3, min=64, max=3341|qa, math|-|
+|[modelscope/MathR-32B-Distill](https://modelscope.cn/datasets/modelscope/MathR-32B-Distill)|data|25921|209.4±63.1, min=121, max=3407|qa, math|-|
 |[modelscope/chinese-poetry-collection](https://modelscope.cn/datasets/modelscope/chinese-poetry-collection)|default|1710|58.1±8.1, min=31, max=71|text-generation, poetry|-|
 |[modelscope/clue](https://modelscope.cn/datasets/modelscope/clue)|cmnli|391783|81.6±16.0, min=54, max=157|text-generation, classification|[clue](https://huggingface.co/datasets/clue)|
 |[modelscope/coco_2014_caption](https://modelscope.cn/datasets/modelscope/coco_2014_caption)|train<br>validation|454617|389.6±68.4, min=70, max=587|chat, multi-modal, vision, 🔥|-|
 |[modelscope/gsm8k](https://modelscope.cn/datasets/modelscope/gsm8k)|main|7473|88.6±21.6, min=41, max=241|qa, math|-|
+|[open-thoughts/OpenThoughts-114k](https://modelscope.cn/datasets/open-thoughts/OpenThoughts-114k)|default|113957|413.2±186.9, min=265, max=13868|chat, sft, cot, r1|[open-thoughts/OpenThoughts-114k](https://huggingface.co/datasets/open-thoughts/OpenThoughts-114k)|
+|[sentence-transformers/stsb](https://modelscope.cn/datasets/sentence-transformers/stsb)|default|5748|21.0±0.0, min=21, max=21|similarity, 🔥|-|
 |[shenweizhou/alpha-umi-toolbench-processed-v2](https://modelscope.cn/datasets/shenweizhou/alpha-umi-toolbench-processed-v2)|backbone<br>caller<br>planner<br>summarizer|huge dataset|-|chat, agent, 🔥|-|
 |[simpleai/HC3](https://modelscope.cn/datasets/simpleai/HC3)|finance<br>finance_cls<br>medicine<br>medicine_cls|11021|296.0±153.3, min=65, max=2267|text-generation, classification, 🔥|[Hello-SimpleAI/HC3](https://huggingface.co/datasets/Hello-SimpleAI/HC3)|
 |[simpleai/HC3-Chinese](https://modelscope.cn/datasets/simpleai/HC3-Chinese)|baike<br>baike_cls<br>open_qa<br>open_qa_cls<br>nlpcc_dbqa<br>nlpcc_dbqa_cls<br>finance<br>finance_cls<br>medicine<br>medicine_cls<br>law<br>law_cls<br>psychology<br>psychology_cls|39781|179.9±70.2, min=90, max=1070|text-generation, classification, 🔥|[Hello-SimpleAI/HC3-Chinese](https://huggingface.co/datasets/Hello-SimpleAI/HC3-Chinese)|
diff --git a/swift/llm/dataset/data/dataset_info.json b/swift/llm/dataset/data/dataset_info.json
index 2cf38c956..35e4c48f6 100644
--- a/swift/llm/dataset/data/dataset_info.json
+++ b/swift/llm/dataset/data/dataset_info.json
@@ -640,12 +640,16 @@
         "tags": ["rlhf", "dpo"]
     },
     {
-        "ms_dataset_id": "DigitalLearningGmbH/MATH-lighteval",
-        "hf_dataset_id": "AI-ModelScope/MATH-lighteval",
+        "ms_dataset_id": "AI-ModelScope/MATH-lighteval",
+        "hf_dataset_id": "DigitalLearningGmbH/MATH-lighteval",
         "columns": {
             "problem": "query"
         },
         "tags": ["grpo", "math"]
-
+    },
+    {
+        "ms_dataset_id": "liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT",
+        "hf_dataset_id": "Congliu/Chinese-DeepSeek-R1-Distill-data-110k-SFT",
+        "tags": ["chat", "sft", "cot", "r1"]
     }
 ]
diff --git a/swift/llm/dataset/dataset/llm.py b/swift/llm/dataset/dataset/llm.py
index 5a4bdc523..e102d78ec 100644
--- a/swift/llm/dataset/dataset/llm.py
+++ b/swift/llm/dataset/dataset/llm.py
@@ -427,10 +427,11 @@ def preprocess(self, row: Dict[str, Any], all_tools=None) -> Optional[Dict[str,
 
 register_dataset(
     DatasetMeta(ms_dataset_id='modelscope/MathR', subsets=['default', 'clean'], split=['train'], tags=['qa', 'math']))
+
 register_dataset(
     DatasetMeta(
         ms_dataset_id='modelscope/MathR-32B-Distill',
-        subsets=['default', 'clean'],
+        subsets=['data'],
         split=['train'],
         tags=['qa', 'math']))
 

From cd36488d5385a94bf1e3a36f59459a14ed37041d Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Thu, 20 Feb 2025 00:39:48 +0800
Subject: [PATCH 2/6] lint pass

---
 swift/llm/dataset/dataset/llm.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/swift/llm/dataset/dataset/llm.py b/swift/llm/dataset/dataset/llm.py
index e102d78ec..01ad277ce 100644
--- a/swift/llm/dataset/dataset/llm.py
+++ b/swift/llm/dataset/dataset/llm.py
@@ -429,11 +429,7 @@ def preprocess(self, row: Dict[str, Any], all_tools=None) -> Optional[Dict[str,
     DatasetMeta(ms_dataset_id='modelscope/MathR', subsets=['default', 'clean'], split=['train'], tags=['qa', 'math']))
 
 register_dataset(
-    DatasetMeta(
-        ms_dataset_id='modelscope/MathR-32B-Distill',
-        subsets=['data'],
-        split=['train'],
-        tags=['qa', 'math']))
+    DatasetMeta(ms_dataset_id='modelscope/MathR-32B-Distill', subsets=['data'], split=['train'], tags=['qa', 'math']))
 
 
 class HC3Preprocessor(ResponsePreprocessor):

From 1dddccfd9dfee9a286363ecc886730c2323cfade Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Thu, 20 Feb 2025 10:19:09 +0800
Subject: [PATCH 3/6] refactor grpo dataset

---
 swift/llm/dataset/dataset/llm.py       | 33 +++---------------------
 swift/llm/dataset/preprocessor/core.py | 35 ++++++++++++++------------
 2 files changed, 22 insertions(+), 46 deletions(-)

diff --git a/swift/llm/dataset/dataset/llm.py b/swift/llm/dataset/dataset/llm.py
index 01ad277ce..78437192f 100644
--- a/swift/llm/dataset/dataset/llm.py
+++ b/swift/llm/dataset/dataset/llm.py
@@ -397,19 +397,6 @@ def preprocess(self, row: Dict[str, Any]) -> Optional[Dict[str, Any]]:
 
 register_dataset(DatasetMeta(ms_dataset_id='swift/ToolBench', tags=['chat', 'agent', 'multi-round']))
 
-
-class CompetitionMathPreprocessor(ResponsePreprocessor):
-
-    def preprocess(self, row: Dict[str, Any], all_tools=None) -> Optional[Dict[str, Any]]:
-        query = row['problem']
-        response = row['solution']
-        row = {
-            'query': query,
-            'response': response,
-        }
-        return super().preprocess(row)
-
-
 register_dataset(
     DatasetMeta(
         ms_dataset_id='tastelikefeet/competition_math',
@@ -418,7 +405,6 @@ def preprocess(self, row: Dict[str, Any], all_tools=None) -> Optional[Dict[str,
                 name='default',
                 subset='default',
                 split=['train', 'test'],
-                preprocess_func=CompetitionMathPreprocessor(),
             ),
         ],
         tags=['qa', 'math']))
@@ -584,28 +570,15 @@ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
         name = answer['name']
         args = json.dumps(answer['arguments'])
         response = f'Action: {name}\nAction Input: {args}'
-        key = 'response' if self.response else 'solution'
-        row = {'query': query, key: response, 'tools': row['tools']}
+        row = {'query': query, 'response': response, 'solution': response, 'tools': row['tools']}
         return super().preprocess(row)
 
 
 register_dataset(
     DatasetMeta(
         ms_dataset_id='LLM-Research/xlam-function-calling-60k',
-        subsets=[
-            SubsetDataset(
-                name='default',
-                subset='dataset',
-                split=['train'],
-                preprocess_func=XlamFunctionCallingPreprocessor(response=True),
-            ),
-            SubsetDataset(
-                name='grpo',
-                subset='dataset',
-                split=['train'],
-                preprocess_func=XlamFunctionCallingPreprocessor(response=False),
-            ),
-        ],
+        subsets=['dataset'],
+        preprocess_func=XlamFunctionCallingPreprocessor(),
         tags=['agent']))
 
 
diff --git a/swift/llm/dataset/preprocessor/core.py b/swift/llm/dataset/preprocessor/core.py
index 91374484d..3efea9dbc 100644
--- a/swift/llm/dataset/preprocessor/core.py
+++ b/swift/llm/dataset/preprocessor/core.py
@@ -129,11 +129,13 @@ def rows_to_batched(rows: List[Dict[str, Any]]):
         return batched
 
     @staticmethod
-    def _fix_streaming_keys(row):
+    def _remove_prefix_keys(row, prefix: str):
         for k in list(row.keys()):
-            if k.startswith('__@'):
-                new_k = k[len('__@'):]
-                row[new_k] = row.pop(k)
+            if k.startswith(prefix):
+                new_k = k[len(prefix):]
+                new_v = row.pop(k)
+                if new_k not in row:
+                    row[new_k] = new_v
 
     @staticmethod
     def _check_objects(row):
@@ -160,7 +162,7 @@ def batched_preprocess(self, batched_row: Dict[str, Any], *, strict: bool,
         from ...template import MaxLengthError
         batched_row = dict(batched_row)
         assert len(batched_row) > 0
-        self._fix_streaming_keys(batched_row)
+        self._remove_prefix_keys(batched_row, '__@')  # compat streaming
         rows = self.batched_to_rows(batched_row)
 
         new_rows = []
@@ -191,7 +193,7 @@ def batched_preprocess(self, batched_row: Dict[str, Any], *, strict: bool,
                 row = []
             new_rows += row
         res = self.rows_to_batched(new_rows)
-
+        self._remove_prefix_keys(res, '__#')  # compat GRPO
         if len(res) == 0:
             res['messages'] = []
 
@@ -285,21 +287,22 @@ def __call__(
         if self.dataset_sample is not None:
             dataset = sample_dataset(dataset, self.dataset_sample, self.random_state)
 
+        map_kwargs = {'batched': True, 'batch_size': batch_size}
+        if isinstance(dataset, HfDataset):
+            map_kwargs['num_proc'] = num_proc
+        # compat GRPO: The solution field will be retained.
+        dataset = RowPreprocessor.get_features_dataset(dataset)
+        if 'solution' in dataset.features:
+            dataset = dataset.map(lambda x: {'__#solution': x['solution']}, **map_kwargs)
         dataset = self._rename_columns(dataset)
         dataset = self.prepare_dataset(dataset)
         dataset = self._cast_pil_image(dataset)
-        map_kwargs = {}
-        ignore_max_length_error = False
-        if isinstance(dataset, HfDataset):
-            map_kwargs['num_proc'] = num_proc
-            if num_proc > 1:
-                ignore_max_length_error = True
+
+        ignore_max_length_error = True if isinstance(dataset, HfDataset) and num_proc > 1 else False
         with self._patch_arrow_writer():
             try:
                 dataset_mapped = dataset.map(
                     self.batched_preprocess,
-                    batched=True,
-                    batch_size=batch_size,
                     fn_kwargs={
                         'strict': strict,
                         'ignore_max_length_error': ignore_max_length_error
@@ -321,8 +324,8 @@ class ResponsePreprocessor(RowPreprocessor):
     def __init__(self, *, columns: Optional[Dict[str, str]] = None, **kwargs) -> None:
         super().__init__(columns=columns, **kwargs)
         system_keys = ['system', 'system_prompt']
-        query_keys = ['query', 'prompt', 'input', 'instruction', 'question']
-        response_keys = ['response', 'answer', 'output', 'targets', 'target', 'answer_key', 'answers'
+        query_keys = ['query', 'prompt', 'input', 'instruction', 'question', 'problem']
+        response_keys = ['response', 'answer', 'output', 'targets', 'target', 'answer_key', 'answers', 'solution'
                          ] + ['text', 'completion', 'content']
         for key in system_keys:
             self.columns[key] = 'system'

From 53eac75e101d9732b2a05862ba2c3ac400630d72 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Thu, 20 Feb 2025 11:15:45 +0800
Subject: [PATCH 4/6] fix sample

---
 examples/train/grpo/full_vllm.sh             | 6 +++---
 swift/llm/infer/infer_engine/infer_engine.py | 3 ---
 swift/llm/sampling/utils.py                  | 6 ++++--
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/examples/train/grpo/full_vllm.sh b/examples/train/grpo/full_vllm.sh
index 6a96641ce..7100bc24b 100644
--- a/examples/train/grpo/full_vllm.sh
+++ b/examples/train/grpo/full_vllm.sh
@@ -7,7 +7,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
 NPROC_PER_NODE=7 \
 swift rlhf \
     --rlhf_type grpo \
-    --model Qwen/Qwen2.5-7B-Instruct \
+    --model Qwen/Qwen2.5-7B \
     --reward_funcs accuracy format \
     --use_vllm true \
     --vllm_device auto \
@@ -15,12 +15,12 @@ swift rlhf \
     --vllm_max_model_len 8192 \
     --train_type full \
     --torch_dtype bfloat16 \
-    --dataset 'AI-MO/NuminaMath-TIR#5000' \
+    --dataset 'AI-MO/NuminaMath-TIR#2000' \
     --max_completion_length 2048 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --per_device_eval_batch_size 1 \
-    --learning_rate 1e-6 \
+    --learning_rate 5e-7 \
     --gradient_accumulation_steps 2 \
     --eval_steps 200 \
     --save_steps 200 \
diff --git a/swift/llm/infer/infer_engine/infer_engine.py b/swift/llm/infer/infer_engine/infer_engine.py
index 2934e2dda..fc2fa6ab2 100644
--- a/swift/llm/infer/infer_engine/infer_engine.py
+++ b/swift/llm/infer/infer_engine/infer_engine.py
@@ -122,9 +122,6 @@ def _update_metrics(result, metrics: Optional[List[Metric]] = None):
                 metric.update(response)
         return result_origin
 
-    def __call__(self, *args, **kwargs):
-        return self.infer(*args, **kwargs)
-
     def infer(self,
               infer_requests: List[InferRequest],
               request_config: Optional[RequestConfig] = None,
diff --git a/swift/llm/sampling/utils.py b/swift/llm/sampling/utils.py
index 3d48c6a53..b8ed30fb1 100644
--- a/swift/llm/sampling/utils.py
+++ b/swift/llm/sampling/utils.py
@@ -38,11 +38,13 @@ def get_reward(model: Any,
         Index 0: The min-max normalized scores matched the infer_requests
         Index 1: The mask filtered by the threshold
     """
-    parameters = inspect.signature(model.infer).parameters
+    from swift.llm import InferEngine
+    infer_func = model.infer if isinstance(model, InferEngine) else model.__call__
+    parameters = inspect.signature(infer_func).parameters
     gt_param = {}
     if 'ground_truths' in parameters:
         gt_param = {'ground_truths': ground_truths}
-    rewards = model(infer_requests, request_config=request_config, **gt_param)
+    rewards = infer_func(infer_requests, request_config=request_config, **gt_param)
     from swift.llm.infer.protocol import ChatCompletionResponse
     if isinstance(rewards[0], ChatCompletionResponse):
         rewards = [float(r.choices[0].message.content) for r in rewards]

From 153e15f14d4a144f3045c2b1e79188d6a63a6ca7 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Thu, 20 Feb 2025 11:16:55 +0800
Subject: [PATCH 5/6] update

---
 examples/train/grpo/full_vllm.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/train/grpo/full_vllm.sh b/examples/train/grpo/full_vllm.sh
index 7100bc24b..6a96641ce 100644
--- a/examples/train/grpo/full_vllm.sh
+++ b/examples/train/grpo/full_vllm.sh
@@ -7,7 +7,7 @@ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
 NPROC_PER_NODE=7 \
 swift rlhf \
     --rlhf_type grpo \
-    --model Qwen/Qwen2.5-7B \
+    --model Qwen/Qwen2.5-7B-Instruct \
     --reward_funcs accuracy format \
     --use_vllm true \
     --vllm_device auto \
@@ -15,12 +15,12 @@ swift rlhf \
     --vllm_max_model_len 8192 \
     --train_type full \
     --torch_dtype bfloat16 \
-    --dataset 'AI-MO/NuminaMath-TIR#2000' \
+    --dataset 'AI-MO/NuminaMath-TIR#5000' \
     --max_completion_length 2048 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --per_device_eval_batch_size 1 \
-    --learning_rate 5e-7 \
+    --learning_rate 1e-6 \
     --gradient_accumulation_steps 2 \
     --eval_steps 200 \
     --save_steps 200 \

From fa9ef3d3d816512031577d694f1eac33121f7e76 Mon Sep 17 00:00:00 2001
From: Jintao Huang <huangjintao.hjt@alibaba-inc.com>
Date: Thu, 20 Feb 2025 11:37:53 +0800
Subject: [PATCH 6/6] update

---
 swift/llm/dataset/dataset/llm.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/swift/llm/dataset/dataset/llm.py b/swift/llm/dataset/dataset/llm.py
index 78437192f..60a7a3bbc 100644
--- a/swift/llm/dataset/dataset/llm.py
+++ b/swift/llm/dataset/dataset/llm.py
@@ -557,10 +557,6 @@ def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
 
 class XlamFunctionCallingPreprocessor(ResponsePreprocessor):
 
-    def __init__(self, response=True):
-        self.response = response
-        super().__init__()
-
     def preprocess(self, row: Dict[str, Any]) -> Dict[str, Any]:
         query = row['query']
         answers = row['response']