update

Jintao-Huang · Feb 20, 2025 · 153e15f · 153e15f
1 parent 53eac75
commit 153e15f
Showing 1 changed file with 3 additions and 3 deletions.
diff --git a/examples/train/grpo/full_vllm.sh b/examples/train/grpo/full_vllm.sh
@@ -7,20 +7,20 @@ CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
 NPROC_PER_NODE=7 \
 swift rlhf \
     --rlhf_type grpo \
-    --model Qwen/Qwen2.5-7B \
+    --model Qwen/Qwen2.5-7B-Instruct \
     --reward_funcs accuracy format \
     --use_vllm true \
     --vllm_device auto \
     --vllm_gpu_memory_utilization 0.7 \
     --vllm_max_model_len 8192 \
     --train_type full \
     --torch_dtype bfloat16 \
-    --dataset 'AI-MO/NuminaMath-TIR#2000' \
+    --dataset 'AI-MO/NuminaMath-TIR#5000' \
     --max_completion_length 2048 \
     --num_train_epochs 1 \
     --per_device_train_batch_size 1 \
     --per_device_eval_batch_size 1 \
-    --learning_rate 5e-7 \
+    --learning_rate 1e-6 \
     --gradient_accumulation_steps 2 \
     --eval_steps 200 \
     --save_steps 200 \