huggingface · younesbelkada · Apr 23, 2024 · Apr 23, 2024 · Apr 23, 2024 · Apr 23, 2024
diff --git a/commands/run_dpo.sh b/commands/run_dpo.sh
@@ -2,7 +2,7 @@
 # This script runs an SFT example end-to-end on a tiny model using different possible configurations
 # but defaults to QLoRA + PEFT
 OUTPUT_DIR="test_dpo/"
-MODEL_NAME="HuggingFaceM4/tiny-random-LlamaForCausalLM"
+MODEL_NAME="trl-internal-testing/tiny-random-LlamaForCausalLM"
 DATASET_NAME="trl-internal-testing/hh-rlhf-trl-style"
 MAX_STEPS=5
 BATCH_SIZE=2
@@ -55,4 +55,4 @@ echo "Starting program..."
     echo "Operation Failed!"
     exit 1
 }
-exit 0
+exit 0
diff --git a/commands/run_sft.sh b/commands/run_sft.sh
@@ -2,7 +2,7 @@
 # This script runs an SFT example end-to-end on a tiny model using different possible configurations
 # but defaults to QLoRA + PEFT
 OUTPUT_DIR="test_sft/"
-MODEL_NAME="HuggingFaceM4/tiny-random-LlamaForCausalLM"
+MODEL_NAME="trl-internal-testing/tiny-random-LlamaForCausalLM"
 DATASET_NAME="imdb"
 MAX_STEPS=5
 BATCH_SIZE=2

diff --git a/docs/source/clis.mdx b/docs/source/clis.mdx
@@ -22,7 +22,7 @@ We also recommend you passing a YAML config file to configure your training prot
 
 ```yaml
 model_name_or_path:
-  HuggingFaceM4/tiny-random-LlamaForCausalLM
+  trl-internal-testing/tiny-random-LlamaForCausalLM
 dataset_name:
   imdb
 dataset_text_field:
@@ -116,4 +116,4 @@ Besides talking to the model there are a few commands you can use:
 - **save {SAVE_NAME} (optional)**: save the current chat and settings to file by default to `./chat_history/{MODEL_NAME}/chat_{DATETIME}.yaml` or `{SAVE_NAME}` if provided
 - **exit**: closes the interface
 
-The default examples are defined in `examples/scripts/config/default_chat_config.yaml` but you can pass your own with `--config CONFIG_FILE` where you can also specify the default generation parameters.
+The default examples are defined in `examples/scripts/config/default_chat_config.yaml` but you can pass your own with `--config CONFIG_FILE` where you can also specify the default generation parameters.
diff --git a/example_config.yaml b/example_config.yaml
@@ -7,7 +7,7 @@
 #   CUDA_VISIBLE_DEVICES: 0
 
 model_name_or_path:
-  HuggingFaceM4/tiny-random-LlamaForCausalLM
+  trl-internal-testing/tiny-random-LlamaForCausalLM
 dataset_name:
   imdb
 dataset_text_field:

diff --git a/tests/slow/testing_constants.py b/tests/slow/testing_constants.py
@@ -14,7 +14,7 @@
 
 # TODO: push them under trl-org
 MODELS_TO_TEST = [
-    "HuggingFaceM4/tiny-random-LlamaForCausalLM",
+    "trl-internal-testing/tiny-random-LlamaForCausalLM",
     "HuggingFaceM4/tiny-random-MistralForCausalLM",
 ]
 

diff --git a/tests/test_cli.py b/tests/test_cli.py
@@ -20,7 +20,7 @@
 def test_sft_cli():
     try:
         subprocess.run(
-            "trl sft --max_steps 1 --output_dir tmp-sft --model_name_or_path HuggingFaceM4/tiny-random-LlamaForCausalLM --dataset_name imdb --learning_rate 1e-4 --lr_scheduler_type cosine --dataset_text_field text",
+            "trl sft --max_steps 1 --output_dir tmp-sft --model_name_or_path trl-internal-testing/tiny-random-LlamaForCausalLM --dataset_name imdb --learning_rate 1e-4 --lr_scheduler_type cosine --dataset_text_field text",
             shell=True,
             check=True,
         )
@@ -32,7 +32,7 @@ def test_sft_cli():
 def test_dpo_cli():
     try:
         subprocess.run(
-            "trl dpo --max_steps 1 --output_dir tmp-dpo --model_name_or_path HuggingFaceM4/tiny-random-LlamaForCausalLM --dataset_name trl-internal-testing/hh-rlhf-trl-style --learning_rate 1e-4 --lr_scheduler_type cosine --sanity_check",
+            "trl dpo --max_steps 1 --output_dir tmp-dpo --model_name_or_path trl-internal-testing/tiny-random-LlamaForCausalLM --dataset_name trl-internal-testing/hh-rlhf-trl-style --learning_rate 1e-4 --lr_scheduler_type cosine --sanity_check",
             shell=True,
             check=True,
         )

diff --git a/tests/test_dpo_trainer.py b/tests/test_dpo_trainer.py
@@ -394,7 +394,7 @@ def test_dpo_lora_bf16_autocast_llama(self):
         # Note this test only works on compute capability > 7 GPU devices
         from peft import LoraConfig
 
-        model_id = "HuggingFaceM4/tiny-random-LlamaForCausalLM"
+        model_id = "trl-internal-testing/tiny-random-LlamaForCausalLM"
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
         lora_config = LoraConfig(
@@ -519,7 +519,7 @@ def test_dpo_lora_bf16_autocast(self, name, loss_type, pre_compute, gen_during_e
     def test_dpo_lora_tags(self):
         from peft import LoraConfig
 
-        model_id = "HuggingFaceM4/tiny-random-LlamaForCausalLM"
+        model_id = "trl-internal-testing/tiny-random-LlamaForCausalLM"
         tokenizer = AutoTokenizer.from_pretrained(model_id)
 
         lora_config = LoraConfig(