Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Rename trainer arg tokenizer to processing_class #2162

Merged
merged 34 commits into from
Oct 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
e30f39f
update doc
qgallouedec Oct 3, 2024
0d8a793
bco
qgallouedec Oct 3, 2024
f03185f
bco
qgallouedec Oct 3, 2024
d1c5c3c
cpo
qgallouedec Oct 3, 2024
35564bf
revert some cpo changes
qgallouedec Oct 3, 2024
42cf70b
dpo
qgallouedec Oct 3, 2024
802e1cb
0.14
qgallouedec Oct 3, 2024
e61afce
online dpo
qgallouedec Oct 3, 2024
2d2f350
Merge branch 'main' into tokenizer_to_processing_class
qgallouedec Oct 3, 2024
0625889
Merge branch 'tokenizer_to_processing_class' of https://github.com/hu…
qgallouedec Oct 3, 2024
bb57af7
gkd
qgallouedec Oct 3, 2024
8aad102
explicit args gkd
qgallouedec Oct 4, 2024
5cc7ef3
kto
qgallouedec Oct 4, 2024
6609e07
Merge branch 'main' into tokenizer_to_processing_class
qgallouedec Oct 4, 2024
c1bdfab
drop deprecated beta
qgallouedec Oct 4, 2024
560e61b
kto type hint
qgallouedec Oct 4, 2024
97af75b
nash-md
qgallouedec Oct 4, 2024
a8fba85
orpo
qgallouedec Oct 4, 2024
cad7c1e
reward
qgallouedec Oct 4, 2024
dc10655
sft
qgallouedec Oct 4, 2024
8055683
Merge branch 'main' into tokenizer_to_processing_class
qgallouedec Oct 4, 2024
753a79a
xpo
qgallouedec Oct 4, 2024
d8ca7c0
iterative sft
qgallouedec Oct 4, 2024
f70c950
correct type gkd
qgallouedec Oct 4, 2024
aff4853
rloo
qgallouedec Oct 4, 2024
6ddef1d
fix gkd import
qgallouedec Oct 4, 2024
9d662f2
ppo
qgallouedec Oct 4, 2024
bc33bf6
sft stack llama
qgallouedec Oct 4, 2024
4b1912f
Update trl/trainer/dpo_trainer.py
qgallouedec Oct 4, 2024
1a4b002
Update trl/trainer/rloo_trainer.py
qgallouedec Oct 4, 2024
cf75054
Update trl/trainer/ppov2_trainer.py
qgallouedec Oct 4, 2024
9eabad9
Update trl/trainer/rloo_trainer.py
qgallouedec Oct 4, 2024
f24e3eb
Merge branch 'main' into tokenizer_to_processing_class
qgallouedec Oct 4, 2024
19e949a
update cpo
qgallouedec Oct 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ training_args = RewardConfig(output_dir="Qwen2.5-0.5B-Reward", per_device_train_
trainer = RewardTrainer(
args=training_args,
model=model,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=dataset,
)
trainer.train()
Expand Down Expand Up @@ -166,7 +166,7 @@ dataset = dataset.map(lambda x: tokenizer(x["prompt"]), remove_columns="prompt")
training_args = RLOOConfig(output_dir="Qwen2.5-0.5B-RL")
trainer = RLOOTrainer(
config=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
policy=policy,
ref_policy=ref_policy,
reward_model=reward_model,
Expand All @@ -189,7 +189,7 @@ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B-Instruct")
dataset = load_dataset("trl-lib/Capybara-Preferences", split="train")
training_args = DPOConfig(output_dir="Qwen2.5-0.5B-DPO")
trainer = DPOTrainer(model=model, args=training_args, train_dataset=dataset, tokenizer=tokenizer)
trainer = DPOTrainer(model=model, args=training_args, train_dataset=dataset, processing_class=tokenizer)
trainer.train()
```

Expand Down
4 changes: 2 additions & 2 deletions docs/source/bco_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ bco_trainer = BCOTrainer(
model_ref,
args=training_args,
train_dataset=train_dataset,
tokenizer=tokenizer,
processing_class=tokenizer,
)
```
After this one can then call:
Expand Down Expand Up @@ -75,7 +75,7 @@ bco_trainer = BCOTrainer(
model_ref,
args=training_args,
train_dataset=train_dataset,
tokenizer=tokenizer,
processing_class=tokenizer,
embedding_func=embedding_func,
embedding_tokenizer=self.embedding_tokenizer,
)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/cpo_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
train_dataset = load_dataset("trl-lib/Capybara-Preferences", split="train")

training_args = CPOConfig(output_dir="Qwen2-0.5B-CPO", logging_steps=10)
trainer = CPOTrainer(model=model, args=training_args, tokenizer=tokenizer, train_dataset=train_dataset)
trainer = CPOTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset)
trainer.train()
```

Expand Down
8 changes: 4 additions & 4 deletions docs/source/dpo_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
train_dataset = load_dataset("trl-lib/Capybara-Preferences", split="train")

training_args = DPOConfig(output_dir="Qwen2-0.5B-DPO", logging_steps=10)
trainer = DPOTrainer(model=model, args=training_args, tokenizer=tokenizer, train_dataset=train_dataset)
trainer = DPOTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset)
trainer.train()
```

Expand Down Expand Up @@ -100,8 +100,8 @@ Additionally, unlike standard text-based models where a `tokenizer` is used, for
model,
args=training_args,
train_dataset=train_dataset,
- tokenizer=tokenizer,
+ tokenizer=processor,
- processing_class=tokenizer,
+ processing_class=processor,
)
```

Expand Down Expand Up @@ -194,7 +194,7 @@ First install `unsloth` according to the [official documentation](https://github

- training_args = DPOConfig(output_dir="Qwen2-0.5B-DPO", logging_steps=10)
+ training_args = DPOConfig(output_dir="Qwen2-0.5B-DPO", logging_steps=10, bf16=True)
trainer = DPOTrainer(model=model, args=training_args, tokenizer=tokenizer, train_dataset=train_dataset)
trainer = DPOTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset)
trainer.train()

```
Expand Down
2 changes: 1 addition & 1 deletion docs/source/gkd_trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ trainer = GKDTrainer(
model=model,
teacher_model=teacher_model,
args=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/kto_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ kto_trainer = KTOTrainer(
ref_model,
args=training_args,
train_dataset=train_dataset,
tokenizer=tokenizer,
processing_class=tokenizer,
)
```
After this one can then call:
Expand Down
2 changes: 1 addition & 1 deletion docs/source/nash_md_trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ trainer = NashMDTrainer(
model=model,
reward_model=reward_model,
args=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=train_dataset,
)
trainer.train()
Expand Down
2 changes: 1 addition & 1 deletion docs/source/online_dpo_trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ train_dataset = load_dataset("trl-lib/ultrafeedback-prompt", split="train")

training_args = OnlineDPOConfig(output_dir="online-dpo-qwen2", logging_steps=10)
trainer = OnlineDPOTrainer(
model=model, reward_model=reward_model, args=training_args, tokenizer=tokenizer, train_dataset=train_dataset
model=model, reward_model=reward_model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset
)
trainer.train()
```
Expand Down
2 changes: 1 addition & 1 deletion docs/source/orpo_trainer.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ orpo_trainer = ORPOTrainer(
model,
args=training_args,
train_dataset=train_dataset,
tokenizer=tokenizer,
processing_class=tokenizer,
)
```
After this one can then call:
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reward_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ peft_config = LoraConfig(
trainer = RewardTrainer(
model=model,
args=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=dataset,
peft_config=peft_config,
)
Expand Down
2 changes: 1 addition & 1 deletion docs/source/sft_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ trainer = SFTTrainer(
args=training_args,
data_collator=collate_fn,
train_dataset=train_dataset,
tokenizer=processor.tokenizer,
processing_class=processor.tokenizer,
)
```

Expand Down
2 changes: 1 addition & 1 deletion docs/source/xpo_trainer.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ trainer = XPOTrainer(
model=model,
reward_model=reward_model,
args=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=train_dataset,
)
trainer.train()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def return_prompt_and_responses(samples) -> Dict[str, str]:
beta=script_args.beta,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=peft_config,
max_prompt_length=script_args.max_prompt_length,
max_length=script_args.max_length,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ def create_datasets(tokenizer, args, seed=None):
peft_config=peft_config,
max_seq_length=None,
formatting_func=prepare_sample_text,
tokenizer=tokenizer,
processing_class=tokenizer,
args=training_args,
)
trainer.train()
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/bco.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def mean_pooling(model_output, attention_mask):
args=training_args,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=get_peft_config(model_args),
embedding_func=embedding_func,
embedding_tokenizer=embedding_tokenizer,
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/cpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ class ScriptArguments:
args=training_args,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=get_peft_config(model_config),
)

Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/dpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=peft_config,
)

Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/dpo_online.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=get_peft_config(model_config),
)
generation_config = GenerationConfig(
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/dpo_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ def process(row):
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=processor,
processing_class=processor,
peft_config=peft_config,
)

Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/gkd.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=get_peft_config(model_config),
)
completions_callback = LogCompletionsCallback(trainer, trainer.generation_config, num_prompts=8)
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/kto.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def format_dataset(example):
args=training_args,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=get_peft_config(model_args),
)

Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/nash_md.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=tokenizer,
processing_class=tokenizer,
)
generation_config = GenerationConfig(
max_new_tokens=training_args.max_new_tokens, do_sample=True, temperature=training_args.temperature
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/orpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def process(row):
args=training_args,
train_dataset=dataset["train"],
eval_dataset=dataset["test"],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=get_peft_config(model_config),
)

Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/ppo/ppo.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def tokenize(element):
################
trainer = PPOv2Trainer(
config=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
policy=policy,
ref_policy=ref_policy,
reward_model=reward_model,
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/ppo/ppo_tldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def tokenize(element):
################
trainer = PPOv2Trainer(
config=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
policy=policy,
ref_policy=ref_policy,
reward_model=reward_model,
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/reward_modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@
##########
trainer = RewardTrainer(
model=model,
tokenizer=tokenizer,
processing_class=tokenizer,
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/rloo/rloo.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ def tokenize(element):
################
trainer = RLOOTrainer(
config=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
policy=policy,
ref_policy=ref_policy,
reward_model=reward_model,
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/rloo/rloo_tldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def tokenize(element):
################
trainer = RLOOTrainer(
config=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
policy=policy,
ref_policy=ref_policy,
reward_model=reward_model,
Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=tokenizer,
processing_class=tokenizer,
peft_config=get_peft_config(model_config),
)

Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/sft_vlm.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def collate_fn(examples):
data_collator=collate_fn,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=processor.tokenizer,
processing_class=processor.tokenizer,
peft_config=get_peft_config(model_config),
)

Expand Down
2 changes: 1 addition & 1 deletion examples/scripts/xpo.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@
args=training_args,
train_dataset=dataset[script_args.dataset_train_split],
eval_dataset=dataset[script_args.dataset_test_split],
tokenizer=tokenizer,
processing_class=tokenizer,
)
generation_config = GenerationConfig(
max_new_tokens=training_args.max_new_tokens, do_sample=True, temperature=training_args.temperature
Expand Down
6 changes: 3 additions & 3 deletions tests/slow/test_dpo_slow.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def test_dpo_bare_model(self, model_id, loss_type, pre_compute_logits):
model=model,
ref_model=None,
args=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=self.dataset,
eval_dataset=self.dataset,
)
Expand Down Expand Up @@ -142,7 +142,7 @@ def test_dpo_peft_model(self, model_id, loss_type, pre_compute_logits, gradient_
model=model,
ref_model=None,
args=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=self.dataset,
eval_dataset=self.dataset,
peft_config=self.peft_config,
Expand Down Expand Up @@ -206,7 +206,7 @@ def test_dpo_peft_model_qlora(self, model_id, loss_type, pre_compute_logits, gra
model=model,
ref_model=None,
args=training_args,
tokenizer=tokenizer,
processing_class=tokenizer,
train_dataset=self.dataset,
eval_dataset=self.dataset,
peft_config=self.peft_config,
Expand Down
Loading
Loading