From d2b24af1e1d81bf6310652cd87e98ff4748d862a Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Mon, 5 Jun 2023 18:53:05 +0000 Subject: [PATCH 1/6] remove unused var --- docs/source/quickstart.mdx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx index 2dfb428d68..3fe04a0950 100644 --- a/docs/source/quickstart.mdx +++ b/docs/source/quickstart.mdx @@ -19,7 +19,7 @@ The following code illustrates the steps above. # 0. imports import torch from transformers import GPT2Tokenizer -from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead, create_reference_model +from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead from trl.core import respond_to_batch # 1. load a pretrained model From 6469d56e36863877ceeedadaeba08c7d24a3fba2 Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Mon, 5 Jun 2023 19:18:23 +0000 Subject: [PATCH 2/6] bug fix --- docs/source/quickstart.mdx | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx index 3fe04a0950..5fa717fd86 100644 --- a/docs/source/quickstart.mdx +++ b/docs/source/quickstart.mdx @@ -26,6 +26,7 @@ from trl.core import respond_to_batch model = AutoModelForCausalLMWithValueHead.from_pretrained('gpt2') model_ref = AutoModelForCausalLMWithValueHead.from_pretrained('gpt2') tokenizer = GPT2Tokenizer.from_pretrained('gpt2') +tokenizer.pad_token = tokenizer.eos_token # 2. initialize trainer ppo_config = {'batch_size': 1} @@ -34,7 +35,7 @@ ppo_trainer = PPOTrainer(config, model, model_ref, tokenizer) # 3. encode a query query_txt = "This morning I went to the " -query_tensor = tokenizer.encode(query_txt, return_tensors="pt") +query_tensor = tokenizer.encode(query_txt, return_tensors="pt").to(model.pretrained_model.device) # 4. generate model response response_tensor = respond_to_batch(model, query_tensor) @@ -42,7 +43,7 @@ response_txt = tokenizer.decode(response_tensor[0,:]) # 5. define a reward for response # (this could be any reward such as human feedback or output from another model) -reward = [torch.tensor(1.0)] +reward = [torch.tensor(1.0, device=model.pretrained_model.device)] # 6. train model with ppo train_stats = ppo_trainer.step([query_tensor[0]], [response_tensor[0]], reward) From fce0b9ebc44825d1f3e35f8c7ff744cc8caaa449 Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Tue, 6 Jun 2023 14:32:56 +0000 Subject: [PATCH 3/6] update docs, add e2e CI --- docs/source/quickstart.mdx | 14 +++++++++++--- examples/hello_world.py | 39 ++++++++++++++++++++++++++++++++++++++ tests/test_e2e.py | 9 +++++++++ 3 files changed, 59 insertions(+), 3 deletions(-) create mode 100644 examples/hello_world.py create mode 100644 tests/test_e2e.py diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx index 5fa717fd86..17ca9c9d22 100644 --- a/docs/source/quickstart.mdx +++ b/docs/source/quickstart.mdx @@ -20,7 +20,7 @@ The following code illustrates the steps above. import torch from transformers import GPT2Tokenizer from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead -from trl.core import respond_to_batch +from trl.core import LengthSampler, respond_to_batch # 1. load a pretrained model model = AutoModelForCausalLMWithValueHead.from_pretrained('gpt2') @@ -38,8 +38,16 @@ query_txt = "This morning I went to the " query_tensor = tokenizer.encode(query_txt, return_tensors="pt").to(model.pretrained_model.device) # 4. generate model response -response_tensor = respond_to_batch(model, query_tensor) -response_txt = tokenizer.decode(response_tensor[0,:]) +generation_kwargs = { + "min_length": -1, + "top_k": 0.0, + "top_p": 1.0, + "do_sample": True, + "pad_token_id": tokenizer.eos_token_id, + "max_new_tokens": 20, +} +response_tensor = ppo_trainer.generate([item for item in query_tensor], return_prompt=False, **generation_kwargs) +response_txt = tokenizer.decode(response_tensor[0]) # 5. define a reward for response # (this could be any reward such as human feedback or output from another model) diff --git a/examples/hello_world.py b/examples/hello_world.py new file mode 100644 index 0000000000..c4a96dc45c --- /dev/null +++ b/examples/hello_world.py @@ -0,0 +1,39 @@ +# 0. imports +import torch +from transformers import GPT2Tokenizer +from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead +from trl.core import LengthSampler, respond_to_batch + +# 1. load a pretrained model +model = AutoModelForCausalLMWithValueHead.from_pretrained('gpt2') +model_ref = AutoModelForCausalLMWithValueHead.from_pretrained('gpt2') +tokenizer = GPT2Tokenizer.from_pretrained('gpt2') +tokenizer.pad_token = tokenizer.eos_token + +# 2. initialize trainer +ppo_config = {'batch_size': 1} +config = PPOConfig(**ppo_config) +ppo_trainer = PPOTrainer(config, model, model_ref, tokenizer) + +# 3. encode a query +query_txt = "This morning I went to the " +query_tensor = tokenizer.encode(query_txt, return_tensors="pt").to(model.pretrained_model.device) + +# 4. generate model response +generation_kwargs = { + "min_length": -1, + "top_k": 0.0, + "top_p": 1.0, + "do_sample": True, + "pad_token_id": tokenizer.eos_token_id, + "max_new_tokens": 20, +} +response_tensor = ppo_trainer.generate([item for item in query_tensor], return_prompt=False, **generation_kwargs) +response_txt = tokenizer.decode(response_tensor[0]) + +# 5. define a reward for response +# (this could be any reward such as human feedback or output from another model) +reward = [torch.tensor(1.0, device=model.pretrained_model.device)] + +# 6. train model with ppo +train_stats = ppo_trainer.step([query_tensor[0]], [response_tensor[0]], reward) \ No newline at end of file diff --git a/tests/test_e2e.py b/tests/test_e2e.py new file mode 100644 index 0000000000..fc7deb4d1a --- /dev/null +++ b/tests/test_e2e.py @@ -0,0 +1,9 @@ +import subprocess + + +def test_dqn_jax(): + subprocess.run( + "python examples/hello_world.py", + shell=True, + check=True, + ) From e2347c01d7fc5df9871ecd16eacc903343c007c8 Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Tue, 6 Jun 2023 14:50:55 +0000 Subject: [PATCH 4/6] black --- docs/source/quickstart.mdx | 8 ++++---- examples/hello_world.py | 10 +++++----- tests/test_e2e.py | 2 +- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx index 17ca9c9d22..c7b28ef863 100644 --- a/docs/source/quickstart.mdx +++ b/docs/source/quickstart.mdx @@ -23,13 +23,13 @@ from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead from trl.core import LengthSampler, respond_to_batch # 1. load a pretrained model -model = AutoModelForCausalLMWithValueHead.from_pretrained('gpt2') -model_ref = AutoModelForCausalLMWithValueHead.from_pretrained('gpt2') -tokenizer = GPT2Tokenizer.from_pretrained('gpt2') +model = AutoModelForCausalLMWithValueHead.from_pretrained("gpt2") +model_ref = AutoModelForCausalLMWithValueHead.from_pretrained("gpt2") +tokenizer = GPT2Tokenizer.from_pretrained("gpt2") tokenizer.pad_token = tokenizer.eos_token # 2. initialize trainer -ppo_config = {'batch_size': 1} +ppo_config = {"batch_size": 1} config = PPOConfig(**ppo_config) ppo_trainer = PPOTrainer(config, model, model_ref, tokenizer) diff --git a/examples/hello_world.py b/examples/hello_world.py index c4a96dc45c..bdbdc8c1d3 100644 --- a/examples/hello_world.py +++ b/examples/hello_world.py @@ -5,13 +5,13 @@ from trl.core import LengthSampler, respond_to_batch # 1. load a pretrained model -model = AutoModelForCausalLMWithValueHead.from_pretrained('gpt2') -model_ref = AutoModelForCausalLMWithValueHead.from_pretrained('gpt2') -tokenizer = GPT2Tokenizer.from_pretrained('gpt2') +model = AutoModelForCausalLMWithValueHead.from_pretrained("gpt2") +model_ref = AutoModelForCausalLMWithValueHead.from_pretrained("gpt2") +tokenizer = GPT2Tokenizer.from_pretrained("gpt2") tokenizer.pad_token = tokenizer.eos_token # 2. initialize trainer -ppo_config = {'batch_size': 1} +ppo_config = {"batch_size": 1} config = PPOConfig(**ppo_config) ppo_trainer = PPOTrainer(config, model, model_ref, tokenizer) @@ -36,4 +36,4 @@ reward = [torch.tensor(1.0, device=model.pretrained_model.device)] # 6. train model with ppo -train_stats = ppo_trainer.step([query_tensor[0]], [response_tensor[0]], reward) \ No newline at end of file +train_stats = ppo_trainer.step([query_tensor[0]], [response_tensor[0]], reward) diff --git a/tests/test_e2e.py b/tests/test_e2e.py index fc7deb4d1a..7e742329de 100644 --- a/tests/test_e2e.py +++ b/tests/test_e2e.py @@ -1,7 +1,7 @@ import subprocess -def test_dqn_jax(): +def test_hello_world(): subprocess.run( "python examples/hello_world.py", shell=True, From 042d8ed6ef5acf01d6a49b6466f9419a643debc2 Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Tue, 6 Jun 2023 15:12:02 +0000 Subject: [PATCH 5/6] isort --- docs/source/quickstart.mdx | 4 +++- examples/hello_world.py | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx index c7b28ef863..e3f0bd8ecd 100644 --- a/docs/source/quickstart.mdx +++ b/docs/source/quickstart.mdx @@ -19,9 +19,11 @@ The following code illustrates the steps above. # 0. imports import torch from transformers import GPT2Tokenizer -from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead + +from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer from trl.core import LengthSampler, respond_to_batch + # 1. load a pretrained model model = AutoModelForCausalLMWithValueHead.from_pretrained("gpt2") model_ref = AutoModelForCausalLMWithValueHead.from_pretrained("gpt2") diff --git a/examples/hello_world.py b/examples/hello_world.py index bdbdc8c1d3..3d266fb184 100644 --- a/examples/hello_world.py +++ b/examples/hello_world.py @@ -1,9 +1,11 @@ # 0. imports import torch from transformers import GPT2Tokenizer -from trl import PPOTrainer, PPOConfig, AutoModelForCausalLMWithValueHead + +from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer from trl.core import LengthSampler, respond_to_batch + # 1. load a pretrained model model = AutoModelForCausalLMWithValueHead.from_pretrained("gpt2") model_ref = AutoModelForCausalLMWithValueHead.from_pretrained("gpt2") From 92f5898536a6579e42754f8b8a316f25093f8f9b Mon Sep 17 00:00:00 2001 From: Costa Huang Date: Tue, 6 Jun 2023 15:31:03 +0000 Subject: [PATCH 6/6] CI --- docs/source/quickstart.mdx | 1 - examples/hello_world.py | 1 - 2 files changed, 2 deletions(-) diff --git a/docs/source/quickstart.mdx b/docs/source/quickstart.mdx index e3f0bd8ecd..cc90a14480 100644 --- a/docs/source/quickstart.mdx +++ b/docs/source/quickstart.mdx @@ -21,7 +21,6 @@ import torch from transformers import GPT2Tokenizer from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer -from trl.core import LengthSampler, respond_to_batch # 1. load a pretrained model diff --git a/examples/hello_world.py b/examples/hello_world.py index 3d266fb184..138defb5b4 100644 --- a/examples/hello_world.py +++ b/examples/hello_world.py @@ -3,7 +3,6 @@ from transformers import GPT2Tokenizer from trl import AutoModelForCausalLMWithValueHead, PPOConfig, PPOTrainer -from trl.core import LengthSampler, respond_to_batch # 1. load a pretrained model