diff --git a/examples/notebooks/gpt2-sentiment.ipynb b/examples/notebooks/gpt2-sentiment.ipynb
index c875b6da90..bbfc0a5eaf 100644
--- a/examples/notebooks/gpt2-sentiment.ipynb
+++ b/examples/notebooks/gpt2-sentiment.ipynb
@@ -92,7 +92,7 @@
" log_with=\"wandb\",\n",
")\n",
"\n",
- "sent_kwargs = {\"return_all_scores\": True, \"function_to_apply\": \"none\", \"batch_size\": 16}"
+ "sent_kwargs = {\"top_k\": None, \"function_to_apply\": \"none\", \"batch_size\": 16}"
]
},
{
@@ -110,7 +110,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
- "You can see that we load a GPT2 model called `gpt2_imdb`. This model was additionally fine-tuned on the IMDB dataset for 1 epoch with the huggingface [script](https://github.com/huggingface/transformers/blob/master/examples/run_language_modeling.py) (no special settings). The other parameters are mostly taken from the original paper [\"Fine-Tuning Language Models from Human Preferences\"](\n",
+ "You can see that we load a GPT2 model called `gpt2_imdb`. This model was additionally fine-tuned on the IMDB dataset for 1 epoch with the huggingface [script](https://github.com/huggingface/transformers/blob/main/examples/legacy/run_language_modeling.py) (no special settings). The other parameters are mostly taken from the original paper [\"Fine-Tuning Language Models from Human Preferences\"](\n",
"https://huggingface.co/papers/1909.08593). This model as well as the BERT model is available in the Huggingface model zoo [here](https://huggingface.co/models). The following code should automatically download the models."
]
},
@@ -134,16 +134,7 @@
"cell_type": "code",
"execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Reusing dataset imdb (/home/leandro/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1)\n",
- "Loading cached processed dataset at /home/leandro/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1/cache-ff455473e884c6a3.arrow\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"def build_dataset(config, dataset_name=\"imdb\", input_min_text_length=2, input_max_text_length=8):\n",
" \"\"\"\n",
@@ -270,8 +261,8 @@
{
"data": {
"text/plain": [
- "[[{'label': 'NEGATIVE', 'score': 2.335048198699951},\n",
- " {'label': 'POSITIVE', 'score': -2.726576566696167}]]"
+ "[{'label': 'NEGATIVE', 'score': 2.335048198699951},\n",
+ " {'label': 'POSITIVE', 'score': -2.726576328277588}]"
]
},
"execution_count": null,
@@ -292,8 +283,8 @@
{
"data": {
"text/plain": [
- "[[{'label': 'NEGATIVE', 'score': -2.2947897911071777},\n",
- " {'label': 'POSITIVE', 'score': 2.557039737701416}]]"
+ "[{'label': 'POSITIVE', 'score': 2.557040214538574},\n",
+ " {'label': 'NEGATIVE', 'score': -2.294790267944336}]"
]
},
"execution_count": null,
@@ -371,7 +362,7 @@
"}\n",
"\n",
"\n",
- "for epoch, batch in tqdm(enumerate(ppo_trainer.dataloader)):\n",
+ "for epoch, batch in enumerate(tqdm(ppo_trainer.dataloader)):\n",
" query_tensors = batch[\"input_ids\"]\n",
"\n",
" #### Get response from gpt2\n",
@@ -386,7 +377,8 @@
" #### Compute sentiment score\n",
" texts = [q + r for q, r in zip(batch[\"query\"], batch[\"response\"])]\n",
" pipe_outputs = sentiment_pipe(texts, **sent_kwargs)\n",
- " rewards = [torch.tensor(output[1][\"score\"]) for output in pipe_outputs]\n",
+ " positive_scores = [item[\"score\"] for output in pipe_outputs for item in output if item[\"label\"] == \"POSITIVE\"]\n",
+ " rewards = [torch.tensor(score) for score in positive_scores]\n",
"\n",
" #### Run PPO step\n",
" stats = ppo_trainer.step(query_tensors, response_tensors, rewards)\n",
@@ -398,7 +390,7 @@
"metadata": {},
"source": [
"### Training progress\n",
- "If you are tracking the training progress with Weights&Biases you should see a plot similar to the one below. Check out the interactive sample report on wandb.ai: [link](https://app.wandb.ai/huggingface/trl-showcase/runs/1jtvxb1m/).\n",
+ "If you are tracking the training progress with Weights&Biases you should see a plot similar to the one below. Check out the interactive sample report on wandb.ai: [link](https://wandb.ai/huggingface/trl/runs/w9l3110g).\n",
"\n",
"
\n",
"
data:image/s3,"s3://crabby-images/22946/22946b2e41b311aa7e22356bb858edb042a66171" alt=""
\n",
@@ -424,14 +416,6 @@
"execution_count": null,
"metadata": {},
"outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/leandro/miniconda3/envs/trl/lib/python3.9/site-packages/transformers/pipelines/base.py:1075: UserWarning: You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset\n",
- " warnings.warn(\n"
- ]
- },
{
"data": {
"text/html": [
@@ -463,208 +447,208 @@
"
\n",
" \n",
" 0 | \n",
- " Oh dear, | \n",
- " what are I saying?! I fast-forwarded through | \n",
- " I must say that I are hanging my head on this | \n",
- " -0.858954 | \n",
- " -1.007609 | \n",
+ " I rented Zero Day from | \n",
+ " people who hate Aboriginals. This movie reall... | \n",
+ " the store. I love this film and I also think ... | \n",
+ " -0.304574 | \n",
+ " 2.731879 | \n",
"
\n",
" \n",
" 1 | \n",
- " I've seen | \n",
- " it, as well.<br | \n",
- " three million dialogue throughout, and | \n",
- " 1.996807 | \n",
- " 2.240883 | \n",
+ " The only thing that prevented this | \n",
+ " amazing mistake is the | \n",
+ " wonderful film was the | \n",
+ " -1.532142 | \n",
+ " 1.554841 | \n",
"
\n",
" \n",
" 2 | \n",
- " Hi:<br /><br | \n",
- " />This movie is a turkey though when it comes to | \n",
- " />I also like that movie. It's so funny | \n",
- " -0.438191 | \n",
- " 2.415630 | \n",
+ " I've read a few of the | \n",
+ " women's reviews that said they were really sh... | \n",
+ " series so I really like the show: it's probab... | \n",
+ " -2.555317 | \n",
+ " 2.638825 | \n",
"
\n",
" \n",
" 3 | \n",
- " I'm a writer | \n",
- " and I'm not going to be asked to | \n",
- " , not a screenwriter. I've written | \n",
- " -0.655991 | \n",
- " -0.724324 | \n",
+ " This is the | \n",
+ " initial sequel when the Golden Ocean building... | \n",
+ " best movie I have seen. And I absolutely reco... | \n",
+ " 1.258972 | \n",
+ " 2.867189 | \n",
"
\n",
" \n",
" 4 | \n",
- " If you | \n",
- " absolutely love sensitive romance, the plot a... | \n",
- " are looking at the cinematography, the acting, | \n",
- " 2.221309 | \n",
- " 0.148751 | \n",
+ " A classic cartoon, always enjoyable and | \n",
+ " even hilarious! the show has my vote for seve... | \n",
+ " often funny. Until now, it's approached is aw... | \n",
+ " 2.797969 | \n",
+ " 2.695254 | \n",
"
\n",
" \n",
" 5 | \n",
- " OMG this | \n",
- " casting cast. Obi cult breezy, this is | \n",
- " movie was totally wonderful, I it was the ide... | \n",
- " -1.533139 | \n",
- " 2.590190 | \n",
+ " This has to | \n",
+ " be the one least watched. This whole | \n",
+ " be the best film I have ever seen | \n",
+ " -1.598879 | \n",
+ " 2.565786 | \n",
"
\n",
" \n",
" 6 | \n",
- " It's | \n",
- " unrealistic; the guy who was supposed to be E... | \n",
- " a very good film. It reminds us about over | \n",
- " -2.097017 | \n",
- " 2.835831 | \n",
+ " Happy Go Lovely is | \n",
+ " a real bucket list movie | \n",
+ " a wonderful show that's | \n",
+ " 1.680087 | \n",
+ " 2.856708 | \n",
"
\n",
" \n",
" 7 | \n",
- " There is a really | \n",
- " awful laptop game!<br /><br />I used to | \n",
- " interesting story that set us the journey. Th... | \n",
- " -2.341743 | \n",
- " 2.282939 | \n",
+ " Wow, I just saw | \n",
+ " a good flick. Winner of | \n",
+ " this amazing story beautifully presented; | \n",
+ " 2.194669 | \n",
+ " 2.862617 | \n",
"
\n",
" \n",
" 8 | \n",
- " This is | \n",
- " my favorite part about | \n",
- " a well thought well | \n",
- " 2.554794 | \n",
- " 2.734139 | \n",
+ " This movie makes several mistakes | \n",
+ " against it as well. First, the pure comedy le... | \n",
+ " !! This is one terrific movie and everyone who... | \n",
+ " -0.951899 | \n",
+ " 2.756286 | \n",
"
\n",
" \n",
" 9 | \n",
- " Wasn't | \n",
- " Wasn't it clichéd?<|endoftext|> | \n",
- " anyone else interested in this movie? It's a ... | \n",
- " -1.790802 | \n",
- " 2.631960 | \n",
+ " Branagh and | \n",
+ " peace between those who do find | \n",
+ " stunning choreography and the spectacular | \n",
+ " 1.767902 | \n",
+ " 2.767675 | \n",
"
\n",
" \n",
" 10 | \n",
- " This film is another of director Tim | \n",
- " Burton's masterpieces | \n",
- " Curry's best bombs | \n",
- " 2.622917 | \n",
- " 2.544106 | \n",
+ " I might have | \n",
+ " missed. It's certainly worth checking out con... | \n",
+ " liked this. It is a refreshing look into the ... | \n",
+ " 2.138980 | \n",
+ " 2.735727 | \n",
"
\n",
" \n",
" 11 | \n",
- " I thought this movie | \n",
- " was excellent. I actually laughed 6 times and... | \n",
- " was perfect, and I believe it's almost overlo... | \n",
- " 2.548022 | \n",
- " 2.601913 | \n",
+ " Really, really bad. How does | \n",
+ " it make a niche for a kids movie? | \n",
+ " this point work? When he has my love | \n",
+ " -2.716649 | \n",
+ " -2.488236 | \n",
"
\n",
" \n",
" 12 | \n",
- " This early John Wayne | \n",
- " films looked like an abandoned police beating | \n",
- " film is a realistic portrayal of what | \n",
- " -1.742279 | \n",
- " 2.609762 | \n",
+ " What another reviewer called lack of | \n",
+ " theatrics? <br /><br />Breaking into the thea... | \n",
+ " continuity and rewarding moments like this pi... | \n",
+ " -1.887323 | \n",
+ " -0.052886 | \n",
"
\n",
" \n",
" 13 | \n",
- " I was | \n",
- " given an experience-a big one, almost 25 | \n",
- " very happy with all the reflections and this ... | \n",
- " 2.250709 | \n",
- " 2.558540 | \n",
+ " This is simply one | \n",
+ " of the worst recent | \n",
+ " of my favorite movies | \n",
+ " -2.842519 | \n",
+ " 2.787408 | \n",
"
\n",
" \n",
" 14 | \n",
- " Embarrassingly, I | \n",
- " am more at a strict conformity after getting ... | \n",
- " had never seen a movie before. There was one ... | \n",
- " -2.021666 | \n",
- " -1.803383 | \n",
+ " \"Perhaps we can arrange | \n",
+ " for a beautiful disposition to take place her... | \n",
+ " a hisra cinema with really good music; glorio... | \n",
+ " 1.227563 | \n",
+ " 2.700343 | \n",
"
\n",
" \n",
" 15 | \n",
- " I am a fan | \n",
- " of living on simple islands, and we have visi... | \n",
- " of many things and learned how to appreciate ... | \n",
- " 1.791297 | \n",
- " 2.324461 | \n",
+ " Richard Willaims | \n",
+ " and Seth Rogen star in the lead roles | \n",
+ " brilliantly captures the calm, romantic suspe... | \n",
+ " 0.247049 | \n",
+ " 2.874593 | \n",
"
\n",
" \n",
"\n",
"
"
],
"text/plain": [
- " query \\\n",
- "0 Oh dear, \n",
- "1 I've seen \n",
- "2 Hi:
This movie is a turkey though when it comes to \n",
- "3 and I'm not going to be asked to \n",
- "4 absolutely love sensitive romance, the plot a... \n",
- "5 casting cast. Obi cult breezy, this is \n",
- "6 unrealistic; the guy who was supposed to be E... \n",
- "7 awful laptop game!
I used to \n",
- "8 my favorite part about \n",
- "9 Wasn't it clichéd?<|endoftext|> \n",
- "10 Burton's masterpieces \n",
- "11 was excellent. I actually laughed 6 times and... \n",
- "12 films looked like an abandoned police beating \n",
- "13 given an experience-a big one, almost 25 \n",
- "14 am more at a strict conformity after getting ... \n",
- "15 of living on simple islands, and we have visi... \n",
+ "0 people who hate Aboriginals. This movie reall... \n",
+ "1 amazing mistake is the \n",
+ "2 women's reviews that said they were really sh... \n",
+ "3 initial sequel when the Golden Ocean building... \n",
+ "4 even hilarious! the show has my vote for seve... \n",
+ "5 be the one least watched. This whole \n",
+ "6 a real bucket list movie \n",
+ "7 a good flick. Winner of \n",
+ "8 against it as well. First, the pure comedy le... \n",
+ "9 peace between those who do find \n",
+ "10 missed. It's certainly worth checking out con... \n",
+ "11 it make a niche for a kids movie? \n",
+ "12 theatrics?
Breaking into the thea... \n",
+ "13 of the worst recent \n",
+ "14 for a beautiful disposition to take place her... \n",
+ "15 and Seth Rogen star in the lead roles \n",
"\n",
" response (after) rewards (before) \\\n",
- "0 I must say that I are hanging my head on this -0.858954 \n",
- "1 three million dialogue throughout, and 1.996807 \n",
- "2 />I also like that movie. It's so funny -0.438191 \n",
- "3 , not a screenwriter. I've written -0.655991 \n",
- "4 are looking at the cinematography, the acting, 2.221309 \n",
- "5 movie was totally wonderful, I it was the ide... -1.533139 \n",
- "6 a very good film. It reminds us about over -2.097017 \n",
- "7 interesting story that set us the journey. Th... -2.341743 \n",
- "8 a well thought well 2.554794 \n",
- "9 anyone else interested in this movie? It's a ... -1.790802 \n",
- "10 Curry's best bombs 2.622917 \n",
- "11 was perfect, and I believe it's almost overlo... 2.548022 \n",
- "12 film is a realistic portrayal of what -1.742279 \n",
- "13 very happy with all the reflections and this ... 2.250709 \n",
- "14 had never seen a movie before. There was one ... -2.021666 \n",
- "15 of many things and learned how to appreciate ... 1.791297 \n",
+ "0 the store. I love this film and I also think ... -0.304574 \n",
+ "1 wonderful film was the -1.532142 \n",
+ "2 series so I really like the show: it's probab... -2.555317 \n",
+ "3 best movie I have seen. And I absolutely reco... 1.258972 \n",
+ "4 often funny. Until now, it's approached is aw... 2.797969 \n",
+ "5 be the best film I have ever seen -1.598879 \n",
+ "6 a wonderful show that's 1.680087 \n",
+ "7 this amazing story beautifully presented; 2.194669 \n",
+ "8 !! This is one terrific movie and everyone who... -0.951899 \n",
+ "9 stunning choreography and the spectacular 1.767902 \n",
+ "10 liked this. It is a refreshing look into the ... 2.138980 \n",
+ "11 this point work? When he has my love -2.716649 \n",
+ "12 continuity and rewarding moments like this pi... -1.887323 \n",
+ "13 of my favorite movies -2.842519 \n",
+ "14 a hisra cinema with really good music; glorio... 1.227563 \n",
+ "15 brilliantly captures the calm, romantic suspe... 0.247049 \n",
"\n",
" rewards (after) \n",
- "0 -1.007609 \n",
- "1 2.240883 \n",
- "2 2.415630 \n",
- "3 -0.724324 \n",
- "4 0.148751 \n",
- "5 2.590190 \n",
- "6 2.835831 \n",
- "7 2.282939 \n",
- "8 2.734139 \n",
- "9 2.631960 \n",
- "10 2.544106 \n",
- "11 2.601913 \n",
- "12 2.609762 \n",
- "13 2.558540 \n",
- "14 -1.803383 \n",
- "15 2.324461 "
+ "0 2.731879 \n",
+ "1 1.554841 \n",
+ "2 2.638825 \n",
+ "3 2.867189 \n",
+ "4 2.695254 \n",
+ "5 2.565786 \n",
+ "6 2.856708 \n",
+ "7 2.862617 \n",
+ "8 2.756286 \n",
+ "9 2.767675 \n",
+ "10 2.735727 \n",
+ "11 -2.488236 \n",
+ "12 -0.052886 \n",
+ "13 2.787408 \n",
+ "14 2.700343 \n",
+ "15 2.874593 "
]
},
"execution_count": null,
@@ -701,10 +685,14 @@
"\n",
"#### sentiment analysis of query/response pairs before/after\n",
"texts = [q + r for q, r in zip(game_data[\"query\"], game_data[\"response (before)\"])]\n",
- "game_data[\"rewards (before)\"] = [output[1][\"score\"] for output in sentiment_pipe(texts, **sent_kwargs)]\n",
+ "pipe_outputs = sentiment_pipe(texts, **sent_kwargs)\n",
+ "positive_scores = [item[\"score\"] for output in pipe_outputs for item in output if item[\"label\"] == \"POSITIVE\"]\n",
+ "game_data[\"rewards (before)\"] = positive_scores\n",
"\n",
"texts = [q + r for q, r in zip(game_data[\"query\"], game_data[\"response (after)\"])]\n",
- "game_data[\"rewards (after)\"] = [output[1][\"score\"] for output in sentiment_pipe(texts, **sent_kwargs)]\n",
+ "pipe_outputs = sentiment_pipe(texts, **sent_kwargs)\n",
+ "positive_scores = [item[\"score\"] for output in pipe_outputs for item in output if item[\"label\"] == \"POSITIVE\"]\n",
+ "game_data[\"rewards (after)\"] = positive_scores\n",
"\n",
"# store results in a dataframe\n",
"df_results = pd.DataFrame(game_data)\n",
@@ -733,8 +721,8 @@
{
"data": {
"text/plain": [
- "rewards (before) 0.156629\n",
- "rewards (after) 1.686487\n",
+ "rewards (before) 0.239116\n",
+ "rewards (after) 2.475334\n",
"dtype: float64"
]
},
@@ -752,8 +740,8 @@
{
"data": {
"text/plain": [
- "rewards (before) -0.547091\n",
- "rewards (after) 2.479868\n",
+ "rewards (before) 0.422371\n",
+ "rewards (after) 2.701585\n",
"dtype: float64"
]
},
@@ -782,45 +770,6 @@
"execution_count": null,
"metadata": {},
"outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/home/leandro/miniconda3/envs/trl/lib/python3.9/site-packages/huggingface_hub/hf_api.py:1001: FutureWarning: `create_repo` now takes `token` as an optional positional argument. Be sure to adapt your code!\n",
- " warnings.warn(\n",
- "Cloning https://huggingface.co/lvwerra/gpt2-imdb-pos-v2 into local empty directory.\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "a953a6d0c465432bbc39aca826d37aaf",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- "Upload file pytorch_model.bin: 0%| | 32.0k/487M [00:00, ?B/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "remote: Enforcing permissions... \n",
- "remote: Allowed refs: all \n",
- "To https://huggingface.co/lvwerra/gpt2-imdb-pos-v2\n",
- " 369b075..28b9865 main -> main\n",
- "\n",
- "remote: Enforcing permissions... \n",
- "remote: Allowed refs: all \n",
- "To https://huggingface.co/lvwerra/gpt2-imdb-pos-v2\n",
- " 28b9865..42792ea main -> main\n",
- "\n"
- ]
- },
{
"data": {
"text/plain": [