Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor notebook for improved readability and consistency #6

Merged
merged 1 commit into from
Jan 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 60 additions & 62 deletions README.md

Large diffs are not rendered by default.

218 changes: 70 additions & 148 deletions nb/CodeGemma_(7B)-Conversational.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -140,23 +140,16 @@
"source": [
"model = FastLanguageModel.get_peft_model(\n",
" model,\n",
" r=16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
" target_modules=[\n",
" \"q_proj\",\n",
" \"k_proj\",\n",
" \"v_proj\",\n",
" \"o_proj\",\n",
" \"gate_proj\",\n",
" \"up_proj\",\n",
" \"down_proj\",\n",
" ],\n",
" lora_alpha=16,\n",
" lora_dropout=0, # Supports any, but = 0 is optimized\n",
" bias=\"none\", # Supports any, but = \"none\" is optimized\n",
" use_gradient_checkpointing=\"unsloth\", # True or \"unsloth\" for very long context\n",
" random_state=3407,\n",
" use_rslora=False, # We support rank stabilized LoRA\n",
" loftq_config=None, # And LoftQ\n",
" r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
" target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
" \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
" lora_alpha = 16,\n",
" lora_dropout = 0, # Supports any, but = 0 is optimized\n",
" bias = \"none\", # Supports any, but = \"none\" is optimized\n",
" use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n",
" random_state = 3407,\n",
" use_rslora = False, # We support rank stabilized LoRA\n",
" loftq_config = None, # And LoftQ\n",
")"
]
},
Expand Down Expand Up @@ -202,39 +195,20 @@
"\n",
"tokenizer = get_chat_template(\n",
" tokenizer,\n",
" chat_template=\"chatml\", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth\n",
" mapping={\n",
" \"role\": \"from\",\n",
" \"content\": \"value\",\n",
" \"user\": \"human\",\n",
" \"assistant\": \"gpt\",\n",
" }, # ShareGPT style\n",
" map_eos_token=True, # Maps <|im_end|> to </s> instead\n",
" chat_template = \"chatml\", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth\n",
" mapping = {\"role\" : \"from\", \"content\" : \"value\", \"user\" : \"human\", \"assistant\" : \"gpt\"}, # ShareGPT style\n",
" map_eos_token = True, # Maps <|im_end|> to </s> instead\n",
")\n",
"\n",
"\n",
"def formatting_prompts_func(examples):\n",
" convos = examples[\"conversations\"]\n",
" texts = [\n",
" tokenizer.apply_chat_template(\n",
" convo, tokenize=False, add_generation_prompt=False\n",
" )\n",
" for convo in convos\n",
" ]\n",
" return {\n",
" \"text\": texts,\n",
" }\n",
"\n",
"\n",
" texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]\n",
" return { \"text\" : texts, }\n",
"pass\n",
"\n",
"from datasets import load_dataset\n",
"\n",
"dataset = load_dataset(\"philschmid/guanaco-sharegpt-style\", split=\"train\")\n",
"dataset = dataset.map(\n",
" formatting_prompts_func,\n",
" batched=True,\n",
")"
"dataset = load_dataset(\"philschmid/guanaco-sharegpt-style\", split = \"train\")\n",
"dataset = dataset.map(formatting_prompts_func, batched = True,)"
]
},
{
Expand Down Expand Up @@ -325,37 +299,28 @@
},
"outputs": [],
"source": [
"unsloth_template = (\n",
" \"{{ bos_token }}\"\n",
" \"{{ 'You are a helpful assistant to the user\\n' }}\"\n",
"unsloth_template = \\\n",
" \"{{ bos_token }}\"\\\n",
" \"{{ 'You are a helpful assistant to the user\\n' }}\"\\\n",
" \"{% endif %}\"\\\n",
" \"{% for message in messages %}\"\\\n",
" \"{% if message['role'] == 'user' %}\"\\\n",
" \"{{ '>>> User: ' + message['content'] + '\\n' }}\"\\\n",
" \"{% elif message['role'] == 'assistant' %}\"\\\n",
" \"{{ '>>> Assistant: ' + message['content'] + eos_token + '\\n' }}\"\\\n",
" \"{% endif %}\"\\\n",
" \"{% endfor %}\"\\\n",
" \"{% if add_generation_prompt %}\"\\\n",
" \"{{ '>>> Assistant: ' }}\"\\\n",
" \"{% endif %}\"\n",
" \"{% for message in messages %}\"\n",
" \"{% if message['role'] == 'user' %}\"\n",
" \"{{ '>>> User: ' + message['content'] + '\\n' }}\"\n",
" \"{% elif message['role'] == 'assistant' %}\"\n",
" \"{{ '>>> Assistant: ' + message['content'] + eos_token + '\\n' }}\"\n",
" \"{% endif %}\"\n",
" \"{% endfor %}\"\n",
" \"{% if add_generation_prompt %}\"\n",
" \"{{ '>>> Assistant: ' }}\"\n",
" \"{% endif %}\"\n",
")\n",
"unsloth_eos_token = \"eos_token\"\n",
"\n",
"if False:\n",
" tokenizer = get_chat_template(\n",
" tokenizer,\n",
" chat_template=(\n",
" unsloth_template,\n",
" unsloth_eos_token,\n",
" ), # You must provide a template and EOS token\n",
" mapping={\n",
" \"role\": \"from\",\n",
" \"content\": \"value\",\n",
" \"user\": \"human\",\n",
" \"assistant\": \"gpt\",\n",
" }, # ShareGPT style\n",
" map_eos_token=True, # Maps <|im_end|> to </s> instead\n",
" chat_template = (unsloth_template, unsloth_eos_token,), # You must provide a template and EOS token\n",
" mapping = {\"role\" : \"from\", \"content\" : \"value\", \"user\" : \"human\", \"assistant\" : \"gpt\"}, # ShareGPT style\n",
" map_eos_token = True, # Maps <|im_end|> to </s> instead\n",
" )"
]
},
Expand Down Expand Up @@ -662,29 +627,24 @@
"\n",
"tokenizer = get_chat_template(\n",
" tokenizer,\n",
" chat_template=\"chatml\", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth\n",
" mapping={\n",
" \"role\": \"from\",\n",
" \"content\": \"value\",\n",
" \"user\": \"human\",\n",
" \"assistant\": \"gpt\",\n",
" }, # ShareGPT style\n",
" map_eos_token=True, # Maps <|im_end|> to </s> instead\n",
" chat_template = \"chatml\", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth\n",
" mapping = {\"role\" : \"from\", \"content\" : \"value\", \"user\" : \"human\", \"assistant\" : \"gpt\"}, # ShareGPT style\n",
" map_eos_token = True, # Maps <|im_end|> to </s> instead\n",
")\n",
"\n",
"FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
"FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
"\n",
"messages = [\n",
" {\"from\": \"human\", \"value\": \"Continue the fibonnaci sequence: 1, 1, 2, 3, 5, 8,\"},\n",
"]\n",
"inputs = tokenizer.apply_chat_template(\n",
" messages,\n",
" tokenize=True,\n",
" add_generation_prompt=True, # Must add for generation\n",
" return_tensors=\"pt\",\n",
" tokenize = True,\n",
" add_generation_prompt = True, # Must add for generation\n",
" return_tensors = \"pt\",\n",
").to(\"cuda\")\n",
"\n",
"outputs = model.generate(input_ids=inputs, max_new_tokens=64, use_cache=True)\n",
"outputs = model.generate(input_ids = inputs, max_new_tokens = 64, use_cache = True)\n",
"tokenizer.batch_decode(outputs)"
]
},
Expand Down Expand Up @@ -720,24 +680,21 @@
}
],
"source": [
"FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
"FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
"\n",
"messages = [\n",
" {\"from\": \"human\", \"value\": \"Continue the fibonnaci sequence: 1, 1, 2, 3, 5, 8,\"},\n",
"]\n",
"inputs = tokenizer.apply_chat_template(\n",
" messages,\n",
" tokenize=True,\n",
" add_generation_prompt=True, # Must add for generation\n",
" return_tensors=\"pt\",\n",
" tokenize = True,\n",
" add_generation_prompt = True, # Must add for generation\n",
" return_tensors = \"pt\",\n",
").to(\"cuda\")\n",
"\n",
"from transformers import TextStreamer\n",
"\n",
"text_streamer = TextStreamer(tokenizer)\n",
"_ = model.generate(\n",
" input_ids=inputs, streamer=text_streamer, max_new_tokens=128, use_cache=True\n",
")"
"_ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128, use_cache = True)"
]
},
{
Expand Down Expand Up @@ -799,31 +756,27 @@
"source": [
"if False:\n",
" from unsloth import FastLanguageModel\n",
"\n",
" model, tokenizer = FastLanguageModel.from_pretrained(\n",
" model_name=\"lora_model\", # YOUR MODEL YOU USED FOR TRAINING\n",
" max_seq_length=max_seq_length,\n",
" dtype=dtype,\n",
" load_in_4bit=load_in_4bit,\n",
" model_name = \"lora_model\", # YOUR MODEL YOU USED FOR TRAINING\n",
" max_seq_length = max_seq_length,\n",
" dtype = dtype,\n",
" load_in_4bit = load_in_4bit,\n",
" )\n",
" FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
" FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
"\n",
"messages = [\n",
" {\"from\": \"human\", \"value\": \"What is a famous tall tower in Paris?\"},\n",
"]\n",
"inputs = tokenizer.apply_chat_template(\n",
" messages,\n",
" tokenize=True,\n",
" add_generation_prompt=True, # Must add for generation\n",
" return_tensors=\"pt\",\n",
" tokenize = True,\n",
" add_generation_prompt = True, # Must add for generation\n",
" return_tensors = \"pt\",\n",
").to(\"cuda\")\n",
"\n",
"from transformers import TextStreamer\n",
"\n",
"text_streamer = TextStreamer(tokenizer)\n",
"_ = model.generate(\n",
" input_ids=inputs, streamer=text_streamer, max_new_tokens=128, use_cache=True\n",
")"
"_ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128, use_cache = True)"
]
},
{
Expand Down Expand Up @@ -875,36 +828,16 @@
"outputs": [],
"source": [
"# Merge to 16bit\n",
"if False:\n",
" model.save_pretrained_merged(\n",
" \"model\",\n",
" tokenizer,\n",
" save_method=\"merged_16bit\",\n",
" )\n",
"if False:\n",
" model.push_to_hub_merged(\n",
" \"hf/model\", tokenizer, save_method=\"merged_16bit\", token=\"\"\n",
" )\n",
"if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n",
"if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n",
"\n",
"# Merge to 4bit\n",
"if False:\n",
" model.save_pretrained_merged(\n",
" \"model\",\n",
" tokenizer,\n",
" save_method=\"merged_4bit\",\n",
" )\n",
"if False:\n",
" model.push_to_hub_merged(\"hf/model\", tokenizer, save_method=\"merged_4bit\", token=\"\")\n",
"if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n",
"if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n",
"\n",
"# Just LoRA adapters\n",
"if False:\n",
" model.save_pretrained_merged(\n",
" \"model\",\n",
" tokenizer,\n",
" save_method=\"lora\",\n",
" )\n",
"if False:\n",
" model.push_to_hub_merged(\"hf/model\", tokenizer, save_method=\"lora\", token=\"\")"
"if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"lora\",)\n",
"if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"lora\", token = \"\")"
]
},
{
Expand All @@ -931,27 +864,16 @@
"outputs": [],
"source": [
"# Save to 8bit Q8_0\n",
"if False:\n",
" model.save_pretrained_gguf(\n",
" \"model\",\n",
" tokenizer,\n",
" )\n",
"if False:\n",
" model.push_to_hub_gguf(\"hf/model\", tokenizer, token=\"\")\n",
"if False: model.save_pretrained_gguf(\"model\", tokenizer,)\n",
"if False: model.push_to_hub_gguf(\"hf/model\", tokenizer, token = \"\")\n",
"\n",
"# Save to 16bit GGUF\n",
"if False:\n",
" model.save_pretrained_gguf(\"model\", tokenizer, quantization_method=\"f16\")\n",
"if False:\n",
" model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method=\"f16\", token=\"\")\n",
"if False: model.save_pretrained_gguf(\"model\", tokenizer, quantization_method = \"f16\")\n",
"if False: model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method = \"f16\", token = \"\")\n",
"\n",
"# Save to q4_k_m GGUF\n",
"if False:\n",
" model.save_pretrained_gguf(\"model\", tokenizer, quantization_method=\"q4_k_m\")\n",
"if False:\n",
" model.push_to_hub_gguf(\n",
" \"hf/model\", tokenizer, quantization_method=\"q4_k_m\", token=\"\"\n",
" )"
"if False: model.save_pretrained_gguf(\"model\", tokenizer, quantization_method = \"q4_k_m\")\n",
"if False: model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method = \"q4_k_m\", token = \"\")"
]
},
{
Expand All @@ -964,9 +886,9 @@
"\n",
"Some other links:\n",
"1. Llama 3.2 Conversational notebook. [Free Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(1B_and_3B)-Conversational.ipynb)\n",
"2. Saving finetunes to Ollama. [Free notebook](https://colab.research.google.com/drive/1WZDi7APtQ9VsvOrQSSC5DDtxq159j8iZ?usp=sharing)\n",
"2. Saving finetunes to Ollama. [Free notebook](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3_(8B)-Ollama.ipynb)\n",
"3. Llama 3.2 Vision finetuning - Radiography use case. [Free Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(11B)-Vision.ipynb)\n",
"6. More notebooks for DPO, ORPO, Continued pretraining, conversational finetuning and more on our [documentation](https://docs.unsloth.ai/get-started/unsloth-notebooks)!\n",
"6. See notebooks for DPO, ORPO, Continued pretraining, conversational finetuning and more on our [documentation](https://docs.unsloth.ai/get-started/unsloth-notebooks)!\n",
"\n",
"<div class=\"align-center\">\n",
" <a href=\"https://unsloth.ai\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png\" width=\"115\"></a>\n",
Expand Down Expand Up @@ -994,4 +916,4 @@
},
"nbformat": 4,
"nbformat_minor": 0
}
}
Loading