unslothai · Erland366 · Jan 11, 2025 · Jan 11, 2025
diff --git a/README.md b/README.md
diff --git a/nb/CodeGemma_(7B)-Conversational.ipynb b/nb/CodeGemma_(7B)-Conversational.ipynb
@@ -140,23 +140,16 @@
    "source": [
     "model = FastLanguageModel.get_peft_model(\n",
     "    model,\n",
-    "    r=16,  # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
-    "    target_modules=[\n",
-    "        \"q_proj\",\n",
-    "        \"k_proj\",\n",
-    "        \"v_proj\",\n",
-    "        \"o_proj\",\n",
-    "        \"gate_proj\",\n",
-    "        \"up_proj\",\n",
-    "        \"down_proj\",\n",
-    "    ],\n",
-    "    lora_alpha=16,\n",
-    "    lora_dropout=0,  # Supports any, but = 0 is optimized\n",
-    "    bias=\"none\",  # Supports any, but = \"none\" is optimized\n",
-    "    use_gradient_checkpointing=\"unsloth\",  # True or \"unsloth\" for very long context\n",
-    "    random_state=3407,\n",
-    "    use_rslora=False,  # We support rank stabilized LoRA\n",
-    "    loftq_config=None,  # And LoftQ\n",
+    "    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128\n",
+    "    target_modules = [\"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
+    "                      \"gate_proj\", \"up_proj\", \"down_proj\",],\n",
+    "    lora_alpha = 16,\n",
+    "    lora_dropout = 0, # Supports any, but = 0 is optimized\n",
+    "    bias = \"none\",    # Supports any, but = \"none\" is optimized\n",
+    "    use_gradient_checkpointing = \"unsloth\", # True or \"unsloth\" for very long context\n",
+    "    random_state = 3407,\n",
+    "    use_rslora = False,  # We support rank stabilized LoRA\n",
+    "    loftq_config = None, # And LoftQ\n",
     ")"
    ]
   },
@@ -202,39 +195,20 @@
     "\n",
     "tokenizer = get_chat_template(\n",
     "    tokenizer,\n",
-    "    chat_template=\"chatml\",  # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth\n",
-    "    mapping={\n",
-    "        \"role\": \"from\",\n",
-    "        \"content\": \"value\",\n",
-    "        \"user\": \"human\",\n",
-    "        \"assistant\": \"gpt\",\n",
-    "    },  # ShareGPT style\n",
-    "    map_eos_token=True,  # Maps <|im_end|> to </s> instead\n",
+    "    chat_template = \"chatml\", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth\n",
+    "    mapping = {\"role\" : \"from\", \"content\" : \"value\", \"user\" : \"human\", \"assistant\" : \"gpt\"}, # ShareGPT style\n",
+    "    map_eos_token = True, # Maps <|im_end|> to </s> instead\n",
     ")\n",
     "\n",
-    "\n",
     "def formatting_prompts_func(examples):\n",
     "    convos = examples[\"conversations\"]\n",
-    "    texts = [\n",
-    "        tokenizer.apply_chat_template(\n",
-    "            convo, tokenize=False, add_generation_prompt=False\n",
-    "        )\n",
-    "        for convo in convos\n",
-    "    ]\n",
-    "    return {\n",
-    "        \"text\": texts,\n",
-    "    }\n",
-    "\n",
-    "\n",
+    "    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]\n",
+    "    return { \"text\" : texts, }\n",
     "pass\n",
     "\n",
     "from datasets import load_dataset\n",
-    "\n",
-    "dataset = load_dataset(\"philschmid/guanaco-sharegpt-style\", split=\"train\")\n",
-    "dataset = dataset.map(\n",
-    "    formatting_prompts_func,\n",
-    "    batched=True,\n",
-    ")"
+    "dataset = load_dataset(\"philschmid/guanaco-sharegpt-style\", split = \"train\")\n",
+    "dataset = dataset.map(formatting_prompts_func, batched = True,)"
    ]
   },
   {
@@ -325,37 +299,28 @@
    },
    "outputs": [],
    "source": [
-    "unsloth_template = (\n",
-    "    \"{{ bos_token }}\"\n",
-    "    \"{{ 'You are a helpful assistant to the user\\n' }}\"\n",
+    "unsloth_template = \\\n",
+    "    \"{{ bos_token }}\"\\\n",
+    "    \"{{ 'You are a helpful assistant to the user\\n' }}\"\\\n",
+    "    \"{% endif %}\"\\\n",
+    "    \"{% for message in messages %}\"\\\n",
+    "        \"{% if message['role'] == 'user' %}\"\\\n",
+    "            \"{{ '>>> User: ' + message['content'] + '\\n' }}\"\\\n",
+    "        \"{% elif message['role'] == 'assistant' %}\"\\\n",
+    "            \"{{ '>>> Assistant: ' + message['content'] + eos_token + '\\n' }}\"\\\n",
+    "        \"{% endif %}\"\\\n",
+    "    \"{% endfor %}\"\\\n",
+    "    \"{% if add_generation_prompt %}\"\\\n",
+    "        \"{{ '>>> Assistant: ' }}\"\\\n",
     "    \"{% endif %}\"\n",
-    "    \"{% for message in messages %}\"\n",
-    "    \"{% if message['role'] == 'user' %}\"\n",
-    "    \"{{ '>>> User: ' + message['content'] + '\\n' }}\"\n",
-    "    \"{% elif message['role'] == 'assistant' %}\"\n",
-    "    \"{{ '>>> Assistant: ' + message['content'] + eos_token + '\\n' }}\"\n",
-    "    \"{% endif %}\"\n",
-    "    \"{% endfor %}\"\n",
-    "    \"{% if add_generation_prompt %}\"\n",
-    "    \"{{ '>>> Assistant: ' }}\"\n",
-    "    \"{% endif %}\"\n",
-    ")\n",
     "unsloth_eos_token = \"eos_token\"\n",
     "\n",
     "if False:\n",
     "    tokenizer = get_chat_template(\n",
     "        tokenizer,\n",
-    "        chat_template=(\n",
-    "            unsloth_template,\n",
-    "            unsloth_eos_token,\n",
-    "        ),  # You must provide a template and EOS token\n",
-    "        mapping={\n",
-    "            \"role\": \"from\",\n",
-    "            \"content\": \"value\",\n",
-    "            \"user\": \"human\",\n",
-    "            \"assistant\": \"gpt\",\n",
-    "        },  # ShareGPT style\n",
-    "        map_eos_token=True,  # Maps <|im_end|> to </s> instead\n",
+    "        chat_template = (unsloth_template, unsloth_eos_token,), # You must provide a template and EOS token\n",
+    "        mapping = {\"role\" : \"from\", \"content\" : \"value\", \"user\" : \"human\", \"assistant\" : \"gpt\"}, # ShareGPT style\n",
+    "        map_eos_token = True, # Maps <|im_end|> to </s> instead\n",
     "    )"
    ]
   },
@@ -662,29 +627,24 @@
     "\n",
     "tokenizer = get_chat_template(\n",
     "    tokenizer,\n",
-    "    chat_template=\"chatml\",  # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth\n",
-    "    mapping={\n",
-    "        \"role\": \"from\",\n",
-    "        \"content\": \"value\",\n",
-    "        \"user\": \"human\",\n",
-    "        \"assistant\": \"gpt\",\n",
-    "    },  # ShareGPT style\n",
-    "    map_eos_token=True,  # Maps <|im_end|> to </s> instead\n",
+    "    chat_template = \"chatml\", # Supports zephyr, chatml, mistral, llama, alpaca, vicuna, vicuna_old, unsloth\n",
+    "    mapping = {\"role\" : \"from\", \"content\" : \"value\", \"user\" : \"human\", \"assistant\" : \"gpt\"}, # ShareGPT style\n",
+    "    map_eos_token = True, # Maps <|im_end|> to </s> instead\n",
     ")\n",
     "\n",
-    "FastLanguageModel.for_inference(model)  # Enable native 2x faster inference\n",
+    "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
     "\n",
     "messages = [\n",
     "    {\"from\": \"human\", \"value\": \"Continue the fibonnaci sequence: 1, 1, 2, 3, 5, 8,\"},\n",
     "]\n",
     "inputs = tokenizer.apply_chat_template(\n",
     "    messages,\n",
-    "    tokenize=True,\n",
-    "    add_generation_prompt=True,  # Must add for generation\n",
-    "    return_tensors=\"pt\",\n",
+    "    tokenize = True,\n",
+    "    add_generation_prompt = True, # Must add for generation\n",
+    "    return_tensors = \"pt\",\n",
     ").to(\"cuda\")\n",
     "\n",
-    "outputs = model.generate(input_ids=inputs, max_new_tokens=64, use_cache=True)\n",
+    "outputs = model.generate(input_ids = inputs, max_new_tokens = 64, use_cache = True)\n",
     "tokenizer.batch_decode(outputs)"
    ]
   },
@@ -720,24 +680,21 @@
     }
    ],
    "source": [
-    "FastLanguageModel.for_inference(model)  # Enable native 2x faster inference\n",
+    "FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
     "\n",
     "messages = [\n",
     "    {\"from\": \"human\", \"value\": \"Continue the fibonnaci sequence: 1, 1, 2, 3, 5, 8,\"},\n",
     "]\n",
     "inputs = tokenizer.apply_chat_template(\n",
     "    messages,\n",
-    "    tokenize=True,\n",
-    "    add_generation_prompt=True,  # Must add for generation\n",
-    "    return_tensors=\"pt\",\n",
+    "    tokenize = True,\n",
+    "    add_generation_prompt = True, # Must add for generation\n",
+    "    return_tensors = \"pt\",\n",
     ").to(\"cuda\")\n",
     "\n",
     "from transformers import TextStreamer\n",
-    "\n",
     "text_streamer = TextStreamer(tokenizer)\n",
-    "_ = model.generate(\n",
-    "    input_ids=inputs, streamer=text_streamer, max_new_tokens=128, use_cache=True\n",
-    ")"
+    "_ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128, use_cache = True)"
    ]
   },
   {
@@ -799,31 +756,27 @@
    "source": [
     "if False:\n",
     "    from unsloth import FastLanguageModel\n",
-    "\n",
     "    model, tokenizer = FastLanguageModel.from_pretrained(\n",
-    "        model_name=\"lora_model\",  # YOUR MODEL YOU USED FOR TRAINING\n",
-    "        max_seq_length=max_seq_length,\n",
-    "        dtype=dtype,\n",
-    "        load_in_4bit=load_in_4bit,\n",
+    "        model_name = \"lora_model\", # YOUR MODEL YOU USED FOR TRAINING\n",
+    "        max_seq_length = max_seq_length,\n",
+    "        dtype = dtype,\n",
+    "        load_in_4bit = load_in_4bit,\n",
     "    )\n",
-    "    FastLanguageModel.for_inference(model)  # Enable native 2x faster inference\n",
+    "    FastLanguageModel.for_inference(model) # Enable native 2x faster inference\n",
     "\n",
     "messages = [\n",
     "    {\"from\": \"human\", \"value\": \"What is a famous tall tower in Paris?\"},\n",
     "]\n",
     "inputs = tokenizer.apply_chat_template(\n",
     "    messages,\n",
-    "    tokenize=True,\n",
-    "    add_generation_prompt=True,  # Must add for generation\n",
-    "    return_tensors=\"pt\",\n",
+    "    tokenize = True,\n",
+    "    add_generation_prompt = True, # Must add for generation\n",
+    "    return_tensors = \"pt\",\n",
     ").to(\"cuda\")\n",
     "\n",
     "from transformers import TextStreamer\n",
-    "\n",
     "text_streamer = TextStreamer(tokenizer)\n",
-    "_ = model.generate(\n",
-    "    input_ids=inputs, streamer=text_streamer, max_new_tokens=128, use_cache=True\n",
-    ")"
+    "_ = model.generate(input_ids = inputs, streamer = text_streamer, max_new_tokens = 128, use_cache = True)"
    ]
   },
   {
@@ -875,36 +828,16 @@
    "outputs": [],
    "source": [
     "# Merge to 16bit\n",
-    "if False:\n",
-    "    model.save_pretrained_merged(\n",
-    "        \"model\",\n",
-    "        tokenizer,\n",
-    "        save_method=\"merged_16bit\",\n",
-    "    )\n",
-    "if False:\n",
-    "    model.push_to_hub_merged(\n",
-    "        \"hf/model\", tokenizer, save_method=\"merged_16bit\", token=\"\"\n",
-    "    )\n",
+    "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_16bit\",)\n",
+    "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_16bit\", token = \"\")\n",
     "\n",
     "# Merge to 4bit\n",
-    "if False:\n",
-    "    model.save_pretrained_merged(\n",
-    "        \"model\",\n",
-    "        tokenizer,\n",
-    "        save_method=\"merged_4bit\",\n",
-    "    )\n",
-    "if False:\n",
-    "    model.push_to_hub_merged(\"hf/model\", tokenizer, save_method=\"merged_4bit\", token=\"\")\n",
+    "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"merged_4bit\",)\n",
+    "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"merged_4bit\", token = \"\")\n",
     "\n",
     "# Just LoRA adapters\n",
-    "if False:\n",
-    "    model.save_pretrained_merged(\n",
-    "        \"model\",\n",
-    "        tokenizer,\n",
-    "        save_method=\"lora\",\n",
-    "    )\n",
-    "if False:\n",
-    "    model.push_to_hub_merged(\"hf/model\", tokenizer, save_method=\"lora\", token=\"\")"
+    "if False: model.save_pretrained_merged(\"model\", tokenizer, save_method = \"lora\",)\n",
+    "if False: model.push_to_hub_merged(\"hf/model\", tokenizer, save_method = \"lora\", token = \"\")"
    ]
   },
   {
@@ -931,27 +864,16 @@
    "outputs": [],
    "source": [
     "# Save to 8bit Q8_0\n",
-    "if False:\n",
-    "    model.save_pretrained_gguf(\n",
-    "        \"model\",\n",
-    "        tokenizer,\n",
-    "    )\n",
-    "if False:\n",
-    "    model.push_to_hub_gguf(\"hf/model\", tokenizer, token=\"\")\n",
+    "if False: model.save_pretrained_gguf(\"model\", tokenizer,)\n",
+    "if False: model.push_to_hub_gguf(\"hf/model\", tokenizer, token = \"\")\n",
     "\n",
     "# Save to 16bit GGUF\n",
-    "if False:\n",
-    "    model.save_pretrained_gguf(\"model\", tokenizer, quantization_method=\"f16\")\n",
-    "if False:\n",
-    "    model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method=\"f16\", token=\"\")\n",
+    "if False: model.save_pretrained_gguf(\"model\", tokenizer, quantization_method = \"f16\")\n",
+    "if False: model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method = \"f16\", token = \"\")\n",
     "\n",
     "# Save to q4_k_m GGUF\n",
-    "if False:\n",
-    "    model.save_pretrained_gguf(\"model\", tokenizer, quantization_method=\"q4_k_m\")\n",
-    "if False:\n",
-    "    model.push_to_hub_gguf(\n",
-    "        \"hf/model\", tokenizer, quantization_method=\"q4_k_m\", token=\"\"\n",
-    "    )"
+    "if False: model.save_pretrained_gguf(\"model\", tokenizer, quantization_method = \"q4_k_m\")\n",
+    "if False: model.push_to_hub_gguf(\"hf/model\", tokenizer, quantization_method = \"q4_k_m\", token = \"\")"
    ]
   },
   {
@@ -964,9 +886,9 @@
     "\n",
     "Some other links:\n",
     "1. Llama 3.2 Conversational notebook. [Free Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(1B_and_3B)-Conversational.ipynb)\n",
-    "2. Saving finetunes to Ollama. [Free notebook](https://colab.research.google.com/drive/1WZDi7APtQ9VsvOrQSSC5DDtxq159j8iZ?usp=sharing)\n",
+    "2. Saving finetunes to Ollama. [Free notebook](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3_(8B)-Ollama.ipynb)\n",
     "3. Llama 3.2 Vision finetuning - Radiography use case. [Free Colab](https://colab.research.google.com/github/unslothai/notebooks/blob/main/nb/Llama3.2_(11B)-Vision.ipynb)\n",
-    "6. More notebooks for DPO, ORPO, Continued pretraining, conversational finetuning and more on our [documentation](https://docs.unsloth.ai/get-started/unsloth-notebooks)!\n",
+    "6. See notebooks for DPO, ORPO, Continued pretraining, conversational finetuning and more on our [documentation](https://docs.unsloth.ai/get-started/unsloth-notebooks)!\n",
     "\n",
     "<div class=\"align-center\">\n",
     "  <a href=\"https://unsloth.ai\"><img src=\"https://github.com/unslothai/unsloth/raw/main/images/unsloth%20new%20logo.png\" width=\"115\"></a>\n",
@@ -994,4 +916,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 0
-}
+}