Move finetune inside the package (#1040)

Lightning-AI · Mar 15, 2024 · 4be70ad · 4be70ad
1 parent 35008a4
commit 4be70ad
Show file tree

Hide file tree

Showing 23 changed files with 101 additions and 98 deletions.
diff --git a/README.md b/README.md
@@ -131,25 +131,25 @@ We support 4-bit quantization (as in QLoRA), (bnb.nf4, bnb.nf4-dq, bnb.fp4, bnb.
 
 ## Finetune the model
 
-We provide a simple training scripts (`finetune/adapter.py`, `finetune/adapter_v2.py`, and `finetune/lora.py`) that instruction-tunes a pretrained model on the [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset.
+We provide a simple training scripts (`litgpt/finetune/*.py`) that instruction-tunes a pretrained model on the [Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset.
 For example, you can either use
 
 Adapter ([Zhang et al. 2023](https://arxiv.org/abs/2303.16199)):
 
 ```bash
-python finetune/adapter.py
+python litgpt/finetune/adapter.py
 ```
 
 or Adapter v2 ([Gao et al. 2023](https://arxiv.org/abs/2304.15010)):
 
 ```bash
-python finetune/adapter_v2.py
+python litgpt/finetune/adapter_v2.py
 ```
 
 or LoRA ([Hu et al. 2021](https://arxiv.org/abs/2106.09685)):
 
 ```bash
-python finetune/lora.py
+python litgpt/finetune/lora.py
 ```
 
 (Please see the [tutorials/finetune_adapter](tutorials/finetune_adapter.md) for details on the differences between the two adapter methods.)

diff --git a/generate/adapter.py b/generate/adapter.py
@@ -34,13 +34,13 @@ def main(
 ) -> None:
     """Generates a response based on a given instruction and an optional input.
     This script will only work with checkpoints from the instruction-tuned GPT-Adapter model.
-    See `finetune/adapter.py`.
+    See `litgpt/finetune/adapter.py`.
 
     Args:
         prompt: The prompt/instruction (Alpaca style).
         input: Optional input (Alpaca style).
         adapter_path: Path to the checkpoint with trained adapter weights, which are the output of
-            `finetune/adapter.py`.
+            `litgpt/finetune/adapter.py`.
         checkpoint_dir: The path to the checkpoint folder with pretrained GPT weights.
         quantize: Whether to quantize the model and using which method:
             - bnb.nf4, bnb.nf4-dq, bnb.fp4, bnb.fp4-dq: 4-bit quantization from bitsandbytes

diff --git a/generate/adapter_v2.py b/generate/adapter_v2.py
@@ -33,13 +33,13 @@ def main(
 ) -> None:
     """Generates a response based on a given instruction and an optional input.
     This script will only work with checkpoints from the instruction-tuned GPT-AdapterV2 model.
-    See `finetune/adapter_v2.py`.
+    See `litgpt/finetune/adapter_v2.py`.
 
     Args:
         prompt: The prompt/instruction (Alpaca style).
         input: Optional input (Alpaca style).
         adapter_path: Path to the checkpoint with trained adapter weights, which are the output of
-            `finetune/adapter_v2.py`.
+            `litgpt/finetune/adapter_v2.py`.
         checkpoint_dir: The path to the checkpoint folder with pretrained GPT weights.
         quantize: Whether to quantize the model and using which method:
             - bnb.nf4, bnb.nf4-dq, bnb.fp4, bnb.fp4-dq: 4-bit quantization from bitsandbytes

diff --git a/generate/full.py b/generate/full.py
@@ -32,13 +32,13 @@ def main(
 ) -> None:
     """Generates a response based on a given instruction and an optional input.
     This script will only work with checkpoints from the instruction-tuned GPT model.
-    See `finetune/full.py`.
+    See `litgpt/finetune/full.py`.
 
     Args:
         prompt: The prompt/instruction (Alpaca style).
         input: Optional input (Alpaca style).
         finetuned_path: Path to the checkpoint with trained weights, which are the output of
-            `finetune/full.py`.
+            `litgpt/finetune/full.py`.
         checkpoint_dir: The path to the checkpoint folder with pretrained GPT weights.
         quantize: Whether to quantize the model and using which method:
             - bnb.nf4, bnb.nf4-dq, bnb.fp4, bnb.fp4-dq: 4-bit quantization from bitsandbytes

diff --git a/generate/lora.py b/generate/lora.py
@@ -42,13 +42,13 @@ def main(
 ) -> None:
     """Generates a response based on a given instruction and an optional input.
     This script will only work with checkpoints from the instruction-tuned GPT-LoRA model.
-    See `finetune/lora.py`.
+    See `litgpt/finetune/lora.py`.
 
     Args:
         prompt: The prompt/instruction (Alpaca style).
         input: Optional input (Alpaca style).
         lora_path: Path to the checkpoint with trained adapter weights, which are the output of
-            `finetune/lora.py`.
+            `litgpt/finetune/lora.py`.
         checkpoint_dir: The path to the checkpoint folder with pretrained GPT weights.
         quantize: Whether to quantize the model and using which method:
             - bnb.nf4, bnb.nf4-dq, bnb.fp4, bnb.fp4-dq: 4-bit quantization from bitsandbytes

diff --git a/litgpt/finetune/__init__.py b/litgpt/finetune/__init__.py
diff --git a/finetune/adapter.py → litgpt/finetune/adapter.py b/finetune/adapter.py → litgpt/finetune/adapter.py
@@ -9,17 +9,12 @@
 
 import lightning as L
 import torch
-from torch.utils.data import DataLoader
 from lightning.fabric.loggers import CSVLogger
 from lightning.fabric.plugins import BitsandbytesPrecision
 from lightning.fabric.strategies import FSDPStrategy
 from lightning.fabric.utilities import ThroughputMonitor
+from torch.utils.data import DataLoader
 
-# support running without installing as a package
-wd = Path(__file__).parent.parent.resolve()
-sys.path.append(str(wd))
-
-from generate.base import generate
 from litgpt.adapter import GPT, Block, Config, adapter_filter, mark_only_adapter_as_trainable
 from litgpt.args import EvalArgs, TrainArgs
 from litgpt.data import Alpaca, LitDataModule
@@ -38,6 +33,12 @@
     save_hyperparameters,
 )
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
+from generate.base import generate
+
 
 def setup(
     precision: Optional[str] = None,

diff --git a/finetune/adapter_v2.py → litgpt/finetune/adapter_v2.py b/finetune/adapter_v2.py → litgpt/finetune/adapter_v2.py
@@ -9,17 +9,12 @@
 
 import lightning as L
 import torch
-from torch.utils.data import DataLoader
 from lightning.fabric.loggers import CSVLogger
 from lightning.fabric.plugins import BitsandbytesPrecision
 from lightning.fabric.strategies import FSDPStrategy
 from lightning.fabric.utilities import ThroughputMonitor
+from torch.utils.data import DataLoader
 
-# support running without installing as a package
-wd = Path(__file__).parent.parent.resolve()
-sys.path.append(str(wd))
-
-from generate.base import generate
 from litgpt.adapter_v2 import GPT, Block, Config, adapter_filter, mark_only_adapter_v2_as_trainable
 from litgpt.args import EvalArgs, TrainArgs
 from litgpt.data import Alpaca, LitDataModule
@@ -38,6 +33,12 @@
     save_hyperparameters,
 )
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
+from generate.base import generate
+
 
 def setup(
     precision: Optional[str] = None,

diff --git a/finetune/full.py → litgpt/finetune/full.py b/finetune/full.py → litgpt/finetune/full.py
@@ -8,24 +8,18 @@
 from pprint import pprint
 from typing import Dict, List, Optional, Tuple, Union
 
-import torch
-from torch.utils.data import DataLoader
-
 import lightning as L
+import torch
 from lightning.fabric.loggers import CSVLogger
 from lightning.fabric.strategies import FSDPStrategy
+from torch.utils.data import DataLoader
 from torchmetrics import RunningMean
 
-# support running without installing as a package
-wd = Path(__file__).parent.parent.resolve()
-sys.path.append(str(wd))
-
-from generate.base import generate
 from litgpt.args import EvalArgs, TrainArgs
-from litgpt.model import GPT, Block, Config
-from litgpt.tokenizer import Tokenizer
 from litgpt.data import Alpaca, LitDataModule
+from litgpt.model import GPT, Block, Config
 from litgpt.prompts import save_prompt_style
+from litgpt.tokenizer import Tokenizer
 from litgpt.utils import (
     CLI,
     check_valid_checkpoint_dir,
@@ -39,6 +33,12 @@
     save_hyperparameters,
 )
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
+from generate.base import generate
+
 
 def setup(
     precision: Optional[str] = None,

diff --git a/finetune/lora.py → litgpt/finetune/lora.py b/finetune/lora.py → litgpt/finetune/lora.py
@@ -10,18 +10,13 @@
 
 import lightning as L
 import torch
-from torch.utils.data import DataLoader
 from lightning.fabric.loggers import CSVLogger
 from lightning.fabric.plugins import BitsandbytesPrecision
 from lightning.fabric.strategies import FSDPStrategy
 from lightning.fabric.utilities import ThroughputMonitor
+from torch.utils.data import DataLoader
 from torchmetrics import RunningMean
 
-# support running without installing as a package
-wd = Path(__file__).parent.parent.resolve()
-sys.path.append(str(wd))
-
-from generate.base import generate
 from litgpt.args import EvalArgs, TrainArgs
 from litgpt.data import LitDataModule, Alpaca
 from litgpt.lora import GPT, Block, Config, lora_filter, mark_only_lora_as_trainable
@@ -40,6 +35,12 @@
     save_hyperparameters,
 )
 
+# support running without installing as a package
+wd = Path(__file__).parent.parent.resolve()
+sys.path.append(str(wd))
+
+from generate.base import generate
+
 
 def setup(
     precision: Optional[str] = None,

diff --git a/scripts/merge_lora.py b/scripts/merge_lora.py
@@ -24,14 +24,14 @@ def merge_lora(
     pretrained_checkpoint_dir: Optional[Path] = None,
     precision: Optional[str] = None,
 ) -> None:
-    """Merges the LoRA weights with the base model. See `finetune/lora.py`.
+    """Merges the LoRA weights with the base model. See `litgpt/finetune/lora.py`.
 
     Merging happens in-place in the checkpoint directory that is given as input. It also saves
     a backup file `lit_model.pth.lora` of the trained LoRA weights in case you still need it later.
 
     Args:
         checkpoint_dir: Path to the checkpoint directory with trained LoRA weights, which is the output of
-            `finetune/lora.py`.
+            `litgpt/finetune/lora.py`.
         pretrained_checkpoint_dir: Optional path to the checkpoint directory with the weights of the base model
             corresponding to the LoRA checkpoint. By default, this will automatically be inferred from the metadata
             in the given `checkpoint_dir` directory. Only set this if the base model checkpoint directory
@@ -83,7 +83,7 @@ def load_lora_metadata(checkpoint_dir: Path) -> Tuple[Dict[str, Any], Path, Opti
         raise FileNotFoundError(
             f"The path {str(hparams_file)!r} is not a valid checkpoint directory. It is missing a"
             f" `hyperparameters.yaml` file. Please point to the checkpoint directory that was produced by"
-            f" the `finetune/lora.py` script."
+            f" the `litgpt/finetune/lora.py` script."
         )
 
     with open(hparams_file, "r") as file:

diff --git a/tests/test_adapter.py b/tests/test_adapter.py
@@ -51,7 +51,7 @@ def test_adapter_filter(tmp_path):
 
 @mock.patch.dict(os.environ, {"LT_ACCELERATOR": "cpu"})
 def test_adapter_script(tmp_path, fake_checkpoint_dir, monkeypatch, alpaca_path):
-    import finetune.adapter as module
+    import litgpt.finetune.adapter as module
     from litgpt.data import Alpaca
     from litgpt.args import EvalArgs, TrainArgs
     from litgpt.config import name_to_config
@@ -146,7 +146,7 @@ def test_adapter_compile():
 def test_adapter_bitsandbytes(monkeypatch, tmp_path, fake_checkpoint_dir, alpaca_path):
     from litgpt.config import name_to_config
     from litgpt.data import Alpaca
-    import finetune.adapter as module
+    import litgpt.finetune.adapter as module
 
     if not _BITSANDBYTES_AVAILABLE:
         pytest.skip("BNB not available")

diff --git a/tests/test_adapter_v2.py b/tests/test_adapter_v2.py
@@ -74,7 +74,7 @@ def test_adapter_v2_filter(tmp_path):
 
 @mock.patch.dict(os.environ, {"LT_ACCELERATOR": "cpu"})
 def test_adapter_v2_script(tmp_path, fake_checkpoint_dir, monkeypatch, alpaca_path):
-    import finetune.adapter_v2 as module
+    import litgpt.finetune.adapter_v2 as module
     from litgpt.args import EvalArgs, TrainArgs
     from litgpt.data import Alpaca
     from litgpt.config import name_to_config
@@ -235,7 +235,7 @@ def test_against_hf_mixtral():
 def test_adapter_v2_bitsandbytes(monkeypatch, tmp_path, fake_checkpoint_dir, alpaca_path):
     from litgpt.config import name_to_config
     from litgpt.data import Alpaca
-    import finetune.adapter_v2 as module
+    import litgpt.finetune.adapter_v2 as module
 
     if not _BITSANDBYTES_AVAILABLE:
         pytest.skip("BNB not available")

diff --git a/tests/test_config_hub.py b/tests/test_config_hub.py
@@ -11,9 +11,9 @@
     ("litgpt/pretrain.py", "pretrain/debug.yaml"),
     ("litgpt/pretrain.py", "pretrain/tinyllama.yaml"),
     ("litgpt/pretrain.py", "pretrain/tinystories.yaml"),
-    ("finetune/full.py", "finetune/llama-2-7b/full.yaml"),
-    ("finetune/lora.py", "finetune/llama-2-7b/lora.yaml"),
-    ("finetune/lora.py", "finetune/tiny-llama/lora.yaml"),
+    ("litgpt/finetune/full.py", "finetune/llama-2-7b/full.yaml"),
+    ("litgpt/finetune/lora.py", "finetune/llama-2-7b/lora.yaml"),
+    ("litgpt/finetune/lora.py", "finetune/tiny-llama/lora.yaml"),
 ])
 def test_config_help(script_file, config_file, monkeypatch, tmp_path):
     """Test that configs validate against the signature in the scripts."""

diff --git a/tests/test_full.py b/tests/test_full.py
@@ -11,7 +11,7 @@
 
 @mock.patch.dict(os.environ, {"LT_ACCELERATOR": "cpu"})
 def test_full_script(tmp_path, fake_checkpoint_dir, monkeypatch, alpaca_path):
-    import finetune.full as module
+    import litgpt.finetune.full as module
     from litgpt.args import EvalArgs, TrainArgs
     from litgpt.data import Alpaca
     from litgpt.config import name_to_config

diff --git a/tests/test_lora.py b/tests/test_lora.py
@@ -182,7 +182,7 @@ def test_lora_filter(tmp_path):
 
 @mock.patch.dict(os.environ, {"LT_ACCELERATOR": "cpu"})
 def test_lora_script(tmp_path, fake_checkpoint_dir, monkeypatch, alpaca_path):
-    import finetune.lora as module
+    import litgpt.finetune.lora as module
     from litgpt.args import EvalArgs, TrainArgs
     from litgpt.data import Alpaca
     from litgpt.config import name_to_config
@@ -593,7 +593,7 @@ def test_against_hf_mixtral():
 def test_lora_bitsandbytes(monkeypatch, tmp_path, fake_checkpoint_dir, alpaca_path):
     from litgpt.config import name_to_config
     from litgpt.data import Alpaca
-    import finetune.lora as module
+    import litgpt.finetune.lora as module
 
     if not _BITSANDBYTES_AVAILABLE:
         pytest.skip("BNB not available")

diff --git a/tutorials/convert_lit_models.md b/tutorials/convert_lit_models.md
@@ -93,7 +93,7 @@ python scripts/prepare_alpaca.py \
 ```bash
 export finetuned_dir=out/lit-finetuned-model
 
-python finetune/lora.py \
+python litgpt/finetune/lora.py \
    --checkpoint_dir checkpoints/$repo_id \
    --out_dir $finetuned_dir \
    --train.epochs 1 \