Merge pull request #28 from SFI-Visual-Intelligence/main

Sync
SFI-Visual-Intelligence · Feb 3, 2025 · 24b7726 · 24b7726
2 parents 5b5da1f + 4350664
commit 24b7726
Show file tree

Hide file tree

Showing 24 changed files with 844 additions and 237 deletions.
diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
@@ -0,0 +1,35 @@
+name: Format
+
+on:
+  push:
+    paths:
+      - 'utils/**'
+  pull_request:
+    paths:
+      - 'utils/**'
+
+jobs:
+  format:
+    name: Run Ruff and isort
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: '3.x'
+
+      - name: Install dependencies
+        run: |
+          pip install ruff isort
+
+      - name: Run Ruff check
+        run: |
+          ruff check utils/
+
+      - name: Run isort check
+        run: |
+          isort --check-only utils/
diff --git a/environment.yml b/environment.yml
@@ -9,6 +9,14 @@ dependencies:
   - sphinx-autobuild
   - sphinx-rtd-theme
   - pip
+  - h5py
+  - black
+  - isort
+  - jupyterlab
+  - numpy
+  - pandas
   - pytest
+  - ruff
+  - scalene
 prefix: /opt/miniconda3/envs/cc-exam
 
diff --git a/main.py b/main.py
@@ -1,78 +1,158 @@
-import torch as th
-import torch.nn as nn 
-from torch.utils.data import DataLoader
 import argparse
-import wandb
+from pathlib import Path
+
 import numpy as np
-from utils import MetricWrapper, load_model, load_data, createfolders
+import torch as th
+import torch.nn as nn
+import wandb
+from torch.utils.data import DataLoader
+
+from utils import MetricWrapper, createfolders, load_data, load_model
 
 
 def main():
-    '''
-    
+    """
+
     Parameters
     ----------
-    
+
     Returns
     -------
-    
+
     Raises
     ------
-    
-    '''
+
+    """
     parser = argparse.ArgumentParser(
-        prog='',
-        description='',
-        epilog='',
-    )
-    #Structuture related values
-    parser.add_argument('--datafolder', type=str, default='Data/', help='Path to where data will be saved during training.')
-    parser.add_argument('--resultfolder', type=str, default='Results/', help='Path to where results will be saved during evaluation.')
-    parser.add_argument('--modelfolder', type=str, default='Experiments/', help='Path to where model weights will be saved at the end of training.')
-    parser.add_argument('--savemodel', type=bool, default=False, help='Whether model should be saved or not.')
-
-    parser.add_argument('--download-data', type=bool, default=False, help='Whether the data should be downloaded or not. Might cause code to start a bit slowly.')
-
-    #Data/Model specific values
-    parser.add_argument('--modelname', type=str, default='MagnusModel', 
-                        choices = ['MagnusModel'], help="Model which to be trained on")
-    parser.add_argument('--dataset', type=str, default='svhn',
-                        choices=['svhn'], help='Which dataset to train the model on.')
-
-    parser.add_argument('--EntropyPrediction', type=bool, default=True, help='Include the Entropy Prediction metric in evaluation')
-    parser.add_argument('--F1Score', type=bool, default=True, help='Include the F1Score metric in evaluation')
-    parser.add_argument('--Recall', type=bool, default=True, help='Include the Recall metric in evaluation')
-    parser.add_argument('--Precision', type=bool, default=True, help='Include the Precision metric in evaluation')
-    parser.add_argument('--Accuracy', type=bool, default=True, help='Include the Accuracy metric in evaluation')
-
-    #Training specific values
-    parser.add_argument('--epoch', type=int, default=20, help='Amount of training epochs the model will do.')
-    parser.add_argument('--learning_rate', type=float, default=0.001, help='Learning rate parameter for model training.')
-    parser.add_argument('--batchsize', type=int, default=64, help='Amount of training images loaded in one go')
-
+        prog="",
+        description="",
+        epilog="",
+    )
+    # Structuture related values
+    parser.add_argument(
+        "--datafolder",
+        type=Path,
+        default="Data",
+        help="Path to where data will be saved during training.",
+    )
+    parser.add_argument(
+        "--resultfolder",
+        type=Path,
+        default="Results",
+        help="Path to where results will be saved during evaluation.",
+    )
+    parser.add_argument(
+        "--modelfolder",
+        type=Path,
+        default="Experiments",
+        help="Path to where model weights will be saved at the end of training.",
+    )
+    parser.add_argument(
+        "--savemodel",
+        type=bool,
+        default=False,
+        help="Whether model should be saved or not.",
+    )
+
+    parser.add_argument(
+        "--download-data",
+        type=bool,
+        default=False,
+        help="Whether the data should be downloaded or not. Might cause code to start a bit slowly.",
+    )
+
+    # Data/Model specific values
+    parser.add_argument(
+        "--modelname",
+        type=str,
+        default="MagnusModel",
+        choices=["MagnusModel", "ChristianModel"],
+        help="Model which to be trained on",
+    )
+    parser.add_argument(
+        "--dataset",
+        type=str,
+        default="svhn",
+        choices=["svhn", "usps_0-6"],
+        help="Which dataset to train the model on.",
+    )
+
+    parser.add_argument(
+        "--metric",
+        type=str,
+        default=["entropy"],
+        choices=["entropy", "f1", "recall", "precision", "accuracy"],
+        nargs="+",
+        help="Which metric to use for evaluation",
+    )
+
+    # Training specific values
+    parser.add_argument(
+        "--epoch",
+        type=int,
+        default=20,
+        help="Amount of training epochs the model will do.",
+    )
+    parser.add_argument(
+        "--learning_rate",
+        type=float,
+        default=0.001,
+        help="Learning rate parameter for model training.",
+    )
+    parser.add_argument(
+        "--batchsize",
+        type=int,
+        default=64,
+        help="Amount of training images loaded in one go",
+    )
+    parser.add_argument(
+        "--device",
+        type=str,
+        default="cpu",
+        choices=["cuda", "cpu", "mps"],
+        help="Which device to run the training on.",
+    )
+    parser.add_argument(
+        "--dry_run",
+        action="store_true",
+        help="If true, the code will not run the training loop.",
+    )
+
     args = parser.parse_args()
-
-
-    createfolders(args)
-
-    device = 'cuda' if th.cuda.is_available() else 'cpu'
-
-    #load model
-    model = load_model()
+
+    createfolders(args.datafolder, args.resultfolder, args.modelfolder)
+
+    device = args.device
+
+    metrics = MetricWrapper(*args.metric)
+
+    # Dataset
+    traindata = load_data(
+        args.dataset,
+        train=True,
+        data_path=args.datafolder,
+        download=args.download_data,
+    )
+    validata = load_data(
+        args.dataset,
+        train=False,
+        data_path=args.datafolder,
+    )
+
+    # Find number of channels in the dataset
+    if len(traindata[0][0].shape) == 2:
+        channels = 1
+    else:
+        channels = traindata[0][0].shape[0]
+
+    # load model
+    model = load_model(
+        args.modelname,
+        in_channels=channels,
+        num_classes=traindata.num_classes,
+    )
     model.to(device)
-
-    metrics = MetricWrapper(
-        EntropyPred = args.EntropyPrediction,
-        F1Score = args.F1Score,
-        Recall = args.Recall,
-        Precision = args.Precision,
-        Accuracy = args.Accuracy
-    )
-
-    #Dataset
-    traindata = load_data(args.dataset)
-    validata = load_data(args.dataset)
-
+
     trainloader = DataLoader(traindata,
                              batch_size=args.batchsize,
                              shuffle=True,
@@ -82,48 +162,51 @@ def main():
                             batch_size=args.batchsize,
                             shuffle=False,
                             pin_memory=True)
-    
+
     criterion = nn.CrossEntropyLoss()
-    optimizer = th.optim.Adam(model.parameters(), lr = args.learning_rate)
-
-
+    optimizer = th.optim.Adam(model.parameters(), lr=args.learning_rate)
+
+    # This allows us to load all the components without running the training loop
+    if args.dry_run:
+        print("Dry run completed")
+        exit(0)
+
     wandb.init(project='',
                tags=[])
     wandb.watch(model)
-    
+
     for epoch in range(args.epoch):
-        
-        #Training loop start
+
+        # Training loop start
         trainingloss = []
         model.train()
-        for x, y in traindata:
+        for x, y in trainloader:
             x, y = x.to(device), y.to(device)
             pred = model.forward(x)
-             
+
             loss = criterion(y, pred)
             loss.backward()
-            
+
             optimizer.step()
             optimizer.zero_grad(set_to_none=True)
             trainingloss.append(loss.item())
-        
+
         evalloss = []
-        #Eval loop start
+        # Eval loop start
         model.eval()
         with th.no_grad():
             for x, y in valiloader:
-                x = x.to(device)
+                x, y = x.to(device), y.to(device)
                 pred = model.forward(x)
                 loss = criterion(y, pred)
                 evalloss.append(loss.item())
-                
+
         wandb.log({
             'Epoch': epoch,
             'Train loss': np.mean(trainingloss),
             'Evaluation Loss': np.mean(evalloss)
         })
-
 
 
 if __name__ == '__main__':
-    main()
+    main()