determined-ai
diff --git a/‎.gitignore
+50 b/‎.gitignore
+50
diff --git a/‎README.md
+79-2 b/‎README.md
+79-2
diff --git a/‎computer_vision/byol_pytorch/README.md
+58 b/‎computer_vision/byol_pytorch/README.md
+58
diff --git a/‎computer_vision/byol_pytorch/backbone.py
+27 b/‎computer_vision/byol_pytorch/backbone.py
+27
diff --git a/‎computer_vision/byol_pytorch/const-cifar10.yaml
+81 b/‎computer_vision/byol_pytorch/const-cifar10.yaml
+81
@@ -0,0 +1,50 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+
+# All log files
+*.log
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# dotenv
+.env
+
+# virtualenv
+.venv
+venv/
+ENV/
+
+# mypy
+.mypy_cache/
+
+# Determined distributable package
+determined-*.tar.gz
+
+# All Python wheels
+*.whl
+
+# Node modules
+node_modules/
+
+# VSCode
+.vscode/
+
+# JetBrains IDEs (e.g., PyCharm and GoLand)
+.idea/
+
+# gobin directory used for tests
+gobin
+
+# MacOS system files
+*.DS_Store
+.dccache
+
+# Hydra output
+model_hub/mmdetection/hydra/outputs
+
+build/
@@ -1,3 +1,80 @@
-# Determined Examples
+# Determined Legacy Examples
 
-[CycleGAN](cyclegan)
+This Repository contains Determined examples that are no longer actively maintained by the determined team.
+
+## Tutorials
+
+| Example                                                       | Dataset          | Framework             |
+|:-------------------------------------------------------------:|:----------------:|:---------------------:|
+| [fashion\_mnist\_tf\_keras](tutorials/fashion_mnist_tf_keras) | Fashion MNIST    | TensorFlow (tf.keras) |
+
+## Computer Vision
+
+| Example                                                                      | Dataset                      | Framework                                |
+|:----------------------------------------------------------------------------:|:----------------------------:|:----------------------------------------:|
+| [cifar10\_pytorch](computer_vision/cifar10_pytorch)                          | CIFAR-10                     | PyTorch                                  |
+| [cifar10\_pytorch\_inference](computer_vision/cifar10_pytorch_inference)     | CIFAR-10                     | PyTorch                                  |
+| [fasterrcnn\_coco\_pytorch](computer_vision/fasterrcnn_coco_pytorch)         | Penn-Fudan Dataset           | PyTorch                                  |
+| [mmdetection\_pytorch](computer_vision/mmdetection_pytorch)                  | COCO                         | PyTorch                                  |
+| [detr\_coco\_pytorch](computer_vision/detr_coco_pytorch)                     | COCO                         | PyTorch                                  |
+| [deformabledetr\_coco\_pytorch](computer_vision/deformabledetr_coco_pytorch) | COCO                         | PyTorch                                  |
+| [iris\_tf\_keras](computer_vision/iris_tf_keras)                             | Iris Dataset                 | TensorFlow (tf.keras)                    |
+| [unets\_tf\_keras](computer_vision/unets_tf_keras)                           | Oxford-IIIT Pet Dataset      | TensorFlow (tf.keras)                    |
+| [efficientdet\_pytorch](computer_vision/efficientdet_pytorch)                | COCO                         | PyTorch                                  |
+| [byol\_pytorch](computer_vision/byol_pytorch)                                | CIFAR-10 / STL-10 / ImageNet | PyTorch                                  |
+| [deepspeed\_cifar10_cpu_offloading](deepspeed/cifar10_cpu_offloading)        | CIFAR-10                     | PyTorch (DeepSpeed)                      |
+
+## Natural Language Processing (NLP)
+
+| Example                                            | Dataset    | Framework |
+|:--------------------------------------------------:|:----------:|:---------:|
+| [albert\_squad\_pytorch](nlp/albert_squad_pytorch) | SQuAD      | PyTorch   |
+| [bert\_glue\_pytorch](nlp/bert_glue_pytorch)       | GLUE       | PyTorch   |
+| [word\_language\_model](nlp/word_language_model)   | WikiText-2 | PyTorch   |
+
+## HP Search Benchmarks
+
+| Example                                                                         | Dataset               | Framework |
+|:-------------------------------------------------------------------------------:|:---------------------:|:---------:|
+| [darts\_cifar10\_pytorch](hp_search_benchmarks/darts_cifar10_pytorch)           | CIFAR-10              | PyTorch   |
+| [darts\_penntreebank\_pytorch](hp_search_benchmarks/darts_penntreebank_pytorch) | Penn Treebank Dataset | PyTorch   |
+
+## Neural Architecture Search (NAS)
+
+| Example                            | Dataset | Framework |
+|:---------------------------------:|:-------:|:---------:|
+| [gaea\_pytorch](nas/gaea_pytorch) | DARTS   | PyTorch   |
+
+## Meta Learning
+
+| Example                                                                | Dataset  | Framework |
+|:----------------------------------------------------------------------:|:--------:|:---------:|
+| [protonet\_omniglot\_pytorch](meta_learning/protonet_omniglot_pytorch) | Omniglot | PyTorch   |
+
+## Generative Adversarial Networks (GAN)
+
+| Example                                       | Dataset          | Framework             |
+|:----------------------------------------------|:----------------:|:---------------------:|
+| [dc\_gan\_tf\_keras](gan/dcgan_tf_keras)      | MNIST            | TensorFlow (tf.keras) |
+| [gan\_mnist\_pytorch](gan/gan_mnist_pytorch)  | MNIST            | PyTorch               |
+| [deepspeed\_dcgan](deepspeed/deepspeed_dcgan) | MNIST / CIFAR-10 | PyTorch (DeepSpeed)   |
+| [pix2pix\_tf\_keras](gan/pix2pix_tf_keras)    | pix2pix          | TensorFlow (tf.keras) |
+| [cyclegan](gan/cyclegan)                      | monet2photo      | PyTorch               |
+
+## Custom Reducers
+
+| Example                                                                    | Dataset | Framework  |
+|:--------------------------------------------------------------------------:|:-------:|:----------:|
+| [custom\_reducers\_mnist\_pytorch](features/custom_reducers_mnist_pytorch) | MNIST   | PyTorch    |
+
+## HP Search Constraints
+
+| Example                                                                  | Dataset | Framework  |
+|:------------------------------------------------------------------------:|:-------:|:----------:|
+| [hp\_constraints\_mnist\_pytorch](features/hp_constraints_mnist_pytorch) | MNIST   | PyTorch    |
+
+## Custom Search Method
+
+| Example                                                                  | Dataset | Framework  |
+|:------------------------------------------------------------------------:|:-------:|:----------:|
+| [asha\_search\_method](custom_search_method/asha_search_method)          | MNIST   | PyTorch    |
@@ -0,0 +1,58 @@
+# Pytorch Bootstrap Your Own Latent (BYOL) Example
+
+This example shows how to perform self-supervised image classifier training with BYOL using
+Determined's PyTorch API.  This example is based on the [byol-pytorch](https://github.com/lucidrains/byol-pytorch/tree/master/byol_pytorch) package.
+
+Original BYOL paper: https://arxiv.org/abs/2006.0
+
+Code and configuration details also sourced from the following BYOL implementations:
+  - (JAX, paper authors) https://github.com/deepmind/deepmind-research/tree/master/byol
+  - (Pytorch) https://github.com/untitled-ai/self_supervised
+
+# Files
+* [backbone.py](backbone.py): Backbone registry.
+* [data.py](data.py): Dataset downloading and metadata registry.
+* [evaluate_result.py](evaluate_result.py): Kicks off an evaluation run, for longer training of classifier heads.
+* [generate_blob_list.py](generate_blob_list.py): Script to generate a blob list from a GCS bucket + prefix.  Used to support GCS streaming for ImageNet dataset.
+* [model_def.py](model_def.py): Core trial and callback definitions.  This is the entrypoint for trials.
+* [optim.py](optim.py): Optimizer definitions and utilities.
+* [reducers.py](reducers.py): Custom reducers used for evaluation metrics.
+* [startup-hook.sh](startup-hook.sh): This script will automatically be run by Determined during startup of every container launched for this experiment.  This script installs some additional dependencies.
+* [utils.py](utils.py): Simple utility functions and classes.
+
+# Configuration Files
+* [const-cifar10.yaml](const-cifar10.yaml): Train with CIFAR-10 on a single GPU with constant hyperparameter values.
+* [distributed-stl10.yaml](distributed-stl10.yaml): Train with STL-10 using 8 GPU distributed training with constant hyperparameter values.
+* [distributed-imagenet.yaml](distributed-imagenet.yaml): Train with ImageNet using 64 GPU distributed training with constant hyperparameter values.
+
+# Data
+This repo uses three datasets:
+- CIFAR-10 (32x32, 10 classes), automatically downloaded via torchvision.
+- STL-10 (96x96, 10 classes), automatically downloaded via torchvision.
+- ImageNet-1k (1000 classes), which must stored in a GCS bucket along with a blob index.  Information on downloading ImageNet-1k is available at the [ImageNet website](https://image-net.org/download.php).  See `distributed-imagenet.yaml` for an example bucket configuration, and `generate_blob_list.py` for a script to generate the blob list.
+
+# To Run
+If you have not yet installed Determined, installation instructions can be found under `docs/install-admin.html` or at https://docs.determined.ai/latest/index.html
+
+Run the following command to kick off self-supervised training: `det -m <master host:port> experiment create -f config/const-cifar10.yaml .`
+
+The other configurations can be run by specifying the appropriate configuration file in place of `const-cifar10.yaml`.
+
+
+To run classifier training and validation on a completed self-supervised training:
+
+1. Find the experiment ID of your self-supervised training.
+2. Run `python evaluate_result.py --experiment-id=<id> --classifier-train-epochs=<number>`
+
+This is necessary for ImageNet, where `hyperparameters.validate_with_classifier` is set to `false` during self-supervised training due to the time it takes to train the classifier.  Other configs have `hyperparameters.validate_with_classifier` set to true to collect `test_accuracy` during the self-supervised training.
+
+
+## Results
+
+For `const-cifar10.yaml` and `distributed-stl10.yaml`, results were taken from best `test_accuracy` achieved over the self-supervised training duration.  For `distributed-imagenet.yaml`, result was taken from running `evaluate_result.py` for 80 classifier training epochs.
+
+| Config file | Test Accuracy (%) |
+| ----------- | ------------- |
+| const-cifar10.yaml | 74.91 |
+| distributed-stl10.yaml | 91.10 |
+| distributed-imagenet.yaml | 71.37 |
@@ -0,0 +1,27 @@
+from dataclasses import dataclass
+from typing import Callable
+
+import torch.nn as nn
+import torchvision.models as models
+
+
+@dataclass
+class BackboneMetadata:
+    feature_size: int
+    build_fn: Callable[[], nn.Module]
+
+
+BACKBONE_METADATA_BY_NAME = {
+    "resnet18": BackboneMetadata(
+        feature_size=512, build_fn=lambda: models.resnet18(pretrained=True)
+    ),
+    "resnet34": BackboneMetadata(
+        feature_size=512, build_fn=lambda: models.resnet34(pretrained=True)
+    ),
+    "resnet50": BackboneMetadata(
+        feature_size=2048, build_fn=lambda: models.resnet50(pretrained=True)
+    ),
+    "resnet101": BackboneMetadata(
+        feature_size=2048, build_fn=lambda: models.resnet101(pretrained=True)
+    ),
+}
@@ -0,0 +1,81 @@
+name: cifar10_byol_const
+entrypoint: model_def:BYOLTrial
+records_per_epoch: 45000
+resources:
+  slots_per_trial: 1
+  shm_size: 17179869184
+min_validation_period:
+  epochs: 2
+
+data:
+  dataset_name: cifar10
+  download_dir: /data
+  num_workers: 8
+  validation_subset_size: 5000
+  eval_transform:
+    resize_short_edge: 32
+    center_crop_size: 32
+  train_transform1:
+    random_crop_size: 32
+    random_crop_min_scale: 0.2
+    random_hflip_prob: 0.5
+    color_jitter_prob: 0.8
+    color_jitter_brightness: 0.4
+    color_jitter_contrast: 0.4
+    color_jitter_saturation: 0.2
+    color_jitter_hue: 0.1
+    grayscale_prob: 0.2
+    gaussian_blur_prob: 1.0
+    gaussian_blur_kernel_size: 3
+    gaussian_blur_min_std: 0.1
+    gaussian_blur_max_std: 2.0
+    solarization_prob: 0.0
+  train_transform2:
+    random_crop_size: 32
+    random_crop_min_scale: 0.2
+    random_hflip_prob: 0.5
+    color_jitter_prob: 0.8
+    color_jitter_brightness: 0.4
+    color_jitter_contrast: 0.4
+    color_jitter_saturation: 0.2
+    color_jitter_hue: 0.1
+    grayscale_prob: 0.2
+    gaussian_blur_prob: 0.1
+    gaussian_blur_kernel_size: 3
+    gaussian_blur_min_std: 0.1
+    gaussian_blur_max_std: 2.0
+    solarization_prob: 0.2
+
+hyperparameters:
+  training_mode: SELF_SUPERVISED
+  validate_with_classifier: true
+  backbone_name: resnet18
+  global_batch_size: 256
+  classifier:
+    learning_rates: [0.1, 0.05, 0.025, 0.01, 0.005]
+    logit_clipping:
+      enabled: true
+      alpha: 20
+    logit_regularization_beta: 1e-2
+    momentum: 0.9
+    train_epochs: 4
+  self_supervised:
+    lars_eta: 0.001
+    momentum: 0.9
+    moving_average_decay_base: 0.996
+    weight_decay: 1.5e-6
+    learning_rate:
+      base: 0.2
+      base_batch_size: 256
+      warmup_epochs: 10
+
+searcher:
+  name: single
+  metric: test_accuracy
+  smaller_is_better: false
+  max_length:
+    epochs: 100
+
+bind_mounts:
+  - host_path: /tmp
+    container_path: /data