Merge pull request #77 from allenai/yawenz/improve_landsat_api

Wrap up Landsat API work
allenai · Dec 2, 2024 · 83f9cdd · 83f9cdd
2 parents 3abe564 + 936aa91
commit 83f9cdd
Show file tree

Hide file tree

Showing 20 changed files with 780 additions and 201 deletions.
diff --git a/.github/workflows/build_test.yaml b/.github/workflows/build_test.yaml
@@ -121,9 +121,64 @@ jobs:
             -v ${{env.GOOGLE_GHA_CREDS_PATH}}:/tmp/gcp-credentials.json:ro \
             -e GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-credentials.json \
             -e RSLP_BUCKET=rslearn-eai \
+            -e RSLP_PREFIX=gs://rslearn-eai \
             test pytest tests/ --ignore tests/integration_slow/
 
       - name: Clean up
         if: always()
         run: |
           docker compose -f docker-compose.yaml down
+
+  test-slow:
+    runs-on: GPU-Enabled-Runner
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+          registry: ${{ env.REGISTRY }}
+          username: ${{ github.actor }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Log in to the Container registry
+        uses: docker/login-action@v3
+        with:
+            registry: ${{ env.REGISTRY }}
+            username: ${{ github.actor }}
+            password: ${{ secrets.GITHUB_TOKEN }}
+
+      - name: Cleanup disk space
+        run: |
+          sudo docker rmi $(docker image ls -aq) >/dev/null 2>&1 || true
+          sudo docker image prune --all --force >/dev/null 2>&1 || true
+          sudo rm -rf /usr/share/dotnet
+          sudo rm -rf /opt/ghc
+          sudo rm -rf /usr/local/share/boost
+
+      - name: Build docker images
+        run: |
+          COMPOSE_DOCKER_CLI_BUILD=1 DOCKER_BUILDKIT=1 docker compose -f docker-compose.yaml build
+
+      - name: Authenticate into gcp
+        uses: "google-github-actions/auth@v2"
+        with:
+          credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }}
+
+      - name: Run tests with Docker Compose
+        run: |
+          docker run \
+            --gpus all \
+            --shm-size=15g \
+            -e AWS_ACCESS_KEY_ID=${{ secrets.AWS_ACCESS_KEY_ID }} \
+            -e AWS_SECRET_ACCESS_KEY=${{ secrets.AWS_SECRET_ACCESS_KEY }} \
+            -v ${{env.GOOGLE_GHA_CREDS_PATH}}:/tmp/gcp-credentials.json:ro \
+            -e GOOGLE_APPLICATION_CREDENTIALS=/tmp/gcp-credentials.json \
+            -e RSLP_BUCKET=rslearn-eai \
+            -e RSLP_PREFIX=gs://rslearn-eai \
+            rslearn_projects-test pytest tests/integration_slow/
+
+      - name: Clean up
+        if: always()
+        run: |
+          docker compose -f docker-compose.yaml down
diff --git a/.github/workflows/landsat_vessel.yaml b/.github/workflows/landsat_vessel.yaml
@@ -0,0 +1,49 @@
+name: Landsat-Vessel-Detection
+
+on:
+  workflow_dispatch: # Manual trigger
+  push:
+    tags:
+      - "landsat_vessels_v*" # Trigger only when a version tag (e.g., landsat_vessels_v0.0.1) is pushed
+
+jobs:
+  build-and-push:
+    runs-on: GPU-Enabled-Runner
+
+    steps:
+      # Step 1: Checkout the repository and fetch all tags
+      - name: Checkout repository
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0 # Ensure all history and tags are fetched
+
+      - name: Fetch tags
+        run: git fetch --tags
+
+      # Step 2: Extract the version from the latest tag and its associated commit SHA
+      - name: Get latest tag and associated SHA
+        id: version
+        run: |
+          # Get the latest tag matching the pattern
+          LATEST_TAG=$(git tag --list "landsat_vessels_v*" --sort=-v:refname | head -n 1)
+          TAG_COMMIT=$(git rev-list -n 1 $LATEST_TAG)
+          SHORT_SHA=$(git rev-parse --short $TAG_COMMIT)
+          echo "LATEST_TAG=$LATEST_TAG" >> $GITHUB_ENV
+          echo "SHORT_SHA=$SHORT_SHA" >> $GITHUB_ENV
+
+      # Step 3: Log in to GHCR
+      - name: Log in to GitHub Container Registry
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.GITHUB_ACTOR }}
+          password: ${{ secrets.GITHUB_TOKEN }}
+
+      # Step 4: Build and push the Docker image
+      - name: Build and Push Docker Image
+        working-directory: rslp/landsat_vessels # Navigate to the required directory
+        run: |
+          docker compose build
+          docker tag landsat_vessels-landsat-vessels ghcr.io/allenai/landsat-vessel-detection:sha-${{ env.SHORT_SHA }}
+          docker tag landsat_vessels-landsat-vessels ghcr.io/allenai/landsat-vessel-detection:${{ env.LATEST_TAG }}
+          docker push ghcr.io/allenai/landsat-vessel-detection:sha-${{ env.SHORT_SHA }}
+          docker push ghcr.io/allenai/landsat-vessel-detection:${{ env.LATEST_TAG }}
diff --git a/data/landsat_vessels/config.yaml b/data/landsat_vessels/config.yaml
@@ -63,6 +63,7 @@ data:
               remap_values: [[0, 1], [0, 255]]
               image_bands: [2, 1, 0]
               exclude_by_center: true
+              score_threshold: 0.7
               enable_map_metric: true
               enable_f1_metric: true
               f1_metric_thresholds: [[0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.95], [0.1], [0.2], [0.3], [0.4], [0.5], [0.6], [0.7], [0.8], [0.9]]
@@ -130,7 +131,7 @@ trainer:
           class_path: rslp.utils.nms.NMSDistanceMerger
           init_args:
             grid_size: 64
-            distance_threshold: 15
+            distance_threshold: 10
             property_name: "category"  # same as task.property_name
             class_agnostic: false
     - class_path: lightning.pytorch.callbacks.ModelCheckpoint

diff --git a/docs/landsat_vessels/api_use.md b/docs/landsat_vessels/api_use.md
@@ -0,0 +1,157 @@
+# Landsat Vessel Detection API
+
+The Landsat Vessel Detection API provides a way to apply the Landsat scenes for vessel detection. This guide explains how to set up and use the API, including running it locally or using prebuilt Docker images hosted on [GitHub Container Registry (GHCR)](https://github.com/allenai/rslearn_projects/pkgs/container/landsat-vessel-detection) and [Google Container Registry (GCR)](https://console.cloud.google.com/gcr/images/skylight-proto-1?referrer=search&inv=1&invt=Abh22Q&project=skylight-proto-1).
+
+
+## Overview
+- **Model Name**: Landsat Vessel Detection
+- **Model Version**: `v0.0.1`
+- **Tag**: `landsat_vessels_v0.0.1`
+- **Last Updated**: `2024-11-21`
+
+
+## Setting Up the Environment
+
+First, create an `.env` file in the directory that you are running the API or Docker container from, including the following environment variables:
+
+```bash
+# Required
+RSLP_PREFIX=<rslp_prefix>
+GOOGLE_APPLICATION_CREDENTIALS=<path_to_service_account_key>
+
+# Optional (with default values)
+LANDSAT_HOST=<host_address>
+LANDSAT_PORT=<port_number>
+
+# Optional (only if you are fetching Landsat scenes from AWS S3 bucket)
+AWS_ACCESS_KEY_ID=<aws_access_key_id>
+AWS_SECRET_ACCESS_KEY=<aws_secret_access_key>
+```
+
+- `RSLP_PREFIX` is required to specify the prefix of the GCS bucket where model checkpoints are stored.
+- `LANDSAT_HOST` and `LANDSAT_PORT` are optional, and used to configure the host and port for the Landsat service. The default values are `0.0.0.0` and `5555`.
+- `GOOGLE_APPLICATION_CREDENTIALS` is required for fetching model checkpoints from GCS bucket, also used for fetching downloaded Landsat scenes from GCS bucket. The service account key file should have the `storage.admin` role.
+- `AWS_ACCESS_KEY_ID` and `AWS_SECRET_ACCESS_KEY` are optional, and only required when `scene_id` is used to fetch Landsat scenes from AWS S3 bucket.
+
+
+## Running the API server Locally
+
+   ```python
+   python rslp/landsat_vessels/api_main.py
+   ```
+
+This will start the API server on the specified host and port, and will rewrite the environment variables in the `.env` file.
+
+## Using Docker Images for API Deployment
+
+Prebuilt Docker images are available on both GHCR and GCR. Use the following steps to pull and run the image (make sure the `.env` file is in the same directory as the Dockerfile, and at least 15GB of shared memory is available):
+
+### GHCR image
+
+1. Pull the image from GHCR.
+
+    ```bash
+    docker pull ghcr.io/allenai/landsat-vessel-detection:v0.0.1
+    ```
+
+2. Run the container. Note that you need to replace the `<port_number>` and `<path_to_service_account_key>` with the actual `LANDSAT_PORT` (if you use the default port, set it to `5555`) and path to your local service account key file, and keep the other arguments unchanged.
+
+    ```bash
+    docker run \
+    --rm -p <port_number>:<port_number> \
+    -e GOOGLE_APPLICATION_CREDENTIALS=/app/credentials/key.json \
+    -v <path_to_service_account_key>:/app/credentials/key.json \
+    --env-file .env \
+    --shm-size=15g \
+    --gpus all \
+    ghcr.io/allenai/landsat-vessel-detection:v0.0.1
+    ```
+
+### GCR image
+
+1. Pull the image from GCR.
+
+    ```bash
+    docker pull gcr.io/skylight-proto-1/landsat-vessel-detection:v0.0.1
+    ```
+
+2. Run the container. Note that you need to replace the `<port_number>` and `<path_to_service_account_key>` with the actual `LANDSAT_PORT` (if you use the default port, set it to `5555`) and path to your local service account key file, and keep the other arguments unchanged.
+
+    ```bash
+    docker run \
+    --rm -p <port_number>:<port_number> \
+    -e GOOGLE_APPLICATION_CREDENTIALS=/app/credentials/key.json \
+    -v <path_to_service_account_key>:/app/credentials/key.json \
+    --env-file .env \
+    --shm-size=15g \
+    --gpus all \
+    gcr.io/skylight-proto-1/landsat-vessel-detection:v0.0.1
+    ```
+
+## Making Requests to the API
+
+Once the API server is running, you can send requests to the `/detections` endpoint to perform vessel detection. The API accepts several types of payloads, depending on the source of your Landsat scene:
+
+1. Fetch Landsat Scene from AWS S3 Bucket:
+
+    Provide the `scene_id` to retrieve the Landsat scene directly from the AWS S3 bucket.
+
+    Payload Example:
+    ```json
+    {
+        "scene_id": scene_id
+    }
+    ```
+
+2. Fetch Zipped Landsat Scene from Local or GCS Storage:
+
+    Provide the `scene_zip_path` to specify the path to a zipped Landsat scene stored locally or in a GCS bucket (for the Skylight team).
+
+    Payload Example:
+    ```json
+    {
+        "scene_zip_path": "gs://your_bucket/your_scene.zip"
+    }
+    ```
+
+3. Fetch Unzipped Landsat Scene from Local or GCS Storage:
+
+    Provide the image_files dictionary to specify paths to individual band files of the unzipped Landsat scene, either locally or in a GCS bucket.
+
+    Payload Example:
+    ```json
+    {
+        "image_files": {
+            "B2": "path/to/B2.TIF",
+            "B3": "path/to/B3.TIF",
+            "B4": "path/to/B4.TIF",
+            "B5": "path/to/B5.TIF",
+            "B6": "path/to/B6.TIF",
+            "B7": "path/to/B7.TIF",
+            "B8": "path/to/B8.TIF"
+        }
+    }
+    ```
+
+You can send requests using `curl` or `requests` library.
+
+Example with `curl`:
+
+```bash
+curl -X POST http://${LANDSAT_HOST}:${LANDSAT_PORT}/detections -H "Content-Type: application/json" -d '{"scene_zip_path": "gs://test-bucket-rslearn/Landsat/LC08_L1TP_162042_20241103_20241103_02_RT.zip"}'
+```
+
+The API will respond with the vessel detection results in JSON format.
+
+Note that the above example uses a test zip file, which is a cropped Landsat scene, not a full scene. To run the API on a full scene, you can use the command below:
+
+```bash
+curl -X POST http://${LANDSAT_HOST}:${LANDSAT_PORT}/detections -H "Content-Type: application/json" -d '{"scene_id": "LC09_L1GT_106084_20241002_20241002_02_T2"}'
+```
+
+
+## Auto Documentation
+
+This API has enabled Swagger UI and ReDoc.
+
+You can access the Swagger UI at `http://<your_address>:<port_number>/docs` and ReDoc at `http://<your_address>:<port_number>/redoc` for a detailed documentation of the API. If you are running this API on VM, the `<your_address>` should be the public IP address of the VM, and you also need to open the `<port_number>` to the public.
diff --git a/docs/landsat_vessels/images/missed_vessels_B8.png b/docs/landsat_vessels/images/missed_vessels_B8.png
diff --git a/docs/landsat_vessels/images/missed_vessels_RGB.png b/docs/landsat_vessels/images/missed_vessels_RGB.png
diff --git a/docs/landsat_vessels/model_summary.md b/docs/landsat_vessels/model_summary.md
@@ -0,0 +1,59 @@
+# Model Summary
+
+## Overview
+- **Model Name**: Landsat Vessel Detection
+- **Model Version**: `v0.0.1`
+- **Tag**: `landsat_vessels_v0.0.1`
+- **Last Updated**: `2024-11-25`
+
+---
+
+## Offline Evaluation Metrics
+
+Note: The evaluation metrics are reported for the two-stage model (detector + classifier), without any filters.
+
+| Date       | Version | Precision | Recall | F1-Score |
+|------------|---------|-----------|--------|----------|
+| 2024-11-15 | 0.0.1   | 0.72      | 0.53   | 0.61     |
+| YYYY-MM-DD | TBD     | TBD       | TBD    | TBD      |
+
+## Offline Scenario Checks
+
+| Mode                                  | Status  |
+|---------------------------------------|---------|
+| **True Positive** - Dense vessels     | ✅ Pass |
+| **True Positive** - Sparse vessels    | ✅ Pass |
+| **False Positive** - Icebergs         | ✅ Pass |
+| **False Positive** - Clouds           | ✅ Pass |
+| **False Positive** - Whitecaps        | ✅ Pass |
+
+---
+
+## Model Configurations
+- **Detector**: `rslearn_projects/data/landsat_vessels/config.yaml`
+- **Classifier**: `rslearn_projects/landsat/recheck_landsat_labels/phase123_config.yaml`
+- **Filters**: marine infrastructure `rslearn_projects/rslp/utils/filter.py`
+
+---
+
+## Known Issues & Next Steps
+
+1. **Missed Small Vessels**: The recall is not high because the model missed a lot of very small vessels.
+
+Below is an example of the missed vessels, a lot of them are only visible in the B8 band (with 15m resolution) and are not visible in the RGB image (with 30m resolution):
+
+<div style="text-align: center;">
+    <img src="images/missed_vessels_B8.png" alt="B8" width="40%" style="display: inline-block;"/>
+    <img src="images/missed_vessels_RGB.png" alt="RGB" width="40%" style="display: inline-block;"/>
+</div>
+
+*Possible solutions: (1) Add positive samples from the detector training set into the classifier training set.*
+
+2. **False Positive**: Though the model now is more robust to false positives, it still sometimes misclassifies objects like whitecaps, islands as vessels.
+
+*Possible solutions: (1) Add more negative samples into the classifier training set. (2) Add high-resolution distance-to-coastline filter to remove detections that are too close to the coastline.*
+
+---
+
+## Changelog
+- **`v0.0.1`**: Initial model release. Offline evaluation metrics reported.