Merge pull request #49 from podaac/release/0.2.0

* /version 0.2.0a0 * feature: convertToPNG --> sendToHarmony (#21) * update state_machine, sendToHarmony * --changelog Issue-15 * moved to unreleased, changelog * /version 0.2.0a1 * Update README.md (#22) * /version 0.2.0a2 * Use -j to remove top-level directory in zip artifact (#25) * /version 0.2.0a3 * Issue#16: Change apply opera treatment choice and lambda to be specific to HLS (#24) * update for HLS -- lambda and state machine * py lint error * update file name * /version 0.2.0a4 * /version 0.2.0a5 * Feature/issue 7 - Remove wait for GITC response (#30) * fix type in readme * change task token to uuid in send to gitc * remove wait for task token & gitc response handler from state machine * change gitc response handler to invoke save cma message * fix typo * fix context parameter * convert uuid to string * fix json typo * remove uuid lib to use stdlib uuid * change identifier to image set name instead of uuid * remove uuid import * fix json formatting of cma invoke * add granule concept ID to identifier * update image set name with granule conceptid * remove whitespace * move get umm json to utils and save gitc outgoing cnm * remove unused import * try setting cmr query env based on stage * remove task token from tests * fix granule index * change how image set name defined * add region to ssm client in utils * add ssm parameters to gitc lambdas * change region reference * update how granule name referenced in send to gitc * remove unused cmr var * fix case * fix cnm parsing * reformat save cnm to separate step * update tf vars for save cnm * fix type in module definition * fix cnm in cma * fix collection reference in save cnm * add debugging log statements * fix input * update save cnm input * debugging * debugging * linting * debugging * debugging * linting * fix state machine * update state machine * reorg state machine * missing comma * move save cnm into map * fix state machine transitions * fix sm * fix boolean * debugging cnm input * fix input * change input * debug input * add cnm as config parameter * lint * fix cnm payload * remove debugging statements * add prefix to cnm path and fix gitc response * increase handle gitc response timeout * update changelog * add original shortname as cnm prefix * revert shortname and change collection ref to save cnm & cnm-r in same location * remove save cma lambda no longer used * remove EDL env params from sendtogitc * change parsing of granule concept id * /version 0.2.0a6 * Update README.md * /version 0.2.0a7 * Support datetimes without microseconds (#37) * Support datetimes without microseconds * changelog * Update generate_image_metadata.py (#39) add docstring * /version 0.2.0a8 * /version 0.2.0a9 * Issues/23: Enable Harmony write output to user bucket (#46) * add s3 destination to harmony job * syntax * job success * harmony block update * --variable * temp test in uat * manual to uat * test s1-v1 * lint * deploy --sit * Update to place harmony results in our bucket * Update to place harmony results in our bucket * Update to place harmony results in our bucket * Update to place harmony results in our bucket * Update to place harmony results in our bucket * Update to place harmony results in our bucket --------- Co-authored-by: Josh <[email protected]> * /version 0.2.0a10 * fix deploy failure * /version 0.2.0a11 * Add new message attribute response_topic_arn to all outgoing messages to GIBS (#47) * /version 0.2.0a12 * prep for 0.2.0 * /version 0.2.0a13 * Fix build error * /version 0.2.0a14 * /release 0.2.0 * Update graph * /version 0.2.0rc1 * Turn prevent_destroy back on for gibs response topic * /version 0.2.0rc2 * Improve error message when missing config * /version 0.2.0rc3 * Improve error message when missing config * /version 0.2.0rc4 * UAT is in east-1 region * /version 0.2.0rc5 * fix gibs queue name * /version 0.2.0rc6 * calculate md5sum for files from harmony * calculate md5sum for files from harmony * /version 0.2.0rc7 * bump chunk size to 100 MB * /version 0.2.0rc8 * Update test message for UAT, add checksum to generated tif, add logging, fix type error * Lint * Lint * /version 0.2.0rc9 * Change named variables for audit path from pobit to bignbit * /version 0.2.0rc10 * Update docs --------- Co-authored-by: frankinspace <[email protected]> Co-authored-by: Josh Haile <[email protected]> Co-authored-by: Victoria McDonald <[email protected]> Co-authored-by: Jake Herrmann <[email protected]> Co-authored-by: James Wood <[email protected]> Co-authored-by: jamesfwood <[email protected]> Co-authored-by: Josh <[email protected]>
podaac · Feb 4, 2025 · 43d56cb · 43d56cb
2 parents 1465b11 + b2d163c
commit 43d56cb
Show file tree

Hide file tree

Showing 47 changed files with 2,888 additions and 2,252 deletions.
diff --git a/.github/workflows/cicd-pipeline.yml b/.github/workflows/cicd-pipeline.yml
@@ -137,9 +137,12 @@ jobs:
       - name: Install conda
         uses: conda-incubator/setup-miniconda@v3
         with:
+          channels: conda-forge
           activate-environment: bignbit
           environment-file: conda-environment.yaml
           auto-activate-base: false
+          conda-remove-defaults: "true"
+          miniforge-version: latest
       - name: Install package
         run: poetry install
       - name: Lint
@@ -159,7 +162,7 @@ jobs:
           terraform init -backend=false -upgrade
           terraform validate -no-color
       - name: SonarCloud Scan
-        uses: sonarsource/sonarcloud-github-action@master
+        uses: sonarsource/sonarqube-scan-action@v4
         env:
           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
           SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}

diff --git a/.gitignore b/.gitignore
@@ -137,11 +137,11 @@ dmypy.json
 # Generated samples
 tests/2021*
 /terraform/big-lambda.zip
-/terraform_deploy/tfplan
+/examples/cumulus-tf/tfplan
 .terraform.d
 .terraform
 /.bash_history
-/terraform_deploy/cumulus-message-adapter.zip
-/terraform_deploy/.terraform.lock.hcl
+/examples/cumulus-tf/cumulus-message-adapter.zip
 bignbit-lambda.zip
-terraform_deploy/bignbit_module/
+/examples/cumulus-tf/bignbit_module/
+!/examples/cumulus-tf/.terraform.lock.hcl
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,31 +7,52 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 ### Added
+### Changed
+### Deprecated
+### Removed
+### Fixed
+### Security
+
+## [0.2.0]
+### Added
+- [issues/40](https://github.com/podaac/bignbit/issues/40): New message attribute `response_topic_arn` will be added to every message sent to GIBS
+- [issues/9](https://github.com/podaac/bignbit/issues/9): Added some documentation for installing as cumulus module
+### Changed
+- [issues/15](https://github.com/podaac/bignbit/issues/15): Change 'convertToPNG' choice to a generic send to harmony choice
+- [issues/16](https://github.com/podaac/bignbit/issues/16): Change apply opera treatment choice and lambda to be specific to HLS
+- [issues/23](https://github.com/podaac/bignbit/issues/23): Harmony requests now include `destinationUrl` parameter to place output 
+  directly in s3 bucket instead of requiring data to be copied.
+- [issues/41](https://github.com/podaac/bignbit/issues/41): Module no longer depends on deprecated hashicorp/template provider
+- [issues/42](https://github.com/podaac/bignbit/issues/42): Terraform version upgraded to v1.5.3
+- Default values for `config_dir` and `bignbit_audit_path` have changed to `big-config` and `bignbit-cnm-output` respectively
 ### Deprecated 
 ### Removed
+- [issues/7](https://github.com/podaac/bignbit/issues/15): Remove the wait for GITC response
+- [issues/23](https://github.com/podaac/bignbit/issues/23): Removed `lambda_role` module variable. The lambda role is now created as part of the module, `permissions_boundary_arn` is required instead.
 ### Fixed
+- [issues/36](https://github.com/podaac/bignbit/issues/36): Support datetimes without microseconds
 ### Security
 
+
 ## [0.1.2]
 ### Added
 ### Changed
-- BIG terraform failing in SWOT venues due to long function(lambda) names 
+- BIG terraform failing in SWOT venues due to long function(lambda) names
 ### Deprecated
 ### Removed
 ### Fixed
 ### Security
 
-## [0.1.1]
 
+## [0.1.1]
 ### Added 
 - [issues/2](https://github.com/podaac/bignbit/issues/2): Create github action pipeline to build artifacts
 - [issues/3](https://github.com/podaac/bignbit/issues/3): Update terraform mock deployment of cumulus module to services accounts
 - Initial port from JPL GHE to public github.com
-
 ### Changed
 - [issues/10](https://github.com/podaac/bignbit/issues/10): Move combined big and pobit state machine into terraform module
 - [issues/6](https://github.com/podaac/bignbit/issues/6): BIG terraform failing in SWOT venues due to long lambda name
-
-
-[Unreleased]: https://github.com/podaac/bignbit/compare/0.1.1...HEAD
-[0.1.1]: https://github.com/podaac/bignbit/releases/tag/0.1.1
+### Deprecated
+### Removed
+### Fixed
+### Security
diff --git a/README.md b/README.md
diff --git a/bignbit/apply_opera_treatment.py → bignbit/apply_opera_hls_treatment.py b/bignbit/apply_opera_treatment.py → bignbit/apply_opera_hls_treatment.py
@@ -2,6 +2,7 @@
 """
 Transforms each image in the input using specific processing required to produce an image for display in GITC
 """
+import datetime
 import logging
 import os
 import pathlib
@@ -16,7 +17,7 @@
 
 from bignbit import utils
 
-CUMULUS_LOGGER = CumulusLogger('apply_opera_treatment')
+CUMULUS_LOGGER = CumulusLogger('apply_opera_hls_treatment')
 
 
 def load_mgrs_gibs_intersection():
@@ -51,17 +52,20 @@ def process(self) -> List[Dict]:
           A list of CMA file dictionaries pointing to the transformed image(s)
         """
         cma_file_list = self.input['big']
+        staging_bucket = self.config.get('bignbit_staging_bucket')
 
         mgrs_grid_code = utils.extract_mgrs_grid_code(self.input['granule_umm_json'])
-        file_metadata_list = transform_images(cma_file_list, pathlib.Path(f"{self.path}"), mgrs_grid_code)
+        file_metadata_list = transform_images(cma_file_list, pathlib.Path(f"{self.path}"), mgrs_grid_code,
+                                              staging_bucket)
         del self.input['big']
         self.input['big'] = file_metadata_list
         return self.input
 
 
-def transform_images(cma_file_list: List[Dict], temp_dir: pathlib.Path, mgrs_grid_code: str) -> List[Dict]:
+def transform_images(cma_file_list: List[Dict], temp_dir: pathlib.Path, mgrs_grid_code: str,
+                     staging_bucket: str) -> List[Dict]:
     """
-    Applies special OPERA processing to each input image. Each input image will result in multiple output transformed
+    Applies special OPERA HLS processing to each input image. Each input image will result in multiple output transformed
     images.
 
     Parameters
@@ -72,6 +76,8 @@ def transform_images(cma_file_list: List[Dict], temp_dir: pathlib.Path, mgrs_gri
         Temporary working directory on local disk
     mgrs_grid_code
         MGRS grid code for the current granule being processed
+    staging_bucket
+        Staging bucket to which transformed files should be written
 
     Returns
     -------
@@ -91,12 +97,12 @@ def transform_images(cma_file_list: List[Dict], temp_dir: pathlib.Path, mgrs_gri
         # Reproject and resample image to sub-tiles
         transformed_images_dirpath = temp_dir.joinpath(source_image_local_filepath.stem)
         transformed_images_dirpath.mkdir(parents=True)
-        transformed_images_filepaths = the_opera_treatment(source_image_local_filepath, transformed_images_dirpath,
-                                                           mgrs_grid_code)
+        transformed_images_filepaths = the_opera_hls_treatment(source_image_local_filepath, transformed_images_dirpath,
+                                                               mgrs_grid_code)
         CUMULUS_LOGGER.info(f'Created new images: {[str(t) for t in transformed_images_filepaths]}')
 
         # Create new file metadata for each new image
-        file_metadata_dicts = create_file_metadata(cma_file_meta, transformed_images_filepaths)
+        file_metadata_dicts = create_file_metadata(transformed_images_filepaths, staging_bucket)
         file_metadata_results.extend(file_metadata_dicts)
 
         # Upload new images to s3
@@ -138,8 +144,8 @@ def get_file(bucket: str, key: str, local_filepath: pathlib.Path) -> pathlib.Pat
     return local_filepath
 
 
-def the_opera_treatment(source_image_filepath: pathlib.Path, working_dirpath: pathlib.Path,
-                        mgrs_grid_code: str) -> List[pathlib.Path]:
+def the_opera_hls_treatment(source_image_filepath: pathlib.Path, working_dirpath: pathlib.Path,
+                            mgrs_grid_code: str) -> List[pathlib.Path]:
     """
     What is the OPERA treatment? Well, it is special.
 
@@ -205,7 +211,7 @@ def the_opera_treatment(source_image_filepath: pathlib.Path, working_dirpath: pa
     return result_image_filepaths
 
 
-def create_file_metadata(original_cma_file_meta: dict, transformed_images_filepaths: List[pathlib.Path]) -> List[Dict]:
+def create_file_metadata(transformed_images_filepaths: List[pathlib.Path], staging_bucket: str) -> List[Dict]:
     """
     Generate a new CMA file metadata dictionary for each transformed image using the original CMA metadata as a
     template.
@@ -215,10 +221,10 @@ def create_file_metadata(original_cma_file_meta: dict, transformed_images_filepa
 
     Parameters
     ----------
-    original_cma_file_meta
-        CMA file metadata dict of the original source image
     transformed_images_filepaths
         Local filepaths to each output transformed image
+    staging_bucket
+        Staging bucket to which transformed files should be written
 
     Returns
     -------
@@ -227,14 +233,16 @@ def create_file_metadata(original_cma_file_meta: dict, transformed_images_filepa
     """
     new_cma_file_meta_list = []
     for transformed_image in transformed_images_filepaths:
-        new_cma_file_meta = original_cma_file_meta.copy()
-        new_cma_file_meta["fileName"] = transformed_image.name
-        # Takes the 'key' from the original and replace just the last part with the new filename
-        new_cma_file_meta["key"] = str(pathlib.Path(*pathlib.Path(original_cma_file_meta["key"]).parts[0:-1]).joinpath(
-            transformed_image.name))
-        new_cma_file_meta["local_filepath"] = str(transformed_image.resolve())
-
-        new_cma_file_meta_list.append(new_cma_file_meta)
+        file_dict = {
+            "fileName": transformed_image.name,
+            "bucket": staging_bucket,
+            "key": f'opera_hls_processing/{datetime.datetime.now(datetime.timezone.utc).strftime("%Y%m%d")}/{transformed_image.name}',
+            "local_filepath": str(transformed_image.resolve()),
+            "checksum": utils.sha512sum(transformed_image),
+            "checksumType": "SHA512"
+        }
+
+        new_cma_file_meta_list.append(file_dict)
 
     return new_cma_file_meta_list
 

diff --git a/bignbit/build_image_sets.py b/bignbit/build_image_sets.py
@@ -6,7 +6,7 @@
 from cumulus_logger import CumulusLogger
 from cumulus_process import Process
 
-from bignbit.image_set import from_big_output, IncompleteImageSet
+from bignbit.image_set import from_big_output, IncompleteImageSet, ImageSet
 
 CUMULUS_LOGGER = CumulusLogger('build_image_sets')
 
@@ -52,11 +52,18 @@ def process(self):
             del response_payload['big']
             response_payload['pobit'] = []
 
-            for image_set in image_sets:
+            for big_image_set in image_sets:
+                pobit_image_set = ImageSet(
+                    name=big_image_set.name + '_' + self.input['granules'][0]['cmrConceptId'],
+                    image=big_image_set.image,
+                    image_metadata=big_image_set.image_metadata,
+                    world_file=big_image_set.world_file)
+
                 response_payload['pobit'].append({
-                    'image_set': image_set._asdict(),
+                    'image_set': pobit_image_set._asdict(),
                     'cmr_provider': self.config.get('cmr_provider'),
                     'collection_name': self.config.get('collection').get('name'),
+                    'granule_ur': self.input['granules'][0]['granuleId']
                 })
 
         return response_payload

diff --git a/bignbit/copy_harmony_output_to_s3.py b/bignbit/copy_harmony_output_to_s3.py