Skip to content

Commit

Permalink
Merge pull request #88 from aodn/ZarrImprovements
Browse files Browse the repository at this point in the history
Zarr improvements
  • Loading branch information
lbesnard authored Oct 23, 2024
2 parents 74fb62c + 76648c7 commit bc846ab
Show file tree
Hide file tree
Showing 26 changed files with 1,615 additions and 1,316 deletions.
12 changes: 9 additions & 3 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@ jobs:
virtualenvs-in-project: true
installer-parallel: true

- name: Run pre-commit
- name: Clear Poetry Cache
run: |
poetry cache clear PyPI --all
pre-commit run --all-files
poetry cache clear pypi --all
- name: Run pre-commit
uses: pre-commit/[email protected]
with:
extra_args: --all-files
hooks: |
poetry-lock
9 changes: 5 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ default_install_hook_types:

repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
rev: v5.0.0
hooks:
- id: check-yaml
files: \.ya?ml$
Expand All @@ -22,7 +22,7 @@ repos:
exclude: ^aodn_cloud_optimised/config/dataset/dataset_template.json$

- repo: https://github.com/python-jsonschema/check-jsonschema
rev: 0.29.0
rev: 0.29.4
hooks:
- id: check-jsonschema
files: ^aodn_cloud_optimised/config/dataset/.*\.json$
Expand All @@ -36,7 +36,7 @@ repos:
#exclude: ^aodn_cloud_optimised/bin/create_aws_registry_dataset.py$ # issue with black!

- repo: https://github.com/python-poetry/poetry
rev: '1.8.3'
rev: '1.8.4'
hooks:
- id: poetry-check
args: ["--lock", "--quiet"] # Add --quiet to run poetry-check quietly
Expand All @@ -45,7 +45,8 @@ repos:
args: ["-f", "requirements.txt", "--quiet"]
verbose: false
- id: poetry-lock
args: ["--no-update"]
#args: ["--no-update"]
args: ["--check"]
verbose: true
- id: poetry-install
args: ["--quiet"]
4 changes: 3 additions & 1 deletion aodn_cloud_optimised/bin/generic_cloud_optimised_creation.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@ def main():

# Apply filters
for filter_str in args.filters:
nc_obj_ls = [s for s in nc_obj_ls if filter_str in s]
nc_obj_ls = list(
dict.fromkeys([s for s in nc_obj_ls if filter_str in s])
) # make the list unique!

if not nc_obj_ls:
raise ValueError("No files found matching the specified criteria.")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,25 @@ def main():
command = [
"generic_cloud_optimised_creation",
"--paths",
"IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/2018", # OK
"IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/2019", # OK
"IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/2020", # OK
"IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/2021", # 2021031* required a t3.large scheduler, otherwise it would fail!!
"IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/2022", # OK
"IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/2023", # OK
"IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/2024", # OK
# "IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/",
# "IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/2018", # OK
# "IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/2019", # OK
"IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/2020",
"IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/2021", # 2021031* 202102* required a t3.large scheduler, otherwise it would fail!!
"IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/2022",
"IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/2023",
"IMOS/SRS/SST/ghrsst/L3SM-1dS/dn/2024",
# "--filters",
# "FILTER_STRING_1",
# "2018121",
# "2019020", # watch out for 20190205 when sst got emptied because of map_blocks?
# "202306",
# "2021031",
# "2022021", # failing if not enough memory available on the machine running the cluster!! not the scheduler
# "202103", # required a t3.xlarge for 20 days! failed with 30days
# "FILTER_STRING_1",
"--dataset-config",
"satellite_ghrsst_l3s_1day_daynighttime_multi_sensor_southernocean.json",
"--clear-existing-data",
# "--clear-existing-data",
"--cluster-mode",
"remote",
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,39 +6,43 @@ def main():
command = [
"generic_cloud_optimised_creation",
"--paths",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/1992",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/1993",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/1994",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/1995",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/1996",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/1997",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/1998",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/1999",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2000",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2001",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2002",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2003",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2004",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2005",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2006",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2007",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2008",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2009",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2010",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2011",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2012",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2013",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2014",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2015",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2016",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2017",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2018",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2019",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2020",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2021",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2022",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2023",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn/2024" "--dataset-config",
"IMOS/SRS/SST/ghrsst/L3S-1d/dn",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/1992",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/1993",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/1994",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/1995",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/1996",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/1997",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/1998",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/1999",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2000",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2001",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2002",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2003",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2004",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2005",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2006",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2007",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2008",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2009",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2010",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2011",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2012",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2013",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2014",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2015",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2016",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2017",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2018",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2019",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2020",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2021",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2022",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2023",
# "IMOS/SRS/SST/ghrsst/L3S-1d/dn/2024",
# "--filters",
# "200005",
"--dataset-config",
"satellite_ghrsst_l3s_1day_daynighttime_single_sensor_australia.json",
"--clear-existing-data",
"--cluster-mode",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,45 +6,47 @@ def main():
command = [
"generic_cloud_optimised_creation",
"--paths",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1992", # OK
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1993", # OK
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1994", # OK
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1995", # OK
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1996", # OK
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1997", # OK
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1998", # OK
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1999",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2000",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2001",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2002",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2003",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2004",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2005",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2006",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2007",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2008",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2009",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2010",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2011",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2012",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2013",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2014",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2015",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2016",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2017",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2018",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2019",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2020",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2021",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2022",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2023",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2024",
"IMOS/SRS/SST/ghrsst/L3S-1dS/dn/",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1992",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1993",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1994",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1995",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1996",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1997",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1998",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/1999",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2000",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2001",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2002",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2003",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2004",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2005",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2006",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2007",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2008",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2009",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2010",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2011",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2012",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2013",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2014",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2015",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2016",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2017",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2018",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2019",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2020",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2021",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2022",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2023",
# "IMOS/SRS/SST/ghrsst/L3S-1dS/dn/2024",
# "--filters",
# "200505",
# "FILTER_STRING_1",
# "FILTER_STRING_1",
"--dataset-config",
"satellite_ghrsst_l3s_1day_daynighttime_single_sensor_southernocean.json",
# "--clear-existing-data",
"--clear-existing-data",
"--cluster-mode",
"remote",
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ def main():
command = [
"generic_cloud_optimised_creation",
"--paths",
"IMOS/SRS/SST/ghrsst/L4/GAMSSA/",
"IMOS/SRS/SST/ghrsst/L4/GAMSSA",
# "--filters",
# "FILTER_STRING_1",
"--dataset-config",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,25 +6,26 @@ def main():
command = [
"generic_cloud_optimised_creation",
"--paths",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2006",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2007",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2008",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2009",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2010",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2011",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2012",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2013",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2014",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2015",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2016",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2017",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2018",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2019",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2020",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2021",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2022",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2023",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/2024",
"IMOS/SRS/SST/ghrsst/L4/RAMSSA/",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2006",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2007",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2008",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2009",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2010",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2011",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2012",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2013",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2014",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2015",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2016",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2017",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2018",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2019",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2020",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2021",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2022",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2023",
# "IMOS/SRS/SST/ghrsst/L4/RAMSSA/2024",
# "--filters",
# "FILTER_STRING_1",
# "FILTER_STRING_1",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@
"cloud_optimised_format": "zarr",
"cluster_options": {
"n_workers": [
10,
40
1,
120
],
"scheduler_vm_types": "t3.large",
"scheduler_vm_types": "t3.2xlarge",
"worker_vm_types": "t3.xlarge",
"allow_ingress_from": "me",
"compute_purchase_option": "spot_with_fallback",
"worker_options": {
"nthreads": 8,
"memory_limit": "16GB"
"memory_limit": "32GB"
}
},
"batch_size": 80,
"batch_size": 60,
"metadata_uuid": "",
"dimensions": {
"time": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,21 @@
"logger_name": "satellite_ghrsst_l3s_1day_daynighttime_single_sensor_australia",
"metadata_uuid": "a136eee7-a990-4c06-a4f6-915657a2464e",
"parent_config": "satellite_ghrsst_main.json",
"cluster_options": {
"n_workers": [
1,
80
],
"scheduler_vm_types": "t3.large",
"worker_vm_types": "t3.xlarge",
"allow_ingress_from": "me",
"compute_purchase_option": "spot_with_fallback",
"worker_options": {
"nthreads": 8,
"memory_limit": "16GB"
}
},
"batch_size": 30,
"aws_opendata_registry": {
"Name": "Satellite - Sea surface temperature - Level 3 - Single sensor - 1 day - Day and night time",
"Description": "This is a single-sensor multi-satellite SSTfnd product for a single 24 hour period, derived using observations from AVHRR instruments on all available NOAA polar-orbiting satellites. It is provided as a 0.02deg x 0.02deg cylindrical equidistant projected map over the region 70\u00b0E to 170\u00b0W, 20\u00b0N to 70\u00b0S. Each grid cell contains the 24 hour average of all the highest available quality SSTs that overlap with that cell, weighted by the area of overlap. The diagram at https://help.aodn.org.au/satellite-data-product-information/ indicates where this product fits within the GHRSST suite of NOAA/AVHRR products. The SSTfnd is derived by adding a constant 0.17 degC to the SSTskin observations after rejecting observations with low surface wind speeds (<6m/s by day and <2m/s at night) (see http://www.bom.gov.au/amoj/docs/2011/beggs.pdf). Matchups with buoy SSTfnd observations indicate typical 2014 biases of < 0.01 degC and standard deviations of 0.6 degC. Refer to the IMOS SST products web page at http://imos.org.au/sstproducts.html and Beggs et al. (2013) at http://imos.org.au/sstdata_references.html for further information.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,16 @@
"cloud_optimised_format": "zarr",
"cluster_options": {
"n_workers": [
5,
60
1,
80
],
"scheduler_vm_types": "t3.medium",
"worker_vm_types": "t3.large",
"scheduler_vm_types": "t3.large",
"worker_vm_types": "t3.xlarge",
"allow_ingress_from": "me",
"compute_purchase_option": "spot_with_fallback",
"worker_options": {
"nthreads": 8,
"memory_limit": "6GB"
"memory_limit": "16GB"
}
},
"batch_size": 30,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
1,
30
],
"scheduler_vm_types": "t3.medium",
"scheduler_vm_types": "t3.large",
"worker_vm_types": "t3.large",
"allow_ingress_from": "me",
"compute_purchase_option": "spot_with_fallback",
Expand Down
Loading

0 comments on commit bc846ab

Please sign in to comment.