From 56d2d9a6fd3db3be8be0b61c679a78ef65d9726a Mon Sep 17 00:00:00 2001 From: Chris Holden Date: Wed, 8 Jan 2025 17:12:37 -0500 Subject: [PATCH] fix: Update metadata to include HLS granule ID and links to Fmask layer (#48) * Include Input_HLS_GranuleUR additional attribute * Fix some typos found by typos_lsp * Include links to Fmask from HLS granule in OnlineAccessURLs * format & lint * Bump lxml for indenting and numpy for eaiser Mac ARM installs * Explain pin for numpy * self review * fix typo * Revert version changes & figure out how to indent without py3.9 or lxml>=4.5 --- hls_vi/generate_metadata.py | 61 +++++++++++++++++-- hls_vi/schema/Granule.xsd | 3 +- hls_vi/schema/MetadataCommon.xsd | 2 +- ...-VI.L30.T06WVS.2024120T211159.v2.0.cmr.xml | 16 ++++- ...-VI.S30.T13RCN.2024128T173909.v2.0.cmr.xml | 16 ++++- 5 files changed, 89 insertions(+), 9 deletions(-) diff --git a/hls_vi/generate_metadata.py b/hls_vi/generate_metadata.py index 1bd6681..e5bd61b 100644 --- a/hls_vi/generate_metadata.py +++ b/hls_vi/generate_metadata.py @@ -3,6 +3,7 @@ import os import re import sys +from xml.dom import minidom from datetime import datetime, timezone from pathlib import Path @@ -92,7 +93,13 @@ def generate_metadata(input_dir: Path, output_dir: Path) -> None: processing_time = tags["HLS_VI_PROCESSING_TIME"] granule_ur = tree.find("GranuleUR") + input_granule_ur = granule_ur.text granule_ur.text = granule_ur.text.replace("HLS", "HLS-VI") + set_additional_attribute( + tree.find("AdditionalAttributes"), + "Input_HLS_GranuleUR", + input_granule_ur, + ) time_format = "%Y-%m-%dT%H:%M:%S.%fZ" formatted_date = datetime.now(timezone.utc).strftime(time_format) @@ -125,6 +132,11 @@ def generate_metadata(input_dir: Path, output_dir: Path) -> None: tree.find("DataFormat").text = "COG" + append_fmask_online_access_urls( + tree.find("OnlineAccessURLs"), + input_granule_ur, + ) + with ( importlib_resources.files("hls_vi") / "schema" @@ -132,17 +144,23 @@ def generate_metadata(input_dir: Path, output_dir: Path) -> None: ).open() as xsd: ET.XMLSchema(file=xsd).assertValid(tree) - tree.write( - str(output_dir / metadata_path.name.replace("HLS", "HLS-VI")), - encoding="utf-8", - xml_declaration=True, + # Python 3.9 or `lxml==4.5` add an `indent()` function to nicely format our XML + # Alas we cannot use those yet, so rely on this approach using `xml.dom.minidom` + dom = minidom.parseString( + ET.tostring(tree, xml_declaration=True, pretty_print=False) + ) + pretty_xml = os.linesep.join( + [line for line in dom.toprettyxml(indent=" ").splitlines() if line.strip()] ) + dest = output_dir / metadata_path.name.replace("HLS", "HLS-VI") + dest.write_text(pretty_xml, encoding="utf-8") + def normalize_additional_attributes(container: ElementBase) -> None: """Normalize additional attribute values. - On rare occassions, granule data is split and recombined upstream. When this + On rare occasions, granule data is split and recombined upstream. When this occurs, the associated metadata is also split and recombined, resulting in values for additional attributes that are created by joining the separate parts with the string `" + "`. @@ -193,6 +211,39 @@ def set_additional_attribute(attrs: ElementBase, name: str, value: str) -> None: attrs.append(attr) +def append_fmask_online_access_urls( + access_urls: ElementBase, hls_granule_ur: str +) -> None: + """Include links to Fmask layer from HLS granule in metadata + + This is intended to help users find the relevant Fmask band without + having to duplicate it into the HLS-VI product. See, + https://github.com/NASA-IMPACT/hls-vi/issues/47 + """ + prefix = "HLSL30.020" if hls_granule_ur.startswith("HLS.L30") else "HLSS30.020" + + http_attr = Element("OnlineAccessURL", None, None) + http_attr_url = Element("URL", None, None) + http_attr_url.text = f"https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/{prefix}/{hls_granule_ur}/{hls_granule_ur}.Fmask.tif" # noqa: E501 + http_attr_desc = Element("URLDescription", None, None) + http_attr_desc.text = f"Download Fmask quality layer {hls_granule_ur}.Fmask.tif" + http_attr.append(http_attr_url) + http_attr.append(http_attr_desc) + + s3_attr = Element("OnlineAccessURL", None, None) + s3_attr_url = Element("URL", None, None) + s3_attr_url.text = ( + f"s3://lp-prod-protected/{prefix}/{hls_granule_ur}/{hls_granule_ur}.Fmask.tif" + ) + s3_attr_desc = Element("URLDescription", None, None) + s3_attr_desc.text = f"This link provides direct download access via S3 to the Fmask quality layer {hls_granule_ur}.Fmask.tif" # noqa: E501 + s3_attr.append(s3_attr_url) + s3_attr.append(s3_attr_desc) + + access_urls.append(http_attr) + access_urls.append(s3_attr) + + def parse_args() -> Tuple[Path, Path]: short_options = "i:o:" long_options = ["instrument=", "inputdir=", "outputdir="] diff --git a/hls_vi/schema/Granule.xsd b/hls_vi/schema/Granule.xsd index ddb5b36..52e46e0 100644 --- a/hls_vi/schema/Granule.xsd +++ b/hls_vi/schema/Granule.xsd @@ -14,6 +14,7 @@ xmlns:xs="http://www.w3.org/2001/XMLSchema"> + @@ -1298,7 +1299,7 @@ xmlns:xs="http://www.w3.org/2001/XMLSchema"> type="ListOfAdditionalAttributeValues"> The ordered list of values of the - additioanl attribute for this granule. The values will be + additional attribute for this granule. The values will be kept in the order which they appear. diff --git a/hls_vi/schema/MetadataCommon.xsd b/hls_vi/schema/MetadataCommon.xsd index 4ef8338..12ac2b2 100644 --- a/hls_vi/schema/MetadataCommon.xsd +++ b/hls_vi/schema/MetadataCommon.xsd @@ -446,7 +446,7 @@ - + The element should contain no children. In diff --git a/tests/fixtures/HLS-VI.L30.T06WVS.2024120T211159.v2.0.cmr.xml b/tests/fixtures/HLS-VI.L30.T06WVS.2024120T211159.v2.0.cmr.xml index 01ba39b..5831deb 100644 --- a/tests/fixtures/HLS-VI.L30.T06WVS.2024120T211159.v2.0.cmr.xml +++ b/tests/fixtures/HLS-VI.L30.T06WVS.2024120T211159.v2.0.cmr.xml @@ -9,7 +9,7 @@ HLS-VI.L30.T06WVS.2024120T211159 DAY - UPDATE HLS Prodution DATETIME + UPDATE HLS Production DATETIME 2.0 @@ -240,8 +240,22 @@ https://doi.org + + Input_HLS_GranuleUR + + HLS.L30.T06WVS.2024120T211159.v2.0 + + + + https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSL30.020/HLS.L30.T06WVS.2024120T211159.v2.0/HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif + Download Fmask quality layer HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif + + + s3://lp-prod-protected/HLSL30.020/HLS.L30.T06WVS.2024120T211159.v2.0/HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif + This link provides direct download access via S3 to the Fmask quality layer HLS.L30.T06WVS.2024120T211159.v2.0.Fmask.tif + diff --git a/tests/fixtures/HLS-VI.S30.T13RCN.2024128T173909.v2.0.cmr.xml b/tests/fixtures/HLS-VI.S30.T13RCN.2024128T173909.v2.0.cmr.xml index 9211d51..0f0e6b9 100644 --- a/tests/fixtures/HLS-VI.S30.T13RCN.2024128T173909.v2.0.cmr.xml +++ b/tests/fixtures/HLS-VI.S30.T13RCN.2024128T173909.v2.0.cmr.xml @@ -9,7 +9,7 @@ HLS-VI.S30.T13RCN.2024128T173909 DAY - UPDATE HLS Prodution DATETIME + UPDATE HLS Production DATETIME 2.0 @@ -302,8 +302,22 @@ https://doi.org + + Input_HLS_GranuleUR + + HLS.S30.T13RCN.2024128T173909.v2.0 + + + + https://data.lpdaac.earthdatacloud.nasa.gov/lp-prod-protected/HLSS30.020/HLS.S30.T13RCN.2024128T173909.v2.0/HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif + Download Fmask quality layer HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif + + + s3://lp-prod-protected/HLSS30.020/HLS.S30.T13RCN.2024128T173909.v2.0/HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif + This link provides direct download access via S3 to the Fmask quality layer HLS.S30.T13RCN.2024128T173909.v2.0.Fmask.tif +