-
Notifications
You must be signed in to change notification settings - Fork 39
/
Copy path_canonical_json.py
38 lines (30 loc) · 1.62 KB
/
_canonical_json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
"""Module that defines the second iteration of the asset manifest"""
from __future__ import annotations
import dataclasses
import json
from .base_manifest import BaseAssetManifest, BaseManifestPath
def canonical_path_comparator(path: BaseManifestPath):
"""
Comparator for sorting paths.
"""
# Sort by UTF-16 values as per the spec
# https://www.rfc-editor.org/rfc/rfc8785.html#name-sorting-of-object-propertie
# Use the "surrogatepass" error handler because filenames encountered in the wild
# include surrogates.
return path.path.encode("utf-16_be", errors="surrogatepass")
def manifest_to_canonical_json_string(manifest: BaseAssetManifest) -> str:
"""
Return a canonicalized JSON string based on the following:
* The JSON file *MUST* adhere to the JSON canonicalization guidelines
outlined here (https://www.rfc-editor.org/rfc/rfc8785.html).
* For now this is a simplification of this spec. Whitespace between JSON tokens are
not emitted, and the keys are lexographically sorted. However the current implementation doesn't
serialize Literals, String, Numbers, etc. to the letter of the spec explicitly.
It implicitly follows the spec as the object keys all fall within the ASCII range of characters
and this version of the Asset Manifest only serializes strings and integers.
* The paths array *MUST* be in lexicographical order by path.
"""
return json.dumps(
dataclasses.asdict(manifest), sort_keys=True, separators=(",", ":"), ensure_ascii=True
)