From a2b8399593e14a2b655a39da14cb60d7c302081b Mon Sep 17 00:00:00 2001 From: Laurent Querel Date: Fri, 9 Aug 2024 10:25:48 -0700 Subject: [PATCH 1/3] feat(resolver): Detect root attribute id duplicates during the resolution process --- Cargo.lock | 147 ++++++++---------- crates/weaver_resolved_schema/src/lineage.rs | 6 + .../expected-attribute-catalog.json | 7 + .../expected-registry.json | 4 +- .../registry/groups.yaml | 14 +- .../expected-attribute-catalog.json | 4 +- .../expected-registry.json | 10 +- .../registry/groups.yaml | 16 +- crates/weaver_resolver/src/lib.rs | 9 ++ crates/weaver_resolver/src/registry.rs | 104 ++++++++++++- crates/weaver_semconv_gen/Cargo.toml | 1 - 11 files changed, 214 insertions(+), 108 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 3262ee4b..8c7c2b2b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,13 +150,14 @@ checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711" [[package]] name = "assert_cmd" -version = "2.0.15" +version = "2.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc65048dd435533bb1baf2ed9956b9a278fbfdcf90301b39ee117f06c0199d37" +checksum = "dc1835b7f27878de8525dc71410b5a31cdcc5f230aed5ba5df968e09c201b23d" dependencies = [ "anstyle", "bstr", "doc-comment", + "libc", "predicates", "predicates-core", "predicates-tree", @@ -292,9 +293,9 @@ checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" [[package]] name = "bytes" -version = "1.7.0" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fca2be1d5c43812bae364ee3f30b3afcb7877cf59f4aeb94c66f313a41d2fac9" +checksum = "8318a53db07bb3f8dca91a600466bdb3f2eaadeedfdbcf02e1accbad9271ba50" [[package]] name = "bytesize" @@ -340,9 +341,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.1.7" +version = "1.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "26a5c3fd7bfa1ce3897a3a3501d362b2d87b7f2583ebcb4a949ec25911025cbc" +checksum = "504bdec147f2cc13c8b57ed9401fd8a147cc66b67ad5cb241394244f2c947549" dependencies = [ "jobserver", "libc", @@ -411,9 +412,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.13" +version = "4.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fbb260a053428790f3de475e304ff84cdbc4face759ea7a3e64c1edd938a7fc" +checksum = "c937d4061031a6d0c8da4b9a4f98a172fc2976dfb1c19213a9cf7d0d3c837e36" dependencies = [ "clap_builder", "clap_derive", @@ -421,9 +422,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.13" +version = "4.5.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64b17d7ea74e9f833c7dbf2cbe4fb12ff26783eda4782a8975b72f895c9b4d99" +checksum = "85379ba512b21a328adf887e85f7742d12e96eb31f3ef077df4ffc26b506ffed" dependencies = [ "anstream", "anstyle", @@ -740,9 +741,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "dunce" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56ce8c6da7551ec6c462cbaf3bfbc75131ebbfa1c944aeaa9dab51ca1c5f0c3b" +checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" [[package]] name = "dyn-clone" @@ -806,14 +807,14 @@ checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" [[package]] name = "filetime" -version = "0.2.23" +version = "0.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1ee447700ac8aa0b2f2bd7bc4462ad686ba06baa6727ac149a2d6277f0d240fd" +checksum = "bf401df4a4e3872c4fe8151134cf483738e74b67fc934d6532c882b3d24a4550" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.4.1", - "windows-sys 0.52.0", + "libredox", + "windows-sys 0.59.0", ] [[package]] @@ -1896,9 +1897,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ab92f4f49ee4fb4f997c784b7a2e0fa70050211e0b6a287f898c3c9785ca956" +checksum = "cde7055719c54e36e95e8719f95883f22072a48ede39db7fc17a4e1d5281e9b9" dependencies = [ "bytes", "futures-channel", @@ -2212,6 +2213,7 @@ checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ "bitflags 2.6.0", "libc", + "redox_syscall", ] [[package]] @@ -2352,9 +2354,9 @@ checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" [[package]] name = "minijinja" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4bf71af278c578cbcc91d0b1ff092910208bc86f7b3750364642bd424e3dcd3" +checksum = "bf369fce3289017a63e514dfca10a0a6f1a02216b21b588b79f6a1081eb999f5" dependencies = [ "aho-corasick", "memo-map", @@ -2366,9 +2368,9 @@ dependencies = [ [[package]] name = "minijinja-contrib" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b22bcb360a130647646fdda70742bff3cea5d4fe562d8a943f26675c84206088" +checksum = "e399c81b189a627d37d43a028451551b2942e8bbe36d5c79db6d81be7e5047d4" dependencies = [ "minijinja", "serde", @@ -2519,9 +2521,9 @@ dependencies = [ [[package]] name = "object" -version = "0.36.2" +version = "0.36.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f203fa8daa7bb185f760ae12bd8e097f63d17041dcdcaf675ac54cdf863170e" +checksum = "27b64972346851a39438c60b341ebc01bba47464ae329e55cf343eb93964efd9" dependencies = [ "memchr", ] @@ -2644,7 +2646,7 @@ checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" dependencies = [ "cfg-if", "libc", - "redox_syscall 0.5.3", + "redox_syscall", "smallvec", "windows-targets 0.52.6", ] @@ -2783,12 +2785,11 @@ checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" [[package]] name = "ppv-lite86" -version = "0.2.19" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2288c0e17cc8d342c712bb43a257a80ebffce59cdb33d5000d8348f3ec02528b" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" dependencies = [ "zerocopy", - "zerocopy-derive", ] [[package]] @@ -2839,9 +2840,9 @@ dependencies = [ [[package]] name = "quinn" -version = "0.11.2" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e4ceeeeabace7857413798eb1ffa1e9c905a9946a57d81fb69b4b71c4d8eb3ad" +checksum = "b22d8e7369034b9a7132bc2008cac12f2013c8132b45e0554e6e20e2617f2156" dependencies = [ "bytes", "pin-project-lite", @@ -2849,6 +2850,7 @@ dependencies = [ "quinn-udp", "rustc-hash", "rustls", + "socket2", "thiserror", "tokio", "tracing", @@ -2856,9 +2858,9 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.3" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ddf517c03a109db8100448a4be38d498df8a210a99fe0e1b9eaf39e78c640efe" +checksum = "ba92fb39ec7ad06ca2582c0ca834dfeadcaf06ddfc8e635c80aa7e1c05315fdd" dependencies = [ "bytes", "rand 0.8.5", @@ -2880,6 +2882,7 @@ dependencies = [ "libc", "once_cell", "socket2", + "tracing", "windows-sys 0.52.0", ] @@ -3003,15 +3006,6 @@ dependencies = [ "rand_core 0.3.1", ] -[[package]] -name = "redox_syscall" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4722d768eff46b75989dd134e5c353f0d6296e5aaa3132e776cbdb56be7731aa" -dependencies = [ - "bitflags 1.3.2", -] - [[package]] name = "redox_syscall" version = "0.5.3" @@ -3171,9 +3165,9 @@ checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" [[package]] name = "rustc-hash" -version = "1.1.0" +version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" +checksum = "583034fd73374156e66797ed8e5b0d5690409c9226b22d87cb7f19821c05d152" [[package]] name = "rustix" @@ -3205,9 +3199,9 @@ dependencies = [ [[package]] name = "rustls-pemfile" -version = "2.1.2" +version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" +checksum = "196fe16b00e106300d3e45ecfcb764fa292a535d7326a29a5875c579c7417425" dependencies = [ "base64 0.22.1", "rustls-pki-types", @@ -3215,9 +3209,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.7.0" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" +checksum = "fc0a2ce646f8655401bb81e7927b812614bd5d91dbc968696be50603510fcaf0" [[package]] name = "rustls-webpki" @@ -3318,18 +3312,18 @@ dependencies = [ [[package]] name = "serde" -version = "1.0.204" +version = "1.0.205" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "e33aedb1a7135da52b7c21791455563facbbcc43d0f0f66165b42c21b3dfb150" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.205" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "692d6f5ac90220161d6774db30c662202721e64aed9058d2c394f451261420c1" dependencies = [ "proc-macro2", "quote", @@ -3656,14 +3650,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.10.1" +version = "3.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" +checksum = "04cbcdd0c794ebb0d4cf35e88edd2f7d2c4c3e9a5a6dab322839b321c6a87a64" dependencies = [ "cfg-if", "fastrand", + "once_cell", "rustix", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -3868,21 +3863,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ "pin-project-lite", - "tracing-attributes", "tracing-core", ] -[[package]] -name = "tracing-attributes" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" -dependencies = [ - "proc-macro2", - "quote", - "syn", -] - [[package]] name = "tracing-core" version = "0.1.32" @@ -3900,9 +3883,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "tui-textarea" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d8b352f15e70a6b6b450dc89f5d737f45b6f9d6bc0c2503b3d55dcb1d0c8553" +checksum = "00524c1366ee838839dd327d1f339ff51846ad4ea85bfa1332859e79adec612c" dependencies = [ "crossterm", "ratatui", @@ -4006,9 +3989,9 @@ checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "ureq" -version = "2.10.0" +version = "2.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72139d247e5f97a3eff96229a7ae85ead5328a39efe76f8bf5a06313d505b6ea" +checksum = "b74fc6b57825be3373f7054754755f03ac3a8f5d70015ccad699ba2029956f4a" dependencies = [ "base64 0.22.1", "flate2", @@ -4360,7 +4343,6 @@ dependencies = [ name = "weaver_semconv_gen" version = "0.8.0" dependencies = [ - "itertools 0.13.0", "miette", "nom", "serde", @@ -4422,11 +4404,11 @@ checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" [[package]] name = "winapi-util" -version = "0.1.8" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -4462,6 +4444,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.59.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-targets" version = "0.48.5" @@ -4716,18 +4707,18 @@ dependencies = [ [[package]] name = "zstd-safe" -version = "7.2.0" +version = "7.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa556e971e7b568dc775c136fc9de8c779b1c2fc3a63defaafadffdbd3181afa" +checksum = "54a3ab4db68cea366acc5c897c7b4d4d1b8994a9cd6e6f841f8964566a419059" dependencies = [ "zstd-sys", ] [[package]] name = "zstd-sys" -version = "2.0.12+zstd.1.5.6" +version = "2.0.13+zstd.1.5.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4e40c320c3cb459d9a9ff6de98cff88f4751ee9275d140e2be94a2b74e4c13" +checksum = "38ff0f21cfee8f97d94cef41359e0c89aa6113028ab0291aa8ca0038995a95aa" dependencies = [ "cc", "pkg-config", diff --git a/crates/weaver_resolved_schema/src/lineage.rs b/crates/weaver_resolved_schema/src/lineage.rs index 32b127f3..ac194984 100644 --- a/crates/weaver_resolved_schema/src/lineage.rs +++ b/crates/weaver_resolved_schema/src/lineage.rs @@ -429,6 +429,12 @@ impl GroupLineage { _ = self.attributes.insert(attr_id, attribute_lineage); } + /// Checks if a given attribute is present in the group lineage. + #[must_use] + pub fn has_attribute(&self, attr_id: &str) -> bool { + self.attributes.contains_key(attr_id) + } + /// Returns the source file of the group (path or URL). #[must_use] pub fn source_file(&self) -> &str { diff --git a/crates/weaver_resolver/data/registry-test-lineage-0/expected-attribute-catalog.json b/crates/weaver_resolver/data/registry-test-lineage-0/expected-attribute-catalog.json index 65cc7ae0..7e149c9f 100644 --- a/crates/weaver_resolver/data/registry-test-lineage-0/expected-attribute-catalog.json +++ b/crates/weaver_resolver/data/registry-test-lineage-0/expected-attribute-catalog.json @@ -49,6 +49,13 @@ ], "requirement_level": "opt_in" }, + { + "name": "network.protocol.name2", + "type": "string", + "brief": "Network protocol name", + "examples": ["http", "spdy"], + "requirement_level": "recommended" + }, { "name": "network.protocol.version", "type": "string", diff --git a/crates/weaver_resolver/data/registry-test-lineage-0/expected-registry.json b/crates/weaver_resolver/data/registry-test-lineage-0/expected-registry.json index c1e396d2..08f357c8 100644 --- a/crates/weaver_resolver/data/registry-test-lineage-0/expected-registry.json +++ b/crates/weaver_resolver/data/registry-test-lineage-0/expected-registry.json @@ -30,10 +30,10 @@ "type": "attribute_group", "brief": "Top level group", "attributes": [ - 1, 3, 4, - 5 + 5, + 6 ], "lineage": { "source_file": "data/registry-test-lineage-0/registry/groups.yaml", diff --git a/crates/weaver_resolver/data/registry-test-lineage-0/registry/groups.yaml b/crates/weaver_resolver/data/registry-test-lineage-0/registry/groups.yaml index 32075590..c68c5cca 100644 --- a/crates/weaver_resolver/data/registry-test-lineage-0/registry/groups.yaml +++ b/crates/weaver_resolver/data/registry-test-lineage-0/registry/groups.yaml @@ -6,7 +6,7 @@ groups: - id: server.port type: string brief: Server port - examples: [ '80', '443' ] + examples: ["80", "443"] - id: registry.network type: attribute_group @@ -15,15 +15,15 @@ groups: - id: network.protocol.name type: string brief: Network protocol name - examples: [ 'http', 'spdy' ] + examples: ["http", "spdy"] - id: network.protocol.version type: string brief: Network protocol version - examples: [ '1.0', '2.0' ] + examples: ["1.0", "2.0"] - id: network.type type: string brief: Network type - examples: [ 'ipv4', 'ipv6' ] + examples: ["ipv4", "ipv6"] - id: top.level.group type: attribute_group @@ -31,10 +31,10 @@ groups: attributes: - ref: server.port requirement_level: opt_in - - id: network.protocol.name + - id: network.protocol.name2 type: string brief: Network protocol name - examples: [ 'http', 'spdy' ] + examples: ["http", "spdy"] - ref: network.protocol.version requirement_level: opt_in - - ref: network.type \ No newline at end of file + - ref: network.type diff --git a/crates/weaver_resolver/data/registry-test-lineage-2/expected-attribute-catalog.json b/crates/weaver_resolver/data/registry-test-lineage-2/expected-attribute-catalog.json index 8ad1b80d..a63bb84a 100644 --- a/crates/weaver_resolver/data/registry-test-lineage-2/expected-attribute-catalog.json +++ b/crates/weaver_resolver/data/registry-test-lineage-2/expected-attribute-catalog.json @@ -42,7 +42,7 @@ "requirement_level": "recommended" }, { - "name": "network.protocol.name", + "name": "network.protocol.name2", "type": "string", "brief": "Network protocol name", "examples": [ @@ -66,7 +66,7 @@ { "name": "network.protocol.name", "type": "string", - "brief": "Network protocol name", + "brief": "Network protocol name (registry)", "examples": [ "http", "spdy" diff --git a/crates/weaver_resolver/data/registry-test-lineage-2/expected-registry.json b/crates/weaver_resolver/data/registry-test-lineage-2/expected-registry.json index 72128eb0..5d8aa78c 100644 --- a/crates/weaver_resolver/data/registry-test-lineage-2/expected-registry.json +++ b/crates/weaver_resolver/data/registry-test-lineage-2/expected-registry.json @@ -57,6 +57,7 @@ "brief": "Top level", "attributes": [ 3, + 4, 6, 7, 8 @@ -65,13 +66,16 @@ "source_file": "data/registry-test-lineage-2/registry/groups.yaml", "attributes": { "network.protocol.name": { + "source_group": "registry.xyz", + "inherited_fields": ["brief", "examples", "note"], + "locally_overridden_fields": ["requirement_level"] + }, + "network.protocol.name2": { "source_group": "intermediate.level", "inherited_fields": [ "brief", "examples", - "note" - ], - "locally_overridden_fields": [ + "note", "requirement_level" ] }, diff --git a/crates/weaver_resolver/data/registry-test-lineage-2/registry/groups.yaml b/crates/weaver_resolver/data/registry-test-lineage-2/registry/groups.yaml index 66c876da..bb18d63d 100644 --- a/crates/weaver_resolver/data/registry-test-lineage-2/registry/groups.yaml +++ b/crates/weaver_resolver/data/registry-test-lineage-2/registry/groups.yaml @@ -7,7 +7,7 @@ groups: type: string brief: Server port stability: stable - examples: [ 80, 8080, 443 ] + examples: [80, 8080, 443] - id: registry.xyz type: attribute_group @@ -16,15 +16,15 @@ groups: - id: network.protocol.name type: string brief: Network protocol name (registry) - examples: [ 'http', 'spdy' ] + examples: ["http", "spdy"] - id: network.protocol.version type: string brief: Network protocol version (registry) - examples: [ '1.0', '2.0' ] + examples: ["1.0", "2.0"] - id: network.type type: string brief: Network type (registry) - examples: [ 'ipv4', 'ipv6' ] + examples: ["ipv4", "ipv6"] - id: intermediate.level type: attribute_group @@ -33,10 +33,10 @@ groups: attributes: - ref: server.port requirement_level: opt_in - - id: network.protocol.name + - id: network.protocol.name2 type: string brief: Network protocol name - examples: [ 'http', 'spdy' ] + examples: ["http", "spdy"] - id: top.level type: attribute_group @@ -45,9 +45,9 @@ groups: attributes: - ref: server.port brief: Brief of the top level - examples: [ 80, 8080, 443 ] + examples: [80, 8080, 443] - ref: network.protocol.name requirement_level: opt_in - ref: network.protocol.version requirement_level: opt_in - - ref: network.type \ No newline at end of file + - ref: network.type diff --git a/crates/weaver_resolver/src/lib.rs b/crates/weaver_resolver/src/lib.rs index c972ac73..f4fe9b27 100644 --- a/crates/weaver_resolver/src/lib.rs +++ b/crates/weaver_resolver/src/lib.rs @@ -139,6 +139,15 @@ pub enum Error { path: PathBuf, }, + /// A duplicate attribute id error. + #[error("The attribute id `{attribute_id}` is declared multiple times in the following groups:\n{group_ids:?}")] + DuplicateAttributeId { + /// The groups where this attribute is duplicated. + group_ids: Vec, + /// The attribute id. + attribute_id: String, + }, + /// A container for multiple errors. #[error("{:?}", format_errors(.0))] CompoundError(Vec), diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index fa7c6241..76f31f5c 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -104,6 +104,9 @@ pub fn resolve_semconv_registry( group.constraints.clear(); } + // Other complementary checks. + check_root_attribute_id_duplicates(&ureg.registry, &attr_name_index)?; + Ok(ureg.registry) } @@ -200,6 +203,68 @@ fn check_group_any_of_constraints( Ok(()) } +/// Checks for duplicate attribute IDs in the given registry. +/// +/// This function iterates over all groups in the registry that are of type `AttributeGroup`. +/// For each root attribute in these groups (i.e. the ones without lineage), it maps the root +/// attribute ID to the group ID. +/// It then checks if any root attribute ID is found in multiple groups and collects errors +/// for such duplicates. +/// +/// # Arguments +/// +/// * `registry` - The registry to check for duplicate attribute IDs. +/// * `attr_name_index` - The index of attribute names (catalog). +/// +/// # Returns +/// +/// This function returns `Ok(())` if no duplicate attribute IDs are found. Otherwise, it returns +/// an error indicating the duplicate attribute IDs. +pub fn check_root_attribute_id_duplicates( + registry: &Registry, + attr_name_index: &[String], +) -> Result<(), Error> { + // Map to track groups by their root attribute ID. + let mut groups_by_root_attr_id = HashMap::new(); + + // Iterate over all groups in the registry that are of type `AttributeGroup`. + registry + .groups + .iter() + .filter(|group| group.r#type == weaver_semconv::group::GroupType::AttributeGroup) + .for_each(|group| { + // Iterate over all attribute references in the group. + for attr_ref in group.attributes.iter() { + // Get the attribute ID from the attribute name index. + let attr_id = &attr_name_index[attr_ref.0 as usize]; + // Check if the group has a lineage and if the lineage does not already have the attribute. + if let Some(lineage) = group.lineage.as_ref() { + if !lineage.has_attribute(attr_id) { + // Add the group ID to the map entry for the attribute ID. + groups_by_root_attr_id + .entry(attr_id.clone()) + .or_insert_with(Vec::new) + .push(group.id.clone()); + } + } + } + }); + + // Collect errors for attribute IDs that are found in multiple groups. + let errors = groups_by_root_attr_id + .into_iter() + .filter(|(_, group_ids)| group_ids.len() > 1) + .map(|(attr_id, group_ids)| Error::DuplicateAttributeId { + attribute_id: attr_id, + group_ids, + }) + .collect(); + + // Handle any errors that were found. + handle_errors(errors)?; + Ok(()) +} + /// Creates a semantic convention registry from a set of semantic convention /// specifications. /// @@ -301,8 +366,10 @@ fn resolve_attribute_references( ureg: &mut UnresolvedRegistry, attr_catalog: &mut AttributeCatalog, ) -> Result<(), Error> { + let mut permanent_errors = vec![]; + loop { - let mut errors = vec![]; + let mut transient_errors = vec![]; let mut resolved_attr_count = 0; // Iterate over all groups and resolve the attributes. @@ -323,12 +390,19 @@ fn resolve_attribute_references( unresolved_group.group.lineage.as_mut(), ); if let Some(attr_ref) = attr_ref { + // Attribute reference resolved successfully. resolved_attr.push(attr_ref); resolved_attr_count += 1; + + // Return None to remove this attribute from the + // unresolved group. None } else { + // Attribute reference could not be resolved. if let AttributeSpec::Ref { r#ref, .. } = &attr.spec { - errors.push(Error::UnresolvedAttributeRef { + // Keep track of unresolved attribute references in + // the transient errors. + transient_errors.push(Error::UnresolvedAttributeRef { group_id: unresolved_group.group.id.clone(), attribute_ref: r#ref.clone(), provenance: unresolved_group.provenance.clone(), @@ -342,7 +416,7 @@ fn resolve_attribute_references( unresolved_group.group.attributes.extend(resolved_attr); } - if errors.is_empty() { + if transient_errors.is_empty() { break; } @@ -351,10 +425,14 @@ fn resolve_attribute_references( // It means that we have an issue with the semantic convention // specifications. if resolved_attr_count == 0 { - return Err(Error::CompoundError(errors)); + permanent_errors.extend(transient_errors); + break; } } + if !permanent_errors.is_empty() { + return Err(Error::CompoundError(permanent_errors)); + } Ok(()) } @@ -689,6 +767,7 @@ fn resolve_inheritance_attr( mod tests { use std::collections::HashSet; use std::error::Error; + use std::path::PathBuf; use glob::glob; use serde::Serialize; @@ -745,15 +824,26 @@ mod tests { let mut attr_catalog = AttributeCatalog::default(); let observed_registry = - resolve_semconv_registry(&mut attr_catalog, "https://127.0.0.1", &sc_specs) - .expect("Failed to resolve registry"); + resolve_semconv_registry(&mut attr_catalog, "https://127.0.0.1", &sc_specs); // Check that the resolved attribute catalog matches the expected attribute catalog. let observed_attr_catalog = attr_catalog.drain_attributes(); + // If the expected attr catalog is not defined in the test directory, then it means + // that the normal behavior of this test is to fail. + let expected_attr_catalog_file = + format!("{}/expected-attribute-catalog.json", test_dir); + if !PathBuf::from(&expected_attr_catalog_file).exists() { + assert!(observed_registry.is_err(), "This test is expected to fail"); + continue; + } + + // At this point, the normal behavior of this test is to pass. + let observed_registry = observed_registry.expect("Failed to resolve the registry"); + // Load the expected registry and attribute catalog. let expected_attr_catalog: Vec = serde_json::from_reader( - std::fs::File::open(format!("{}/expected-attribute-catalog.json", test_dir)) + std::fs::File::open(expected_attr_catalog_file) .expect("Failed to open expected attribute catalog"), ) .expect("Failed to deserialize expected attribute catalog"); diff --git a/crates/weaver_semconv_gen/Cargo.toml b/crates/weaver_semconv_gen/Cargo.toml index f87ca4b6..d8eb9402 100644 --- a/crates/weaver_semconv_gen/Cargo.toml +++ b/crates/weaver_semconv_gen/Cargo.toml @@ -18,7 +18,6 @@ weaver_resolved_schema = { path = "../weaver_resolved_schema" } weaver_semconv = { path = "../weaver_semconv" } nom = "7.1.3" -itertools.workspace = true thiserror.workspace = true serde.workspace = true miette.workspace = true From d6fd673ca9e7d07044e3073847cc3adf9eb9af1d Mon Sep 17 00:00:00 2001 From: Laurent Querel Date: Fri, 9 Aug 2024 10:37:18 -0700 Subject: [PATCH 2/3] feat(resolver): Detect root attribute id duplicates during the resolution process --- crates/weaver_resolver/src/registry.rs | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/crates/weaver_resolver/src/registry.rs b/crates/weaver_resolver/src/registry.rs index 76f31f5c..60709faf 100644 --- a/crates/weaver_resolver/src/registry.rs +++ b/crates/weaver_resolver/src/registry.rs @@ -366,10 +366,8 @@ fn resolve_attribute_references( ureg: &mut UnresolvedRegistry, attr_catalog: &mut AttributeCatalog, ) -> Result<(), Error> { - let mut permanent_errors = vec![]; - loop { - let mut transient_errors = vec![]; + let mut errors = vec![]; let mut resolved_attr_count = 0; // Iterate over all groups and resolve the attributes. @@ -401,8 +399,8 @@ fn resolve_attribute_references( // Attribute reference could not be resolved. if let AttributeSpec::Ref { r#ref, .. } = &attr.spec { // Keep track of unresolved attribute references in - // the transient errors. - transient_errors.push(Error::UnresolvedAttributeRef { + // the errors. + errors.push(Error::UnresolvedAttributeRef { group_id: unresolved_group.group.id.clone(), attribute_ref: r#ref.clone(), provenance: unresolved_group.provenance.clone(), @@ -416,7 +414,7 @@ fn resolve_attribute_references( unresolved_group.group.attributes.extend(resolved_attr); } - if transient_errors.is_empty() { + if errors.is_empty() { break; } @@ -425,14 +423,10 @@ fn resolve_attribute_references( // It means that we have an issue with the semantic convention // specifications. if resolved_attr_count == 0 { - permanent_errors.extend(transient_errors); - break; + return Err(Error::CompoundError(errors)); } } - if !permanent_errors.is_empty() { - return Err(Error::CompoundError(permanent_errors)); - } Ok(()) } From 2193df140d7aad893fbd2a6c3624917db05d894c Mon Sep 17 00:00:00 2001 From: Laurent Querel Date: Fri, 9 Aug 2024 11:56:50 -0700 Subject: [PATCH 3/3] chore(test): Add missing test data --- .../README.md | 2 + .../registry/metrics-messaging.yaml | 11 + .../registry/registry-messaging.yaml | 258 ++++++++++++++++++ 3 files changed, 271 insertions(+) create mode 100644 crates/weaver_resolver/data/registry-test-10-duplicate-attr-name/README.md create mode 100644 crates/weaver_resolver/data/registry-test-10-duplicate-attr-name/registry/metrics-messaging.yaml create mode 100644 crates/weaver_resolver/data/registry-test-10-duplicate-attr-name/registry/registry-messaging.yaml diff --git a/crates/weaver_resolver/data/registry-test-10-duplicate-attr-name/README.md b/crates/weaver_resolver/data/registry-test-10-duplicate-attr-name/README.md new file mode 100644 index 00000000..0497bcf1 --- /dev/null +++ b/crates/weaver_resolver/data/registry-test-10-duplicate-attr-name/README.md @@ -0,0 +1,2 @@ +Test that duplicate attribute names are detected and reported. +This test must fail. diff --git a/crates/weaver_resolver/data/registry-test-10-duplicate-attr-name/registry/metrics-messaging.yaml b/crates/weaver_resolver/data/registry-test-10-duplicate-attr-name/registry/metrics-messaging.yaml new file mode 100644 index 00000000..b5787179 --- /dev/null +++ b/crates/weaver_resolver/data/registry-test-10-duplicate-attr-name/registry/metrics-messaging.yaml @@ -0,0 +1,11 @@ +groups: + - id: metric.messaging.attributes + type: attribute_group + brief: "Common messaging metrics attributes." + attributes: + - ref: messaging.destination.name + requirement_level: + conditionally_required: if and only if `messaging.destination.name` is known to have low cardinality. Otherwise, `messaging.destination.template` MAY be populated. + - ref: messaging.destination.template + requirement_level: + conditionally_required: if available. diff --git a/crates/weaver_resolver/data/registry-test-10-duplicate-attr-name/registry/registry-messaging.yaml b/crates/weaver_resolver/data/registry-test-10-duplicate-attr-name/registry/registry-messaging.yaml new file mode 100644 index 00000000..b21283b6 --- /dev/null +++ b/crates/weaver_resolver/data/registry-test-10-duplicate-attr-name/registry/registry-messaging.yaml @@ -0,0 +1,258 @@ +groups: + - id: registry.messaging_bis + prefix: messaging + type: attribute_group + brief: "Attributes describing telemetry around messaging systems and messaging activities." + attributes: + - id: batch.message_count + type: int + brief: The number of messages sent, received, or processed in the scope of the batching operation. + note: > + Instrumentations SHOULD NOT set `messaging.batch.message_count` on spans that operate with a single message. + When a messaging client library supports both batch and single-message API for the same operation, instrumentations SHOULD + use `messaging.batch.message_count` for batching APIs and SHOULD NOT use it for single-message APIs. + examples: [0, 1, 2] + - id: registry.messaging + prefix: messaging + type: attribute_group + brief: "Attributes describing telemetry around messaging systems and messaging activities." + attributes: + - id: batch.message_count + type: int + brief: The number of messages sent, received, or processed in the scope of the batching operation. + note: > + Instrumentations SHOULD NOT set `messaging.batch.message_count` on spans that operate with a single message. + When a messaging client library supports both batch and single-message API for the same operation, instrumentations SHOULD + use `messaging.batch.message_count` for batching APIs and SHOULD NOT use it for single-message APIs. + examples: [0, 1, 2] + - id: client_id + type: string + brief: > + A unique identifier for the client that consumes or produces a message. + examples: ["client-5", "myhost@8742@s8083jm"] + - id: destination.name + type: string + brief: "The message destination name" + note: | + Destination name SHOULD uniquely identify a specific queue, topic or other entity within the broker. If + the broker doesn't have such notion, the destination name SHOULD uniquely identify the broker. + examples: ["MyQueue", "MyTopic"] + - id: destination.template + type: string + brief: Low cardinality representation of the messaging destination name + note: > + Destination names could be constructed from templates. + An example would be a destination name involving a user name or product id. + Although the destination name in this case is of high cardinality, + the underlying template is of low cardinality and can be effectively + used for grouping and aggregation. + examples: ["/customers/{customerId}"] + - id: destination.anonymous + type: boolean + brief: "A boolean that is true if the message destination is anonymous (could be unnamed or have auto-generated name)." + - id: destination.temporary + type: boolean + brief: "A boolean that is true if the message destination is temporary and might not exist anymore after messages are processed." + - id: destination_publish.anonymous + type: boolean + brief: "A boolean that is true if the publish message destination is anonymous (could be unnamed or have auto-generated name)." + - id: destination_publish.name + type: string + brief: "The name of the original destination the message was published to" + note: | + The name SHOULD uniquely identify a specific queue, topic, or other entity within the broker. If + the broker doesn't have such notion, the original destination name SHOULD uniquely identify the broker. + examples: ["MyQueue", "MyTopic"] + - id: kafka.consumer.group + type: string + brief: > + Name of the Kafka Consumer Group that is handling the message. + Only applies to consumers, not producers. + examples: "my-group" + - id: kafka.destination.partition + type: int + brief: > + Partition the message is sent to. + examples: 2 + - id: kafka.message.key + type: string + brief: > + Message keys in Kafka are used for grouping alike messages to ensure they're processed on the same partition. + They differ from `messaging.message.id` in that they're not unique. + If the key is `null`, the attribute MUST NOT be set. + note: > + If the key type is not string, it's string representation has to be supplied for the attribute. + If the key has no unambiguous, canonical string form, don't include its value. + examples: "myKey" + - id: kafka.message.offset + type: int + brief: > + The offset of a record in the corresponding Kafka partition. + examples: 42 + - id: kafka.message.tombstone + type: boolean + brief: "A boolean that is true if the message is a tombstone." + - id: message.conversation_id + type: string + brief: > + The conversation ID identifying the conversation to which the message belongs, + represented as a string. Sometimes called "Correlation ID". + examples: "MyConversationId" + - id: message.envelope.size + type: int + brief: > + The size of the message body and metadata in bytes. + note: | + This can refer to both the compressed or uncompressed size. If both sizes are known, the uncompressed + size should be used. + examples: 2738 + - id: message.id + type: string + brief: "A value used by the messaging system as an identifier for the message, represented as a string." + examples: "452a7c7c7c7048c2f887f61572b18fc2" + - id: message.body.size + type: int + brief: > + The size of the message body in bytes. + note: | + This can refer to both the compressed or uncompressed body size. If both sizes are known, the uncompressed + body size should be used. + examples: 1439 + - id: operation + type: + allow_custom_values: true + members: + - id: publish + value: "publish" + brief: > + One or more messages are provided for publishing to an intermediary. + If a single message is published, the context of the "Publish" span can be used as the creation context and no "Create" span needs to be created. + - id: create + value: "create" + brief: > + A message is created. + "Create" spans always refer to a single message and are used to provide a unique creation context for messages in batch publishing scenarios. + - id: receive + value: "receive" + brief: > + One or more messages are requested by a consumer. + This operation refers to pull-based scenarios, where consumers explicitly call methods of messaging SDKs to receive messages. + - id: deliver + value: "deliver" + brief: > + One or more messages are passed to a consumer. + This operation refers to push-based scenarios, where consumer register callbacks which get called by messaging SDKs. + brief: > + A string identifying the kind of messaging operation. + note: If a custom value is used, it MUST be of low cardinality. + - id: rabbitmq.destination.routing_key + type: string + brief: > + RabbitMQ message routing key. + examples: "myKey" + - id: rocketmq.client_group + type: string + brief: > + Name of the RocketMQ producer/consumer group that is handling the message. The client type is identified by the SpanKind. + examples: "myConsumerGroup" + - id: rocketmq.consumption_model + type: + allow_custom_values: false + members: + - id: clustering + value: "clustering" + brief: "Clustering consumption model" + - id: broadcasting + value: "broadcasting" + brief: "Broadcasting consumption model" + brief: > + Model of message consumption. This only applies to consumer spans. + - id: rocketmq.message.delay_time_level + type: int + brief: > + The delay time level for delay message, which determines the message delay time. + examples: 3 + - id: rocketmq.message.delivery_timestamp + type: int + brief: > + The timestamp in milliseconds that the delay message is expected to be delivered to consumer. + examples: 1665987217045 + - id: rocketmq.message.group + type: string + brief: > + It is essential for FIFO message. Messages that belong to the same message group are always processed one by one within the same consumer group. + examples: "myMessageGroup" + - id: rocketmq.message.keys + type: string[] + brief: > + Key(s) of message, another way to mark message besides message id. + examples: ["keyA", "keyB"] + - id: rocketmq.message.tag + type: string + brief: > + The secondary classifier of message besides topic. + examples: tagA + - id: rocketmq.message.type + type: + allow_custom_values: false + members: + - id: normal + value: "normal" + brief: "Normal message" + - id: fifo + value: "fifo" + brief: "FIFO message" + - id: delay + value: "delay" + brief: "Delay message" + - id: transaction + value: "transaction" + brief: "Transaction message" + brief: > + Type of message. + - id: rocketmq.namespace + type: string + brief: > + Namespace of RocketMQ resources, resources in different namespaces are individual. + examples: "myNamespace" + - id: gcp_pubsub.message.ordering_key + type: string + brief: > + The ordering key for a given message. If the attribute is not present, the message does not have an ordering key. + examples: "ordering_key" + - id: system + brief: > + An identifier for the messaging system being used. See below for a list of well-known identifiers. + type: + allow_custom_values: true + members: + - id: activemq + value: "activemq" + brief: "Apache ActiveMQ" + - id: aws_sqs + value: "aws_sqs" + brief: "Amazon Simple Queue Service (SQS)" + - id: azure_eventgrid + value: "azure_eventgrid" + brief: "Azure Event Grid" + - id: azure_eventhubs + value: "azure_eventhubs" + brief: "Azure Event Hubs" + - id: azure_servicebus + value: "azure_servicebus" + brief: "Azure Service Bus" + - id: gcp_pubsub + value: "gcp_pubsub" + brief: "Google Cloud Pub/Sub" + - id: jms + value: "jms" + brief: "Java Message Service" + - id: kafka + value: "kafka" + brief: "Apache Kafka" + - id: rabbitmq + value: "rabbitmq" + brief: "RabbitMQ" + - id: rocketmq + value: "rocketmq" + brief: "Apache RocketMQ"