Skip to content

Commit 3b38ba8

Browse files
authored
chore(config): emit human-friendly version of enum variant/property names in schema (vectordotdev#17171)
1 parent 3c92556 commit 3b38ba8

File tree

14 files changed

+454
-135
lines changed

14 files changed

+454
-135
lines changed

Cargo.lock

+14-41
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

lib/vector-config-common/Cargo.toml

+2
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,10 @@ edition = "2021"
55
license = "MPL-2.0"
66

77
[dependencies]
8+
convert_case = { version = "0.6", default-features = false }
89
darling = { version = "0.13", default-features = false, features = ["suggestions"] }
910
indexmap = { version = "1.9", default-features = false, features = ["serde"] }
11+
once_cell = { version = "1", default-features = false, features = ["std"] }
1012
proc-macro2 = { version = "1.0", default-features = false }
1113
serde = { version = "1.0", default-features = false, features = ["derive"] }
1214
serde_json = { version = "1.0", default-features = false, features = ["std"] }

lib/vector-config-common/src/constants.rs

+2-1
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,14 @@ pub const DOCS_META_ENUM_TAG_FIELD: &str = "docs::enum_tag_field";
1818
pub const DOCS_META_ENUM_TAGGING: &str = "docs::enum_tagging";
1919
pub const DOCS_META_EXAMPLES: &str = "docs::examples";
2020
pub const DOCS_META_HIDDEN: &str = "docs::hidden";
21-
pub const DOCS_META_LABEL: &str = "docs::label";
21+
pub const DOCS_META_HUMAN_NAME: &str = "docs::human_name";
2222
pub const DOCS_META_NUMERIC_TYPE: &str = "docs::numeric_type";
2323
pub const DOCS_META_OPTIONAL: &str = "docs::optional";
2424
pub const DOCS_META_SYNTAX_OVERRIDE: &str = "docs::syntax_override";
2525
pub const DOCS_META_TEMPLATEABLE: &str = "docs::templateable";
2626
pub const DOCS_META_TYPE_OVERRIDE: &str = "docs::type_override";
2727
pub const DOCS_META_TYPE_UNIT: &str = "docs::type_unit";
28+
pub const LOGICAL_NAME: &str = "logical_name";
2829
pub const METADATA: &str = "_metadata";
2930

3031
/// Well-known component types.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
use std::collections::{HashMap, HashSet};
2+
3+
use convert_case::{Boundary, Case, Converter};
4+
use once_cell::sync::Lazy;
5+
6+
/// Well-known replacements.
7+
///
8+
/// Replacements are instances of strings with unique capitalization that cannot be achieved
9+
/// programmatically, as well as the potential insertion of additional characters, such as the
10+
/// replacement of "pubsub" with "Pub/Sub".
11+
static WELL_KNOWN_REPLACEMENTS: Lazy<HashMap<String, &'static str>> = Lazy::new(|| {
12+
let pairs = vec![
13+
("eventstoredb", "EventStoreDB"),
14+
("mongodb", "MongoDB"),
15+
("opentelemetry", "OpenTelemetry"),
16+
("otel", "OTEL"),
17+
("postgresql", "PostgreSQL"),
18+
("pubsub", "Pub/Sub"),
19+
("statsd", "StatsD"),
20+
("journald", "JournalD"),
21+
("appsignal", "AppSignal"),
22+
("clickhouse", "ClickHouse"),
23+
("influxdb", "InfluxDB"),
24+
("webhdfs", "WebHDFS"),
25+
("cloudwatch", "CloudWatch"),
26+
("logdna", "LogDNA"),
27+
("geoip", "GeoIP"),
28+
("ssekms", "SSE-KMS"),
29+
("aes256", "AES-256"),
30+
("apiserver", "API Server"),
31+
("dir", "Directory"),
32+
("ids", "IDs"),
33+
("ips", "IPs"),
34+
("grpc", "gRPC"),
35+
("oauth2", "OAuth2"),
36+
];
37+
38+
pairs.iter().map(|(k, v)| (k.to_lowercase(), *v)).collect()
39+
});
40+
41+
/// Well-known acronyms.
42+
///
43+
/// Acronyms are distinct from replacements because they should be entirely capitalized (i.e. "aws"
44+
/// or "aWs" or "Aws" should always be replaced with "AWS") whereas replacements may insert
45+
/// additional characters or capitalize specific characters within the original string.
46+
static WELL_KNOWN_ACRONYMS: Lazy<HashSet<String>> = Lazy::new(|| {
47+
let acronyms = &[
48+
"api", "amqp", "aws", "ec2", "ecs", "gcp", "hec", "http", "https", "nats", "nginx", "s3",
49+
"sqs", "tls", "ssl", "otel", "gelf", "csv", "json", "rfc3339", "lz4", "us", "eu", "bsd",
50+
"vrl", "tcp", "udp", "id", "uuid", "kms", "uri", "url", "acp", "uid", "ip", "pid",
51+
"ndjson", "ewma", "rtt", "cpu", "acl",
52+
];
53+
54+
acronyms.iter().map(|s| s.to_lowercase()).collect()
55+
});
56+
57+
/// Generates a human-friendly version of the given string.
58+
///
59+
/// Many instances exist where type names, or string constants, represent a condensed form of an
60+
/// otherwise human-friendly/recognize string, such as "aws_s3" (for AWS S3) or "InfluxdbMetrics"
61+
/// (for InfluxDB Metrics) and so on.
62+
///
63+
/// This function takes a given input and restores it back to the human-friendly version by
64+
/// splitting it on the relevant word boundaries, adjusting the input to title case, and applying
65+
/// well-known replacements to ensure that brand-specific casing (such as "CloudWatch" instead of
66+
/// "Cloudwatch", or handling acronyms like AWS, GCP, and so on) makes it into the final version.
67+
pub fn generate_human_friendly_string(input: &str) -> String {
68+
// Create our case converter, which specifically ignores letter/digit boundaries, which is
69+
// important for not turning substrings like "Ec2" or "S3" into "Ec"/"2" and "S"/"3",
70+
// respectively.
71+
let converter = Converter::new()
72+
.to_case(Case::Title)
73+
.remove_boundaries(&[Boundary::LowerDigit, Boundary::UpperDigit]);
74+
let normalized = converter.convert(input);
75+
76+
let replaced_segments = normalized
77+
.split(' ')
78+
.map(replace_well_known_segments)
79+
.collect::<Vec<_>>();
80+
replaced_segments.join(" ")
81+
}
82+
83+
fn replace_well_known_segments(input: &str) -> String {
84+
let as_lower = input.to_lowercase();
85+
if let Some(replacement) = WELL_KNOWN_REPLACEMENTS.get(&as_lower) {
86+
replacement.to_string()
87+
} else if WELL_KNOWN_ACRONYMS.contains(&as_lower) {
88+
input.to_uppercase()
89+
} else {
90+
input.to_string()
91+
}
92+
}
93+
94+
#[cfg(test)]
95+
mod tests {
96+
use super::generate_human_friendly_string;
97+
98+
#[test]
99+
fn autodetect_input_case() {
100+
let pascal_input = "LogToMetric";
101+
let snake_input = "log_to_metric";
102+
103+
let pascal_friendly = generate_human_friendly_string(pascal_input);
104+
let snake_friendly = generate_human_friendly_string(snake_input);
105+
106+
let expected = "Log To Metric";
107+
assert_eq!(expected, pascal_friendly);
108+
assert_eq!(expected, snake_friendly);
109+
}
110+
111+
#[test]
112+
fn digit_letter_boundaries() {
113+
let input1 = "Ec2Metadata";
114+
let expected1 = "EC2 Metadata";
115+
let actual1 = generate_human_friendly_string(input1);
116+
assert_eq!(expected1, actual1);
117+
118+
let input2 = "AwsS3";
119+
let expected2 = "AWS S3";
120+
let actual2 = generate_human_friendly_string(input2);
121+
assert_eq!(expected2, actual2);
122+
}
123+
}

lib/vector-config-common/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
#![deny(warnings)]
1414
pub mod attributes;
1515
pub mod constants;
16+
pub mod human_friendly;
1617
pub mod num;
1718
pub mod schema;
1819
pub mod validation;

lib/vector-config-macros/Cargo.toml

-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ proc-macro = true
99

1010
[dependencies]
1111
darling = { version = "0.13", default-features = false, features = ["suggestions"] }
12-
itertools = { version = "0.10.5", default-features = false, features = ["use_std"] }
1312
proc-macro2 = { version = "1.0", default-features = false }
1413
quote = { version = "1.0", default-features = false }
1514
serde_derive_internals = "0.26"

lib/vector-config-macros/src/configurable_component.rs

+5-31
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
use darling::{Error, FromMeta};
2-
use itertools::Itertools as _;
32
use proc_macro::TokenStream;
43
use proc_macro2::{Ident, Span};
54
use quote::{quote, quote_spanned};
65
use syn::{
76
parse_macro_input, parse_quote, parse_quote_spanned, punctuated::Punctuated, spanned::Spanned,
87
token::Comma, AttributeArgs, DeriveInput, Lit, LitStr, Meta, MetaList, NestedMeta, Path,
98
};
10-
use vector_config_common::constants::ComponentType;
9+
use vector_config_common::{
10+
constants::ComponentType, human_friendly::generate_human_friendly_string,
11+
};
1112

1213
use crate::attrs;
1314

@@ -95,8 +96,8 @@ impl TypedComponent {
9596
}
9697
};
9798

98-
// Derive the label from the component name, but capitalized.
99-
let label = capitalize_words(&component_name.value());
99+
// Derive the human-friendly name from the component name.
100+
let label = generate_human_friendly_string(&component_name.value());
100101

101102
// Derive the logical name from the config type, with the trailing "Config" dropped.
102103
let logical_name = config_ty.to_string();
@@ -327,33 +328,6 @@ pub fn configurable_component_impl(args: TokenStream, item: TokenStream) -> Toke
327328
derived.into()
328329
}
329330

330-
// Properly capitalize labels, accounting for some exceptions
331-
// TODO: Replace this with an explicit requirement for a "component_human_name" or similar.
332-
fn capitalize(s: &str) -> String {
333-
match s {
334-
"Amqp" | "Aws" | "Ec2" | "Ecs" | "Gcp" | "Hec" | "Http" | "Nats" | "Nginx" | "Sqs" => {
335-
s.to_uppercase()
336-
}
337-
"Eventstoredb" => String::from("EventStoreDB"),
338-
"Mongodb" => String::from("MongoDB"),
339-
"Opentelemetry" => String::from("OpenTelemetry"),
340-
"Postgresql" => String::from("PostgreSQL"),
341-
"Pubsub" => String::from("Pub/Sub"),
342-
"Statsd" => String::from("StatsD"),
343-
_ => {
344-
let mut iter = s.chars();
345-
match iter.next() {
346-
None => String::new(),
347-
Some(first) => first.to_uppercase().collect::<String>() + iter.as_str(),
348-
}
349-
}
350-
}
351-
}
352-
353-
fn capitalize_words(s: &str) -> String {
354-
s.split('_').map(capitalize).join(" ")
355-
}
356-
357331
/// Gets the ident of the component type-specific helper attribute for the `NamedComponent` derive.
358332
///
359333
/// When we emit code for a configurable item that has been marked as a typed component, we

lib/vector-config/Cargo.toml

+1-1
Original file line numberDiff line numberDiff line change
@@ -32,5 +32,5 @@ vector-config-common = { path = "../vector-config-common" }
3232
vector-config-macros = { path = "../vector-config-macros" }
3333

3434
[dev-dependencies]
35-
pretty_assertions = { version = "1.3.0", default-features = false, features = ["std"] }
35+
assert-json-diff = { version = "2", default-features = false }
3636
serde_with = { version = "2.3.2", default-features = false, features = ["std", "macros"] }

lib/vector-config/src/component/description.rs

+4-2
Original file line numberDiff line numberDiff line change
@@ -119,8 +119,10 @@ where
119119

120120
let mut variant_metadata = Metadata::default();
121121
variant_metadata.set_description(self.description);
122-
variant_metadata
123-
.add_custom_attribute(CustomAttribute::kv(constants::DOCS_META_LABEL, self.label));
122+
variant_metadata.add_custom_attribute(CustomAttribute::kv(
123+
constants::DOCS_META_HUMAN_NAME,
124+
self.label,
125+
));
124126
variant_metadata
125127
.add_custom_attribute(CustomAttribute::kv("logical_name", self.logical_name));
126128
schema::apply_base_metadata(&mut subschema, variant_metadata);

0 commit comments

Comments
 (0)