Skip to content

Commit

Permalink
chore(config): emit human-friendly version of enum variant/property n…
Browse files Browse the repository at this point in the history
…ames in schema (vectordotdev#17171)
  • Loading branch information
tobz authored Apr 19, 2023
1 parent 3c92556 commit 3b38ba8
Show file tree
Hide file tree
Showing 14 changed files with 454 additions and 135 deletions.
55 changes: 14 additions & 41 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions lib/vector-config-common/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@ edition = "2021"
license = "MPL-2.0"

[dependencies]
convert_case = { version = "0.6", default-features = false }
darling = { version = "0.13", default-features = false, features = ["suggestions"] }
indexmap = { version = "1.9", default-features = false, features = ["serde"] }
once_cell = { version = "1", default-features = false, features = ["std"] }
proc-macro2 = { version = "1.0", default-features = false }
serde = { version = "1.0", default-features = false, features = ["derive"] }
serde_json = { version = "1.0", default-features = false, features = ["std"] }
Expand Down
3 changes: 2 additions & 1 deletion lib/vector-config-common/src/constants.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ pub const DOCS_META_ENUM_TAG_FIELD: &str = "docs::enum_tag_field";
pub const DOCS_META_ENUM_TAGGING: &str = "docs::enum_tagging";
pub const DOCS_META_EXAMPLES: &str = "docs::examples";
pub const DOCS_META_HIDDEN: &str = "docs::hidden";
pub const DOCS_META_LABEL: &str = "docs::label";
pub const DOCS_META_HUMAN_NAME: &str = "docs::human_name";
pub const DOCS_META_NUMERIC_TYPE: &str = "docs::numeric_type";
pub const DOCS_META_OPTIONAL: &str = "docs::optional";
pub const DOCS_META_SYNTAX_OVERRIDE: &str = "docs::syntax_override";
pub const DOCS_META_TEMPLATEABLE: &str = "docs::templateable";
pub const DOCS_META_TYPE_OVERRIDE: &str = "docs::type_override";
pub const DOCS_META_TYPE_UNIT: &str = "docs::type_unit";
pub const LOGICAL_NAME: &str = "logical_name";
pub const METADATA: &str = "_metadata";

/// Well-known component types.
Expand Down
123 changes: 123 additions & 0 deletions lib/vector-config-common/src/human_friendly.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
use std::collections::{HashMap, HashSet};

use convert_case::{Boundary, Case, Converter};
use once_cell::sync::Lazy;

/// Well-known replacements.
///
/// Replacements are instances of strings with unique capitalization that cannot be achieved
/// programmatically, as well as the potential insertion of additional characters, such as the
/// replacement of "pubsub" with "Pub/Sub".
static WELL_KNOWN_REPLACEMENTS: Lazy<HashMap<String, &'static str>> = Lazy::new(|| {
let pairs = vec![
("eventstoredb", "EventStoreDB"),
("mongodb", "MongoDB"),
("opentelemetry", "OpenTelemetry"),
("otel", "OTEL"),
("postgresql", "PostgreSQL"),
("pubsub", "Pub/Sub"),
("statsd", "StatsD"),
("journald", "JournalD"),
("appsignal", "AppSignal"),
("clickhouse", "ClickHouse"),
("influxdb", "InfluxDB"),
("webhdfs", "WebHDFS"),
("cloudwatch", "CloudWatch"),
("logdna", "LogDNA"),
("geoip", "GeoIP"),
("ssekms", "SSE-KMS"),
("aes256", "AES-256"),
("apiserver", "API Server"),
("dir", "Directory"),
("ids", "IDs"),
("ips", "IPs"),
("grpc", "gRPC"),
("oauth2", "OAuth2"),
];

pairs.iter().map(|(k, v)| (k.to_lowercase(), *v)).collect()
});

/// Well-known acronyms.
///
/// Acronyms are distinct from replacements because they should be entirely capitalized (i.e. "aws"
/// or "aWs" or "Aws" should always be replaced with "AWS") whereas replacements may insert
/// additional characters or capitalize specific characters within the original string.
static WELL_KNOWN_ACRONYMS: Lazy<HashSet<String>> = Lazy::new(|| {
let acronyms = &[
"api", "amqp", "aws", "ec2", "ecs", "gcp", "hec", "http", "https", "nats", "nginx", "s3",
"sqs", "tls", "ssl", "otel", "gelf", "csv", "json", "rfc3339", "lz4", "us", "eu", "bsd",
"vrl", "tcp", "udp", "id", "uuid", "kms", "uri", "url", "acp", "uid", "ip", "pid",
"ndjson", "ewma", "rtt", "cpu", "acl",
];

acronyms.iter().map(|s| s.to_lowercase()).collect()
});

/// Generates a human-friendly version of the given string.
///
/// Many instances exist where type names, or string constants, represent a condensed form of an
/// otherwise human-friendly/recognize string, such as "aws_s3" (for AWS S3) or "InfluxdbMetrics"
/// (for InfluxDB Metrics) and so on.
///
/// This function takes a given input and restores it back to the human-friendly version by
/// splitting it on the relevant word boundaries, adjusting the input to title case, and applying
/// well-known replacements to ensure that brand-specific casing (such as "CloudWatch" instead of
/// "Cloudwatch", or handling acronyms like AWS, GCP, and so on) makes it into the final version.
pub fn generate_human_friendly_string(input: &str) -> String {
// Create our case converter, which specifically ignores letter/digit boundaries, which is
// important for not turning substrings like "Ec2" or "S3" into "Ec"/"2" and "S"/"3",
// respectively.
let converter = Converter::new()
.to_case(Case::Title)
.remove_boundaries(&[Boundary::LowerDigit, Boundary::UpperDigit]);
let normalized = converter.convert(input);

let replaced_segments = normalized
.split(' ')
.map(replace_well_known_segments)
.collect::<Vec<_>>();
replaced_segments.join(" ")
}

fn replace_well_known_segments(input: &str) -> String {
let as_lower = input.to_lowercase();
if let Some(replacement) = WELL_KNOWN_REPLACEMENTS.get(&as_lower) {
replacement.to_string()
} else if WELL_KNOWN_ACRONYMS.contains(&as_lower) {
input.to_uppercase()
} else {
input.to_string()
}
}

#[cfg(test)]
mod tests {
use super::generate_human_friendly_string;

#[test]
fn autodetect_input_case() {
let pascal_input = "LogToMetric";
let snake_input = "log_to_metric";

let pascal_friendly = generate_human_friendly_string(pascal_input);
let snake_friendly = generate_human_friendly_string(snake_input);

let expected = "Log To Metric";
assert_eq!(expected, pascal_friendly);
assert_eq!(expected, snake_friendly);
}

#[test]
fn digit_letter_boundaries() {
let input1 = "Ec2Metadata";
let expected1 = "EC2 Metadata";
let actual1 = generate_human_friendly_string(input1);
assert_eq!(expected1, actual1);

let input2 = "AwsS3";
let expected2 = "AWS S3";
let actual2 = generate_human_friendly_string(input2);
assert_eq!(expected2, actual2);
}
}
1 change: 1 addition & 0 deletions lib/vector-config-common/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#![deny(warnings)]
pub mod attributes;
pub mod constants;
pub mod human_friendly;
pub mod num;
pub mod schema;
pub mod validation;
1 change: 0 additions & 1 deletion lib/vector-config-macros/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ proc-macro = true

[dependencies]
darling = { version = "0.13", default-features = false, features = ["suggestions"] }
itertools = { version = "0.10.5", default-features = false, features = ["use_std"] }
proc-macro2 = { version = "1.0", default-features = false }
quote = { version = "1.0", default-features = false }
serde_derive_internals = "0.26"
Expand Down
36 changes: 5 additions & 31 deletions lib/vector-config-macros/src/configurable_component.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
use darling::{Error, FromMeta};
use itertools::Itertools as _;
use proc_macro::TokenStream;
use proc_macro2::{Ident, Span};
use quote::{quote, quote_spanned};
use syn::{
parse_macro_input, parse_quote, parse_quote_spanned, punctuated::Punctuated, spanned::Spanned,
token::Comma, AttributeArgs, DeriveInput, Lit, LitStr, Meta, MetaList, NestedMeta, Path,
};
use vector_config_common::constants::ComponentType;
use vector_config_common::{
constants::ComponentType, human_friendly::generate_human_friendly_string,
};

use crate::attrs;

Expand Down Expand Up @@ -95,8 +96,8 @@ impl TypedComponent {
}
};

// Derive the label from the component name, but capitalized.
let label = capitalize_words(&component_name.value());
// Derive the human-friendly name from the component name.
let label = generate_human_friendly_string(&component_name.value());

// Derive the logical name from the config type, with the trailing "Config" dropped.
let logical_name = config_ty.to_string();
Expand Down Expand Up @@ -327,33 +328,6 @@ pub fn configurable_component_impl(args: TokenStream, item: TokenStream) -> Toke
derived.into()
}

// Properly capitalize labels, accounting for some exceptions
// TODO: Replace this with an explicit requirement for a "component_human_name" or similar.
fn capitalize(s: &str) -> String {
match s {
"Amqp" | "Aws" | "Ec2" | "Ecs" | "Gcp" | "Hec" | "Http" | "Nats" | "Nginx" | "Sqs" => {
s.to_uppercase()
}
"Eventstoredb" => String::from("EventStoreDB"),
"Mongodb" => String::from("MongoDB"),
"Opentelemetry" => String::from("OpenTelemetry"),
"Postgresql" => String::from("PostgreSQL"),
"Pubsub" => String::from("Pub/Sub"),
"Statsd" => String::from("StatsD"),
_ => {
let mut iter = s.chars();
match iter.next() {
None => String::new(),
Some(first) => first.to_uppercase().collect::<String>() + iter.as_str(),
}
}
}
}

fn capitalize_words(s: &str) -> String {
s.split('_').map(capitalize).join(" ")
}

/// Gets the ident of the component type-specific helper attribute for the `NamedComponent` derive.
///
/// When we emit code for a configurable item that has been marked as a typed component, we
Expand Down
2 changes: 1 addition & 1 deletion lib/vector-config/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,5 @@ vector-config-common = { path = "../vector-config-common" }
vector-config-macros = { path = "../vector-config-macros" }

[dev-dependencies]
pretty_assertions = { version = "1.3.0", default-features = false, features = ["std"] }
assert-json-diff = { version = "2", default-features = false }
serde_with = { version = "2.3.2", default-features = false, features = ["std", "macros"] }
6 changes: 4 additions & 2 deletions lib/vector-config/src/component/description.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,8 +119,10 @@ where

let mut variant_metadata = Metadata::default();
variant_metadata.set_description(self.description);
variant_metadata
.add_custom_attribute(CustomAttribute::kv(constants::DOCS_META_LABEL, self.label));
variant_metadata.add_custom_attribute(CustomAttribute::kv(
constants::DOCS_META_HUMAN_NAME,
self.label,
));
variant_metadata
.add_custom_attribute(CustomAttribute::kv("logical_name", self.logical_name));
schema::apply_base_metadata(&mut subschema, variant_metadata);
Expand Down
Loading

0 comments on commit 3b38ba8

Please sign in to comment.