From 3b38ba82c3727eac93c0d0a992f248b72435dac6 Mon Sep 17 00:00:00 2001 From: Toby Lawrence Date: Wed, 19 Apr 2023 12:52:30 -0400 Subject: [PATCH] chore(config): emit human-friendly version of enum variant/property names in schema (#17171) --- Cargo.lock | 55 +--- lib/vector-config-common/Cargo.toml | 2 + lib/vector-config-common/src/constants.rs | 3 +- .../src/human_friendly.rs | 123 ++++++++ lib/vector-config-common/src/lib.rs | 1 + lib/vector-config-macros/Cargo.toml | 1 - .../src/configurable_component.rs | 36 +-- lib/vector-config/Cargo.toml | 2 +- .../src/component/description.rs | 6 +- lib/vector-config/src/schema/helpers.rs | 6 +- .../src/schema/visitors/human_name.rs | 286 ++++++++++++++++++ lib/vector-config/src/schema/visitors/mod.rs | 2 + lib/vector-config/src/schema/visitors/test.rs | 8 +- src/sinks/mod.rs | 58 +--- 14 files changed, 454 insertions(+), 135 deletions(-) create mode 100644 lib/vector-config-common/src/human_friendly.rs create mode 100644 lib/vector-config/src/schema/visitors/human_name.rs diff --git a/Cargo.lock b/Cargo.lock index ad05c290a99f8..aa09fb4026c35 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2151,6 +2151,15 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" +[[package]] +name = "convert_case" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec182b0ca2f35d8fc196cf3404988fd8b8c739a4d270ff118a398feb0cbec1ca" +dependencies = [ + "unicode-segmentation", +] + [[package]] name = "cookie-factory" version = "0.3.2" @@ -2385,16 +2394,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "ctor" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" -dependencies = [ - "quote 1.0.26", - "syn 1.0.109", -] - [[package]] name = "ctor" version = "0.2.0" @@ -2675,7 +2674,7 @@ version = "0.99.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" dependencies = [ - "convert_case", + "convert_case 0.4.0", "proc-macro2 1.0.56", "quote 1.0.26", "rustc_version 0.4.0", @@ -4178,7 +4177,7 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7741301a6d6a9b28ce77c0fb77a4eb116b6bc8f3bef09923f7743d059c4157d3" dependencies = [ - "ctor 0.2.0", + "ctor", "ghost", ] @@ -5721,15 +5720,6 @@ version = "6.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b7820b9daea5457c9f21c69448905d723fbd21136ccf521748f23fd49e723ee" -[[package]] -name = "output_vt100" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "628223faebab4e3e40667ee0b2336d34a5b960ff60ea743ddfdbcf7770bcfb66" -dependencies = [ - "winapi", -] - [[package]] name = "outref" version = "0.5.1" @@ -6147,18 +6137,6 @@ dependencies = [ "termtree", ] -[[package]] -name = "pretty_assertions" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a25e9bcb20aa780fd0bb16b72403a9064d6b3f22f026946029acb941a50af755" -dependencies = [ - "ctor 0.1.26", - "diff", - "output_vt100", - "yansi", -] - [[package]] name = "prettydiff" version = "0.6.2" @@ -9481,6 +9459,7 @@ dependencies = [ name = "vector-config" version = "0.1.0" dependencies = [ + "assert-json-diff", "chrono", "chrono-tz", "encoding_rs", @@ -9489,7 +9468,6 @@ dependencies = [ "no-proxy", "num-traits", "once_cell", - "pretty_assertions", "serde", "serde_json", "serde_with 2.3.2", @@ -9507,8 +9485,10 @@ dependencies = [ name = "vector-config-common" version = "0.1.0" dependencies = [ + "convert_case 0.6.0", "darling 0.13.4", "indexmap", + "once_cell", "proc-macro2 1.0.56", "quote 1.0.26", "serde", @@ -9522,7 +9502,6 @@ name = "vector-config-macros" version = "0.1.0" dependencies = [ "darling 0.13.4", - "itertools", "proc-macro2 1.0.56", "quote 1.0.26", "serde", @@ -10425,12 +10404,6 @@ dependencies = [ "linked-hash-map", ] -[[package]] -name = "yansi" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09041cd90cf85f7f8b2df60c646f853b7f535ce68f85244eb6731cf89fa498ec" - [[package]] name = "zerocopy" version = "0.6.1" diff --git a/lib/vector-config-common/Cargo.toml b/lib/vector-config-common/Cargo.toml index d8eac2cfd0485..9e29eb538f98a 100644 --- a/lib/vector-config-common/Cargo.toml +++ b/lib/vector-config-common/Cargo.toml @@ -5,8 +5,10 @@ edition = "2021" license = "MPL-2.0" [dependencies] +convert_case = { version = "0.6", default-features = false } darling = { version = "0.13", default-features = false, features = ["suggestions"] } indexmap = { version = "1.9", default-features = false, features = ["serde"] } +once_cell = { version = "1", default-features = false, features = ["std"] } proc-macro2 = { version = "1.0", default-features = false } serde = { version = "1.0", default-features = false, features = ["derive"] } serde_json = { version = "1.0", default-features = false, features = ["std"] } diff --git a/lib/vector-config-common/src/constants.rs b/lib/vector-config-common/src/constants.rs index c7a06a0d3a6ec..864b72098acec 100644 --- a/lib/vector-config-common/src/constants.rs +++ b/lib/vector-config-common/src/constants.rs @@ -18,13 +18,14 @@ pub const DOCS_META_ENUM_TAG_FIELD: &str = "docs::enum_tag_field"; pub const DOCS_META_ENUM_TAGGING: &str = "docs::enum_tagging"; pub const DOCS_META_EXAMPLES: &str = "docs::examples"; pub const DOCS_META_HIDDEN: &str = "docs::hidden"; -pub const DOCS_META_LABEL: &str = "docs::label"; +pub const DOCS_META_HUMAN_NAME: &str = "docs::human_name"; pub const DOCS_META_NUMERIC_TYPE: &str = "docs::numeric_type"; pub const DOCS_META_OPTIONAL: &str = "docs::optional"; pub const DOCS_META_SYNTAX_OVERRIDE: &str = "docs::syntax_override"; pub const DOCS_META_TEMPLATEABLE: &str = "docs::templateable"; pub const DOCS_META_TYPE_OVERRIDE: &str = "docs::type_override"; pub const DOCS_META_TYPE_UNIT: &str = "docs::type_unit"; +pub const LOGICAL_NAME: &str = "logical_name"; pub const METADATA: &str = "_metadata"; /// Well-known component types. diff --git a/lib/vector-config-common/src/human_friendly.rs b/lib/vector-config-common/src/human_friendly.rs new file mode 100644 index 0000000000000..29c9a290efb93 --- /dev/null +++ b/lib/vector-config-common/src/human_friendly.rs @@ -0,0 +1,123 @@ +use std::collections::{HashMap, HashSet}; + +use convert_case::{Boundary, Case, Converter}; +use once_cell::sync::Lazy; + +/// Well-known replacements. +/// +/// Replacements are instances of strings with unique capitalization that cannot be achieved +/// programmatically, as well as the potential insertion of additional characters, such as the +/// replacement of "pubsub" with "Pub/Sub". +static WELL_KNOWN_REPLACEMENTS: Lazy> = Lazy::new(|| { + let pairs = vec![ + ("eventstoredb", "EventStoreDB"), + ("mongodb", "MongoDB"), + ("opentelemetry", "OpenTelemetry"), + ("otel", "OTEL"), + ("postgresql", "PostgreSQL"), + ("pubsub", "Pub/Sub"), + ("statsd", "StatsD"), + ("journald", "JournalD"), + ("appsignal", "AppSignal"), + ("clickhouse", "ClickHouse"), + ("influxdb", "InfluxDB"), + ("webhdfs", "WebHDFS"), + ("cloudwatch", "CloudWatch"), + ("logdna", "LogDNA"), + ("geoip", "GeoIP"), + ("ssekms", "SSE-KMS"), + ("aes256", "AES-256"), + ("apiserver", "API Server"), + ("dir", "Directory"), + ("ids", "IDs"), + ("ips", "IPs"), + ("grpc", "gRPC"), + ("oauth2", "OAuth2"), + ]; + + pairs.iter().map(|(k, v)| (k.to_lowercase(), *v)).collect() +}); + +/// Well-known acronyms. +/// +/// Acronyms are distinct from replacements because they should be entirely capitalized (i.e. "aws" +/// or "aWs" or "Aws" should always be replaced with "AWS") whereas replacements may insert +/// additional characters or capitalize specific characters within the original string. +static WELL_KNOWN_ACRONYMS: Lazy> = Lazy::new(|| { + let acronyms = &[ + "api", "amqp", "aws", "ec2", "ecs", "gcp", "hec", "http", "https", "nats", "nginx", "s3", + "sqs", "tls", "ssl", "otel", "gelf", "csv", "json", "rfc3339", "lz4", "us", "eu", "bsd", + "vrl", "tcp", "udp", "id", "uuid", "kms", "uri", "url", "acp", "uid", "ip", "pid", + "ndjson", "ewma", "rtt", "cpu", "acl", + ]; + + acronyms.iter().map(|s| s.to_lowercase()).collect() +}); + +/// Generates a human-friendly version of the given string. +/// +/// Many instances exist where type names, or string constants, represent a condensed form of an +/// otherwise human-friendly/recognize string, such as "aws_s3" (for AWS S3) or "InfluxdbMetrics" +/// (for InfluxDB Metrics) and so on. +/// +/// This function takes a given input and restores it back to the human-friendly version by +/// splitting it on the relevant word boundaries, adjusting the input to title case, and applying +/// well-known replacements to ensure that brand-specific casing (such as "CloudWatch" instead of +/// "Cloudwatch", or handling acronyms like AWS, GCP, and so on) makes it into the final version. +pub fn generate_human_friendly_string(input: &str) -> String { + // Create our case converter, which specifically ignores letter/digit boundaries, which is + // important for not turning substrings like "Ec2" or "S3" into "Ec"/"2" and "S"/"3", + // respectively. + let converter = Converter::new() + .to_case(Case::Title) + .remove_boundaries(&[Boundary::LowerDigit, Boundary::UpperDigit]); + let normalized = converter.convert(input); + + let replaced_segments = normalized + .split(' ') + .map(replace_well_known_segments) + .collect::>(); + replaced_segments.join(" ") +} + +fn replace_well_known_segments(input: &str) -> String { + let as_lower = input.to_lowercase(); + if let Some(replacement) = WELL_KNOWN_REPLACEMENTS.get(&as_lower) { + replacement.to_string() + } else if WELL_KNOWN_ACRONYMS.contains(&as_lower) { + input.to_uppercase() + } else { + input.to_string() + } +} + +#[cfg(test)] +mod tests { + use super::generate_human_friendly_string; + + #[test] + fn autodetect_input_case() { + let pascal_input = "LogToMetric"; + let snake_input = "log_to_metric"; + + let pascal_friendly = generate_human_friendly_string(pascal_input); + let snake_friendly = generate_human_friendly_string(snake_input); + + let expected = "Log To Metric"; + assert_eq!(expected, pascal_friendly); + assert_eq!(expected, snake_friendly); + } + + #[test] + fn digit_letter_boundaries() { + let input1 = "Ec2Metadata"; + let expected1 = "EC2 Metadata"; + let actual1 = generate_human_friendly_string(input1); + assert_eq!(expected1, actual1); + + let input2 = "AwsS3"; + let expected2 = "AWS S3"; + let actual2 = generate_human_friendly_string(input2); + assert_eq!(expected2, actual2); + } +} diff --git a/lib/vector-config-common/src/lib.rs b/lib/vector-config-common/src/lib.rs index 35286bb11036c..f7305a3abea50 100644 --- a/lib/vector-config-common/src/lib.rs +++ b/lib/vector-config-common/src/lib.rs @@ -13,6 +13,7 @@ #![deny(warnings)] pub mod attributes; pub mod constants; +pub mod human_friendly; pub mod num; pub mod schema; pub mod validation; diff --git a/lib/vector-config-macros/Cargo.toml b/lib/vector-config-macros/Cargo.toml index a8ce2a3dde76a..bd67a370190cb 100644 --- a/lib/vector-config-macros/Cargo.toml +++ b/lib/vector-config-macros/Cargo.toml @@ -9,7 +9,6 @@ proc-macro = true [dependencies] darling = { version = "0.13", default-features = false, features = ["suggestions"] } -itertools = { version = "0.10.5", default-features = false, features = ["use_std"] } proc-macro2 = { version = "1.0", default-features = false } quote = { version = "1.0", default-features = false } serde_derive_internals = "0.26" diff --git a/lib/vector-config-macros/src/configurable_component.rs b/lib/vector-config-macros/src/configurable_component.rs index b0381c6b2bf90..37acc7703f24d 100644 --- a/lib/vector-config-macros/src/configurable_component.rs +++ b/lib/vector-config-macros/src/configurable_component.rs @@ -1,5 +1,4 @@ use darling::{Error, FromMeta}; -use itertools::Itertools as _; use proc_macro::TokenStream; use proc_macro2::{Ident, Span}; use quote::{quote, quote_spanned}; @@ -7,7 +6,9 @@ use syn::{ parse_macro_input, parse_quote, parse_quote_spanned, punctuated::Punctuated, spanned::Spanned, token::Comma, AttributeArgs, DeriveInput, Lit, LitStr, Meta, MetaList, NestedMeta, Path, }; -use vector_config_common::constants::ComponentType; +use vector_config_common::{ + constants::ComponentType, human_friendly::generate_human_friendly_string, +}; use crate::attrs; @@ -95,8 +96,8 @@ impl TypedComponent { } }; - // Derive the label from the component name, but capitalized. - let label = capitalize_words(&component_name.value()); + // Derive the human-friendly name from the component name. + let label = generate_human_friendly_string(&component_name.value()); // Derive the logical name from the config type, with the trailing "Config" dropped. let logical_name = config_ty.to_string(); @@ -327,33 +328,6 @@ pub fn configurable_component_impl(args: TokenStream, item: TokenStream) -> Toke derived.into() } -// Properly capitalize labels, accounting for some exceptions -// TODO: Replace this with an explicit requirement for a "component_human_name" or similar. -fn capitalize(s: &str) -> String { - match s { - "Amqp" | "Aws" | "Ec2" | "Ecs" | "Gcp" | "Hec" | "Http" | "Nats" | "Nginx" | "Sqs" => { - s.to_uppercase() - } - "Eventstoredb" => String::from("EventStoreDB"), - "Mongodb" => String::from("MongoDB"), - "Opentelemetry" => String::from("OpenTelemetry"), - "Postgresql" => String::from("PostgreSQL"), - "Pubsub" => String::from("Pub/Sub"), - "Statsd" => String::from("StatsD"), - _ => { - let mut iter = s.chars(); - match iter.next() { - None => String::new(), - Some(first) => first.to_uppercase().collect::() + iter.as_str(), - } - } - } -} - -fn capitalize_words(s: &str) -> String { - s.split('_').map(capitalize).join(" ") -} - /// Gets the ident of the component type-specific helper attribute for the `NamedComponent` derive. /// /// When we emit code for a configurable item that has been marked as a typed component, we diff --git a/lib/vector-config/Cargo.toml b/lib/vector-config/Cargo.toml index 6c69e7aed1ecc..ce5411e7082a7 100644 --- a/lib/vector-config/Cargo.toml +++ b/lib/vector-config/Cargo.toml @@ -32,5 +32,5 @@ vector-config-common = { path = "../vector-config-common" } vector-config-macros = { path = "../vector-config-macros" } [dev-dependencies] -pretty_assertions = { version = "1.3.0", default-features = false, features = ["std"] } +assert-json-diff = { version = "2", default-features = false } serde_with = { version = "2.3.2", default-features = false, features = ["std", "macros"] } diff --git a/lib/vector-config/src/component/description.rs b/lib/vector-config/src/component/description.rs index df18fa919f612..b64239aafcdea 100644 --- a/lib/vector-config/src/component/description.rs +++ b/lib/vector-config/src/component/description.rs @@ -119,8 +119,10 @@ where let mut variant_metadata = Metadata::default(); variant_metadata.set_description(self.description); - variant_metadata - .add_custom_attribute(CustomAttribute::kv(constants::DOCS_META_LABEL, self.label)); + variant_metadata.add_custom_attribute(CustomAttribute::kv( + constants::DOCS_META_HUMAN_NAME, + self.label, + )); variant_metadata .add_custom_attribute(CustomAttribute::kv("logical_name", self.logical_name)); schema::apply_base_metadata(&mut subschema, variant_metadata); diff --git a/lib/vector-config/src/schema/helpers.rs b/lib/vector-config/src/schema/helpers.rs index 108b3e3a2f42a..40a0ce1ccb2f8 100644 --- a/lib/vector-config/src/schema/helpers.rs +++ b/lib/vector-config/src/schema/helpers.rs @@ -12,7 +12,10 @@ use crate::{ num::ConfigurableNumber, Configurable, ConfigurableRef, GenerateError, Metadata, ToValue, }; -use super::visitors::{DisallowUnevaluatedPropertiesVisitor, InlineSingleUseReferencesVisitor}; +use super::visitors::{ + DisallowUnevaluatedPropertiesVisitor, GenerateHumanFriendlyNameVisitor, + InlineSingleUseReferencesVisitor, +}; /// Applies metadata to the given schema. /// @@ -483,6 +486,7 @@ pub fn default_schema_settings() -> SchemaSettings { SchemaSettings::new() .with_visitor(InlineSingleUseReferencesVisitor::from_settings) .with_visitor(DisallowUnevaluatedPropertiesVisitor::from_settings) + .with_visitor(GenerateHumanFriendlyNameVisitor::from_settings) } pub fn generate_root_schema() -> Result diff --git a/lib/vector-config/src/schema/visitors/human_name.rs b/lib/vector-config/src/schema/visitors/human_name.rs new file mode 100644 index 0000000000000..dc2fb7e57cca9 --- /dev/null +++ b/lib/vector-config/src/schema/visitors/human_name.rs @@ -0,0 +1,286 @@ +use serde_json::Value; +use vector_config_common::{ + constants, + human_friendly::generate_human_friendly_string, + schema::{visit::Visitor, *}, +}; + +/// A visitor that generates a human-friendly name for enum variants and fields as metadata. +/// +/// Generally, we rely on rich documentation to provide human-friendly descriptions of types and +/// fields, but there is no such mechanism to provide a human-friendly name for types and fields +/// directly from their documentation comments. While it is possible to do so with manual metadata +/// annotations, it is laborious and prone to error. +/// +/// This visitor generates a human-friendly name for types and fields, stored in metadata +/// (`docs::human_name`) using a simple set of heuristics to figure out how to break apart +/// type/field names, as well as what the case of each word should be, including accommodations for +/// well-known technical terms/acronyms, and so on. +/// +/// ## Opting out of the visitor behavior +/// +/// This approach has a very high hit rate, as the corpus we're operating on is generally small and +/// well contained, leading to requiring only a small set of replacements and logic. However, for +/// cases when this approach is not suitable, upstream usages can declare `docs::human_name` +/// themselves. Whenever the visitor sees that the metadata annotation is already present, it will +/// skip generating it. +#[derive(Debug, Default)] +pub struct GenerateHumanFriendlyNameVisitor; + +impl GenerateHumanFriendlyNameVisitor { + pub fn from_settings(_: &SchemaSettings) -> Self { + Self + } +} + +impl Visitor for GenerateHumanFriendlyNameVisitor { + fn visit_schema_object( + &mut self, + definitions: &mut Map, + schema: &mut SchemaObject, + ) { + // Recursively visit this schema first. + visit::visit_schema_object(self, definitions, schema); + + // Skip this schema if it already has a human-friendly name defined. + if has_schema_metadata_attr_str(schema, constants::DOCS_META_HUMAN_NAME) { + return; + } + + // When a logical name (via `logical_name`) is present, we use that as the source for + // generating the human-friendly name. Logical name is populated for schemas that represent + // an enum variant. + if let Some(logical_name) = get_schema_metadata_attr_str(schema, constants::LOGICAL_NAME) { + let human_name = generate_human_friendly_string(logical_name); + set_schema_metadata_attr_str(schema, constants::DOCS_META_HUMAN_NAME, human_name); + } + + // If the schema has object properties, we'll individually add the human name to each + // property's schema if it doesn't already have a human-friendly name defined. + if let Some(properties) = schema.object.as_mut().map(|object| &mut object.properties) { + for (property_name, property_schema) in properties.iter_mut() { + if let Some(property_schema) = property_schema.as_object_mut() { + if !has_schema_metadata_attr_str( + property_schema, + constants::DOCS_META_HUMAN_NAME, + ) { + let human_name = generate_human_friendly_string(property_name); + set_schema_metadata_attr_str( + property_schema, + constants::DOCS_META_HUMAN_NAME, + human_name, + ); + } + } + } + } + } +} + +fn has_schema_metadata_attr_str(schema: &SchemaObject, key: &str) -> bool { + get_schema_metadata_attr_str(schema, key).is_some() +} + +fn get_schema_metadata_attr_str<'a>(schema: &'a SchemaObject, key: &str) -> Option<&'a str> { + schema + .extensions + .get(constants::METADATA) + .and_then(|metadata| metadata.get(key)) + .and_then(|value| value.as_str()) +} + +fn set_schema_metadata_attr_str(schema: &mut SchemaObject, key: &str, value: String) { + let metadata = schema + .extensions + .entry(constants::METADATA.to_string()) + .or_insert_with(|| Value::Object(serde_json::Map::new())); + + let metadata_map = metadata + .as_object_mut() + .expect("schema metadata must always be an object"); + metadata_map.insert(key.to_string(), Value::String(value)); +} + +#[cfg(test)] +mod tests { + use serde_json::json; + use vector_config_common::schema::visit::Visitor; + + use crate::schema::visitors::test::{as_schema, assert_schemas_eq}; + + use super::GenerateHumanFriendlyNameVisitor; + + #[test] + fn logical_name() { + let mut actual_schema = as_schema(json!({ + "type": "string", + "_metadata": { + "logical_name": "LogToMetric" + } + })); + + let expected_schema = as_schema(json!({ + "type": "string", + "_metadata": { + "docs::human_name": "Log To Metric", + "logical_name": "LogToMetric" + } + })); + + let mut visitor = GenerateHumanFriendlyNameVisitor::default(); + visitor.visit_root_schema(&mut actual_schema); + + assert_schemas_eq(expected_schema, actual_schema); + } + + #[test] + fn logical_name_with_replacement() { + let mut actual_schema = as_schema(json!({ + "type": "string", + "_metadata": { + "logical_name": "AwsCloudwatchLogs" + } + })); + + let expected_schema = as_schema(json!({ + "type": "string", + "_metadata": { + "docs::human_name": "AWS CloudWatch Logs", + "logical_name": "AwsCloudwatchLogs" + } + })); + + let mut visitor = GenerateHumanFriendlyNameVisitor::default(); + visitor.visit_root_schema(&mut actual_schema); + + assert_schemas_eq(expected_schema, actual_schema); + } + + #[test] + fn property_name() { + let mut actual_schema = as_schema(json!({ + "type": "object", + "properties": { + "store_key": { "type": "boolean" } + } + })); + + let expected_schema = as_schema(json!({ + "type": "object", + "properties": { + "store_key": { + "type": "boolean", + "_metadata": { + "docs::human_name": "Store Key" + } + } + } + })); + + let mut visitor = GenerateHumanFriendlyNameVisitor::default(); + visitor.visit_root_schema(&mut actual_schema); + + assert_schemas_eq(expected_schema, actual_schema); + } + + #[test] + fn property_name_with_replacement() { + let mut actual_schema = as_schema(json!({ + "type": "object", + "properties": { + "store_api_key": { "type": "boolean" } + } + })); + + let expected_schema = as_schema(json!({ + "type": "object", + "properties": { + "store_api_key": { + "type": "boolean", + "_metadata": { + "docs::human_name": "Store API Key" + } + } + } + })); + + let mut visitor = GenerateHumanFriendlyNameVisitor::default(); + visitor.visit_root_schema(&mut actual_schema); + + assert_schemas_eq(expected_schema, actual_schema); + } + + #[test] + fn logical_name_override() { + let mut actual_schema = as_schema(json!({ + "type": "string", + "_metadata": { + "docs::human_name": "AWS EC2 Metadata", + "logical_name": "Ec2Metadata" + } + })); + + let expected_schema = actual_schema.clone(); + + let mut visitor = GenerateHumanFriendlyNameVisitor::default(); + visitor.visit_root_schema(&mut actual_schema); + + assert_schemas_eq(expected_schema, actual_schema); + } + + #[test] + fn property_name_override() { + let mut actual_schema = as_schema(json!({ + "type": "object", + "properties": { + "store_api_key": { + "type": "boolean", + "_metadata": { + "docs::human_name": "Store_api_key" + } + } + } + })); + + let expected_schema = actual_schema.clone(); + + let mut visitor = GenerateHumanFriendlyNameVisitor::default(); + visitor.visit_root_schema(&mut actual_schema); + + assert_schemas_eq(expected_schema, actual_schema); + } + + #[test] + fn mixed_with_replacement() { + let mut actual_schema = as_schema(json!({ + "type": "object", + "properties": { + "store_api_key": { "type": "boolean" } + }, + "_metadata": { + "logical_name": "AwsEc2Metadata" + } + })); + + let expected_schema = as_schema(json!({ + "type": "object", + "properties": { + "store_api_key": { + "type": "boolean", + "_metadata": { + "docs::human_name": "Store API Key" + } + } + }, + "_metadata": { + "docs::human_name": "AWS EC2 Metadata", + "logical_name": "AwsEc2Metadata" + } + })); + + let mut visitor = GenerateHumanFriendlyNameVisitor::default(); + visitor.visit_root_schema(&mut actual_schema); + + assert_schemas_eq(expected_schema, actual_schema); + } +} diff --git a/lib/vector-config/src/schema/visitors/mod.rs b/lib/vector-config/src/schema/visitors/mod.rs index 3d919f2dea740..ae5551c0c01d2 100644 --- a/lib/vector-config/src/schema/visitors/mod.rs +++ b/lib/vector-config/src/schema/visitors/mod.rs @@ -1,3 +1,4 @@ +mod human_name; mod inline_single; pub mod merge; pub mod scoped_visit; @@ -6,5 +7,6 @@ mod unevaluated; #[cfg(test)] pub(self) mod test; +pub use self::human_name::GenerateHumanFriendlyNameVisitor; pub use self::inline_single::InlineSingleUseReferencesVisitor; pub use self::unevaluated::DisallowUnevaluatedPropertiesVisitor; diff --git a/lib/vector-config/src/schema/visitors/test.rs b/lib/vector-config/src/schema/visitors/test.rs index 1f1793cb8c071..468560997260b 100644 --- a/lib/vector-config/src/schema/visitors/test.rs +++ b/lib/vector-config/src/schema/visitors/test.rs @@ -1,4 +1,4 @@ -use pretty_assertions::assert_eq; +use assert_json_diff::assert_json_eq; use serde_json::Value; use vector_config_common::schema::RootSchema; @@ -8,8 +8,8 @@ pub fn as_schema(value: Value) -> RootSchema { #[track_caller] pub fn assert_schemas_eq(expected: RootSchema, actual: RootSchema) { - let expected_json = serde_json::to_string_pretty(&expected).expect("should not fail"); - let actual_json = serde_json::to_string_pretty(&actual).expect("should not fail"); + let expected_json = serde_json::to_value(&expected).expect("should not fail"); + let actual_json = serde_json::to_value(&actual).expect("should not fail"); - assert_eq!(expected_json, actual_json); + assert_json_eq!(expected_json, actual_json); } diff --git a/src/sinks/mod.rs b/src/sinks/mod.rs index a992c11a76baf..bce42f6769ba6 100644 --- a/src/sinks/mod.rs +++ b/src/sinks/mod.rs @@ -142,72 +142,61 @@ pub enum HealthcheckError { pub enum Sinks { /// Send events to AMQP 0.9.1 compatible brokers like RabbitMQ. #[cfg(feature = "sinks-amqp")] - #[configurable(metadata(docs::label = "AMQP"))] Amqp(amqp::AmqpSinkConfig), /// Send events to AppSignal. #[cfg(feature = "sinks-appsignal")] - #[configurable(metadata(docs::label = "AppSignal"))] Appsignal(appsignal::AppsignalSinkConfig), /// Publish log events to AWS CloudWatch Logs. #[cfg(feature = "sinks-aws_cloudwatch_logs")] - #[configurable(metadata(docs::label = "AWS CloudWatch Logs"))] AwsCloudwatchLogs(aws_cloudwatch_logs::CloudwatchLogsSinkConfig), /// Publish metric events to AWS CloudWatch Metrics. #[cfg(feature = "sinks-aws_cloudwatch_metrics")] - #[configurable(metadata(docs::label = "AWS CloudWatch Metrics"))] AwsCloudwatchMetrics(aws_cloudwatch_metrics::CloudWatchMetricsSinkConfig), /// Publish logs to AWS Kinesis Data Firehose topics. #[cfg(feature = "sinks-aws_kinesis_firehose")] - #[configurable(metadata(docs::label = "AWS Kinesis Data Firehose Logs"))] + #[configurable(metadata(docs::human_name = "AWS Kinesis Data Firehose Logs"))] AwsKinesisFirehose(aws_kinesis::firehose::KinesisFirehoseSinkConfig), /// Publish logs to AWS Kinesis Streams topics. #[cfg(feature = "sinks-aws_kinesis_streams")] - #[configurable(metadata(docs::label = "AWS Kinesis Streams Logs"))] + #[configurable(metadata(docs::human_name = "AWS Kinesis Streams Logs"))] AwsKinesisStreams(aws_kinesis::streams::KinesisStreamsSinkConfig), /// Store observability events in the AWS S3 object storage system. #[cfg(feature = "sinks-aws_s3")] - #[configurable(metadata(docs::label = "AWS S3"))] AwsS3(aws_s3::S3SinkConfig), /// Publish observability events to AWS Simple Queue Service topics. #[cfg(feature = "sinks-aws_sqs")] - #[configurable(metadata(docs::label = "AWS SQS"))] AwsSqs(aws_sqs::SqsSinkConfig), /// Deliver log events to Axiom. #[cfg(feature = "sinks-axiom")] - #[configurable(metadata(docs::label = "Axiom"))] Axiom(axiom::AxiomConfig), /// Store your observability data in Azure Blob Storage. #[cfg(feature = "sinks-azure_blob")] - #[configurable(metadata(docs::label = "Azure Blob Storage"))] + #[configurable(metadata(docs::human_name = "Azure Blob Storage"))] AzureBlob(azure_blob::AzureBlobSinkConfig), /// Publish log events to the Azure Monitor Logs service. #[cfg(feature = "sinks-azure_monitor_logs")] - #[configurable(metadata(docs::label = "Azure Monitor Logs"))] AzureMonitorLogs(azure_monitor_logs::AzureMonitorLogsConfig), /// Send observability events nowhere, which can be useful for debugging purposes. #[cfg(feature = "sinks-blackhole")] - #[configurable(metadata(docs::label = "Blackhole"))] Blackhole(blackhole::BlackholeConfig), /// Deliver log data to a ClickHouse database. #[cfg(feature = "sinks-clickhouse")] - #[configurable(metadata(docs::label = "ClickHouse"))] Clickhouse(clickhouse::ClickhouseConfig), /// Display observability events in the console, which can be useful for debugging purposes. #[cfg(feature = "sinks-console")] - #[configurable(metadata(docs::label = "Console"))] Console(console::ConsoleSinkConfig), /// Deliver log data to a Databend database. @@ -216,187 +205,152 @@ pub enum Sinks { /// Send events to Datadog Archives. #[cfg(feature = "sinks-datadog_archives")] - #[configurable(metadata(docs::label = "Datadog Archives"))] DatadogArchives(datadog_archives::DatadogArchivesSinkConfig), /// Publish observability events to the Datadog Events API. #[cfg(feature = "sinks-datadog_events")] - #[configurable(metadata(docs::label = "Datadog Events"))] DatadogEvents(datadog::events::DatadogEventsConfig), /// Publish log events to Datadog. #[cfg(feature = "sinks-datadog_logs")] - #[configurable(metadata(docs::label = "Datadog Logs"))] DatadogLogs(datadog::logs::DatadogLogsConfig), /// Publish metric events to Datadog. #[cfg(feature = "sinks-datadog_metrics")] - #[configurable(metadata(docs::label = "Datadog Metrics"))] DatadogMetrics(datadog::metrics::DatadogMetricsConfig), /// Publish traces to Datadog. #[cfg(feature = "sinks-datadog_traces")] - #[configurable(metadata(docs::label = "Datadog Traces"))] DatadogTraces(datadog::traces::DatadogTracesConfig), /// Index observability events in Elasticsearch. #[cfg(feature = "sinks-elasticsearch")] - #[configurable(metadata(docs::label = "Elasticsearch"))] Elasticsearch(elasticsearch::ElasticsearchConfig), /// Output observability events into files. #[cfg(feature = "sinks-file")] - #[configurable(metadata(docs::label = "File"))] File(file::FileSinkConfig), /// Store unstructured log events in Google Chronicle. #[cfg(feature = "sinks-gcp")] - #[configurable(metadata(docs::label = "GCP Chronicle Unstructured"))] GcpChronicleUnstructured(gcp::chronicle_unstructured::ChronicleUnstructuredConfig), /// Deliver logs to GCP's Cloud Operations suite. #[cfg(feature = "sinks-gcp")] - #[configurable(metadata(docs::label = "GCP Operations (Stackdriver)"))] + #[configurable(metadata(docs::human_name = "GCP Operations (Stackdriver)"))] GcpStackdriverLogs(gcp::stackdriver_logs::StackdriverConfig), /// Deliver metrics to GCP's Cloud Monitoring system. #[cfg(feature = "sinks-gcp")] - #[configurable(metadata(docs::label = "GCP Cloud Monitoring (Stackdriver)"))] + #[configurable(metadata(docs::human_name = "GCP Cloud Monitoring (Stackdriver)"))] GcpStackdriverMetrics(gcp::stackdriver_metrics::StackdriverConfig), /// Store observability events in GCP Cloud Storage. #[cfg(feature = "sinks-gcp")] - #[configurable(metadata(docs::label = "GCP Cloud Storage"))] GcpCloudStorage(gcp::cloud_storage::GcsSinkConfig), /// Publish observability events to GCP's Pub/Sub messaging system. #[cfg(feature = "sinks-gcp")] - #[configurable(metadata(docs::label = "GCP Pub/Sub"))] GcpPubsub(gcp::pubsub::PubsubConfig), /// WebHDFS. #[cfg(feature = "sinks-webhdfs")] - #[configurable(metadata(docs::label = "WebHDFS"))] Webhdfs(webhdfs::WebHdfsConfig), /// Deliver log events to Honeycomb. #[cfg(feature = "sinks-honeycomb")] - #[configurable(metadata(docs::label = "Honeycomb"))] Honeycomb(honeycomb::HoneycombConfig), /// Deliver observability event data to an HTTP server. #[cfg(feature = "sinks-http")] - #[configurable(metadata(docs::label = "HTTP"))] Http(http::HttpSinkConfig), /// Deliver log event data to Humio. #[cfg(feature = "sinks-humio")] - #[configurable(metadata(docs::label = "Humio Logs"))] HumioLogs(humio::logs::HumioLogsConfig), /// Deliver metric event data to Humio. #[cfg(feature = "sinks-humio")] - #[configurable(metadata(docs::label = "Humio Metrics"))] HumioMetrics(humio::metrics::HumioMetricsConfig), /// Deliver log event data to InfluxDB. #[cfg(any(feature = "sinks-influxdb", feature = "prometheus-integration-tests"))] - #[configurable(metadata(docs::label = "InfluxDB Logs"))] InfluxdbLogs(influxdb::logs::InfluxDbLogsConfig), /// Deliver metric event data to InfluxDB. #[cfg(any(feature = "sinks-influxdb", feature = "prometheus-integration-tests"))] - #[configurable(metadata(docs::label = "InfluxDB Metrics"))] InfluxdbMetrics(influxdb::metrics::InfluxDbConfig), /// Publish observability event data to Apache Kafka topics. #[cfg(feature = "sinks-kafka")] - #[configurable(metadata(docs::label = "Kafka"))] Kafka(kafka::KafkaSinkConfig), /// Deliver log event data to Mezmo. #[cfg(feature = "sinks-mezmo")] - #[configurable(metadata(docs::label = "Mezmo"))] Mezmo(mezmo::MezmoConfig), /// Deliver log event data to LogDNA. #[cfg(feature = "sinks-mezmo")] - #[configurable(metadata(docs::label = "LogDNA"))] Logdna(mezmo::LogdnaConfig), /// Deliver log event data to the Loki aggregation system. #[cfg(feature = "sinks-loki")] - #[configurable(metadata(docs::label = "Loki"))] Loki(loki::LokiConfig), /// Publish observability data to subjects on the NATS messaging system. #[cfg(feature = "sinks-nats")] - #[configurable(metadata(docs::label = "NATS"))] Nats(self::nats::NatsSinkConfig), /// Deliver events to New Relic. #[cfg(feature = "sinks-new_relic")] - #[configurable(metadata(docs::label = "New Relic"))] NewRelic(new_relic::NewRelicConfig), /// Deliver log events to Papertrail from SolarWinds. #[cfg(feature = "sinks-papertrail")] - #[configurable(metadata(docs::label = "Papertrail"))] Papertrail(papertrail::PapertrailConfig), /// Expose metric events on a Prometheus compatible endpoint. #[cfg(feature = "sinks-prometheus")] - #[configurable(metadata(docs::label = "Prometheus Exporter"))] PrometheusExporter(prometheus::exporter::PrometheusExporterConfig), /// Deliver metric data to a Prometheus remote write endpoint. #[cfg(feature = "sinks-prometheus")] - #[configurable(metadata(docs::label = "Prometheus Remote Write"))] PrometheusRemoteWrite(prometheus::remote_write::RemoteWriteConfig), /// Publish observability events to Apache Pulsar topics. #[cfg(feature = "sinks-pulsar")] - #[configurable(metadata(docs::label = "Pulsar"))] Pulsar(pulsar::config::PulsarSinkConfig), /// Publish observability data to Redis. #[cfg(feature = "sinks-redis")] - #[configurable(metadata(docs::label = "Redis"))] Redis(redis::RedisSinkConfig), /// Publish log events to Sematext. #[cfg(feature = "sinks-sematext")] - #[configurable(metadata(docs::label = "Sematext Logs"))] SematextLogs(sematext::logs::SematextLogsConfig), /// Publish metric events to Sematext. #[cfg(feature = "sinks-sematext")] - #[configurable(metadata(docs::label = "Sematext Metrics"))] SematextMetrics(sematext::metrics::SematextMetricsConfig), /// Deliver logs to a remote socket endpoint. #[cfg(feature = "sinks-socket")] - #[configurable(metadata(docs::label = "Socket"))] Socket(socket::SocketSinkConfig), /// Deliver log data to Splunk's HTTP Event Collector. #[cfg(feature = "sinks-splunk_hec")] - #[configurable(metadata(docs::label = "Splunk HEC Logs"))] SplunkHecLogs(splunk_hec::logs::config::HecLogsSinkConfig), /// Deliver metric data to Splunk's HTTP Event Collector. #[cfg(feature = "sinks-splunk_hec")] - #[configurable(metadata(docs::label = "Splunk HEC Metrics"))] SplunkHecMetrics(splunk_hec::metrics::config::HecMetricsSinkConfig), /// Deliver metric data to a StatsD aggregator. #[cfg(feature = "sinks-statsd")] - #[configurable(metadata(docs::label = "Statsd"))] Statsd(statsd::StatsdSinkConfig), /// Test (adaptive concurrency). #[cfg(all(test, feature = "sources-demo_logs"))] - #[configurable(metadata(docs::label = ""))] TestArc(self::util::adaptive_concurrency::tests::TestConfig), /// Test (backpressure). @@ -427,12 +381,10 @@ pub enum Sinks { /// Relay observability data to a Vector instance. #[cfg(feature = "sinks-vector")] - #[configurable(metadata(docs::label = "Vector"))] Vector(vector::VectorConfig), /// Deliver observability event data to a websocket listener. #[cfg(feature = "sinks-websocket")] - #[configurable(metadata(docs::label = "Websocket"))] Websocket(websocket::WebSocketSinkConfig), }