diff --git a/website/content/en/docs/reference/configuration/_index.md b/website/content/en/docs/reference/configuration/_index.md index 699fc2a1c6a2a..285bb6f7a875d 100644 --- a/website/content/en/docs/reference/configuration/_index.md +++ b/website/content/en/docs/reference/configuration/_index.md @@ -14,7 +14,71 @@ The following is an example of a popular Vector configuration that ingests logs from a file and routes them to both Elasticsearch and AWS S3. Your configuration will differ based on your needs. -{{< tabs default="vector.toml" >}} +{{< tabs default="vector.yaml" >}} +{{< tab title="vector.yaml" >}} + +```yaml +# Set global options +data_dir: "/var/lib/vector" + +# Vector's API (disabled by default) +# Enable and try it out with the `vector top` command +api: + enabled: false +# address = "127.0.0.1:8686" + +# Ingest data by tailing one or more files +sources: + apache_logs: + type: "file" + include: + - "/var/log/apache2/*.log" # supports globbing + ignore_older: 86400 # 1 day + +# Structure and parse via Vector's Remap Language +transforms: + apache_parser: + inputs: + - "apache_logs" + type: "remap" + source: ". = parse_apache_log(.message)" + + # Sample the data to save on cost + apache_sampler: + inputs: + - "apache_parser" + type: "sample" + rate: 2 # only keep 50% (1/`rate`) + +# Send structured data to a short-term storage +sinks: + es_cluster: + inputs: + - "apache_sampler" # only take sampled data + type: "elasticsearch" + endpoints: + - "http://79.12.221.222:9200" + bulk: + index: "vector-%Y-%m-%d" # daily indices + + # Send structured data to a cost-effective long-term storage + s3_archives: + inputs: + - "apache_parser" # don't sample for S3 + type: "aws_s3" + region: "us-east-1" + bucket: "my-log-archives" + key_prefix: "date=%Y-%m-%d" # daily partitions, hive friendly format + compression: "gzip" # compress final objects + framing: + method: "newline_delimited" # new line delimited... + encoding: + codec: "json" # ...JSON + batch: + max_bytes: 10000000 # 10mb uncompressed +``` + +{{< /tab >}} {{< tab title="vector.toml" >}} ```toml @@ -67,53 +131,6 @@ encoding.codec = "json" # ...JSON batch.max_bytes = 10000000 # 10mb uncompressed ``` -{{< /tab >}} -{{< tab title="vector.yaml" >}} - -```yaml -data_dir: /var/lib/vector -sources: - apache_logs: - type: file - include: - - /var/log/apache2/*.log - ignore_older: 86400 -transforms: - apache_parser: - inputs: - - apache_logs - type: remap - source: | - . = parse_apache_log(.message) - apache_sampler: - inputs: - - apache_parser - type: sample - rate: 50 -sinks: - es_cluster: - inputs: - - apache_sampler - type: elasticsearch - endpoints: ['http://79.12.221.222:9200'] - bulk: - index: vector-%Y-%m-%d - s3_archives: - inputs: - - apache_parser - type: aws_s3 - region: us-east-1 - bucket: my-log-archives - key_prefix: date=%Y-%m-%d - compression: gzip - framing: - method: newline_delimited - encoding: - codec: json - batch: - max_bytes: 10000000 -``` - {{< /tab >}} {{< tab title="vector.json" >}} @@ -185,18 +202,18 @@ sinks: To use this configuration file, specify it with the `--config` flag when starting Vector: -{{< tabs default="TOML" >}} -{{< tab title="TOML" >}} +{{< tabs default="YAML" >}} +{{< tab title="YAML" >}} ```shell -vector --config /etc/vector/vector.toml +vector --config /etc/vector/vector.yaml ``` {{< /tab >}} -{{< tab title="YAML" >}} +{{< tab title="TOML" >}} ```shell -vector --config /etc/vector/vector.yaml +vector --config /etc/vector/vector.toml ``` {{< /tab >}} @@ -229,37 +246,37 @@ vector --config /etc/vector/vector.json Vector interpolates environment variables within your configuration file with the following syntax: -```toml -[transforms.add_host] -type = "remap" -source = ''' -# Basic usage. "$HOSTNAME" also works. -.host = "${HOSTNAME}" # or "$HOSTNAME" +```yaml +transforms: + add_host: + type: "remap" + source: | + # Basic usage. "$HOSTNAME" also works. + .host = "${HOSTNAME}" # or "$HOSTNAME" -# Setting a default value when not present. -.environment = "${ENV:-development}" + # Setting a default value when not present. + .environment = "${ENV:-development}" -# Requiring an environment variable to be present. -.tenant = "${TENANT:?tenant must be supplied}" -''' + # Requiring an environment variable to be present. + .tenant = "${TENANT:?tenant must be supplied}" ``` #### Default values Default values can be supplied using `:-` or `-` syntax: -```toml -option = "${ENV_VAR:-default}" # default value if variable is unset or empty -option = "${ENV_VAR-default}" # default value only if variable is unset +```yaml +option: "${ENV_VAR:-default}" # default value if variable is unset or empty +option: "${ENV_VAR-default}" # default value only if variable is unset ``` #### Required variables Environment variables that are required can be specified using `:?` or `?` syntax: -```toml -option = "${ENV_VAR:?err}" # Vector exits with 'err' message if variable is unset or empty -option = "${ENV_VAR?err}" # Vector exits with 'err' message only if variable is unset +```yaml +option: "${ENV_VAR:?err}" # Vector exits with 'err' message if variable is unset or empty +option: "${ENV_VAR?err}" # Vector exits with 'err' message only if variable is unset ``` #### Escaping @@ -285,89 +302,98 @@ method. For most Linux-based systems, the file can be found at You can pass multiple configuration files when starting Vector: ```shell -vector --config vector1.toml --config vector2.toml +vector --config vector1.yaml --config vector2.yaml ``` Or using a [globbing syntax][glob]: ```shell -vector --config /etc/vector/*.toml +vector --config /etc/vector/*.yaml ``` #### Automatic namespacing You can also split your configuration by grouping the components by their type, one directory per component type, where the file name is used as the component id. For example: -{{< tabs default="vector.toml" >}} -{{< tab title="vector.toml" >}} +{{< tabs default="vector.yaml" >}} +{{< tab title="vector.yaml" >}} -```toml +```yaml # Set global options -data_dir = "/var/lib/vector" +data_dir: "/var/lib/vector" # Vector's API (disabled by default) # Enable and try it out with the `vector top` command -[api] -enabled = false -# address = "127.0.0.1:8686" +api: + enabled: false + # address: "127.0.0.1:8686" ``` {{< /tab >}} -{{< tab title="sources/apache_logs.toml" >}} +{{< tab title="sources/apache_logs.yaml" >}} -```toml +```yaml # Ingest data by tailing one or more files -type = "file" -include = ["/var/log/apache2/*.log"] # supports globbing -ignore_older = 86400 # 1 day +type: "file" +include: ["/var/log/apache2/*.log"] # supports globbing +ignore_older: 86400 # 1 day ``` {{< /tab >}} -{{< tab title="transforms/apache_parser.toml" >}} +{{< tab title="transforms/apache_parser.yaml" >}} -```toml +```yaml # Structure and parse via Vector Remap Language -inputs = ["apache_logs"] -type = "remap" -source = ''' -. = parse_apache_log(.message) +inputs: + - "apache_logs" +type: "remap" +source: | + . = parse_apache_log(.message) ``` {{< /tab >}} -{{< tab title="transforms/apache_sampler.toml" >}} +{{< tab title="transforms/apache_sampler.yaml" >}} -```toml +```yaml # Sample the data to save on cost -inputs = ["apache_parser"] -type = "sample" -rate = 2 # only keep 50% (1/`rate`) +inputs: + - "apache_parser" +type: "sample" +rate: 2 # only keep 50% (1/`rate`) ``` {{< /tab >}} -{{< tab title="sinks/es_cluster.toml" >}} +{{< tab title="sinks/es_cluster.yaml" >}} -```toml +```yaml # Send structured data to a short-term storage -inputs = ["apache_sampler"] # only take sampled data -type = "elasticsearch" -endpoints = ["http://79.12.221.222:9200"] # local or external host -bulk.index = "vector-%Y-%m-%d" # daily indices +inputs: + - "apache_sampler" # only take sampled data +type: "elasticsearch" +endpoints: + - "http://79.12.221.222:9200" # local or external host +bulk: + index: "vector-%Y-%m-%d" # daily indices ``` {{< /tab >}} -{{< tab title="sinks/s3_archives.toml" >}} +{{< tab title="sinks/s3_archives.yaml" >}} -```toml +```yaml # Send structured data to a cost-effective long-term storage -inputs = ["apache_parser"] # don't sample for S3 -type = "aws_s3" -region = "us-east-1" -bucket = "my-log-archives" -key_prefix = "date=%Y-%m-%d" # daily partitions, hive friendly format -compression = "gzip" # compress final objects -framing.method = "newline_delimited" # new line delimited... -encoding.codec = "json" # ...JSON -batch.max_bytes = 10000000 # 10mb uncompressed +inputs: + - "apache_parser" # don't sample for S3 +type: "aws_s3" +region: "us-east-1" +bucket: "my-log-archives" +key_prefix: "date=%Y-%m-%d" # daily partitions, hive-friendly format +compression: "gzip" # compress final objects +framing: + method: "newline_delimited" # new line delimited... +encoding: + codec: "json" # ...JSON +batch: + max_bytes: 10000000 # 10mb uncompressed ``` {{< /tab >}} @@ -384,26 +410,28 @@ vector --config-dir /etc/vector Vector supports wildcards (`*`) in component IDs when building your topology. For example: -```toml -[sources.app1_logs] -type = "file" -includes = ["/var/log/app1.log"] +```yaml +sources: + app1_logs: + type: "file" + includes: ["/var/log/app1.log"] -[sources.app2_logs] -type = "file" -includes = ["/var/log/app.log"] + app2_logs: + type: "file" + includes: ["/var/log/app.log"] -[sources.system_logs] -type = "file" -includes = ["/var/log/system.log"] + system_logs: + type: "file" + includes: ["/var/log/system.log"] -[sinks.app_logs] -type = "datadog_logs" -inputs = ["app*"] +sinks: + app_logs: + type: "datadog_logs" + inputs: ["app*"] -[sinks.archive] -type = "aws_s3" -inputs = ["app*", "system_logs"] + archive: + type: "aws_s3" + inputs: ["app*", "system_logs"] ``` ## Sections