From e168f780fb291e7f2b48db4041c39e2246603d26 Mon Sep 17 00:00:00 2001 From: Antanas Bastys Date: Tue, 21 Sep 2021 12:18:45 +0300 Subject: [PATCH 1/5] [coordinator] docker integration tests for prom-remote storage --- .../docker-compose.yml | 114 +++++++++ .../m3aggregator.yml | 216 ++++++++++++++++++ .../m3coordinator-admin.yml | 29 +++ .../m3coordinator.yml | 86 +++++++ .../prom_remote_write_backend/prometheus.yml | 16 ++ .../prom_remote_write_backend/test.sh | 139 +++++++++++ .../prom_remote_write_backend/tests.sh | 76 ++++++ .../prometheus/prometheus.yml | 2 +- scripts/docker-integration-tests/run.sh | 15 +- src/query/storage/promremote/storage.go | 1 + 10 files changed, 679 insertions(+), 15 deletions(-) create mode 100644 scripts/docker-integration-tests/prom_remote_write_backend/docker-compose.yml create mode 100644 scripts/docker-integration-tests/prom_remote_write_backend/m3aggregator.yml create mode 100644 scripts/docker-integration-tests/prom_remote_write_backend/m3coordinator-admin.yml create mode 100644 scripts/docker-integration-tests/prom_remote_write_backend/m3coordinator.yml create mode 100644 scripts/docker-integration-tests/prom_remote_write_backend/prometheus.yml create mode 100755 scripts/docker-integration-tests/prom_remote_write_backend/test.sh create mode 100644 scripts/docker-integration-tests/prom_remote_write_backend/tests.sh diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/docker-compose.yml b/scripts/docker-integration-tests/prom_remote_write_backend/docker-compose.yml new file mode 100644 index 0000000000..6d72dd4614 --- /dev/null +++ b/scripts/docker-integration-tests/prom_remote_write_backend/docker-compose.yml @@ -0,0 +1,114 @@ +version: "3.5" +services: + dbnode01: + expose: + - "9000-9004" + - "7201" + ports: + - "0.0.0.0:9000-9004:9000-9004" + - "0.0.0.0:7201:7201" + networks: + - backend + image: "m3dbnode_integration:${REVISION}" + m3aggregator01: + expose: + - "6001" + ports: + - "127.0.0.1:6001:6001" + networks: + - backend + environment: + - M3AGGREGATOR_HOST_ID=m3aggregator01 + image: "m3aggregator_integration:${REVISION}" + volumes: + - "./m3aggregator.yml:/etc/m3aggregator/m3aggregator.yml" + m3aggregator02: + networks: + - backend + environment: + - M3AGGREGATOR_HOST_ID=m3aggregator02 + image: "m3aggregator_integration:${REVISION}" + volumes: + - "./m3aggregator.yml:/etc/m3aggregator/m3aggregator.yml" + m3coordinator01: + expose: + - "7202" + ports: + - "0.0.0.0:7202:7201" + networks: + - backend + image: "m3coordinator_integration:${REVISION}" + volumes: + - "./:/etc/m3coordinator/" + coordinatoradmin: + expose: + - "7201" + ports: + - "0.0.0.0:7201:7201" + networks: + - backend + image: "m3coordinator_integration:${REVISION}" + volumes: + - "./m3coordinator-admin.yml:/etc/m3coordinator/m3coordinator.yml" + prometheusraw: + expose: + - "9090" + ports: + - "0.0.0.0:9090:9090" + networks: + - backend + image: prom/prometheus:latest + volumes: + - "./prometheus.yml:/etc/prometheus/prometheus.yml" + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--web.console.libraries=/usr/share/prometheus/console_libraries" + - "--web.console.templates=/usr/share/prometheus/consoles" + - "--enable-feature=remote-write-receiver" + prometheusagg: + expose: + - "9091" + ports: + - "0.0.0.0:9091:9090" + networks: + - backend + image: prom/prometheus:latest + volumes: + - "./prometheus.yml:/etc/prometheus/prometheus.yml" + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--web.console.libraries=/usr/share/prometheus/console_libraries" + - "--web.console.templates=/usr/share/prometheus/consoles" + - "--enable-feature=remote-write-receiver" + etcd01: + expose: + - "2379-2380" + ports: + - "0.0.0.0:2379-2380:2379-2380" + networks: + - backend + image: quay.io/coreos/etcd:v3.4.3 + command: + - "etcd" + - "--name" + - "etcd01" + - "--listen-peer-urls" + - "http://0.0.0.0:2380" + - "--listen-client-urls" + - "http://0.0.0.0:2379" + - "--advertise-client-urls" + - "http://etcd01:2379" + - "--initial-cluster-token" + - "etcd-cluster-1" + - "--initial-advertise-peer-urls" + - "http://etcd01:2380" + - "--initial-cluster" + - "etcd01=http://etcd01:2380" + - "--initial-cluster-state" + - "new" + - "--data-dir" + - "/var/lib/etcd" +networks: + backend: diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/m3aggregator.yml b/scripts/docker-integration-tests/prom_remote_write_backend/m3aggregator.yml new file mode 100644 index 0000000000..6b6fa0a4f3 --- /dev/null +++ b/scripts/docker-integration-tests/prom_remote_write_backend/m3aggregator.yml @@ -0,0 +1,216 @@ +logging: + level: info + +metrics: + scope: + prefix: m3aggregator + prometheus: + onError: none + handlerPath: /metrics + listenAddress: 0.0.0.0:6002 + timerType: histogram + sanitization: prometheus + samplingRate: 1.0 + extended: none + +m3msg: + server: + listenAddress: 0.0.0.0:6000 + retry: + maxBackoff: 10s + jitter: true + consumer: + messagePool: + size: 16384 + watermark: + low: 0.2 + high: 0.5 + +http: + listenAddress: 0.0.0.0:6001 + readTimeout: 60s + writeTimeout: 60s + +kvClient: + etcd: + env: override_test_env + zone: embedded + service: m3aggregator + cacheDir: /var/lib/m3kv + etcdClusters: + - zone: embedded + endpoints: + - etcd01:2379 + +runtimeOptions: + kvConfig: + environment: override_test_env + zone: embedded + writeValuesPerMetricLimitPerSecondKey: write-values-per-metric-limit-per-second + writeValuesPerMetricLimitPerSecond: 0 + writeNewMetricLimitClusterPerSecondKey: write-new-metric-limit-cluster-per-second + writeNewMetricLimitClusterPerSecond: 0 + writeNewMetricNoLimitWarmupDuration: 0 + +aggregator: + hostID: + resolver: environment + envVarName: M3AGGREGATOR_HOST_ID + instanceID: + type: host_id + verboseErrors: true + metricPrefix: "" + counterPrefix: "" + timerPrefix: "" + gaugePrefix: "" + aggregationTypes: + counterTransformFnType: empty + timerTransformFnType: suffix + gaugeTransformFnType: empty + aggregationTypesPool: + size: 1024 + quantilesPool: + buckets: + - count: 256 + capacity: 4 + - count: 128 + capacity: 8 + stream: + eps: 0.001 + capacity: 32 + streamPool: + size: 4096 + samplePool: + size: 4096 + floatsPool: + buckets: + - count: 4096 + capacity: 16 + - count: 2048 + capacity: 32 + - count: 1024 + capacity: 64 + client: + type: m3msg + m3msg: + producer: + writer: + topicName: aggregator_ingest + topicServiceOverride: + zone: embedded + environment: override_test_env + placement: + isStaged: true + placementServiceOverride: + namespaces: + placement: /placement + messagePool: + size: 16384 + watermark: + low: 0.2 + high: 0.5 + placementManager: + kvConfig: + namespace: /placement + environment: override_test_env + zone: embedded + placementWatcher: + key: m3aggregator + initWatchTimeout: 10s + hashType: murmur32 + bufferDurationBeforeShardCutover: 10m + bufferDurationAfterShardCutoff: 10m + bufferDurationForFutureTimedMetric: 10s # Allow test to write into future. + bufferDurationForPastTimedMetric: 10s # Don't wait too long for timed metrics to flush. + resignTimeout: 1m + flushTimesManager: + kvConfig: + environment: override_test_env + zone: embedded + flushTimesKeyFmt: shardset/%d/flush + flushTimesPersistRetrier: + initialBackoff: 100ms + backoffFactor: 2.0 + maxBackoff: 2s + maxRetries: 3 + electionManager: + election: + leaderTimeout: 10s + resignTimeout: 10s + ttlSeconds: 10 + serviceID: + name: m3aggregator + environment: override_test_env + zone: embedded + electionKeyFmt: shardset/%d/lock + campaignRetrier: + initialBackoff: 100ms + backoffFactor: 2.0 + maxBackoff: 2s + forever: true + jitter: true + changeRetrier: + initialBackoff: 100ms + backoffFactor: 2.0 + maxBackoff: 5s + forever: true + jitter: true + resignRetrier: + initialBackoff: 100ms + backoffFactor: 2.0 + maxBackoff: 5s + forever: true + jitter: true + campaignStateCheckInterval: 1s + shardCutoffCheckOffset: 30s + flushManager: + checkEvery: 1s + jitterEnabled: true + maxJitters: + - flushInterval: 5s + maxJitterPercent: 1.0 + - flushInterval: 10s + maxJitterPercent: 0.5 + - flushInterval: 1m + maxJitterPercent: 0.5 + - flushInterval: 10m + maxJitterPercent: 0.5 + - flushInterval: 1h + maxJitterPercent: 0.25 + numWorkersPerCPU: 0.5 + flushTimesPersistEvery: 10s + maxBufferSize: 5m + forcedFlushWindowSize: 10s + flush: + handlers: + - dynamicBackend: + name: m3msg + hashType: murmur32 + producer: + writer: + topicName: aggregated_metrics + topicServiceOverride: + zone: embedded + environment: override_test_env + messagePool: + size: 16384 + watermark: + low: 0.2 + high: 0.5 + passthrough: + enabled: true + forwarding: + maxConstDelay: 1m # Need to add some buffer window, since timed metrics by default are delayed by 1min. + entryTTL: 1h + entryCheckInterval: 10m + maxTimerBatchSizePerWrite: 140 + maxNumCachedSourceSets: 2 + discardNaNAggregatedValues: true + entryPool: + size: 4096 + counterElemPool: + size: 4096 + timerElemPool: + size: 4096 + gaugeElemPool: + size: 4096 diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/m3coordinator-admin.yml b/scripts/docker-integration-tests/prom_remote_write_backend/m3coordinator-admin.yml new file mode 100644 index 0000000000..c3c08c0104 --- /dev/null +++ b/scripts/docker-integration-tests/prom_remote_write_backend/m3coordinator-admin.yml @@ -0,0 +1,29 @@ +listenAddress: 0.0.0.0:7201 + +logging: + level: info + +metrics: + scope: + prefix: "coordinator" + prometheus: + handlerPath: /metrics + listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved + sanitization: prometheus + samplingRate: 1.0 + extended: none + +backend: noop-etcd +clusterManagement: + etcd: + env: default_env + zone: embedded + service: m3db + cacheDir: /var/lib/m3kv + etcdClusters: + - zone: embedded + endpoints: + - etcd01:2379 + +tagOptions: + idScheme: quoted diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/m3coordinator.yml b/scripts/docker-integration-tests/prom_remote_write_backend/m3coordinator.yml new file mode 100644 index 0000000000..de408673d2 --- /dev/null +++ b/scripts/docker-integration-tests/prom_remote_write_backend/m3coordinator.yml @@ -0,0 +1,86 @@ +listenAddress: 0.0.0.0:7201 + +logging: + level: info + +metrics: + scope: + prefix: "coordinator" + prometheus: + handlerPath: /metrics + listenAddress: 0.0.0.0:7203 # until https://github.com/m3db/m3/issues/682 is resolved + sanitization: prometheus + samplingRate: 1.0 + extended: none + +backend: prom-remote + +prometheusRemoteBackend: + endpoints: + - name: raw + address: "http://prometheusraw:9090/api/v1/write" + - name: aggregated + address: "http://prometheusagg:9090/api/v1/write" + storagePolicy: + retention: 1h + resolution: 5s + downsample: + all: true + +clusterManagement: + etcd: + env: default_env + zone: embedded + service: m3db + cacheDir: /var/lib/m3kv + etcdClusters: + - zone: embedded + endpoints: + - etcd01:2379 + +tagOptions: + idScheme: quoted + +downsample: + matcher: + requireNamespaceWatchOnInit: false + remoteAggregator: + client: + type: m3msg + m3msg: + producer: + writer: + topicName: aggregator_ingest + topicServiceOverride: + zone: embedded + environment: override_test_env + placement: + isStaged: true + placementServiceOverride: + namespaces: + placement: /placement + connection: + numConnections: 4 + messagePool: + size: 16384 + watermark: + low: 0.2 + high: 0.5 + +ingest: + ingester: + workerPoolSize: 10000 + opPool: + size: 10000 + retry: + maxRetries: 3 + jitter: true + logSampleRate: 0.01 + m3msg: + server: + listenAddress: "0.0.0.0:7507" + retry: + maxBackoff: 10s + jitter: true + +storeMetricsType: true \ No newline at end of file diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/prometheus.yml b/scripts/docker-integration-tests/prom_remote_write_backend/prometheus.yml new file mode 100644 index 0000000000..ddc9637b89 --- /dev/null +++ b/scripts/docker-integration-tests/prom_remote_write_backend/prometheus.yml @@ -0,0 +1,16 @@ +global: + external_labels: + role: "remote" + +alerting: + alertmanagers: + - static_configs: + - targets: + +rule_files: + +scrape_configs: + +remote_read: + +remote_write: diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/test.sh b/scripts/docker-integration-tests/prom_remote_write_backend/test.sh new file mode 100755 index 0000000000..85781a8094 --- /dev/null +++ b/scripts/docker-integration-tests/prom_remote_write_backend/test.sh @@ -0,0 +1,139 @@ +#!/usr/bin/env bash + +set -xe + +M3_PATH=${M3_PATH:-$GOPATH/src/github.com/m3db/m3} +TESTDIR="$M3_PATH"/scripts/docker-integration-tests/ +source "$TESTDIR"/common.sh +source "$TESTDIR"/prom_remote_write_backend/tests.sh +REVISION=$(git rev-parse HEAD) +COMPOSE_FILE="$TESTDIR"/prom_remote_write_backend/docker-compose.yml +# quay.io/m3db/prometheus_remote_client_golang @ v0.4.3 +PROMREMOTECLI_IMAGE=quay.io/m3db/prometheus_remote_client_golang:v0.4.3 +JQ_IMAGE=realguess/jq:1.4@sha256:300c5d9fb1d74154248d155ce182e207cf6630acccbaadd0168e18b15bfaa786 +export REVISION + +echo "Pull containers required for test" +docker pull $PROMREMOTECLI_IMAGE +docker pull $JQ_IMAGE + +function defer { + if [[ "$TEST_SUCCESS" != "true" ]]; then + echo "Test failure, printing docker-compose logs" + docker-compose -f "${COMPOSE_FILE}" logs + fi + + docker-compose -f "${COMPOSE_FILE}" down || echo "unable to shutdown containers" # CI fails to stop all containers sometimes +} +trap defer EXIT + +echo "Run ETCD" +docker-compose -f "${COMPOSE_FILE}" up -d etcd01 + +echo "Run Coordinator in Admin mode" +docker-compose -f "${COMPOSE_FILE}" up -d coordinatoradmin +wait_until_ready "0.0.0.0:7201" + +echo "Initializing aggregator topology" +curl -vvvsSf -X POST -H "Cluster-Environment-Name: override_test_env" localhost:7201/api/v1/services/m3aggregator/placement/init -d '{ + "num_shards": 64, + "replication_factor": 2, + "instances": [ + { + "id": "m3aggregator01", + "isolation_group": "availability-zone-a", + "zone": "embedded", + "weight": 100, + "endpoint": "m3aggregator01:6000", + "hostname": "m3aggregator01", + "port": 6000 + }, + { + "id": "m3aggregator02", + "isolation_group": "availability-zone-b", + "zone": "embedded", + "weight": 100, + "endpoint": "m3aggregator02:6000", + "hostname": "m3aggregator02", + "port": 6000 + } + ] +}' + +echo "Initializing m3msg inbound topic for m3aggregator ingestion from m3coordinators" +curl -vvvsSf -X POST -H "Topic-Name: aggregator_ingest" -H "Cluster-Environment-Name: override_test_env" localhost:7201/api/v1/topic/init -d '{ + "numberOfShards": 64 +}' + +# Do this after placement and topic for m3aggregator is created. +echo "Adding m3aggregator as a consumer to the aggregator ingest topic" +curl -vvvsSf -X POST -H "Topic-Name: aggregator_ingest" -H "Cluster-Environment-Name: override_test_env" localhost:7201/api/v1/topic -d '{ + "consumerService": { + "serviceId": { + "name": "m3aggregator", + "environment": "override_test_env", + "zone": "embedded" + }, + "consumptionType": "REPLICATED", + "messageTtlNanos": "600000000000" + } +}' # msgs will be discarded after 600000000000ns = 10mins + +# TODO paziuret ar nereik +echo "Initializing m3coordinator topology" +curl -vvvsSf -X POST localhost:7201/api/v1/services/m3coordinator/placement/init -d '{ + "instances": [ + { + "id": "m3coordinator01", + "zone": "embedded", + "endpoint": "m3coordinator01:7507", + "hostname": "m3coordinator01", + "port": 7507 + } + ] +}' +echo "Done initializing m3coordinator topology" + +echo "Validating m3coordinator topology" +[ "$(curl -sSf localhost:7201/api/v1/services/m3coordinator/placement | jq .placement.instances.m3coordinator01.id)" == '"m3coordinator01"' ] +echo "Done validating topology" + +# Do this after placement for m3coordinator is created. +echo "Initializing m3msg outbound topic for m3coordinator ingestion from m3aggregators" +curl -vvvsSf -X POST -H "Topic-Name: aggregated_metrics" -H "Cluster-Environment-Name: override_test_env" 0.0.0.0:7201/api/v1/topic/init -d '{ + "numberOfShards": 64 +}' + +echo "Adding m3coordinator as a consumer to the aggregator publish topic" +curl -vvvsSf -X POST -H "Topic-Name: aggregated_metrics" -H "Cluster-Environment-Name: override_test_env" 0.0.0.0:7201/api/v1/topic -d '{ + "consumerService": { + "serviceId": { + "name": "m3coordinator", + "environment": "default_env", + "zone": "embedded" + }, + "consumptionType": "SHARED", + "messageTtlNanos": "600000000000" + } +}' # msgs will be discarded after 600000000000ns = 10mins + +echo "Run M3 containers" +docker-compose -f "${COMPOSE_FILE}" up -d m3aggregator01 +docker-compose -f "${COMPOSE_FILE}" up -d m3aggregator02 +sleep 10 +docker-compose -f "${COMPOSE_FILE}" up -d m3coordinator01 + +wait_until_ready "0.0.0.0:7202" + +echo "Start Prometheus containers" +docker-compose -f "${COMPOSE_FILE}" up -d prometheusraw +docker-compose -f "${COMPOSE_FILE}" up -d prometheusagg + +sleep 10 + +TEST_SUCCESS=false + +echo "Running write tests" +test_prometheus_remote_write_multi_namespaces + +TEST_SUCCESS=true diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/tests.sh b/scripts/docker-integration-tests/prom_remote_write_backend/tests.sh new file mode 100644 index 0000000000..3e93757c20 --- /dev/null +++ b/scripts/docker-integration-tests/prom_remote_write_backend/tests.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash + +set -xe + +source "$M3_PATH"/scripts/docker-integration-tests/common.sh + +function prometheus_remote_write { + local metric_name=$1 + local datapoint_timestamp=$2 + local datapoint_value=$3 + local expect_success=$4 + local expect_success_err=$5 + local expect_status=$6 + local expect_status_err=$7 + + network_name="prom_remote_write_backend_backend" + network=$(docker network ls | grep -F $network_name | tr -s ' ' | cut -f 1 -d ' ' | tail -n 1) + + out=$( (docker run -it --rm --network "$network" \ + "$PROMREMOTECLI_IMAGE" \ + -u http://m3coordinator01:7201/api/v1/prom/remote/write \ + -t __name__:"${metric_name}" \ + -d "${datapoint_timestamp}","${datapoint_value}" | grep -v promremotecli_log) || true) + + success=$(echo "$out" | grep -v promremotecli_log | docker run --rm -i "$JQ_IMAGE" jq .success) + status=$(echo "$out" | grep -v promremotecli_log | docker run --rm -i "$JQ_IMAGE" jq .statusCode) + if [[ "$success" != "$expect_success" ]]; then + echo "$expect_success_err" + return 1 + fi + if [[ "$status" != "$expect_status" ]]; then + echo "${expect_status_err}: actual=${status}" + return 1 + fi + echo "Returned success=${success}, status=${status} as expected" + return 0 +} + +function wait_until_ready { + host=$1 + # Check readiness probe eventually succeeds + echo "Check readiness probe eventually succeeds" + ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ + "[[ \$(curl --write-out \"%{http_code}\" --silent --output /dev/null $host/ready) -eq \"200\" ]]" +} + +function query_metric { + metric_name=$1 + host=$2 + ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ + "[[ \$(curl -sSf $host/api/v1/query?query=$metric_name | jq -r .data.result[0].value[1]) -gt 0 ]]" +} + +function test_prometheus_remote_write_multi_namespaces { + now=$(date +"%s") + now_truncate_by=$(( now % 5 )) + now_truncated=$(( now - now_truncate_by )) + prometheus_raw_local_address="0.0.0.0:9090" + prometheus_agg_local_address="0.0.0.0:9091" + metric_name=foo_metric + + for _ in {1..5} ; do + prometheus_remote_write \ + $metric_name $now_truncated 42 \ + true "Expected request to succeed" \ + 200 "Expected request to return status code 200" + done + + # Make sure we're proxying writes to the aggregated namespace + echo "Wait until data begins being written to remote storage for the aggregated namespace" + query_metric $metric_name $prometheus_raw_local_address + + # Make sure we're proxying writes to the unaggregated namespace + echo "Wait until data begins being written to remote storage for the unaggregated namespace" + query_metric $metric_name $prometheus_agg_local_address +} \ No newline at end of file diff --git a/scripts/docker-integration-tests/prometheus/prometheus.yml b/scripts/docker-integration-tests/prometheus/prometheus.yml index c11e4d7239..6c7b7ee2ca 100644 --- a/scripts/docker-integration-tests/prometheus/prometheus.yml +++ b/scripts/docker-integration-tests/prometheus/prometheus.yml @@ -1,4 +1,4 @@ -# my global config +d# my global config global: external_labels: role: "remote" diff --git a/scripts/docker-integration-tests/run.sh b/scripts/docker-integration-tests/run.sh index c0ff48903c..054ddca714 100755 --- a/scripts/docker-integration-tests/run.sh +++ b/scripts/docker-integration-tests/run.sh @@ -3,20 +3,7 @@ set -ex TESTS=( - scripts/docker-integration-tests/simple/test.sh - scripts/docker-integration-tests/cold_writes_simple/test.sh - scripts/docker-integration-tests/prometheus/test.sh - scripts/docker-integration-tests/prometheus_replication/test.sh - scripts/docker-integration-tests/carbon/test.sh - scripts/docker-integration-tests/aggregator/test.sh - scripts/docker-integration-tests/aggregator_legacy/test.sh - scripts/docker-integration-tests/query_fanout/test.sh - scripts/docker-integration-tests/repair/test.sh - scripts/docker-integration-tests/replication/test.sh - scripts/docker-integration-tests/repair_and_replication/test.sh - scripts/docker-integration-tests/multi_cluster_write/test.sh - scripts/docker-integration-tests/coordinator_config_rules/test.sh - scripts/docker-integration-tests/coordinator_noop/test.sh + scripts/docker-integration-tests/prom_remote_write_backend/test.sh ) # Some systems, including our default Buildkite hosts, don't come with netcat diff --git a/src/query/storage/promremote/storage.go b/src/query/storage/promremote/storage.go index b514596821..ec24199365 100644 --- a/src/query/storage/promremote/storage.go +++ b/src/query/storage/promremote/storage.go @@ -92,6 +92,7 @@ func (p *promStorage) Write(ctx context.Context, query *storage.WriteQuery) erro metrics := p.endpointMetrics[endpoint.name] atLeastOneEndpointMatched = true + p.logger.Info("sending metric", zap.Duration("retention", endpoint.attributes.Retention), zap.Any("tags", query.Tags()), zap.Any("dp", query.Datapoints())) wg.Add(1) go func() { defer wg.Done() From 56877931953b60847cc3005267bc50c505803b71 Mon Sep 17 00:00:00 2001 From: Antanas Bastys Date: Thu, 23 Sep 2021 15:29:33 +0300 Subject: [PATCH 2/5] cleanup, wait for leader --- .../docker-compose.yml | 14 +- .../m3aggregator.yml | 2 +- .../prom_remote_write_backend/test.sh | 112 ++----------- .../prom_remote_write_backend/tests.sh | 57 +------ .../prom_remote_write_backend/utils.sh | 153 ++++++++++++++++++ .../prometheus/prometheus.yml | 2 +- scripts/docker-integration-tests/run.sh | 15 +- src/query/storage/promremote/storage.go | 1 - 8 files changed, 189 insertions(+), 167 deletions(-) create mode 100644 scripts/docker-integration-tests/prom_remote_write_backend/utils.sh diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/docker-compose.yml b/scripts/docker-integration-tests/prom_remote_write_backend/docker-compose.yml index 6d72dd4614..6c38ae39de 100644 --- a/scripts/docker-integration-tests/prom_remote_write_backend/docker-compose.yml +++ b/scripts/docker-integration-tests/prom_remote_write_backend/docker-compose.yml @@ -1,15 +1,5 @@ version: "3.5" services: - dbnode01: - expose: - - "9000-9004" - - "7201" - ports: - - "0.0.0.0:9000-9004:9000-9004" - - "0.0.0.0:7201:7201" - networks: - - backend - image: "m3dbnode_integration:${REVISION}" m3aggregator01: expose: - "6001" @@ -23,6 +13,10 @@ services: volumes: - "./m3aggregator.yml:/etc/m3aggregator/m3aggregator.yml" m3aggregator02: + expose: + - "6002" + ports: + - "127.0.0.1:6002:6001" networks: - backend environment: diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/m3aggregator.yml b/scripts/docker-integration-tests/prom_remote_write_backend/m3aggregator.yml index 6b6fa0a4f3..c9ee22de23 100644 --- a/scripts/docker-integration-tests/prom_remote_write_backend/m3aggregator.yml +++ b/scripts/docker-integration-tests/prom_remote_write_backend/m3aggregator.yml @@ -179,7 +179,7 @@ aggregator: maxJitterPercent: 0.25 numWorkersPerCPU: 0.5 flushTimesPersistEvery: 10s - maxBufferSize: 5m + maxBufferSize: 10s forcedFlushWindowSize: 10s flush: handlers: diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/test.sh b/scripts/docker-integration-tests/prom_remote_write_backend/test.sh index 85781a8094..a72d00a2e4 100755 --- a/scripts/docker-integration-tests/prom_remote_write_backend/test.sh +++ b/scripts/docker-integration-tests/prom_remote_write_backend/test.sh @@ -4,28 +4,22 @@ set -xe M3_PATH=${M3_PATH:-$GOPATH/src/github.com/m3db/m3} TESTDIR="$M3_PATH"/scripts/docker-integration-tests/ -source "$TESTDIR"/common.sh -source "$TESTDIR"/prom_remote_write_backend/tests.sh REVISION=$(git rev-parse HEAD) +export REVISION COMPOSE_FILE="$TESTDIR"/prom_remote_write_backend/docker-compose.yml -# quay.io/m3db/prometheus_remote_client_golang @ v0.4.3 PROMREMOTECLI_IMAGE=quay.io/m3db/prometheus_remote_client_golang:v0.4.3 JQ_IMAGE=realguess/jq:1.4@sha256:300c5d9fb1d74154248d155ce182e207cf6630acccbaadd0168e18b15bfaa786 -export REVISION +TEST_SUCCESS=false + +source "$TESTDIR"/common.sh +source "$TESTDIR"/prom_remote_write_backend/utils.sh +source "$TESTDIR"/prom_remote_write_backend/tests.sh echo "Pull containers required for test" docker pull $PROMREMOTECLI_IMAGE docker pull $JQ_IMAGE -function defer { - if [[ "$TEST_SUCCESS" != "true" ]]; then - echo "Test failure, printing docker-compose logs" - docker-compose -f "${COMPOSE_FILE}" logs - fi - - docker-compose -f "${COMPOSE_FILE}" down || echo "unable to shutdown containers" # CI fails to stop all containers sometimes -} -trap defer EXIT +trap 'cleanup ${COMPOSE_FILE} ${TEST_SUCCESS}' EXIT echo "Run ETCD" docker-compose -f "${COMPOSE_FILE}" up -d etcd01 @@ -34,106 +28,22 @@ echo "Run Coordinator in Admin mode" docker-compose -f "${COMPOSE_FILE}" up -d coordinatoradmin wait_until_ready "0.0.0.0:7201" -echo "Initializing aggregator topology" -curl -vvvsSf -X POST -H "Cluster-Environment-Name: override_test_env" localhost:7201/api/v1/services/m3aggregator/placement/init -d '{ - "num_shards": 64, - "replication_factor": 2, - "instances": [ - { - "id": "m3aggregator01", - "isolation_group": "availability-zone-a", - "zone": "embedded", - "weight": 100, - "endpoint": "m3aggregator01:6000", - "hostname": "m3aggregator01", - "port": 6000 - }, - { - "id": "m3aggregator02", - "isolation_group": "availability-zone-b", - "zone": "embedded", - "weight": 100, - "endpoint": "m3aggregator02:6000", - "hostname": "m3aggregator02", - "port": 6000 - } - ] -}' - -echo "Initializing m3msg inbound topic for m3aggregator ingestion from m3coordinators" -curl -vvvsSf -X POST -H "Topic-Name: aggregator_ingest" -H "Cluster-Environment-Name: override_test_env" localhost:7201/api/v1/topic/init -d '{ - "numberOfShards": 64 -}' - -# Do this after placement and topic for m3aggregator is created. -echo "Adding m3aggregator as a consumer to the aggregator ingest topic" -curl -vvvsSf -X POST -H "Topic-Name: aggregator_ingest" -H "Cluster-Environment-Name: override_test_env" localhost:7201/api/v1/topic -d '{ - "consumerService": { - "serviceId": { - "name": "m3aggregator", - "environment": "override_test_env", - "zone": "embedded" - }, - "consumptionType": "REPLICATED", - "messageTtlNanos": "600000000000" - } -}' # msgs will be discarded after 600000000000ns = 10mins - -# TODO paziuret ar nereik -echo "Initializing m3coordinator topology" -curl -vvvsSf -X POST localhost:7201/api/v1/services/m3coordinator/placement/init -d '{ - "instances": [ - { - "id": "m3coordinator01", - "zone": "embedded", - "endpoint": "m3coordinator01:7507", - "hostname": "m3coordinator01", - "port": 7507 - } - ] -}' -echo "Done initializing m3coordinator topology" - -echo "Validating m3coordinator topology" -[ "$(curl -sSf localhost:7201/api/v1/services/m3coordinator/placement | jq .placement.instances.m3coordinator01.id)" == '"m3coordinator01"' ] -echo "Done validating topology" - -# Do this after placement for m3coordinator is created. -echo "Initializing m3msg outbound topic for m3coordinator ingestion from m3aggregators" -curl -vvvsSf -X POST -H "Topic-Name: aggregated_metrics" -H "Cluster-Environment-Name: override_test_env" 0.0.0.0:7201/api/v1/topic/init -d '{ - "numberOfShards": 64 -}' - -echo "Adding m3coordinator as a consumer to the aggregator publish topic" -curl -vvvsSf -X POST -H "Topic-Name: aggregated_metrics" -H "Cluster-Environment-Name: override_test_env" 0.0.0.0:7201/api/v1/topic -d '{ - "consumerService": { - "serviceId": { - "name": "m3coordinator", - "environment": "default_env", - "zone": "embedded" - }, - "consumptionType": "SHARED", - "messageTtlNanos": "600000000000" - } -}' # msgs will be discarded after 600000000000ns = 10mins +initialize_m3_via_coordinator_admin echo "Run M3 containers" docker-compose -f "${COMPOSE_FILE}" up -d m3aggregator01 docker-compose -f "${COMPOSE_FILE}" up -d m3aggregator02 -sleep 10 docker-compose -f "${COMPOSE_FILE}" up -d m3coordinator01 -wait_until_ready "0.0.0.0:7202" - echo "Start Prometheus containers" docker-compose -f "${COMPOSE_FILE}" up -d prometheusraw docker-compose -f "${COMPOSE_FILE}" up -d prometheusagg -sleep 10 +wait_until_leader_elected +wait_until_ready "0.0.0.0:7202" -TEST_SUCCESS=false +echo "Running tests" -echo "Running write tests" test_prometheus_remote_write_multi_namespaces TEST_SUCCESS=true diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/tests.sh b/scripts/docker-integration-tests/prom_remote_write_backend/tests.sh index 3e93757c20..e17d23999c 100644 --- a/scripts/docker-integration-tests/prom_remote_write_backend/tests.sh +++ b/scripts/docker-integration-tests/prom_remote_write_backend/tests.sh @@ -3,53 +3,7 @@ set -xe source "$M3_PATH"/scripts/docker-integration-tests/common.sh - -function prometheus_remote_write { - local metric_name=$1 - local datapoint_timestamp=$2 - local datapoint_value=$3 - local expect_success=$4 - local expect_success_err=$5 - local expect_status=$6 - local expect_status_err=$7 - - network_name="prom_remote_write_backend_backend" - network=$(docker network ls | grep -F $network_name | tr -s ' ' | cut -f 1 -d ' ' | tail -n 1) - - out=$( (docker run -it --rm --network "$network" \ - "$PROMREMOTECLI_IMAGE" \ - -u http://m3coordinator01:7201/api/v1/prom/remote/write \ - -t __name__:"${metric_name}" \ - -d "${datapoint_timestamp}","${datapoint_value}" | grep -v promremotecli_log) || true) - - success=$(echo "$out" | grep -v promremotecli_log | docker run --rm -i "$JQ_IMAGE" jq .success) - status=$(echo "$out" | grep -v promremotecli_log | docker run --rm -i "$JQ_IMAGE" jq .statusCode) - if [[ "$success" != "$expect_success" ]]; then - echo "$expect_success_err" - return 1 - fi - if [[ "$status" != "$expect_status" ]]; then - echo "${expect_status_err}: actual=${status}" - return 1 - fi - echo "Returned success=${success}, status=${status} as expected" - return 0 -} - -function wait_until_ready { - host=$1 - # Check readiness probe eventually succeeds - echo "Check readiness probe eventually succeeds" - ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ - "[[ \$(curl --write-out \"%{http_code}\" --silent --output /dev/null $host/ready) -eq \"200\" ]]" -} - -function query_metric { - metric_name=$1 - host=$2 - ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ - "[[ \$(curl -sSf $host/api/v1/query?query=$metric_name | jq -r .data.result[0].value[1]) -gt 0 ]]" -} +source "$M3_PATH"/scripts/docker-integration-tests/prom_remote_write_backend/utils.sh function test_prometheus_remote_write_multi_namespaces { now=$(date +"%s") @@ -59,18 +13,17 @@ function test_prometheus_remote_write_multi_namespaces { prometheus_agg_local_address="0.0.0.0:9091" metric_name=foo_metric - for _ in {1..5} ; do + # NB(antanas): just sending metrics multiple times to make sure everything is stable after startup. + for _ in {1..10} ; do prometheus_remote_write \ $metric_name $now_truncated 42 \ true "Expected request to succeed" \ 200 "Expected request to return status code 200" done - # Make sure we're proxying writes to the aggregated namespace - echo "Wait until data begins being written to remote storage for the aggregated namespace" + echo "Querying for data in raw prometheus" query_metric $metric_name $prometheus_raw_local_address - # Make sure we're proxying writes to the unaggregated namespace - echo "Wait until data begins being written to remote storage for the unaggregated namespace" + echo "Querying for data in aggregated prometheus" query_metric $metric_name $prometheus_agg_local_address } \ No newline at end of file diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/utils.sh b/scripts/docker-integration-tests/prom_remote_write_backend/utils.sh new file mode 100644 index 0000000000..470b1adb20 --- /dev/null +++ b/scripts/docker-integration-tests/prom_remote_write_backend/utils.sh @@ -0,0 +1,153 @@ +#!/usr/bin/env bash + +set -xe + +source "$M3_PATH"/scripts/docker-integration-tests/common.sh + +function prometheus_remote_write { + local metric_name=$1 + local datapoint_timestamp=$2 + local datapoint_value=$3 + local expect_success=$4 + local expect_success_err=$5 + local expect_status=$6 + local expect_status_err=$7 + + network_name="prom_remote_write_backend_backend" + network=$(docker network ls | grep -F $network_name | tr -s ' ' | cut -f 1 -d ' ' | tail -n 1) + + out=$( (docker run -it --rm --network "$network" \ + "$PROMREMOTECLI_IMAGE" \ + -u http://m3coordinator01:7201/api/v1/prom/remote/write \ + -t __name__:"${metric_name}" \ + -d "${datapoint_timestamp}","${datapoint_value}" | grep -v promremotecli_log) || true) + + success=$(echo "$out" | grep -v promremotecli_log | docker run --rm -i "$JQ_IMAGE" jq .success) + status=$(echo "$out" | grep -v promremotecli_log | docker run --rm -i "$JQ_IMAGE" jq .statusCode) + if [[ "$success" != "$expect_success" ]]; then + echo "$expect_success_err" + return 1 + fi + if [[ "$status" != "$expect_status" ]]; then + echo "${expect_status_err}: actual=${status}" + return 1 + fi + echo "Returned success=${success}, status=${status} as expected" + return 0 +} + +function wait_until_ready { + host=$1 + # Check readiness probe eventually succeeds + echo "Check readiness probe eventually succeeds" + ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ + "[[ \$(curl --write-out \"%{http_code}\" --silent --output /dev/null $host/ready) -eq \"200\" ]]" +} + +function query_metric { + metric_name=$1 + host=$2 + ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ + "[[ \$(curl -sSf $host/api/v1/query?query=$metric_name | jq -r .data.result[0].value[1]) -gt 0 ]]" +} + +function wait_until_leader_elected { + ATTEMPTS=50 TIMEOUT=2 MAX_TIMEOUT=4 retry_with_backoff \ + "[[ \$(curl localhost:6001/status localhost:6002/status | grep leader) ]]" +} + +function cleanup { + local compose_file=$1 + local success=$2 + if [[ "$success" != "true" ]]; then + echo "Test failure, printing docker-compose logs" + docker-compose -f "${compose_file}" logs + fi + + docker-compose -f "${compose_file}" down || echo "unable to shutdown containers" # CI fails to stop all containers sometimes +} + +function initialize_m3_via_coordinator_admin { + echo "Initializing aggregator topology" + curl -vvvsSf -X POST -H "Cluster-Environment-Name: override_test_env" localhost:7201/api/v1/services/m3aggregator/placement/init -d '{ + "num_shards": 64, + "replication_factor": 2, + "instances": [ + { + "id": "m3aggregator01", + "isolation_group": "availability-zone-a", + "zone": "embedded", + "weight": 100, + "endpoint": "m3aggregator01:6000", + "hostname": "m3aggregator01", + "port": 6000 + }, + { + "id": "m3aggregator02", + "isolation_group": "availability-zone-b", + "zone": "embedded", + "weight": 100, + "endpoint": "m3aggregator02:6000", + "hostname": "m3aggregator02", + "port": 6000 + } + ] + }' + + echo "Initializing m3msg inbound topic for m3aggregator ingestion from m3coordinators" + curl -vvvsSf -X POST -H "Topic-Name: aggregator_ingest" -H "Cluster-Environment-Name: override_test_env" localhost:7201/api/v1/topic/init -d '{ + "numberOfShards": 64 + }' + + # Do this after placement and topic for m3aggregator is created. + echo "Adding m3aggregator as a consumer to the aggregator ingest topic" + curl -vvvsSf -X POST -H "Topic-Name: aggregator_ingest" -H "Cluster-Environment-Name: override_test_env" localhost:7201/api/v1/topic -d '{ + "consumerService": { + "serviceId": { + "name": "m3aggregator", + "environment": "override_test_env", + "zone": "embedded" + }, + "consumptionType": "REPLICATED", + "messageTtlNanos": "600000000000" + } + }' # msgs will be discarded after 600000000000ns = 10mins + + # TODO paziuret ar nereik + echo "Initializing m3coordinator topology" + curl -vvvsSf -X POST localhost:7201/api/v1/services/m3coordinator/placement/init -d '{ + "instances": [ + { + "id": "m3coordinator01", + "zone": "embedded", + "endpoint": "m3coordinator01:7507", + "hostname": "m3coordinator01", + "port": 7507 + } + ] + }' + echo "Done initializing m3coordinator topology" + + echo "Validating m3coordinator topology" + [ "$(curl -sSf localhost:7201/api/v1/services/m3coordinator/placement | jq .placement.instances.m3coordinator01.id)" == '"m3coordinator01"' ] + echo "Done validating topology" + + # Do this after placement for m3coordinator is created. + echo "Initializing m3msg outbound topic for m3coordinator ingestion from m3aggregators" + curl -vvvsSf -X POST -H "Topic-Name: aggregated_metrics" -H "Cluster-Environment-Name: override_test_env" 0.0.0.0:7201/api/v1/topic/init -d '{ + "numberOfShards": 64 + }' + + echo "Adding m3coordinator as a consumer to the aggregator publish topic" + curl -vvvsSf -X POST -H "Topic-Name: aggregated_metrics" -H "Cluster-Environment-Name: override_test_env" 0.0.0.0:7201/api/v1/topic -d '{ + "consumerService": { + "serviceId": { + "name": "m3coordinator", + "environment": "default_env", + "zone": "embedded" + }, + "consumptionType": "SHARED", + "messageTtlNanos": "600000000000" + } + }' # msgs will be discarded after 600000000000ns = 10mins +} \ No newline at end of file diff --git a/scripts/docker-integration-tests/prometheus/prometheus.yml b/scripts/docker-integration-tests/prometheus/prometheus.yml index 6c7b7ee2ca..c11e4d7239 100644 --- a/scripts/docker-integration-tests/prometheus/prometheus.yml +++ b/scripts/docker-integration-tests/prometheus/prometheus.yml @@ -1,4 +1,4 @@ -d# my global config +# my global config global: external_labels: role: "remote" diff --git a/scripts/docker-integration-tests/run.sh b/scripts/docker-integration-tests/run.sh index 054ddca714..c0ff48903c 100755 --- a/scripts/docker-integration-tests/run.sh +++ b/scripts/docker-integration-tests/run.sh @@ -3,7 +3,20 @@ set -ex TESTS=( - scripts/docker-integration-tests/prom_remote_write_backend/test.sh + scripts/docker-integration-tests/simple/test.sh + scripts/docker-integration-tests/cold_writes_simple/test.sh + scripts/docker-integration-tests/prometheus/test.sh + scripts/docker-integration-tests/prometheus_replication/test.sh + scripts/docker-integration-tests/carbon/test.sh + scripts/docker-integration-tests/aggregator/test.sh + scripts/docker-integration-tests/aggregator_legacy/test.sh + scripts/docker-integration-tests/query_fanout/test.sh + scripts/docker-integration-tests/repair/test.sh + scripts/docker-integration-tests/replication/test.sh + scripts/docker-integration-tests/repair_and_replication/test.sh + scripts/docker-integration-tests/multi_cluster_write/test.sh + scripts/docker-integration-tests/coordinator_config_rules/test.sh + scripts/docker-integration-tests/coordinator_noop/test.sh ) # Some systems, including our default Buildkite hosts, don't come with netcat diff --git a/src/query/storage/promremote/storage.go b/src/query/storage/promremote/storage.go index ec24199365..b514596821 100644 --- a/src/query/storage/promremote/storage.go +++ b/src/query/storage/promremote/storage.go @@ -92,7 +92,6 @@ func (p *promStorage) Write(ctx context.Context, query *storage.WriteQuery) erro metrics := p.endpointMetrics[endpoint.name] atLeastOneEndpointMatched = true - p.logger.Info("sending metric", zap.Duration("retention", endpoint.attributes.Retention), zap.Any("tags", query.Tags()), zap.Any("dp", query.Datapoints())) wg.Add(1) go func() { defer wg.Done() From 3e2944296b2088492b394183420363c5f5ada1ad Mon Sep 17 00:00:00 2001 From: Antanas Bastys Date: Thu, 23 Sep 2021 15:52:47 +0300 Subject: [PATCH 3/5] remove todo --- .../docker-integration-tests/prom_remote_write_backend/utils.sh | 1 - scripts/docker-integration-tests/run.sh | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/utils.sh b/scripts/docker-integration-tests/prom_remote_write_backend/utils.sh index 470b1adb20..880d8267ff 100644 --- a/scripts/docker-integration-tests/prom_remote_write_backend/utils.sh +++ b/scripts/docker-integration-tests/prom_remote_write_backend/utils.sh @@ -113,7 +113,6 @@ function initialize_m3_via_coordinator_admin { } }' # msgs will be discarded after 600000000000ns = 10mins - # TODO paziuret ar nereik echo "Initializing m3coordinator topology" curl -vvvsSf -X POST localhost:7201/api/v1/services/m3coordinator/placement/init -d '{ "instances": [ diff --git a/scripts/docker-integration-tests/run.sh b/scripts/docker-integration-tests/run.sh index c0ff48903c..844d924fc6 100755 --- a/scripts/docker-integration-tests/run.sh +++ b/scripts/docker-integration-tests/run.sh @@ -17,6 +17,7 @@ TESTS=( scripts/docker-integration-tests/multi_cluster_write/test.sh scripts/docker-integration-tests/coordinator_config_rules/test.sh scripts/docker-integration-tests/coordinator_noop/test.sh + scripts/docker-integration-tests/prom_remote_write_backend/test.sh ) # Some systems, including our default Buildkite hosts, don't come with netcat From 188b7a519b034c4a739cda14f7ceccf037c27fd1 Mon Sep 17 00:00:00 2001 From: Antanas Bastys Date: Thu, 23 Sep 2021 17:34:48 +0300 Subject: [PATCH 4/5] fixes --- .../prom_remote_write_backend/m3aggregator.yml | 4 ++-- .../prom_remote_write_backend/m3coordinator.yml | 16 ++++++++++++++++ .../prom_remote_write_backend/tests.sh | 4 ++-- 3 files changed, 20 insertions(+), 4 deletions(-) diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/m3aggregator.yml b/scripts/docker-integration-tests/prom_remote_write_backend/m3aggregator.yml index c9ee22de23..1d77b0a035 100644 --- a/scripts/docker-integration-tests/prom_remote_write_backend/m3aggregator.yml +++ b/scripts/docker-integration-tests/prom_remote_write_backend/m3aggregator.yml @@ -120,7 +120,7 @@ aggregator: hashType: murmur32 bufferDurationBeforeShardCutover: 10m bufferDurationAfterShardCutoff: 10m - bufferDurationForFutureTimedMetric: 10s # Allow test to write into future. + bufferDurationForFutureTimedMetric: 10m # Allow test to write into future. bufferDurationForPastTimedMetric: 10s # Don't wait too long for timed metrics to flush. resignTimeout: 1m flushTimesManager: @@ -179,7 +179,7 @@ aggregator: maxJitterPercent: 0.25 numWorkersPerCPU: 0.5 flushTimesPersistEvery: 10s - maxBufferSize: 10s + maxBufferSize: 10m forcedFlushWindowSize: 10s flush: handlers: diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/m3coordinator.yml b/scripts/docker-integration-tests/prom_remote_write_backend/m3coordinator.yml index de408673d2..d6c54c8430 100644 --- a/scripts/docker-integration-tests/prom_remote_write_backend/m3coordinator.yml +++ b/scripts/docker-integration-tests/prom_remote_write_backend/m3coordinator.yml @@ -42,6 +42,22 @@ tagOptions: idScheme: quoted downsample: + rules: + rollupRules: + - name: "prom_remote_write_test_metric rolled up" + filter: "__name__:prom_remote_write_test_metric" + transforms: + - transform: + type: "Increase" + - rollup: + metricName: "prom_remote_write_test_metric_rolled_up" + groupBy: [] + aggregations: ["Sum"] + - transform: + type: "Add" + storagePolicies: + - resolution: 5s + retention: 1h matcher: requireNamespaceWatchOnInit: false remoteAggregator: diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/tests.sh b/scripts/docker-integration-tests/prom_remote_write_backend/tests.sh index e17d23999c..b53403ef13 100644 --- a/scripts/docker-integration-tests/prom_remote_write_backend/tests.sh +++ b/scripts/docker-integration-tests/prom_remote_write_backend/tests.sh @@ -11,7 +11,7 @@ function test_prometheus_remote_write_multi_namespaces { now_truncated=$(( now - now_truncate_by )) prometheus_raw_local_address="0.0.0.0:9090" prometheus_agg_local_address="0.0.0.0:9091" - metric_name=foo_metric + metric_name=prom_remote_write_test_metric # NB(antanas): just sending metrics multiple times to make sure everything is stable after startup. for _ in {1..10} ; do @@ -25,5 +25,5 @@ function test_prometheus_remote_write_multi_namespaces { query_metric $metric_name $prometheus_raw_local_address echo "Querying for data in aggregated prometheus" - query_metric $metric_name $prometheus_agg_local_address + query_metric "${metric_name}_rolled_up" $prometheus_agg_local_address } \ No newline at end of file From 6fca768d75d1e77d816916438cca403c51b91882 Mon Sep 17 00:00:00 2001 From: Antanas Bastys Date: Tue, 28 Sep 2021 17:12:52 +0300 Subject: [PATCH 5/5] use local jq --- .../prom_remote_write_backend/test.sh | 2 -- .../prom_remote_write_backend/utils.sh | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/test.sh b/scripts/docker-integration-tests/prom_remote_write_backend/test.sh index a72d00a2e4..208bb88565 100755 --- a/scripts/docker-integration-tests/prom_remote_write_backend/test.sh +++ b/scripts/docker-integration-tests/prom_remote_write_backend/test.sh @@ -8,7 +8,6 @@ REVISION=$(git rev-parse HEAD) export REVISION COMPOSE_FILE="$TESTDIR"/prom_remote_write_backend/docker-compose.yml PROMREMOTECLI_IMAGE=quay.io/m3db/prometheus_remote_client_golang:v0.4.3 -JQ_IMAGE=realguess/jq:1.4@sha256:300c5d9fb1d74154248d155ce182e207cf6630acccbaadd0168e18b15bfaa786 TEST_SUCCESS=false source "$TESTDIR"/common.sh @@ -17,7 +16,6 @@ source "$TESTDIR"/prom_remote_write_backend/tests.sh echo "Pull containers required for test" docker pull $PROMREMOTECLI_IMAGE -docker pull $JQ_IMAGE trap 'cleanup ${COMPOSE_FILE} ${TEST_SUCCESS}' EXIT diff --git a/scripts/docker-integration-tests/prom_remote_write_backend/utils.sh b/scripts/docker-integration-tests/prom_remote_write_backend/utils.sh index 880d8267ff..96ffa2211e 100644 --- a/scripts/docker-integration-tests/prom_remote_write_backend/utils.sh +++ b/scripts/docker-integration-tests/prom_remote_write_backend/utils.sh @@ -22,8 +22,8 @@ function prometheus_remote_write { -t __name__:"${metric_name}" \ -d "${datapoint_timestamp}","${datapoint_value}" | grep -v promremotecli_log) || true) - success=$(echo "$out" | grep -v promremotecli_log | docker run --rm -i "$JQ_IMAGE" jq .success) - status=$(echo "$out" | grep -v promremotecli_log | docker run --rm -i "$JQ_IMAGE" jq .statusCode) + success=$(echo "$out" | grep -v promremotecli_log | jq .success) + status=$(echo "$out" | grep -v promremotecli_log | jq .statusCode) if [[ "$success" != "$expect_success" ]]; then echo "$expect_success_err" return 1