Skip to content

Commit

Permalink
Add rudimentary multi-cluster support for m3coordinator (#785)
Browse files Browse the repository at this point in the history
This adds rudimentary multi-cluster support for m3coordinator so that we can begin to send metrics to different namespaces for different retention periods, etc.

Right now this is not optimal as it retrieves data from all matching namespaces that has retention long enough to cover the query, then returns the most granular datapoints that come back and discards any lower granularity datapoints that also came back.  This is because we do not have a distributed index in the current M3 open source offering.  For read workloads that do not require tens of thousands or hundreds of thousands realtime alert evaluations, this should be quite sufficient given that the write volume absolutely dwarfs the read volume.

At some point we'll hopefully have a distributed index that we can use for all use cases and deprecate this behavior.
  • Loading branch information
robskillington authored Jul 11, 2018
1 parent 9fa2377 commit 69b0ea0
Show file tree
Hide file tree
Showing 41 changed files with 2,749 additions and 454 deletions.
73 changes: 65 additions & 8 deletions glide.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

23 changes: 22 additions & 1 deletion glide.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,12 @@ import:
- package: github.com/m3db/m3em
version: ed532baee45a440f0b08b6893c816634c6978d4d

- package: github.com/m3db/m3aggregator
version: fd38c07d1a94b8598b6839a9c471e0a6feacfafb

- package: github.com/m3db/m3ninx
version: 7556fa8339674f1d9f559486d1feca18c17d1190

- package: github.com/m3db/bitset
version: 07973db6b78acb62ac207d0538055e874b49d90d

Expand Down Expand Up @@ -54,6 +60,7 @@ import:

- package: github.com/golang/protobuf
version: ^1.1.0

subpackages:
- proto
- ptypes/timestamp
Expand Down Expand Up @@ -145,7 +152,7 @@ import:
version: ^2.2.6

- package: github.com/m3db/m3metrics
version: 17e4ddf89f2b0c8fbb16b6d5b71a871b793d2a00
version: f22d8684fa8b42ff30f1d68f6f2be5e465db9a9d
subpackages:
- policy

Expand All @@ -161,14 +168,28 @@ import:
subpackages:
- cmp

# START_PROMETHEUS_DEPS
- package: github.com/prometheus/prometheus
version: 998dfcbac689ae832ea64ca134fcb096f61a7f62

# To avoid prometheus/prometheus dependencies from breaking,
# pin the transitive dependencies
- package: github.com/prometheus/common
version: 9e0844febd9e2856f839c9cb974fbd676d1755a8

- package: github.com/prometheus/procfs
version: a1dba9ce8baed984a2495b658c82687f8157b98f

- package: github.com/prometheus/tsdb
version: 16b2bf1b45ce3e3536c78ebec5116ea09a69786e
# END_PROMETHEUS_DEPS

- package: github.com/coreos/pkg
version: 4
subpackages:
- capnslog

# To avoid conflicting packages not resolving the latest GRPC
- package: google.golang.org/grpc
version: ~1.7.3
subpackages:
Expand Down
5 changes: 4 additions & 1 deletion scripts/integration-tests/prometheus/m3dbnode-local-etcd.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
coordinator:
listenAddress: 0.0.0.0:7201

local:
namespace: prometheus_metrics
retention: 48h

metrics:
scope:
prefix: "coordinator"
Expand All @@ -10,7 +14,6 @@ coordinator:
sanitization: prometheus
samplingRate: 1.0
extended: none
dbNamespace: prometheus_metrics

db:
logging:
Expand Down
22 changes: 11 additions & 11 deletions scripts/integration-tests/prometheus/prometheus-integration-test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ set -xe
rm -rf /tmp/m3dbdata/
mkdir -p /tmp/m3dbdata/

echo "Build M3DB docker image"
echo "Build M3DB docker image"

docker-compose -f docker-compose.yml build

echo "Run M3DB docker container"
echo "Run M3DB docker container"

docker-compose -f docker-compose.yml up -d dbnode01

Expand Down Expand Up @@ -43,13 +43,13 @@ curl -vvvsSf -X POST localhost:7201/api/v1/namespace -d '{
}
}'

echo "Sleep while namespace is init'd"
echo "Sleep while namespace is init'd"

sleep 10 # TODO Replace sleeps with logic to determine when to proceed

[ "$(curl -sSf localhost:7201/api/v1/namespace | jq .registry.namespaces.prometheus_metrics.indexOptions.enabled)" == true ]

echo "Initialization placement"
echo "Initialization placement"

curl -vvvsSf -X POST localhost:7201/api/v1/placement/init -d '{
"num_shards": 64,
Expand All @@ -69,17 +69,17 @@ curl -vvvsSf -X POST localhost:7201/api/v1/placement/init -d '{

[ "$(curl -sSf localhost:7201/api/v1/placement | jq .placement.instances.m3db_local.id)" == '"m3db_local"' ]

echo "Wait for placement to fully initialize"
echo "Wait for placement to fully initialize"

sleep 60 # TODO Replace sleeps with logic to determine when to proceed

echo "Start Prometheus container"
echo "Start Prometheus container"

docker-compose -f docker-compose.yml up -d prometheus01

sleep 10

echo "Write data"
echo "Write data"

curl -vvvsSf -X POST localhost:9003/writetagged -d '{
"namespace": "prometheus_metrics",
Expand Down Expand Up @@ -112,12 +112,12 @@ queryResult=$(curl -sSf -X POST localhost:9003/query -d '{
},
"rangeStart": 0,
"rangeEnd":'"$(date +"%s")"'
}' | jq '.results | length')
}' | jq '.results | length')

if [ "$queryResult" -lt 1 ]; then
echo "Result not found"
if [ "$queryResult" -lt 1 ]; then
echo "Result not found"
exit 1
else
else
echo "Result found"
fi

Expand Down
45 changes: 36 additions & 9 deletions src/cmd/services/m3coordinator/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,34 +21,61 @@
package config

import (
"github.com/m3db/m3db/src/dbnode/client"
"time"

etcdclient "github.com/m3db/m3cluster/client/etcd"
"github.com/m3db/m3db/src/coordinator/storage/local"
"github.com/m3db/m3x/instrument"
)

// Configuration is the configuration for the coordinator.
type Configuration struct {
// DBClient is the DB client configuration.
DBClient *client.Configuration `yaml:"dbClient"`

// Metrics configuration.
Metrics instrument.MetricsConfiguration `yaml:"metrics"`

// Clusters is the DB cluster configurations for read, write and
// query endpoints.
Clusters local.ClustersStaticConfiguration `yaml:"clusters"`

// LocalConfiguration is the local embedded configuration if running
// coordinator embedded in the DB.
Local *LocalConfiguration `yaml:"local"`

// ClusterManagement for placemement, namespaces and database management
// endpoints (optional).
ClusterManagement *ClusterManagementConfiguration `yaml:"clusterManagement"`

// ListenAddress is the server listen address.
ListenAddress string `yaml:"listenAddress" validate:"nonzero"`

// RPC is the RPC configuration.
RPC *RPCConfiguration `yaml:"rpc"`

// DBNamespace is the namespace string to use for reads and writes
DBNamespace string `yaml:"dbNamespace"`

// DecompressWorkerPoolCount is the number of decompression worker pools
// DecompressWorkerPoolCount is the number of decompression worker pools.
DecompressWorkerPoolCount int `yaml:"workerPoolCount"`

// DecompressWorkerPoolSize is the size of the worker pool given to each fetch request
// DecompressWorkerPoolSize is the size of the worker pool given to each
// fetch request.
DecompressWorkerPoolSize int `yaml:"workerPoolSize"`
}

// LocalConfiguration is the local embedded configuration if running
// coordinator embedded in the DB.
type LocalConfiguration struct {
// Namespace is the name of the local namespace to write/read from.
Namespace string `yaml:"namespace" validate:"nonzero"`

// Retention is the retention of the local namespace to write/read from.
Retention time.Duration `yaml:"retention" validate:"nonzero"`
}

// ClusterManagementConfiguration is configuration for the placemement,
// namespaces and database management endpoints (optional).
type ClusterManagementConfiguration struct {
// Etcd is the client configuration for etcd.
Etcd etcdclient.Configuration `yaml:"etcd"`
}

// RPCConfiguration is the RPC configuration for the coordinator for
// the GRPC server used for remote coordinator to coordinator calls.
type RPCConfiguration struct {
Expand Down
2 changes: 1 addition & 1 deletion src/cmd/services/m3dbnode/main/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func main() {
go func() {
coordinatorserver.Run(coordinatorserver.RunOptions{
Config: *cfg.Coordinator,
DBConfig: *cfg.DB,
DBConfig: cfg.DB,
DBClient: dbClientCh,
ClusterClient: clusterClientCh,
})
Expand Down
9 changes: 7 additions & 2 deletions src/coordinator/api/v1/handler/database/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,13 @@ type Handler struct {
}

// RegisterRoutes registers the namespace routes
func RegisterRoutes(r *mux.Router, client clusterclient.Client, cfg config.Configuration, dbCfg dbconfig.DBConfiguration) {
func RegisterRoutes(
r *mux.Router,
client clusterclient.Client,
cfg config.Configuration,
embeddedDbCfg *dbconfig.DBConfiguration,
) {
logged := logging.WithResponseTimeLogging

r.HandleFunc(CreateURL, logged(NewCreateHandler(client, cfg, dbCfg)).ServeHTTP).Methods(CreateHTTPMethod)
r.HandleFunc(CreateURL, logged(NewCreateHandler(client, cfg, embeddedDbCfg)).ServeHTTP).Methods(CreateHTTPMethod)
}
Loading

0 comments on commit 69b0ea0

Please sign in to comment.