Skip to content

Commit

Permalink
[targetallocator] PrometheusOperator CRD MVC (open-telemetry#653)
Browse files Browse the repository at this point in the history
* feat(target-allocator): allow custom config file path

* feat(target-allocator): move CLI config options to config package

* feat(target-allocator): allow running outside of cluster for debugging

* introduce meta watcher

* add event source

* fix: log panics

* fix: race condition

* fixup! fix: log panics

* feat: implement promCR retrieval

* feat: functioning

* refactor: some cleanup

* feat(target-allocator): escape job names in query parameters

* feat(target-allocator): make prometheusCR lookup optional and allow using kubernetes_sd outside of cluster

* refactor(target-allocator): improve memory usage and comments

* chore(target-allocator): update PromOperator and Kubernetes deps

* refactor(target-allocator): use exclude instead of replace directive

* ci: add Makefile targets for target allocator

* tests: add kuttl tests for PrometheusCR feature of target allocator

* docs(targetAllocator): add README.md

* fixup CRD docs

* fix(Makefile): add missing PHONY tags

* implement change requests

* go mod tidy and fix linting
  • Loading branch information
secustor authored Apr 22, 2022
1 parent 7e5c232 commit fbc1122
Show file tree
Hide file tree
Showing 23 changed files with 2,119 additions and 274 deletions.
20 changes: 19 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,9 @@ IMG_REPO ?= opentelemetry-operator
IMG ?= ${IMG_PREFIX}/${IMG_REPO}:${VERSION}
BUNDLE_IMG ?= ${IMG_PREFIX}/${IMG_REPO}-bundle:${VERSION}

TARGETALLOCATOR_IMG_REPO ?= target-allocator
TARGETALLOCATOR_IMG ?= ${IMG_PREFIX}/${TARGETALLOCATOR_IMG_REPO}:$(addprefix v,${VERSION})

# Options for 'bundle-build'
ifneq ($(origin CHANNELS), undefined)
BUNDLE_CHANNELS := --channels=$(CHANNELS)
Expand Down Expand Up @@ -149,7 +152,7 @@ e2e:
$(KUTTL) test

.PHONY: prepare-e2e
prepare-e2e: kuttl set-test-image-vars set-image-controller container start-kind
prepare-e2e: kuttl set-test-image-vars set-image-controller container container-target-allocator start-kind load-image-all
mkdir -p tests/_build/crds tests/_build/manifests
$(KUSTOMIZE) build config/default -o tests/_build/manifests/01-opentelemetry-operator.yaml
$(KUSTOMIZE) build config/crd -o tests/_build/crds/
Expand All @@ -161,6 +164,7 @@ scorecard-tests: operator-sdk
.PHONY: set-test-image-vars
set-test-image-vars:
$(eval IMG=local/opentelemetry-operator:e2e)
$(eval TARGETALLOCATOR_IMG=local/opentelemetry-operator-targetallocator:e2e)

# Build the container image, used only for local dev purposes
.PHONY: container
Expand All @@ -172,11 +176,25 @@ container:
container-push:
docker push ${IMG}

.PHONY: container-target-allocator
container-target-allocator:
docker build -t ${TARGETALLOCATOR_IMG} cmd/otel-allocator

.PHONY: start-kind
start-kind:
kind create cluster --config $(KIND_CONFIG)

.PHONY: load-image-all
load-image-all: load-image-operator load-image-target-allocator

.PHONY: load-image-operator
load-image-operator:
kind load docker-image local/opentelemetry-operator:e2e

.PHONY: load-image-target-allocator
load-image-target-allocator:
kind load docker-image ${TARGETALLOCATOR_IMG}

.PHONY: cert-manager
cert-manager: cmctl
# Consider using cmctl to install the cert-manager once install command is not experimental
Expand Down
11 changes: 11 additions & 0 deletions apis/v1alpha1/opentelemetrycollector_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -129,11 +129,22 @@ type OpenTelemetryTargetAllocator struct {
// +optional
Enabled bool `json:"enabled,omitempty"`

// PrometheusCR defines the configuration for the retrieval of PrometheusOperator CRDs ( servicemonitor.monitoring.coreos.com/v1 and podmonitor.monitoring.coreos.com/v1 ) retrieval.
// All CR instances which the ServiceAccount has access to will be retrieved. This includes other namespaces.
// +optional
PrometheusCR OpenTelemetryTargetAllocatorPrometheusCR `json:"prometheusCR,omitempty"`

// Image indicates the container image to use for the OpenTelemetry TargetAllocator.
// +optional
Image string `json:"image,omitempty"`
}

type OpenTelemetryTargetAllocatorPrometheusCR struct {
// Enabled indicates whether to use a PrometheusOperator custom resources as targets or not.
// +optional
Enabled bool `json:"enabled,omitempty"`
}

// ScaleSubresourceStatus defines the observed state of the OpenTelemetryCollector's
// scale subresource.
type ScaleSubresourceStatus struct {
Expand Down
16 changes: 16 additions & 0 deletions apis/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 12 additions & 0 deletions bundle/manifests/opentelemetry.io_opentelemetrycollectors.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -680,6 +680,18 @@ spec:
description: Image indicates the container image to use for the
OpenTelemetry TargetAllocator.
type: string
prometheusCR:
description: PrometheusCR defines the configuration for the retrieval
of PrometheusOperator CRDs ( servicemonitor.monitoring.coreos.com/v1
and podmonitor.monitoring.coreos.com/v1 ) retrieval. All CR
instances which the ServiceAccount has access to will be retrieved.
This includes other namespaces.
properties:
enabled:
description: Enabled indicates whether to use a PrometheusOperator
custom resources as targets or not.
type: boolean
type: object
type: object
tolerations:
description: Toleration to schedule OpenTelemetry Collector pods.
Expand Down
2 changes: 1 addition & 1 deletion cmd/otel-allocator/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,4 @@ WORKDIR /root/
# Copy the pre-built binary file from the previous stage
COPY --from=builder /app/main .

CMD ["./main"]
ENTRYPOINT ["./main"]
85 changes: 85 additions & 0 deletions cmd/otel-allocator/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Target Allocator

The TargetAllocator is an optional separately deployed component of an OpenTelemetry Collector setup, which is used to
distribute targets of the PrometheusReceiver on all deployed Collector instances.

# Design

If the Allocator is activated, all Prometheus configurations will be transferred in a separate ConfigMap which get in
turn mounted to the Allocator.
This configuration will be resolved to target configurations and then split across all OpenTelemetryCollector instances.

TargetAllocators expose the results as [HTTP_SD endpoints](https://prometheus.io/docs/prometheus/latest/http_sd/)
split by collector.

#### Endpoints
`/jobs`:

```json
{
"job1": {
"_link": "/jobs/job1/targets"
},
"job2": {
"_link": "/jobs/job1/targets"
}
}

```

`/jobs/{jobID}/targets`:

```json
{
"collector-1": {
"_link": "/jobs/job1/targets?collector_id=collector-1",
"targets": [
{
"Targets": [
"10.100.100.100",
"10.100.100.101",
"10.100.100.102"
],
"Labels": {
"namespace": "a_namespace",
"pod": "a_pod"
}
}
]
}
}
```

`/jobs/{jobID}/targets?collector_id={collectorID}`:

```json
[
{
"targets": [
"10.100.100.100",
"10.100.100.101",
"10.100.100.102"
],
"labels": {
"namespace": "a_namespace",
"pod": "a_pod"
}
}
]
```


## Packages
### Watchers
Watchers are responsible for the translation of external sources into Prometheus readable scrape configurations and
triggers updates to the DiscoveryManager

### DiscoveryManager
Watches the Prometheus service discovery for new targets and sets targets to the Allocator

### Allocator
Shards the received targets based on the discovered Collector instances

### Collector
Client to watch for deployed Collector instances which will then provided to the Allocator.

5 changes: 3 additions & 2 deletions cmd/otel-allocator/allocation/allocator.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package allocation

import (
"fmt"
"net/url"
"sync"

"github.com/go-logr/logr"
Expand Down Expand Up @@ -80,7 +81,7 @@ func (allocator *Allocator) SetWaitingTargets(targets []TargetItem) {
// SetCollectors sets the set of collectors with key=collectorName, value=Collector object.
// SetCollectors is called when Collectors are added or removed
func (allocator *Allocator) SetCollectors(collectors []string) {
log := allocator.log.WithValues("opentelemetry-targetallocator")
log := allocator.log.WithValues("component", "opentelemetry-targetallocator")

allocator.m.Lock()
defer allocator.m.Unlock()
Expand Down Expand Up @@ -132,7 +133,7 @@ func (allocator *Allocator) processWaitingTargets() {
allocator.TargetItems[k] = &v
targetItem := TargetItem{
JobName: v.JobName,
Link: LinkJSON{fmt.Sprintf("/jobs/%s/targets", v.JobName)},
Link: LinkJSON{fmt.Sprintf("/jobs/%s/targets", url.QueryEscape(v.JobName))},
TargetURL: v.TargetURL,
Label: v.Label,
Collector: col,
Expand Down
3 changes: 2 additions & 1 deletion cmd/otel-allocator/allocation/http.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ package allocation

import (
"fmt"
"net/url"

"github.com/prometheus/common/model"
)
Expand Down Expand Up @@ -43,7 +44,7 @@ func GetAllTargetsByJob(job string, cMap map[string][]TargetItem, allocator *All
targetGroupList = append(targetGroupList, targetGroupJSON{Targets: targets, Labels: labelSetMap[targets[0]]})

}
displayData[j.Collector.Name] = collectorJSON{Link: fmt.Sprintf("/jobs/%s/targets?collector_id=%s", j.JobName, j.Collector.Name), Jobs: targetGroupList}
displayData[j.Collector.Name] = collectorJSON{Link: fmt.Sprintf("/jobs/%s/targets?collector_id=%s", url.QueryEscape(j.JobName), j.Collector.Name), Jobs: targetGroupList}

}
}
Expand Down
15 changes: 5 additions & 10 deletions cmd/otel-allocator/collector/collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,8 @@ type Client struct {
close chan struct{}
}

func NewClient(logger logr.Logger) (*Client, error) {
config, err := rest.InClusterConfig()
if err != nil {
return &Client{}, err
}

clientset, err := kubernetes.NewForConfig(config)
func NewClient(logger logr.Logger, kubeConfig *rest.Config) (*Client, error) {
clientset, err := kubernetes.NewForConfig(kubeConfig)
if err != nil {
return &Client{}, err
}
Expand All @@ -50,7 +45,7 @@ func NewClient(logger logr.Logger) (*Client, error) {

func (k *Client) Watch(ctx context.Context, labelMap map[string]string, fn func(collectors []string)) {
collectorMap := map[string]bool{}
log := k.log.WithValues("opentelemetry-targetallocator")
log := k.log.WithValues("component", "opentelemetry-targetallocator")

opts := metav1.ListOptions{
LabelSelector: labels.SelectorFromSet(labelMap).String(),
Expand Down Expand Up @@ -83,15 +78,15 @@ func (k *Client) Watch(ctx context.Context, labelMap map[string]string, fn func(
return
}
if msg := runWatch(ctx, k, watcher.ResultChan(), collectorMap, fn); msg != "" {
log.Info("Collector pod watch event stopped", msg)
log.Info("Collector pod watch event stopped " + msg)
return
}
}
}()
}

func runWatch(ctx context.Context, k *Client, c <-chan watch.Event, collectorMap map[string]bool, fn func(collectors []string)) string {
log := k.log.WithValues("opentelemetry-targetallocator")
log := k.log.WithValues("component", "opentelemetry-targetallocator")
for {
select {
case <-k.close:
Expand Down
66 changes: 61 additions & 5 deletions cmd/otel-allocator/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,31 +2,54 @@ package config

import (
"errors"
"flag"
"fmt"
"io/fs"
"io/ioutil"
"path/filepath"
"time"

"github.com/go-logr/logr"
promconfig "github.com/prometheus/prometheus/config"
_ "github.com/prometheus/prometheus/discovery/install"
"github.com/spf13/pflag"
"gopkg.in/yaml.v2"
"k8s.io/client-go/rest"
"k8s.io/client-go/tools/clientcmd"
"k8s.io/client-go/util/homedir"
"k8s.io/klog/v2"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/log/zap"
)

// ErrInvalidYAML represents an error in the format of the original YAML configuration file.
var (
ErrInvalidYAML = errors.New("couldn't parse the loadbalancer configuration")
)

const defaultConfigFile string = "/conf/targetallocator.yaml"
const DefaultResyncTime = 5 * time.Minute
const DefaultConfigFilePath string = "/conf/targetallocator.yaml"

type Config struct {
LabelSelector map[string]string `yaml:"label_selector,omitempty"`
Config *promconfig.Config `yaml:"config"`
}

func Load(file string) (Config, error) {
if file == "" {
file = defaultConfigFile
}
type PrometheusCRWatcherConfig struct {
Enabled *bool
}

type CLIConfig struct {
ListenAddr *string
ConfigFilePath *string
ClusterConfig *rest.Config
// KubeConfigFilePath empty if in cluster configuration is in use
KubeConfigFilePath string
RootLogger logr.Logger
PromCRWatcherConf PrometheusCRWatcherConfig
}

func Load(file string) (Config, error) {
var cfg Config
if err := unmarshal(&cfg, file); err != nil {
return Config{}, err
Expand All @@ -45,3 +68,36 @@ func unmarshal(cfg *Config, configFile string) error {
}
return nil
}

func ParseCLI() (CLIConfig, error) {
opts := zap.Options{}
opts.BindFlags(flag.CommandLine)
cLIConf := CLIConfig{
ListenAddr: pflag.String("listen-addr", ":8080", "The address where this service serves."),
ConfigFilePath: pflag.String("config-file", DefaultConfigFilePath, "The path to the config file."),
PromCRWatcherConf: PrometheusCRWatcherConfig{
Enabled: pflag.Bool("enable-prometheus-cr-watcher", false, "Enable Prometheus CRs as target sources"),
},
}
kubeconfigPath := pflag.String("kubeconfig-path", filepath.Join(homedir.HomeDir(), ".kube", "config"), "absolute path to the KubeconfigPath file")
pflag.Parse()

cLIConf.RootLogger = zap.New(zap.UseFlagOptions(&opts))
klog.SetLogger(cLIConf.RootLogger)
ctrl.SetLogger(cLIConf.RootLogger)

clusterConfig, err := clientcmd.BuildConfigFromFlags("", *kubeconfigPath)
cLIConf.KubeConfigFilePath = *kubeconfigPath
if err != nil {
if _, ok := err.(*fs.PathError); !ok {
return CLIConfig{}, err
}
clusterConfig, err = rest.InClusterConfig()
if err != nil {
return CLIConfig{}, err
}
cLIConf.KubeConfigFilePath = "" // reset as we use in cluster configuration
}
cLIConf.ClusterConfig = clusterConfig
return cLIConf, nil
}
Loading

0 comments on commit fbc1122

Please sign in to comment.