Skip to content

Commit

Permalink
Merge pull request #207 from czerwonk/feature/otel_tracing
Browse files Browse the repository at this point in the history
Add tracing using OpenTelemetry
  • Loading branch information
czerwonk authored Feb 10, 2023
2 parents 54e75a1 + 942328c commit c094c52
Show file tree
Hide file tree
Showing 43 changed files with 466 additions and 144 deletions.
21 changes: 20 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,39 @@ require (
github.com/prometheus/client_model v0.3.0
github.com/sirupsen/logrus v1.9.0
github.com/stretchr/testify v1.8.1
go.opentelemetry.io/otel v1.12.0
go.opentelemetry.io/otel/exporters/stdout/stdouttrace v1.12.0
go.opentelemetry.io/otel/sdk v1.12.0
go.opentelemetry.io/otel/trace v1.12.0
golang.org/x/crypto v0.3.0
gopkg.in/yaml.v2 v2.4.0
)

require (
github.com/cenkalti/backoff/v4 v4.2.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.7.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/internal/retry v1.12.0 // indirect
golang.org/x/net v0.4.0 // indirect
golang.org/x/text v0.5.0 // indirect
google.golang.org/genproto v0.0.0-20221118155620-16455021b5e6 // indirect
google.golang.org/grpc v1.52.0 // indirect
)

require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/go-logr/logr v1.2.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/common v0.37.0 // indirect
github.com/prometheus/procfs v0.8.0 // indirect
golang.org/x/sys v0.2.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.12.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.12.0
go.opentelemetry.io/proto/otlp v0.19.0 // indirect
golang.org/x/sys v0.5.0 // indirect
google.golang.org/protobuf v1.28.1 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
)
72 changes: 71 additions & 1 deletion go.sum

Large diffs are not rendered by default.

11 changes: 11 additions & 0 deletions internal/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ type Config struct {
Features FeatureConfig `yaml:"features,omitempty"`
LSEnabled bool `yaml:"logical_systems,omitempty"`
IfDescReg string `yaml:"interface_description_regex,omitempty"`
Tracing TracingConfig `yaml:"tracing"`
}

// DeviceConfig is the config representation of 1 device
Expand Down Expand Up @@ -67,6 +68,16 @@ type FeatureConfig struct {
VRRP bool `yaml:"vrrp,omitempty"`
}

type TracingConfig struct {
Enabled bool `yaml:"enabled"`
Provider string `yaml:"provider"`
Collector TracingCollectorConfig `yaml:"collector"`
}

type TracingCollectorConfig struct {
GRPCAddress string `yaml:"grpc_address"`
}

// New creates a new config
func New() *Config {
c := &Config{
Expand Down
21 changes: 21 additions & 0 deletions internal/config/config_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -210,3 +210,24 @@ func TestFindDeviceConfig(t *testing.T) {
t.Fatal("Unexpected device for switch-oob")
}
}

func TestTracingConfig(t *testing.T) {
b, err := os.ReadFile("tests/config6.yml")
if err != nil {
t.Fatal(err)
}
c, err := Load(bytes.NewReader(b))
if err != nil {
t.Fatal(err)
}

expected := TracingConfig{
Enabled: true,
Provider: "collector",
Collector: TracingCollectorConfig{
GRPCAddress: "localhost:12345",
},
}

assert.Equal(t, expected, c.Tracing)
}
9 changes: 9 additions & 0 deletions internal/config/tests/config6.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
devices:
- host: router1
username: router

tracing:
enabled: true
provider: collector
collector:
grpc_address: "localhost:12345"
9 changes: 9 additions & 0 deletions internal/config/tests/config7.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
devices:
- host: router1
username: router

tracing:
enabled: true
provider: collector
collector:
grpc_address: "localhost:12345"
50 changes: 40 additions & 10 deletions junos_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
package main

import (
"context"
"regexp"
"sync"
"time"
Expand All @@ -12,6 +13,9 @@ import (
"github.com/czerwonk/junos_exporter/pkg/rpc"
"github.com/prometheus/client_golang/prometheus"
log "github.com/sirupsen/logrus"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/codes"
"go.opentelemetry.io/otel/trace"
)

const prefix = "junos_"
Expand All @@ -34,9 +38,10 @@ type junosCollector struct {
devices []*connector.Device
clients map[*connector.Device]*rpc.Client
collectors *collectors
ctx context.Context
}

func newJunosCollector(devices []*connector.Device, connectionManager *connector.SSHConnectionManager, logicalSystem string) *junosCollector {
func newJunosCollector(ctx context.Context, devices []*connector.Device, connectionManager *connector.SSHConnectionManager, logicalSystem string) *junosCollector {
l := interfacelabels.NewDynamicLabels()

clients := make(map[*connector.Device]*rpc.Client)
Expand All @@ -49,10 +54,14 @@ func newJunosCollector(devices []*connector.Device, connectionManager *connector
}

clients[d] = cl
cta := &clientTracingAdapter{
cl: cl,
ctx: ctx,
}

if *dynamicIfaceLabels {
regex := deviceInterfaceRegex(d.Host)
err = l.CollectDescriptions(d, cl, regex)
err = l.CollectDescriptions(d, cta, regex)
if err != nil {
log.Errorf("Could not get interface descriptions %s: %s", d, err)
continue
Expand All @@ -64,6 +73,7 @@ func newJunosCollector(devices []*connector.Device, connectionManager *connector
devices: devices,
collectors: collectorsForDevices(devices, cfg, logicalSystem, l),
clients: clients,
ctx: ctx,
}
}

Expand Down Expand Up @@ -97,16 +107,16 @@ func clientForDevice(device *connector.Device, connManager *connector.SSHConnect
return nil, err
}

c := rpc.NewClient(conn)

opts := []rpc.ClientOption{}
if *debug {
c.EnableDebug()
opts = append(opts, rpc.WithDebug())
}

if cfg.Features.Satellite {
c.EnableSatellite()
opts = append(opts, rpc.WithSatellite())
}

c := rpc.NewClient(conn, opts...)
return c, nil
}

Expand All @@ -123,27 +133,35 @@ func (c *junosCollector) Describe(ch chan<- *prometheus.Desc) {

// Collect implements prometheus.Collector interface
func (c *junosCollector) Collect(ch chan<- prometheus.Metric) {
ctx, span := tracer.Start(c.ctx, "Collect")
defer span.End()

wg := &sync.WaitGroup{}

wg.Add(len(c.devices))
for _, d := range c.devices {
go c.collectForHost(d, ch, wg)
go c.collectForHost(ctx, d, ch, wg)
}

wg.Wait()
}

func (c *junosCollector) collectForHost(device *connector.Device, ch chan<- prometheus.Metric, wg *sync.WaitGroup) {
func (c *junosCollector) collectForHost(ctx context.Context, device *connector.Device, ch chan<- prometheus.Metric, wg *sync.WaitGroup) {
defer wg.Done()

ctx, span := tracer.Start(ctx, "CollectForHost", trace.WithAttributes(
attribute.String("host", device.Host),
))
defer span.End()

l := []string{device.Host}

t := time.Now()
defer func() {
ch <- prometheus.MustNewConstMetric(scrapeDurationDesc, prometheus.GaugeValue, time.Since(t).Seconds(), l...)
}()

rpc, found := c.clients[device]
cl, found := c.clients[device]
if !found {
ch <- prometheus.MustNewConstMetric(upDesc, prometheus.GaugeValue, 0, l...)
return
Expand All @@ -152,13 +170,25 @@ func (c *junosCollector) collectForHost(device *connector.Device, ch chan<- prom
ch <- prometheus.MustNewConstMetric(upDesc, prometheus.GaugeValue, 1, l...)

for _, col := range c.collectors.collectorsForDevice(device) {
ctx, sp := tracer.Start(ctx, "CollectForHostWithCollector", trace.WithAttributes(
attribute.String("collector", col.Name()),
))

cta := &clientTracingAdapter{
cl: cl,
ctx: ctx,
}

ct := time.Now()
err := col.Collect(rpc, ch, l)
err := col.Collect(cta, ch, l)

if err != nil && err.Error() != "EOF" {
sp.RecordError(err)
sp.SetStatus(codes.Error, err.Error())
log.Errorln(col.Name() + ": " + err.Error())
}

ch <- prometheus.MustNewConstMetric(scrapeCollectorDurationDesc, prometheus.GaugeValue, time.Since(ct).Seconds(), append(l, col.Name())...)
sp.End()
}
}
24 changes: 21 additions & 3 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ package main

import (
"bytes"
"context"
"flag"
"fmt"
"net/http"
Expand All @@ -15,14 +16,15 @@ import (
"time"

"github.com/czerwonk/junos_exporter/pkg/connector"
"go.opentelemetry.io/otel/codes"

"github.com/czerwonk/junos_exporter/internal/config"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
log "github.com/sirupsen/logrus"
)

const version string = "0.10.2"
const version string = "0.11.0"

var (
showVersion = flag.Bool("version", false, "Print version information.")
Expand Down Expand Up @@ -103,6 +105,15 @@ func main() {
log.Fatalf("could not initialize exporter. %v", err)
}

ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt)
defer cancel()

shutdownTracing, err := initTracing(ctx)
if err != nil {
log.Fatalf("could not initialize tracing: %v", err)
}
defer shutdownTracing()

initChannels()

startServer()
Expand Down Expand Up @@ -244,7 +255,7 @@ func connectionManager() *connector.SSHConnectionManager {
}

func startServer() {
log.Infof("Starting JunOS exporter (Version: %s)\n", version)
log.Infof("Starting JunOS exporter (Version: %s)", version)
http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.Write([]byte(`<html>
<head><title>JunOS Exporter (Version ` + version + `)</title></head>
Expand Down Expand Up @@ -286,21 +297,28 @@ func handleMetricsRequest(w http.ResponseWriter, r *http.Request) {
configMu.RLock()
defer configMu.RUnlock()

ctx, span := tracer.Start(context.Background(), "HandleMetricsRequest")
defer span.End()

reg := prometheus.NewRegistry()

devs, err := devicesForRequest(r)
if err != nil {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
http.Error(w, err.Error(), 400)
return
}

logicalSystem := r.URL.Query().Get("ls")
if !cfg.LSEnabled && logicalSystem != "" {
span.RecordError(err)
span.SetStatus(codes.Error, err.Error())
http.Error(w, fmt.Sprintf("Logical systems not enabled but the logical system '%s' in parameters", logicalSystem), 400)
return
}

c := newJunosCollector(devs, connManager, logicalSystem)
c := newJunosCollector(ctx, devs, connManager, logicalSystem)
reg.MustRegister(c)

l := log.New()
Expand Down
23 changes: 21 additions & 2 deletions pkg/collector/rpc_collector.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,30 @@
package collector

import (
"github.com/czerwonk/junos_exporter/pkg/rpc"
"context"

"github.com/czerwonk/junos_exporter/pkg/connector"
"github.com/czerwonk/junos_exporter/pkg/rpc"
"github.com/prometheus/client_golang/prometheus"
)

type Client interface {
// RunCommandAndParse runs a command on JunOS and unmarshals the XML result
RunCommandAndParse(cmd string, obj interface{}) error

// RunCommandAndParseWithParser runs a command on JunOS and unmarshals the XML result using the specified parser function
RunCommandAndParseWithParser(cmd string, parser rpc.Parser) error

// IsSatelliteEnabled returns if sattelite features are enabled on the device
IsSatelliteEnabled() bool

// Device returns device information for the connected device
Device() *connector.Device

// Ctx returns the context the client is running in
Context() context.Context
}

// RPCCollector collects metrics from JunOS using rpc.Client
type RPCCollector interface {
// Name returns an human readable name for logging and debugging purposes
Expand All @@ -17,5 +36,5 @@ type RPCCollector interface {
Describe(ch chan<- *prometheus.Desc)

// Collect collects metrics from JunOS
Collect(client *rpc.Client, ch chan<- prometheus.Metric, labelValues []string) error
Collect(client Client, ch chan<- prometheus.Metric, labelValues []string) error
}
Loading

0 comments on commit c094c52

Please sign in to comment.