Skip to content

Commit

Permalink
feat: PC-13045 Good total single query experimental support (#458)
Browse files Browse the repository at this point in the history
## Motivation

Enabling support for gathering godd and total points using single query.
Currently for Splunk only.

Sample YAML:
```
---
apiVersion: n9/v1alpha
kind: SLO
metadata:
  name: splunk-counts-calendar
  project: splunk
spec:
  service: splunk-service
  indicator:
    metricSource:
      kind: Agent
      name: splunk
      project: splunk
  timeWindows:
    - unit: Day
      count: 1
      calendar:
        startTime: 2021-04-09 00:00:00
        timeZone: Europe/Warsaw
  budgetingMethod: Occurrences
  objectives:
    - displayName: So so
      target: 0.80
      name: objective-1
      countMetrics:
        incremental: false
        goodTotal:
          splunk:
            query:  |-
              | mstats avg("spl.intr.resource_usage.IOWait.data.avg_cpu_pct") as n9good WHERE index="_metrics" span=15s 
              | join type=left _time [
              | mstats avg("spl.intr.resource_usage.IOWait.data.max_cpus_pct") as n9total WHERE index="_metrics" span=15s
              ]
              | rename _time as n9time
              | fields n9time n9good n9total
```

## Summary

Added new `goodTotal` field to count metrics spec
Added validation for splunk query with new field names `n9time`,
`n9good`, `n9total`

## Testing

- Unit tests
- Manual planned tests after sloctl and platform changes

No release notes, as this is in experimental stage.
  • Loading branch information
marcinlawnik authored Jul 11, 2024
1 parent 1a7c326 commit 77ff3d4
Show file tree
Hide file tree
Showing 9 changed files with 366 additions and 8 deletions.
1 change: 1 addition & 0 deletions cspell.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ words:
- mockgen
- mprofile
- msgf
- mstats
- msteams
- ningxia
- nobl
Expand Down
11 changes: 10 additions & 1 deletion manifest/v1alpha/slo/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ type CountMetricsSpec struct {
Incremental *bool `json:"incremental"`
GoodMetric *MetricSpec `json:"good,omitempty"`
BadMetric *MetricSpec `json:"bad,omitempty"`
TotalMetric *MetricSpec `json:"total"`
TotalMetric *MetricSpec `json:"total,omitempty"`
// Experimental: Splunk only, a single query returning both good and total counts.
GoodTotalMetric *MetricSpec `json:"goodTotal,omitempty"`
}

// RawMetricSpec represents integration with a metric source for a particular objective.
Expand Down Expand Up @@ -143,6 +145,9 @@ func (s *Spec) CountMetricsCount() int {
if objective.CountMetrics.BadMetric != nil {
count++
}
if objective.CountMetrics.GoodTotalMetric != nil {
count++
}
}
}
return count
Expand All @@ -168,6 +173,10 @@ func (s *Spec) CountMetrics() []*MetricSpec {
countMetrics[i] = objective.CountMetrics.BadMetric
i++
}
if objective.CountMetrics.GoodTotalMetric != nil {
countMetrics[i] = objective.CountMetrics.GoodTotalMetric
i++
}
}
return countMetrics
}
Expand Down
12 changes: 12 additions & 0 deletions manifest/v1alpha/slo/metrics_bigquery_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,18 @@ func TestBigQuery_CountMetrics(t *testing.T) {
err := validate(slo)
testutils.AssertNoError(t, slo, err)
})
t.Run("unsupported goodTotal single query", func(t *testing.T) {
slo := validCountMetricSLO(v1alpha.BigQuery)
slo.Spec.Objectives[0].CountMetrics = &CountMetricsSpec{
Incremental: ptr(false),
GoodTotalMetric: validMetricSpec(v1alpha.BigQuery),
}
err := validate(slo)
testutils.AssertContainsErrors(t, slo, err, 1, testutils.ExpectedError{
Prop: "spec.objectives[0].countMetrics.goodTotal",
Code: joinErrorCodes(errCodeSingleQueryGoodOverTotalDisabled, validation.ErrorCodeOneOf),
})
})
t.Run("projectId must be the same for good and total", func(t *testing.T) {
slo := validCountMetricSLO(v1alpha.BigQuery)
slo.Spec.Objectives[0].CountMetrics.TotalMetric.BigQuery.ProjectID = "1"
Expand Down
33 changes: 33 additions & 0 deletions manifest/v1alpha/slo/metrics_splunk.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,33 @@ package slo
import (
"regexp"

"github.com/pkg/errors"

"github.com/nobl9/nobl9-go/internal/validation"
"github.com/nobl9/nobl9-go/manifest/v1alpha"
)

// SplunkMetric represents metric from Splunk
type SplunkMetric struct {
Query *string `json:"query"`
}

var splunkCountMetricsLevelValidation = validation.New[CountMetricsSpec](
validation.For(validation.GetSelf[CountMetricsSpec]()).
Rules(
validation.NewSingleRule(func(c CountMetricsSpec) error {
if c.GoodTotalMetric != nil {
if c.GoodMetric != nil || c.BadMetric != nil || c.TotalMetric != nil {
return errors.New("goodTotal is mutually exclusive with good, bad, and total")
}
}
return nil
}).WithErrorCode(validation.ErrorCodeMutuallyExclusive)),
).When(
whenCountMetricsIs(v1alpha.Splunk),
validation.WhenDescription("countMetrics is splunk"),
)

var splunkValidation = validation.New[SplunkMetric](
validation.ForPointer(func(s SplunkMetric) *string { return s.Query }).
WithName("query").
Expand All @@ -24,3 +43,17 @@ var splunkValidation = validation.New[SplunkMetric](
"index=svc-events", `"index"=svc-events`).
WithDetails(`query has to contain index=<NAME> or "index"=<NAME>`)),
)

var splunkSingleQueryValidation = validation.New[SplunkMetric](
validation.ForPointer(func(s SplunkMetric) *string { return s.Query }).
WithName("query").
Required().
Cascade(validation.CascadeModeStop).
Rules(validation.StringNotEmpty()).
Rules(
validation.StringContains("n9time", "n9good", "n9total"),
validation.StringMatchRegexp(
regexp.MustCompile(`(\bindex\s*=.+)|("\bindex"\s*=.+)`),
"index=svc-events", `"index"=svc-events`).
WithDetails(`query has to contain index=<NAME> or "index"=<NAME>`)),
)
113 changes: 113 additions & 0 deletions manifest/v1alpha/slo/metrics_splunk_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,3 +77,116 @@ fields n9time n9value`,
}
})
}

func TestSplunk_CountMetrics_SingleQuery(t *testing.T) {
t.Run("passes", func(t *testing.T) {
slo := validSingleQueryGoodOverTotalCountMetricSLO(v1alpha.Splunk)
err := validate(slo)
testutils.AssertNoError(t, slo, err)
})
t.Run("required", func(t *testing.T) {
slo := validSingleQueryGoodOverTotalCountMetricSLO(v1alpha.Splunk)
slo.Spec.Objectives[0].CountMetrics.GoodTotalMetric.Splunk.Query = nil
err := validate(slo)
testutils.AssertContainsErrors(t, slo, err, 1, testutils.ExpectedError{
Prop: "spec.objectives[0].countMetrics.goodTotal.splunk.query",
Code: validation.ErrorCodeRequired,
})
})
t.Run("empty", func(t *testing.T) {
slo := validSingleQueryGoodOverTotalCountMetricSLO(v1alpha.Splunk)
slo.Spec.Objectives[0].CountMetrics.GoodTotalMetric.Splunk.Query = ptr("")
err := validate(slo)
testutils.AssertContainsErrors(t, slo, err, 1, testutils.ExpectedError{
Prop: "spec.objectives[0].countMetrics.goodTotal.splunk.query",
Code: validation.ErrorCodeStringNotEmpty,
})
})
t.Run("goodTotal mixed with total", func(t *testing.T) {
slo := validSingleQueryGoodOverTotalCountMetricSLO(v1alpha.Splunk)
slo.Spec.Objectives[0].CountMetrics.TotalMetric = validMetricSpec(v1alpha.Splunk)
err := validate(slo)
testutils.AssertContainsErrors(t, slo, err, 1, testutils.ExpectedError{
Prop: "spec.objectives[0].countMetrics",
Code: validation.ErrorCodeMutuallyExclusive,
})
})
t.Run("goodTotal mixed with good", func(t *testing.T) {
slo := validSingleQueryGoodOverTotalCountMetricSLO(v1alpha.Splunk)
slo.Spec.Objectives[0].CountMetrics.GoodMetric = validMetricSpec(v1alpha.Splunk)
err := validate(slo)
testutils.AssertContainsErrors(t, slo, err, 1, testutils.ExpectedError{
Prop: "spec.objectives[0].countMetrics",
Code: validation.ErrorCodeMutuallyExclusive,
})
})
t.Run("goodTotal mixed with bad", func(t *testing.T) {
slo := validSingleQueryGoodOverTotalCountMetricSLO(v1alpha.Splunk)
slo.Spec.Objectives[0].CountMetrics.BadMetric = validMetricSpec(v1alpha.Splunk)
err := validate(slo)
testutils.AssertContainsErrors(t, slo, err, 2, testutils.ExpectedError{
Prop: "spec.objectives[0].countMetrics.bad",
Code: joinErrorCodes(errCodeBadOverTotalDisabled, validation.ErrorCodeOneOf),
}, testutils.ExpectedError{
Prop: "spec.objectives[0].countMetrics",
Code: validation.ErrorCodeMutuallyExclusive,
})
})
t.Run("invalid query", func(t *testing.T) {
tests := map[string]struct {
Query string
ExpectedCode string
}{
"missing n9time": {
Query: `
| mstats avg("spl.intr.resource_usage.IOWait.data.avg_cpu_pct") as n9good WHERE index="_metrics" span=15s
| join type=left _time [
| mstats avg("spl.intr.resource_usage.IOWait.data.max_cpus_pct") as n9total WHERE index="_metrics" span=15s
]
| fields _time n9good n9total`,
ExpectedCode: validation.ErrorCodeStringContains,
},
"missing n9good": {
Query: `
| mstats avg("spl.intr.resource_usage.IOWait.data.avg_cpu_pct") as good WHERE index="_metrics" span=15s
| join type=left _time [
| mstats avg("spl.intr.resource_usage.IOWait.data.max_cpus_pct") as n9total WHERE index="_metrics" span=15s
]
| rename _time as n9time
| fields n9time good n9total`,
ExpectedCode: validation.ErrorCodeStringContains,
},
"missing n9total": {
Query: `
| mstats avg("spl.intr.resource_usage.IOWait.data.avg_cpu_pct") as n9good WHERE index="_metrics" span=15s
| join type=left _time [
| mstats avg("spl.intr.resource_usage.IOWait.data.max_cpus_pct") as total WHERE index="_metrics" span=15s
]
| rename _time as n9time
| fields n9time n9good total`,
ExpectedCode: validation.ErrorCodeStringContains,
},
"missing index": {
Query: `
| mstats avg("spl.intr.resource_usage.IOWait.data.avg_cpu_pct") as n9good span=15s
| join type=left _time [
| mstats avg("spl.intr.resource_usage.IOWait.data.max_cpus_pct") as n9total span=15s
]
| rename _time as n9time
| fields n9time n9good n9total`,
ExpectedCode: validation.ErrorCodeStringMatchRegexp,
},
}
for name, test := range tests {
t.Run(name, func(t *testing.T) {
slo := validSingleQueryGoodOverTotalCountMetricSLO(v1alpha.Splunk)
slo.Spec.Objectives[0].CountMetrics.GoodTotalMetric.Splunk.Query = ptr(test.Query)
err := validate(slo)
testutils.AssertContainsErrors(t, slo, err, 1, testutils.ExpectedError{
Prop: "spec.objectives[0].countMetrics.goodTotal.splunk.query",
Code: test.ExpectedCode,
})
})
}
})
}
28 changes: 28 additions & 0 deletions manifest/v1alpha/slo/metrics_test.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
package slo

import (
"slices"
"testing"

"github.com/nobl9/nobl9-go/internal/testutils"
"github.com/nobl9/nobl9-go/internal/validation"

"github.com/stretchr/testify/assert"

"github.com/nobl9/nobl9-go/manifest/v1alpha"
Expand All @@ -21,3 +25,27 @@ func TestQuery(t *testing.T) {
assert.NotEmpty(t, spec)
}
}

func Test_SingleQueryDisabled(t *testing.T) {
skippedDataSources := []v1alpha.DataSourceType{
v1alpha.ThousandEyes, // query is forbidden for this plugin
}
for _, src := range v1alpha.DataSourceTypeValues() {
if slices.Contains(singleQueryGoodOverTotalEnabledSources, src) {
continue
}
if slices.Contains(skippedDataSources, src) {
continue
}
slo := validCountMetricSLO(src)
slo.Spec.Objectives[0].CountMetrics = &CountMetricsSpec{
Incremental: ptr(false),
GoodTotalMetric: validMetricSpec(src),
}
err := validate(slo)
testutils.AssertContainsErrors(t, slo, err, 1, testutils.ExpectedError{
Prop: "spec.objectives[0].countMetrics.goodTotal",
Code: joinErrorCodes(errCodeSingleQueryGoodOverTotalDisabled, validation.ErrorCodeOneOf),
})
}
}
44 changes: 37 additions & 7 deletions manifest/v1alpha/slo/metrics_validation.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@ import (
)

const (
errCodeExactlyOneMetricType = "exactly_one_metric_type"
errCodeBadOverTotalDisabled = "bad_over_total_disabled"
errCodeExactlyOneMetricSpecType = "exactly_one_metric_spec_type"
errCodeEitherBadOrGoodCountMetric = "either_bad_or_good_count_metric"
errCodeTimeSliceTarget = "time_slice_target"
errCodeExactlyOneMetricType = "exactly_one_metric_type"
errCodeBadOverTotalDisabled = "bad_over_total_disabled"
errCodeSingleQueryGoodOverTotalDisabled = "single_query_good_over_total_disabled"
errCodeExactlyOneMetricSpecType = "exactly_one_metric_spec_type"
errCodeEitherBadOrGoodCountMetric = "either_bad_or_good_count_metric"
errCodeTimeSliceTarget = "time_slice_target"
)

var specMetricsValidation = validation.New[Spec](
Expand Down Expand Up @@ -61,13 +62,13 @@ var countMetricsSpecValidation = validation.New[CountMetricsSpec](
sumoLogicCountMetricsLevelValidation,
instanaCountMetricsLevelValidation,
redshiftCountMetricsLevelValidation,
bigQueryCountMetricsLevelValidation),
bigQueryCountMetricsLevelValidation,
splunkCountMetricsLevelValidation),
validation.ForPointer(func(c CountMetricsSpec) *bool { return c.Incremental }).
WithName("incremental").
Required(),
validation.ForPointer(func(c CountMetricsSpec) *MetricSpec { return c.TotalMetric }).
WithName("total").
Required().
Include(
metricSpecValidation,
countMetricsValidation,
Expand All @@ -84,6 +85,12 @@ var countMetricsSpecValidation = validation.New[CountMetricsSpec](
Include(
countMetricsValidation,
metricSpecValidation),
validation.ForPointer(func(c CountMetricsSpec) *MetricSpec { return c.GoodTotalMetric }).
WithName("goodTotal").
Rules(oneOfSingleQueryGoodOverTotalValidationRule).
Include(
countMetricsValidation,
singleQueryMetricSpecValidation),
)

var rawMetricsValidation = validation.New[RawMetricSpec](
Expand All @@ -106,6 +113,12 @@ var countMetricsValidation = validation.New[MetricSpec](
instanaCountMetricsValidation),
)

var singleQueryMetricSpecValidation = validation.New[MetricSpec](
validation.ForPointer(func(m MetricSpec) *SplunkMetric { return m.Splunk }).
WithName("splunk").
Include(splunkSingleQueryValidation),
)

var metricSpecValidation = validation.New[MetricSpec](
validation.ForPointer(func(m MetricSpec) *AppDynamicsMetric { return m.AppDynamics }).
WithName("appDynamics").
Expand Down Expand Up @@ -200,6 +213,17 @@ var oneOfBadOverTotalValidationRule = validation.NewSingleRule(func(v MetricSpec
return validation.OneOf(badOverTotalEnabledSources...).Validate(v.DataSourceType())
}).WithErrorCode(errCodeBadOverTotalDisabled)

var singleQueryGoodOverTotalEnabledSources = []v1alpha.DataSourceType{
v1alpha.Splunk,
}

// Support for single query good/total metrics is experimental.
// Splunk is the only datasource integration to have this feature
// - extend the list while adding support for next integrations.
var oneOfSingleQueryGoodOverTotalValidationRule = validation.NewSingleRule(func(v MetricSpec) error {
return validation.OneOf(singleQueryGoodOverTotalEnabledSources...).Validate(v.DataSourceType())
}).WithErrorCode(errCodeSingleQueryGoodOverTotalDisabled)

var exactlyOneMetricSpecTypeValidationRule = validation.NewSingleRule(func(v Spec) error {
if v.Indicator == nil {
return nil
Expand Down Expand Up @@ -401,6 +425,12 @@ var timeSliceTargetsValidationRule = validation.NewSingleRule[Spec](func(s Spec)
// the count metrics is of the given type.
func whenCountMetricsIs(typ v1alpha.DataSourceType) func(c CountMetricsSpec) bool {
return func(c CountMetricsSpec) bool {
if slices.Contains(singleQueryGoodOverTotalEnabledSources, typ) {
if c.GoodTotalMetric != nil && typ != c.GoodTotalMetric.DataSourceType() {
return false
}
return c.GoodMetric != nil || c.BadMetric != nil || c.TotalMetric != nil
}
if c.TotalMetric == nil {
return false
}
Expand Down
Loading

0 comments on commit 77ff3d4

Please sign in to comment.