Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

store: label_values: fetch less postings #7814

Merged
merged 6 commits into from
Oct 17, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
- [#7658](https://github.com/thanos-io/thanos/pull/7658) Store: Fix panic because too small buffer in pool.
- [#7643](https://github.com/thanos-io/thanos/pull/7643) Receive: fix thanos_receive_write_{timeseries,samples} stats
- [#7644](https://github.com/thanos-io/thanos/pull/7644) fix(ui): add null check to find overlapping blocks logic
- [#7814](https://github.com/thanos-io/thanos/pull/7814) Store: label_values: fetch less postings.
xBazilio marked this conversation as resolved.
Show resolved Hide resolved
- [#7679](https://github.com/thanos-io/thanos/pull/7679) Query: respect store.limit.* flags when evaluating queries

### Added
Expand Down
40 changes: 40 additions & 0 deletions pkg/store/acceptance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -740,6 +740,46 @@ func testStoreAPIsAcceptance(t *testing.T, startStore startStoreFn) {
},
},
},
{
desc: "label_values(kube_pod_info{}, pod) don't fetch postings for pod!=''",
appendFn: func(app storage.Appender) {
_, err := app.Append(0, labels.FromStrings("__name__", "up", "pod", "pod-1"), timestamp.FromTime(now), 1)
testutil.Ok(t, err)
_, err = app.Append(0, labels.FromStrings("__name__", "up", "pod", "pod-2"), timestamp.FromTime(now), 1)
testutil.Ok(t, err)
_, err = app.Append(0, labels.FromStrings("__name__", "kube_pod_info", "pod", "pod-1"), timestamp.FromTime(now), 1)
testutil.Ok(t, err)
testutil.Ok(t, app.Commit())
},
labelNameCalls: []labelNameCallCase{
{
start: timestamp.FromTime(minTime),
end: timestamp.FromTime(maxTime),
expectedNames: []string{"__name__", "pod", "region"},
matchers: []storepb.LabelMatcher{{Type: storepb.LabelMatcher_EQ, Name: "__name__", Value: "kube_pod_info"}},
},
{
start: timestamp.FromTime(minTime),
end: timestamp.FromTime(maxTime),
expectedNames: []string{"__name__", "pod", "region"},
},
},
labelValuesCalls: []labelValuesCallCase{
{
start: timestamp.FromTime(minTime),
end: timestamp.FromTime(maxTime),
label: "pod",
expectedValues: []string{"pod-1"},
matchers: []storepb.LabelMatcher{{Type: storepb.LabelMatcher_EQ, Name: "__name__", Value: "kube_pod_info"}},
},
{
start: timestamp.FromTime(minTime),
end: timestamp.FromTime(maxTime),
label: "pod",
expectedValues: []string{"pod-1", "pod-2"},
},
},
},
} {
t.Run(tc.desc, func(t *testing.T) {
appendFn := tc.appendFn
Expand Down
30 changes: 27 additions & 3 deletions pkg/store/bucket.go
Original file line number Diff line number Diff line change
Expand Up @@ -1992,6 +1992,14 @@ func (s *BucketStore) LabelValues(ctx context.Context, req *storepb.LabelValuesR

resHints := &hintspb.LabelValuesResponseHints{}

var hasMetricNameMatcher = false
for _, m := range reqSeriesMatchers {
if m.Name == labels.MetricName {
hasMetricNameMatcher = true
break
}
}

g, gctx := errgroup.WithContext(ctx)

var reqBlockMatchers []*labels.Matcher
Expand All @@ -2015,6 +2023,7 @@ func (s *BucketStore) LabelValues(ctx context.Context, req *storepb.LabelValuesR
var seriesLimiter = s.seriesLimiterFactory(s.metrics.queriesDropped.WithLabelValues("series", tenant))
var bytesLimiter = s.bytesLimiterFactory(s.metrics.queriesDropped.WithLabelValues("bytes", tenant))
var logger = s.requestLoggerFunc(ctx, s.logger)
var stats = &queryStats{}

for _, b := range s.blocks {
b := b
Expand All @@ -2033,7 +2042,8 @@ func (s *BucketStore) LabelValues(ctx context.Context, req *storepb.LabelValuesR

// If we have series matchers and the Label is not an external one, add <labelName> != "" matcher
// to only select series that have given label name.
if len(reqSeriesMatchersNoExtLabels) > 0 && !b.extLset.Has(req.Label) {
// We don't need such matcher if matchers already contain __name__ matcher.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying to understand this comment. Why it is specific to the metric name label?
It is just a tradeoff of fetching more postings or series.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From my experience, if __name__ is specified, it means, user knows, that the metric contains requested label. The method will select all series with such __name__ and they 99% will have the requested label.
As for random queries, where there can be results with the __name__ but without specified labels, normally, they should be rare. Users still can make queries like label_values({}, pod). They will work, but will fetch alot of data.
So the point is, if the user knows what they need and specifies __name__, we don't need to save them from fetching some extra series. But we do save them from fetching all references to all kube_state_metrics from object storage for example.
Other popular labels may be service, application, job, instance. If __name__ is specified, it is guaranteed it'll be less data, but all the label values.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not against this. Just hope we have a better way to cover more labels because we have those information from posting cardinality and series size.

if !hasMetricNameMatcher && len(reqSeriesMatchersNoExtLabels) > 0 && !b.extLset.Has(req.Label) {
m, err := labels.NewMatcher(labels.MatchNotEqual, req.Label, "")
if err != nil {
return nil, status.Error(codes.InvalidArgument, err.Error())
Expand Down Expand Up @@ -2101,7 +2111,12 @@ func (s *BucketStore) LabelValues(ctx context.Context, req *storepb.LabelValuesR
s.metrics.lazyExpandedPostingSeriesOverfetchedSizeBytes,
tenant,
)
defer blockClient.Close()
defer func() {
mtx.Lock()
stats = blockClient.MergeStats(stats)
mtx.Unlock()
blockClient.Close()
}()

if err := blockClient.ExpandPostings(
sortedReqSeriesMatchersNoExtLabels,
Expand Down Expand Up @@ -2130,7 +2145,7 @@ func (s *BucketStore) LabelValues(ctx context.Context, req *storepb.LabelValuesR
}

val := labelpb.ZLabelsToPromLabels(ls.GetSeries().Labels).Get(req.Label)
if val != "" { // Should never be empty since we added labelName!="" matcher to the list of matchers.
if val != "" {
values[val] = struct{}{}
}
}
Expand All @@ -2154,6 +2169,15 @@ func (s *BucketStore) LabelValues(ctx context.Context, req *storepb.LabelValuesR

s.mtx.RUnlock()

defer func() {
if s.debugLogging {
level.Debug(logger).Log("msg", "stats query processed",
"request", req,
"tenant", tenant,
"stats", fmt.Sprintf("%+v", stats), "err", err)
}
}()

if err := g.Wait(); err != nil {
code := codes.Internal
if s, ok := status.FromError(errors.Cause(err)); ok {
Expand Down
Loading