Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add sanity check at startup to ensure the configured filesystem directories don't overlap for different components #2828

Merged
merged 5 commits into from
Aug 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
* [ENHANCEMENT] Distributor: Add `cortex_distributor_received_requests_total` and `cortex_distributor_requests_in_total` metrics to provide visiblity into appropriate per-tenant request limits. #2770
* [ENHANCEMENT] Distributor: Add single forwarding remote-write endpoint for a tenant (`forwarding_endpoint`), instead of using per-rule endpoints. This takes precendence over per-rule endpoints. #2801
* [ENHANCEMENT] Added `err-mimir-distributor-max-write-message-size` to the errors catalog. #2470
* [ENHANCEMENT] Add sanity check at startup to ensure the configured filesystem directories don't overlap for different components. #2828
* [BUGFIX] Ruler: fix not restoring alerts' state at startup. #2648
* [BUGFIX] Ingester: Fix disk filling up after restarting ingesters with out-of-order support disabled while it was enabled before. #2799
* [BUGFIX] Memberlist: retry joining memberlist cluster on startup when no nodes are resolved. #2837
Expand Down
2 changes: 2 additions & 0 deletions integration/ingester_limits_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,9 @@ overrides:
"-runtime-config.reload-period": "100ms",
"-blocks-storage.backend": "filesystem",
"-blocks-storage.filesystem.dir": "/tmp",
"-blocks-storage.storage-prefix": "blocks",
"-blocks-storage.bucket-store.bucket-index.enabled": "false",
"-ruler-storage.backend": "filesystem",
"-ruler-storage.local.directory": "/tmp", // Avoid warning "unable to list rules".
"-runtime-config.file": filepath.Join(e2e.ContainerSharedDir, overridesFile),
}
Expand Down
1 change: 1 addition & 0 deletions integration/single_binary_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ func TestMimirCanParseIntZeroAsZeroDuration(t *testing.T) {
flags := map[string]string{
"-common.storage.backend": "filesystem",
"-common.storage.filesystem.dir": "./bucket",
"-blocks-storage.storage-prefix": "blocks",
}

mimir := e2emimir.NewSingleBinary("mimir-1", flags, e2emimir.WithPorts(9009, 9095), e2emimir.WithConfigFile(mimirConfigFile))
Expand Down
12 changes: 6 additions & 6 deletions pkg/alertmanager/alertstore/bucketclient/bucket_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ import (
)

const (
// The bucket prefix under which all tenants alertmanager configs are stored.
// AlertsPrefix is the bucket prefix under which all tenants alertmanager configs are stored.
// Note that objects stored under this prefix follow the pattern:
// alerts/<user-id>
alertsPrefix = "alerts"
AlertsPrefix = "alerts"

// The bucket prefix under which other alertmanager state is stored.
// AlertmanagerPrefix is the bucket prefix under which other alertmanager state is stored.
// Note that objects stored under this prefix follow the pattern:
// alertmanager/<user-id>/<object>
alertmanagerPrefix = "alertmanager"
AlertmanagerPrefix = "alertmanager"

// The name of alertmanager full state objects (notification log + silences).
fullStateName = "fullstate"
Expand All @@ -52,8 +52,8 @@ type BucketAlertStore struct {

func NewBucketAlertStore(bkt objstore.Bucket, cfgProvider bucket.TenantConfigProvider, logger log.Logger) *BucketAlertStore {
return &BucketAlertStore{
alertsBucket: bucket.NewPrefixedBucketClient(bkt, alertsPrefix),
amBucket: bucket.NewPrefixedBucketClient(bkt, alertmanagerPrefix),
alertsBucket: bucket.NewPrefixedBucketClient(bkt, AlertsPrefix),
amBucket: bucket.NewPrefixedBucketClient(bkt, AlertmanagerPrefix),
cfgProvider: cfgProvider,
logger: logger,
}
Expand Down
157 changes: 157 additions & 0 deletions pkg/mimir/mimir.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"fmt"
"net/http"
"os"
"path/filepath"
"strconv"
"strings"

Expand Down Expand Up @@ -39,6 +40,7 @@ import (

"github.com/grafana/mimir/pkg/alertmanager"
"github.com/grafana/mimir/pkg/alertmanager/alertstore"
alertbucketclient "github.com/grafana/mimir/pkg/alertmanager/alertstore/bucketclient"
alertstorelocal "github.com/grafana/mimir/pkg/alertmanager/alertstore/local"
"github.com/grafana/mimir/pkg/api"
"github.com/grafana/mimir/pkg/compactor"
Expand All @@ -54,6 +56,7 @@ import (
querier_worker "github.com/grafana/mimir/pkg/querier/worker"
"github.com/grafana/mimir/pkg/ruler"
"github.com/grafana/mimir/pkg/ruler/rulestore"
rulebucketclient "github.com/grafana/mimir/pkg/ruler/rulestore/bucketclient"
rulestorelocal "github.com/grafana/mimir/pkg/ruler/rulestore/local"
"github.com/grafana/mimir/pkg/scheduler"
"github.com/grafana/mimir/pkg/storage/bucket"
Expand Down Expand Up @@ -200,6 +203,9 @@ func (c *Config) Validate(log log.Logger) error {
if err := c.validateBucketConfigs(); err != nil {
return fmt.Errorf("%w: %s", errInvalidBucketConfig, err)
}
if err := c.validateFilesystemPaths(log); err != nil {
return err
}
if err := c.RulerStorage.Validate(); err != nil {
return errors.Wrap(err, "invalid rulestore config")
}
Expand Down Expand Up @@ -308,6 +314,157 @@ func validateBucketConfig(cfg bucket.Config, blockStorageBucketCfg bucket.Config
return nil
}

// validateFilesystemPaths checks all configured filesystem paths and return error if it finds two of them
// overlapping (Mimir expects all filesystem paths to not overlap).
func (c *Config) validateFilesystemPaths(logger log.Logger) error {
type pathConfig struct {
name string
cfgValue string
checkValue string
}

var paths []pathConfig

if c.BlocksStorage.Bucket.Backend == bucket.Filesystem {
// Add the optional prefix to the path, because that's the actual location where blocks will be stored.
paths = append(paths, pathConfig{
name: "blocks storage filesystem directory",
cfgValue: c.BlocksStorage.Bucket.Filesystem.Directory,
checkValue: filepath.Join(c.BlocksStorage.Bucket.Filesystem.Directory, c.BlocksStorage.Bucket.StoragePrefix),
})
}

// Ingester.
if c.isAnyModuleEnabled(All, Ingester, Write) {
paths = append(paths, pathConfig{
name: "tsdb directory",
cfgValue: c.BlocksStorage.TSDB.Dir,
checkValue: c.BlocksStorage.TSDB.Dir,
})
}

// Store-gateway.
if c.isAnyModuleEnabled(All, StoreGateway, Backend) {
paths = append(paths, pathConfig{
name: "bucket store sync directory",
cfgValue: c.BlocksStorage.BucketStore.SyncDir,
checkValue: c.BlocksStorage.BucketStore.SyncDir,
})
}

// Compactor.
if c.isAnyModuleEnabled(All, Compactor, Backend) {
paths = append(paths, pathConfig{
name: "compactor data directory",
cfgValue: c.Compactor.DataDir,
checkValue: c.Compactor.DataDir,
})
}

// Ruler.
if c.isAnyModuleEnabled(All, Ruler, Backend) {
paths = append(paths, pathConfig{
name: "ruler data directory",
cfgValue: c.Ruler.RulePath,
checkValue: c.Ruler.RulePath,
})

if c.RulerStorage.Backend == bucket.Filesystem {
// All ruler configuration is stored under an hardcoded prefix that we're taking in account here.
paths = append(paths, pathConfig{
name: "ruler storage filesystem directory",
cfgValue: c.RulerStorage.Filesystem.Directory,
checkValue: filepath.Join(c.RulerStorage.Filesystem.Directory, rulebucketclient.RulesPrefix),
})
}
if c.RulerStorage.Backend == rulestorelocal.Name {
paths = append(paths, pathConfig{
name: "ruler storage local directory",
cfgValue: c.RulerStorage.Local.Directory,
checkValue: c.RulerStorage.Local.Directory,
})
}
}

// Alertmanager.
if c.isAnyModuleEnabled(AlertManager) {
paths = append(paths, pathConfig{
name: "alertmanager data directory",
cfgValue: c.Alertmanager.DataDir,
checkValue: c.Alertmanager.DataDir,
})

if c.AlertmanagerStorage.Backend == bucket.Filesystem {
var (
name = "alertmanager storage filesystem directory"
cfgValue = c.AlertmanagerStorage.Filesystem.Directory
)

// All ruler configuration is stored under an hardcoded prefix that we're taking into account here.
paths = append(paths, pathConfig{name: name, cfgValue: cfgValue, checkValue: filepath.Join(c.AlertmanagerStorage.Filesystem.Directory, alertbucketclient.AlertsPrefix)})
paths = append(paths, pathConfig{name: name, cfgValue: cfgValue, checkValue: filepath.Join(c.AlertmanagerStorage.Filesystem.Directory, alertbucketclient.AlertmanagerPrefix)})
}

if c.AlertmanagerStorage.Backend == alertstorelocal.Name {
paths = append(paths, pathConfig{
name: "alertmanager storage local directory",
cfgValue: c.AlertmanagerStorage.Local.Path,
checkValue: c.AlertmanagerStorage.Local.Path,
})
}
}

// Convert all check paths to absolute clean paths.
for idx, path := range paths {
abs, err := filepath.Abs(path.checkValue)
if err != nil {
// We prefer to log a warning instead of returning an error to ensure that if we're unable to
// run the sanity check Mimir could start anyway.
level.Warn(logger).Log("msg", "the configuration sanity check for the filesystem directory has been skipped because can't get the absolute path", "path", path, "err", err)
paths[idx].checkValue = ""
continue
}

paths[idx].checkValue = abs
}

for _, firstPath := range paths {
for _, secondPath := range paths {
// Skip the same config field.
if firstPath.name == secondPath.name {
continue
}

// Skip if we've been unable to get the absolute path of one of the two paths.
if firstPath.checkValue == "" || secondPath.checkValue == "" {
continue
}

if isAbsPathOverlapping(firstPath.checkValue, secondPath.checkValue) {
// Report the configured path in the error message, otherwise it's harder for the user to spot it.
return fmt.Errorf("the configured %s %q cannot overlap with the configured %s %q; please set different paths, also ensuring one is not a subdirectory of the other one", firstPath.name, firstPath.cfgValue, secondPath.name, secondPath.cfgValue)
}
}
}

return nil
}

// isAbsPathOverlapping returns whether the two input absolute paths overlap.
func isAbsPathOverlapping(firstAbsPath, secondAbsPath string) bool {
firstBase, firstName := filepath.Split(firstAbsPath)
secondBase, secondName := filepath.Split(secondAbsPath)

if firstBase == secondBase {
// The base directories are the same, so they overlap if the last segment of the path (name)
// is the same or it's missing (happens when the input path is the root "/").
return firstName == secondName || firstName == "" || secondName == ""
}

// The base directories are different, but they could still overlap if one is the child of the other one.
return strings.HasPrefix(firstAbsPath, secondAbsPath) || strings.HasPrefix(secondAbsPath, firstAbsPath)
}

func (c *Config) registerServerFlagsWithChangedDefaultValues(fs *flag.FlagSet) {
throwaway := flag.NewFlagSet("throwaway", flag.PanicOnError)

Expand Down
Loading