Skip to content

Commit

Permalink
Log compaction failure error and delete temporarily blocks from disk (#…
Browse files Browse the repository at this point in the history
…2261)

* Log compaction failure error and delete temporarily blocks from disk

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Well, we have to always delete local dir

Signed-off-by: Marco Pracucci <marco@pracucci.com>

* Fix unit tests

Signed-off-by: Marco Pracucci <marco@pracucci.com>
  • Loading branch information
pracucci authored Jun 29, 2022
1 parent 8fabc83 commit e9bba6b
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 11 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

### Grafana Mimir

* [CHANGE] Compactor: delete source and output blocks from local disk on compaction failed, to reduce likelihood that subsequent compactions fail because of no space left on disk. #2261
* [BUGFIX] Compactor: log the actual error on compaction failed. #2261

### Mixin

### Jsonnet
Expand Down
10 changes: 5 additions & 5 deletions pkg/compactor/bucket_compactor.go
Original file line number Diff line number Diff line change
Expand Up @@ -280,13 +280,13 @@ func (c *BucketCompactor) runCompactionJob(ctx context.Context, job *Job) (shoul

defer func() {
elapsed := time.Since(jobBeginTime)
level.Info(jobLogger).Log("msg", "compaction job finished", "success", rerr == nil, "duration", elapsed, "duration_ms", elapsed.Milliseconds())

// Leave the compact directory for inspection if it is a halt error
// or if it is not then so that possibly we would not have to download everything again.
if rerr != nil {
return
if rerr == nil {
level.Info(jobLogger).Log("msg", "compaction job succeeded", "duration", elapsed, "duration_ms", elapsed.Milliseconds())
} else {
level.Error(jobLogger).Log("msg", "compaction job failed", "duration", elapsed, "duration_ms", elapsed.Milliseconds(), "err", rerr)
}

if err := os.RemoveAll(subDir); err != nil {
level.Error(jobLogger).Log("msg", "failed to remove compaction group work directory", "path", subDir, "err", err)
}
Expand Down
12 changes: 6 additions & 6 deletions pkg/compactor/compactor_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -537,15 +537,15 @@ func TestMultitenantCompactor_ShouldIterateOverUsersAndRunCompaction(t *testing.
`level=info component=compactor user=user-1 msg="start of GC"`,
`level=debug component=compactor user=user-1 msg="grouper found a compactable blocks group" groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 job="stage: merge, range start: 1574776800000, range end: 1574784000000, shard: , blocks: 01DTVP434PA9VFXSW2JKB3392D (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC),01FS51A7GQ1RQWV35DBVYQM4KF (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC)"`,
`level=info component=compactor user=user-1 msg="start of compactions"`,
`level=info component=compactor user=user-1 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job finished" success=true`,
`level=info component=compactor user=user-1 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job succeeded"`,
`level=info component=compactor user=user-1 msg="compaction iterations done"`,
`level=info component=compactor msg="successfully compacted user blocks" user=user-1`,
`level=info component=compactor msg="starting compaction of user blocks" user=user-2`,
`level=info component=compactor user=user-2 msg="start sync of metas"`,
`level=info component=compactor user=user-2 msg="start of GC"`,
`level=debug component=compactor user=user-2 msg="grouper found a compactable blocks group" groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 job="stage: merge, range start: 1574776800000, range end: 1574784000000, shard: , blocks: 01DTW0ZCPDDNV4BV83Q2SV4QAZ (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC),01FRSF035J26D6CGX7STCSD1KG (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC)"`,
`level=info component=compactor user=user-2 msg="start of compactions"`,
`level=info component=compactor user=user-2 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job finished" success=true`,
`level=info component=compactor user=user-2 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job succeeded"`,
`level=info component=compactor user=user-2 msg="compaction iterations done"`,
`level=info component=compactor msg="successfully compacted user blocks" user=user-2`,
}, removeIgnoredLogs(strings.Split(strings.TrimSpace(logs.String()), "\n")))
Expand Down Expand Up @@ -677,7 +677,7 @@ func TestMultitenantCompactor_ShouldStopCompactingTenantOnReachingMaxCompactionT
`level=debug component=compactor user=user-1 msg="grouper found a compactable blocks group" groupKey=0@414047632870839233-merge--1574776800000-1574784000000 job="stage: merge, range start: 1574776800000, range end: 1574784000000, shard: , blocks: 01DTVP434PA9VFXSW2JKB3392D (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC),01FS51A7GQ1RQWV35DBVYQM4KF (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC)"`,
`level=info component=compactor user=user-1 msg="start of compactions"`,
`level=info component=compactor user=user-1 msg="max compaction time reached, no more compactions will be started"`,
`level=info component=compactor user=user-1 groupKey=0@12695595599644216241-merge--1574776800000-1574784000000 msg="compaction job finished" success=true`,
`level=info component=compactor user=user-1 groupKey=0@12695595599644216241-merge--1574776800000-1574784000000 msg="compaction job succeeded"`,
`level=info component=compactor user=user-1 msg="compaction iterations done"`,
`level=info component=compactor msg="successfully compacted user blocks" user=user-1`,
}, removeIgnoredLogs(strings.Split(strings.TrimSpace(logs.String()), "\n")))
Expand Down Expand Up @@ -1024,15 +1024,15 @@ func TestMultitenantCompactor_ShouldCompactAllUsersOnShardingEnabledButOnlyOneIn
`level=info component=compactor user=user-1 msg="start of GC"`,
`level=debug component=compactor user=user-1 msg="grouper found a compactable blocks group" groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 job="stage: merge, range start: 1574776800000, range end: 1574784000000, shard: , blocks: 01DTVP434PA9VFXSW2JKB3392D (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC),01FSTQ95C8FS0ZAGTQS2EF1NEG (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC)"`,
`level=info component=compactor user=user-1 msg="start of compactions"`,
`level=info component=compactor user=user-1 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job finished" success=true`,
`level=info component=compactor user=user-1 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job succeeded"`,
`level=info component=compactor user=user-1 msg="compaction iterations done"`,
`level=info component=compactor msg="successfully compacted user blocks" user=user-1`,
`level=info component=compactor msg="starting compaction of user blocks" user=user-2`,
`level=info component=compactor user=user-2 msg="start sync of metas"`,
`level=info component=compactor user=user-2 msg="start of GC"`,
`level=debug component=compactor user=user-2 msg="grouper found a compactable blocks group" groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 job="stage: merge, range start: 1574776800000, range end: 1574784000000, shard: , blocks: 01DTW0ZCPDDNV4BV83Q2SV4QAZ (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC),01FSV54G6QFQH1G9QE93G3B9TB (min time: 2019-11-26 14:00:00 +0000 UTC, max time: 2019-11-26 16:00:00 +0000 UTC)"`,
`level=info component=compactor user=user-2 msg="start of compactions"`,
`level=info component=compactor user=user-2 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job finished" success=true`,
`level=info component=compactor user=user-2 groupKey=0@17241709254077376921-merge--1574776800000-1574784000000 msg="compaction job succeeded"`,
`level=info component=compactor user=user-2 msg="compaction iterations done"`,
`level=info component=compactor msg="successfully compacted user blocks" user=user-2`,
}, removeIgnoredLogs(strings.Split(strings.TrimSpace(logs.String()), "\n")))
Expand Down Expand Up @@ -1241,7 +1241,7 @@ func TestMultitenantCompactor_ShouldSkipCompactionForJobsNoMoreOwnedAfterPlannin
`level=debug component=compactor user=user-1 msg="grouper found a compactable blocks group" groupKey=0@17241709254077376921-split-1_of_4-1574863200000-1574870400000 job="stage: split, range start: 1574863200000, range end: 1574870400000, shard: 1_of_4, blocks: 01DTVP434PA9VFXSW2JK000002 (min time: 2019-11-27 14:00:00 +0000 UTC, max time: 2019-11-27 16:00:00 +0000 UTC)"`,
// The ownership check is failing because, to keep this test simple, we've just switched
// the instance state to LEAVING and there are no other instances in the ring.
`level=info component=compactor user=user-1 groupKey=0@17241709254077376921-split-4_of_4-1574776800000-1574784000000 msg="compaction job finished" success=true`,
`level=info component=compactor user=user-1 groupKey=0@17241709254077376921-split-4_of_4-1574776800000-1574784000000 msg="compaction job succeeded"`,
`level=info component=compactor user=user-1 msg="skipped compaction because unable to check whether the job is owned by the compactor instance" groupKey=0@17241709254077376921-split-1_of_4-1574863200000-1574870400000 err="at least 1 live replicas required, could only find 0 - unhealthy instances: 1.2.3.4:0"`,
`level=info component=compactor user=user-1 msg="compaction iterations done"`,
`level=info component=compactor msg="successfully compacted user blocks" user=user-1`,
Expand Down

0 comments on commit e9bba6b

Please sign in to comment.