Skip to content

Commit

Permalink
*: Add dbSizeInUse to StatusResposne
Browse files Browse the repository at this point in the history
Existing dbSize shows physically allocated DB size and the backend
(boltdb) won't shrink it after a compaction until a user runs the defrag command.
The new dbSizeInUse shows the DB size that excludes free pages created
by compactions so that users can see the actual DB usage. dbSize >=
dbSizeInUse is always true.
Note that dbSizeInUse shows a page-based size and not byte level usage.
  • Loading branch information
Iwasaki Yudai committed Jan 31, 2018
1 parent dafbc04 commit 03054b8
Show file tree
Hide file tree
Showing 12 changed files with 380 additions and 245 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG-3.4.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@
- Add [`raftAppliedIndex` field to `etcdserverpb.StatusResponse`](https://github.com/coreos/etcd/pull/9176) for current Raft applied index.
- Add [`errors` field to `etcdserverpb.StatusResponse`](https://github.com/coreos/etcd/pull/9206) for server-side error.
- e.g. `"etcdserver: no leader", "NOSPACE", "CORRUPT"`
- Add [`dbSizeInUse` field to `etcdserverpb.StatusResponse`](https://github.com/coreos/etcd/pull/9256) for actual DB size after compaction.
- Also exposed as metrics `etcd_debugging_mvcc_db_total_size_in_use_in_bytes`

### Added(v3 `etcdctl`)

Expand Down
7 changes: 6 additions & 1 deletion Documentation/dev-guide/apispec/swagger/rpc.swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -2164,7 +2164,12 @@
"type": "object",
"properties": {
"dbSize": {
"description": "dbSize is the size of the backend database, in bytes, of the responding member.",
"description": "dbSize is the size of the backend database physically allocated, in bytes, of the responding member.",
"type": "string",
"format": "int64"
},
"dbSizeInUse": {
"description": "dbSizeInUse is the size of the backend database logically in use, in bytes, of the responding member.",
"type": "string",
"format": "int64"
},
Expand Down
2 changes: 2 additions & 0 deletions Documentation/op-guide/maintenance.md
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,8 @@ $ ETCDCTL_API=3 etcdctl put newkey 123
OK
```

The metrics `etcd_debugging_mvcc_db_total_size_in_use_in_bytes` indicates the actual database usage after a history compaction, while `etcd_debugging_mvcc_db_total_size_in_bytes` shows the database size including free space waiting for defragmentation. The latter increases only when the former equals to it, meaning when both of these metrics are close to the quota, a history compaction is required to avoid triggering the space quota.

## Snapshot backup

Snapshotting the `etcd` cluster on a regular basis serves as a durable backup for an etcd keyspace. By taking periodic snapshots of an etcd member's backend database, an `etcd` cluster can be recovered to a point in time with a known good state.
Expand Down
1 change: 1 addition & 0 deletions etcdserver/api/v3rpc/maintenance.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,7 @@ func (ms *maintenanceServer) Status(ctx context.Context, ar *pb.StatusRequest) (
RaftIndex: ms.rg.Index(),
RaftTerm: ms.rg.Term(),
RaftAppliedIndex: ms.rg.AppliedIndex(),
DbSizeInUse: ms.bg.Backend().SizeInUse(),
}
if uint64(ms.rg.Leader()) == raft.None {
resp.Errors = append(resp.Errors, etcdserver.ErrNoLeader.Error())
Expand Down
503 changes: 270 additions & 233 deletions etcdserver/etcdserverpb/rpc.pb.go

Large diffs are not rendered by default.

4 changes: 3 additions & 1 deletion etcdserver/etcdserverpb/rpc.proto
Original file line number Diff line number Diff line change
Expand Up @@ -899,7 +899,7 @@ message StatusResponse {
ResponseHeader header = 1;
// version is the cluster protocol version used by the responding member.
string version = 2;
// dbSize is the size of the backend database, in bytes, of the responding member.
// dbSize is the size of the backend database physically allocated, in bytes, of the responding member.
int64 dbSize = 3;
// leader is the member ID which the responding member believes is the current leader.
uint64 leader = 4;
Expand All @@ -911,6 +911,8 @@ message StatusResponse {
uint64 raftAppliedIndex = 7;
// errors contains alarm/health information and status.
repeated string errors = 8;
// dbSizeInUse is the size of the backend database logically in use, in bytes, of the responding member.
int64 dbSizeInUse = 9;
}

message AuthEnableRequest {
Expand Down
46 changes: 43 additions & 3 deletions integration/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,26 +72,66 @@ func TestMetricDbSizeDefrag(t *testing.T) {
if expected := numPuts * len(putreq.Value); bv < expected {
t.Fatalf("expected db size greater than %d, got %d", expected, bv)
}
beforeDefragInUse, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_use_in_bytes")
if err != nil {
t.Fatal(err)
}
biu, err := strconv.Atoi(beforeDefragInUse)
if err != nil {
t.Fatal(err)
}
if biu != bv {
t.Fatalf("when db size is growing, db size (%d) and db size in use (%d) is expected to be equal", bv, biu)
}

// clear out historical keys
// clear out historical keys, in use bytes should free pages
creq := &pb.CompactionRequest{Revision: int64(numPuts), Physical: true}
if _, kerr := kvc.Compact(context.TODO(), creq); kerr != nil {
t.Fatal(kerr)
}

// Put to move PendingPages to FreePages
if _, err := kvc.Put(context.TODO(), putreq); err != nil {
t.Fatal(err)
}
time.Sleep(500 * time.Millisecond)

afterCompactionInUse, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_use_in_bytes")
if err != nil {
t.Fatal(err)
}
aciu, err := strconv.Atoi(afterCompactionInUse)
if err != nil {
t.Fatal(err)
}
if biu <= aciu {
t.Fatalf("expected less than %d, got %d after compaction", biu, aciu)
}

// defrag should give freed space back to fs
mc.Defragment(context.TODO(), &pb.DefragmentRequest{})

afterDefrag, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_bytes")
if err != nil {
t.Fatal(err)
}

av, err := strconv.Atoi(afterDefrag)
if err != nil {
t.Fatal(err)
}

if bv <= av {
t.Fatalf("expected less than %d, got %d after defrag", bv, av)
}

afterDefragInUse, err := clus.Members[0].Metric("etcd_debugging_mvcc_db_total_size_in_use_in_bytes")
if err != nil {
t.Fatal(err)
}
adiu, err := strconv.Atoi(afterDefragInUse)
if err != nil {
t.Fatal(err)
}
if adiu > av {
t.Fatalf("db size in use (%d) is expected less than db size (%d) after defrag", adiu, av)
}
}
30 changes: 26 additions & 4 deletions internal/mvcc/backend/backend.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,15 @@ type Backend interface {

Snapshot() Snapshot
Hash(ignores map[IgnoreKey]struct{}) (uint32, error)
// Size returns the current size of the backend.
// Size returns the current size of the backend physically allocated.
// The backend can hold DB space that is not utilized at the moment,
// since it can conduct pre-allocation or spare unused space for recycling.
// Use SizeInUse() instead for the actual DB size.
Size() int64
// SizeInUse returns the current size of the backend logically in use.
// Since the backend can manage free space in a non-byte unit such as
// number of pages, the returned value can be not exactly accurate in bytes.
SizeInUse() int64
Defrag() error
ForceCommit()
Close() error
Expand All @@ -72,8 +79,10 @@ type backend struct {
// size and commits are used with atomic operations so they must be
// 64-bit aligned, otherwise 32-bit tests will crash

// size is the number of bytes in the backend
// size is the number of bytes allocated in the backend
size int64
// sizeInUse is the number of bytes actually used in the backend
sizeInUse int64
// commits counts number of commits since start
commits int64

Expand Down Expand Up @@ -247,6 +256,10 @@ func (b *backend) Size() int64 {
return atomic.LoadInt64(&b.size)
}

func (b *backend) SizeInUse() int64 {
return atomic.LoadInt64(&b.sizeInUse)
}

func (b *backend) run() {
defer close(b.donec)
t := time.NewTimer(b.batchInterval)
Expand Down Expand Up @@ -344,7 +357,11 @@ func (b *backend) defrag() error {

b.readTx.reset()
b.readTx.tx = b.unsafeBegin(false)
atomic.StoreInt64(&b.size, b.readTx.tx.Size())

size := b.readTx.tx.Size()
db := b.readTx.tx.DB()
atomic.StoreInt64(&b.size, size)
atomic.StoreInt64(&b.sizeInUse, size-(int64(db.Stats().FreePageN)*int64(db.Info().PageSize)))

return nil
}
Expand Down Expand Up @@ -405,7 +422,12 @@ func (b *backend) begin(write bool) *bolt.Tx {
b.mu.RLock()
tx := b.unsafeBegin(write)
b.mu.RUnlock()
atomic.StoreInt64(&b.size, tx.Size())

size := tx.Size()
db := tx.DB()
atomic.StoreInt64(&b.size, size)
atomic.StoreInt64(&b.sizeInUse, size-(int64(db.Stats().FreePageN)*int64(db.Info().PageSize)))

return tx
}

Expand Down
5 changes: 4 additions & 1 deletion internal/mvcc/backend/batch_tx.go
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,10 @@ func (t *batchTx) commit(stop bool) {
// which initializes *bolt.Tx.db and *bolt.Tx.meta as nil; panics t.tx.Size().
// Server must make sure 'batchTx.commit(false)' does not follow
// 'batchTx.commit(true)' (e.g. stopping backend, and inflight Hash call).
atomic.StoreInt64(&t.backend.size, t.tx.Size())
size := t.tx.Size()
db := t.tx.DB()
atomic.StoreInt64(&t.backend.size, size)
atomic.StoreInt64(&t.backend.sizeInUse, size-(int64(db.Stats().FreePageN)*int64(db.Info().PageSize)))
return
}

Expand Down
5 changes: 4 additions & 1 deletion internal/mvcc/kvstore.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,10 +300,13 @@ func (s *store) Restore(b backend.Backend) error {
}

func (s *store) restore() error {
reportDbTotalSizeInBytesMu.Lock()
b := s.b
reportDbTotalSizeInBytesMu.Lock()
reportDbTotalSizeInBytes = func() float64 { return float64(b.Size()) }
reportDbTotalSizeInBytesMu.Unlock()
reportDbTotalSizeInUseInBytesMu.Lock()
reportDbTotalSizeInUseInBytes = func() float64 { return float64(b.SizeInUse()) }
reportDbTotalSizeInUseInBytesMu.Unlock()

min, max := newRevBytes(), newRevBytes()
revToBytes(revision{main: 1}, min)
Expand Down
1 change: 1 addition & 0 deletions internal/mvcc/kvstore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -741,6 +741,7 @@ func (b *fakeBackend) BatchTx() backend.BatchTx
func (b *fakeBackend) ReadTx() backend.ReadTx { return b.tx }
func (b *fakeBackend) Hash(ignores map[backend.IgnoreKey]struct{}) (uint32, error) { return 0, nil }
func (b *fakeBackend) Size() int64 { return 0 }
func (b *fakeBackend) SizeInUse() int64 { return 0 }
func (b *fakeBackend) Snapshot() backend.Snapshot { return nil }
func (b *fakeBackend) ForceCommit() {}
func (b *fakeBackend) Defrag() error { return nil }
Expand Down
19 changes: 18 additions & 1 deletion internal/mvcc/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ var (
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "db_total_size_in_bytes",
Help: "Total size of the underlying database in bytes.",
Help: "Total size of the underlying database physically allocated in bytes.",
},
func() float64 {
reportDbTotalSizeInBytesMu.RLock()
Expand All @@ -154,6 +154,22 @@ var (
// overridden by mvcc initialization
reportDbTotalSizeInBytesMu sync.RWMutex
reportDbTotalSizeInBytes func() float64 = func() float64 { return 0 }

dbTotalSizeInUse = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
Namespace: "etcd_debugging",
Subsystem: "mvcc",
Name: "db_total_size_in_use_in_bytes",
Help: "Total size of the underlying database logically in use in bytes.",
},
func() float64 {
reportDbTotalSizeInUseInBytesMu.RLock()
defer reportDbTotalSizeInUseInBytesMu.RUnlock()
return reportDbTotalSizeInUseInBytes()
},
)
// overridden by mvcc initialization
reportDbTotalSizeInUseInBytesMu sync.RWMutex
reportDbTotalSizeInUseInBytes func() float64 = func() float64 { return 0 }
)

func init() {
Expand All @@ -172,6 +188,7 @@ func init() {
prometheus.MustRegister(dbCompactionTotalDurations)
prometheus.MustRegister(dbCompactionKeysCounter)
prometheus.MustRegister(dbTotalSize)
prometheus.MustRegister(dbTotalSizeInUse)
}

// ReportEventReceived reports that an event is received.
Expand Down

0 comments on commit 03054b8

Please sign in to comment.