Skip to content
This repository has been archived by the owner on Dec 8, 2021. It is now read-only.

Commit

Permalink
server: check open file ulimit for local backend (#343)
Browse files Browse the repository at this point in the history
* check open file ulimit for local backend

* fix comment and add a test

* fix tests

* remove useless comments

* fix

Co-authored-by: Neil Shen <overvenus@gmail.com>
  • Loading branch information
glorv and overvenus authored Jul 14, 2020
1 parent f38aa6e commit 73e48bb
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 5 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ require (
github.com/shurcooL/httpgzip v0.0.0-20190720172056-320755c1c1b0
github.com/spaolacci/murmur3 v1.1.0 // indirect
go.uber.org/zap v1.15.0
golang.org/x/net v0.0.0-20200602114024-627f9648deb9 // indirect
golang.org/x/net v0.0.0-20200602114024-627f9648deb9
golang.org/x/sync v0.0.0-20200317015054-43a5402ce75a
golang.org/x/sys v0.0.0-20200615200032-f1bc736245b1 // indirect
golang.org/x/text v0.3.3
Expand Down
4 changes: 3 additions & 1 deletion lightning/backend/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ const (
gRPCKeepAliveTime = 10 * time.Second
gRPCKeepAliveTimeout = 3 * time.Second
gRPCBackOffMaxDelay = 3 * time.Second

LocalMemoryTableSize = 512 << 20
)

var (
Expand Down Expand Up @@ -257,7 +259,7 @@ func (local *local) ShouldPostProcess() bool {

func (local *local) openEngineDB(engineUUID uuid.UUID, readOnly bool) (*pebble.DB, error) {
opt := &pebble.Options{
MemTableSize: 512 << 20,
MemTableSize: LocalMemoryTableSize,
MaxConcurrentCompactions: 16,
MinCompactionRate: 1 << 30,
L0CompactionThreshold: math.MaxInt32, // set to max try to disable compaction
Expand Down
67 changes: 64 additions & 3 deletions lightning/lightning.go
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,21 @@ import (
"net/http"
"net/http/pprof"
"os"
"sort"
"strconv"
"strings"
"sync"
"syscall"
"time"

"golang.org/x/net/http/httpproxy"

"github.com/pingcap/errors"
"github.com/pingcap/failpoint"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/shurcooL/httpgzip"
"go.uber.org/zap"
"golang.org/x/net/http/httpproxy"

"github.com/pingcap/tidb-lightning/lightning/backend"
"github.com/pingcap/tidb-lightning/lightning/common"
"github.com/pingcap/tidb-lightning/lightning/config"
"github.com/pingcap/tidb-lightning/lightning/log"
Expand Down Expand Up @@ -210,7 +212,11 @@ func (l *Lightning) run(taskCfg *config.Config) (err error) {
if err != nil {
return errors.Trace(err)
}

err = checkSystemRequirement(taskCfg, mdl.GetDatabases())
if err != nil {
log.L().Error("check system requirements failed", zap.Error(err))
return errors.Trace(err)
}
dbMetas := mdl.GetDatabases()
web.BroadcastInitProgress(dbMetas)

Expand Down Expand Up @@ -529,3 +535,58 @@ func handleResume(w http.ResponseWriter, req *http.Request) {
writeJSONError(w, http.StatusMethodNotAllowed, "only PUT is allowed", nil)
}
}

func checkSystemRequirement(cfg *config.Config, dbsMeta []*mydump.MDDatabaseMeta) error {
// in local mode, we need to read&write a lot of L0 sst files, so we need to check system max open files limit
if cfg.TikvImporter.Backend == config.BackendLocal {
// estimate max open files = {top N(TableConcurrency) table sizes} / {MemoryTableSize}
tableTotalSizes := make([]int64, 0)
for _, dbs := range dbsMeta {
for _, tb := range dbs.Tables {
tableTotalSizes = append(tableTotalSizes, tb.TotalSize)
}
}
sort.Slice(tableTotalSizes, func(i, j int) bool {
return tableTotalSizes[i] > tableTotalSizes[j]
})
topNTotalSize := int64(0)
for i := 0; i < len(tableTotalSizes) && i < cfg.App.TableConcurrency; i++ {
topNTotalSize += tableTotalSizes[i]
}

estimateMaxFiles := uint64(topNTotalSize / backend.LocalMemoryTableSize)
var rLimit syscall.Rlimit
err := syscall.Getrlimit(syscall.RLIMIT_NOFILE, &rLimit)
failpoint.Inject("GetRlimitValue", func(v failpoint.Value) {
limit := uint64(v.(int))
rLimit.Cur = limit
rLimit.Max = limit
err = nil
})
if err != nil {
return errors.Trace(err)
}
if rLimit.Cur >= estimateMaxFiles {
return nil
}
if rLimit.Max < estimateMaxFiles {
// If the process is not started by privileged user, this will fail.
rLimit.Max = estimateMaxFiles
}
prevLimit := rLimit.Cur
rLimit.Cur = estimateMaxFiles
failpoint.Inject("SetRlimitError", func(v failpoint.Value) {
if v.(bool) {
err = errors.New("Setrlimit Injected Error")
}
})
if err == nil {
err = syscall.Setrlimit(syscall.RLIMIT_NOFILE, &rLimit)
}
if err != nil {
return errors.Annotatef(err, "the maximum number of open file descriptors is too small, got %d, expect greater or equal to %d", prevLimit, estimateMaxFiles)
}
}

return nil
}
62 changes: 62 additions & 0 deletions lightning/lightning_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ import (
"testing"
"time"

"github.com/pingcap/tidb-lightning/lightning/mydump"

. "github.com/pingcap/check"
"github.com/pingcap/failpoint"
"github.com/pingcap/tidb-lightning/lightning/config"
Expand Down Expand Up @@ -399,3 +401,63 @@ func (s *lightningServerSuite) TestHTTPAPIOutsideServerMode(c *C) {
// ... and the task should be canceled now.
c.Assert(<-errCh, Equals, context.Canceled)
}

func (s *lightningServerSuite) TestCheckSystemRequirement(c *C) {
cfg := config.NewConfig()
cfg.App.TableConcurrency = 4
cfg.TikvImporter.Backend = config.BackendLocal

dbMetas := []*mydump.MDDatabaseMeta{
{
Tables: []*mydump.MDTableMeta{
{
TotalSize: 500 << 20,
},
{
TotalSize: 150_000 << 20,
},
},
},
{
Tables: []*mydump.MDTableMeta{
{
TotalSize: 150_800 << 20,
},
{
TotalSize: 35 << 20,
},
{
TotalSize: 100_000 << 20,
},
},
},
{
Tables: []*mydump.MDTableMeta{
{
TotalSize: 240 << 20,
},
{
TotalSize: 124_000 << 20,
},
},
},
}

// with max open files 1024, the max table size will be: 524288MB
err := failpoint.Enable("github.com/pingcap/tidb-lightning/lightning/GetRlimitValue", "return(1024)")
c.Assert(err, IsNil)
err = failpoint.Enable("github.com/pingcap/tidb-lightning/lightning/SetRlimitError", "return(true)")
c.Assert(err, IsNil)
defer failpoint.Disable("github.com/pingcap/tidb-lightning/lightning/SetRlimitError")
// with this dbMetas, the estimated fds will be 1025, so should return error
err = checkSystemRequirement(cfg, dbMetas)
c.Assert(err, NotNil)
err = failpoint.Disable("github.com/pingcap/tidb-lightning/lightning/GetRlimitValue")
c.Assert(err, IsNil)

err = failpoint.Enable("github.com/pingcap/tidb-lightning/lightning/GetRlimitValue", "return(1025)")
defer failpoint.Disable("github.com/pingcap/tidb-lightning/lightning/GetRlimitValue")
c.Assert(err, IsNil)
err = checkSystemRequirement(cfg, dbMetas)
c.Assert(err, IsNil)
}

0 comments on commit 73e48bb

Please sign in to comment.