Skip to content

Commit

Permalink
Limit the number of read-only files the POSIX Env will have open.
Browse files Browse the repository at this point in the history
Background compaction can create an unbounded number of
leveldb::RandomAccessFile instances. On 64-bit systems mmap is used and
file descriptors are only used beyond a certain number of mmap's.
32-bit systems to not use mmap at all. leveldb::RandomAccessFile does not
observe Options.max_open_files so compaction could exhaust the file
descriptor limit.

This change uses getrlimit to determine the maximum number of open
files and limits RandomAccessFile to approximately 20% of that value.

-------------
Created by MOE: https://github.com/google/moe
MOE_MIGRATED_REVID=143505556
  • Loading branch information
cmumford committed Jan 4, 2017
1 parent a2fb086 commit 646c358
Show file tree
Hide file tree
Showing 3 changed files with 204 additions and 58 deletions.
192 changes: 134 additions & 58 deletions util/env_posix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/resource.h>
#include <sys/stat.h>
#include <sys/time.h>
#include <sys/types.h>
Expand All @@ -23,15 +24,70 @@
#include "util/logging.h"
#include "util/mutexlock.h"
#include "util/posix_logger.h"
#include "util/env_posix_test_helper.h"

namespace leveldb {

namespace {

static int open_read_only_file_limit = -1;
static int mmap_limit = -1;

static Status IOError(const std::string& context, int err_number) {
return Status::IOError(context, strerror(err_number));
}

// Helper class to limit resource usage to avoid exhaustion.
// Currently used to limit read-only file descriptors and mmap file usage
// so that we do not end up running out of file descriptors, virtual memory,
// or running into kernel performance problems for very large databases.
class Limiter {
public:
// Limit maximum number of resources to |n|.
Limiter(intptr_t n) {
SetAllowed(n);
}

// If another resource is available, acquire it and return true.
// Else return false.
bool Acquire() {
if (GetAllowed() <= 0) {
return false;
}
MutexLock l(&mu_);
intptr_t x = GetAllowed();
if (x <= 0) {
return false;
} else {
SetAllowed(x - 1);
return true;
}
}

// Release a resource acquired by a previous call to Acquire() that returned
// true.
void Release() {
MutexLock l(&mu_);
SetAllowed(GetAllowed() + 1);
}

private:
port::Mutex mu_;
port::AtomicPointer allowed_;

intptr_t GetAllowed() const {
return reinterpret_cast<intptr_t>(allowed_.Acquire_Load());
}

// REQUIRES: mu_ must be held
void SetAllowed(intptr_t v) {
allowed_.Release_Store(reinterpret_cast<void*>(v));
}

Limiter(const Limiter&);
void operator=(const Limiter&);
};

class PosixSequentialFile: public SequentialFile {
private:
std::string filename_;
Expand Down Expand Up @@ -69,73 +125,51 @@ class PosixSequentialFile: public SequentialFile {
class PosixRandomAccessFile: public RandomAccessFile {
private:
std::string filename_;
bool temporary_fd_; // If true, fd_ is -1 and we open on every read.
int fd_;
Limiter* limiter_;

public:
PosixRandomAccessFile(const std::string& fname, int fd)
: filename_(fname), fd_(fd) { }
virtual ~PosixRandomAccessFile() { close(fd_); }
PosixRandomAccessFile(const std::string& fname, int fd, Limiter* limiter)
: filename_(fname), fd_(fd), limiter_(limiter) {
temporary_fd_ = !limiter->Acquire();
if (temporary_fd_) {
// Open file on every access.
close(fd_);
fd_ = -1;
}
}

virtual ~PosixRandomAccessFile() {
if (!temporary_fd_) {
close(fd_);
limiter_->Release();
}
}

virtual Status Read(uint64_t offset, size_t n, Slice* result,
char* scratch) const {
int fd = fd_;
if (temporary_fd_) {
fd = open(filename_.c_str(), O_RDONLY);
if (fd < 0) {
return IOError(filename_, errno);
}
}

Status s;
ssize_t r = pread(fd_, scratch, n, static_cast<off_t>(offset));
ssize_t r = pread(fd, scratch, n, static_cast<off_t>(offset));
*result = Slice(scratch, (r < 0) ? 0 : r);
if (r < 0) {
// An error: return a non-ok status
s = IOError(filename_, errno);
}
return s;
}
};

// Helper class to limit mmap file usage so that we do not end up
// running out virtual memory or running into kernel performance
// problems for very large databases.
class MmapLimiter {
public:
// Up to 1000 mmaps for 64-bit binaries; none for smaller pointer sizes.
MmapLimiter() {
SetAllowed(sizeof(void*) >= 8 ? 1000 : 0);
}

// If another mmap slot is available, acquire it and return true.
// Else return false.
bool Acquire() {
if (GetAllowed() <= 0) {
return false;
}
MutexLock l(&mu_);
intptr_t x = GetAllowed();
if (x <= 0) {
return false;
} else {
SetAllowed(x - 1);
return true;
if (temporary_fd_) {
// Close the temporary file descriptor opened earlier.
close(fd);
}
return s;
}

// Release a slot acquired by a previous call to Acquire() that returned true.
void Release() {
MutexLock l(&mu_);
SetAllowed(GetAllowed() + 1);
}

private:
port::Mutex mu_;
port::AtomicPointer allowed_;

intptr_t GetAllowed() const {
return reinterpret_cast<intptr_t>(allowed_.Acquire_Load());
}

// REQUIRES: mu_ must be held
void SetAllowed(intptr_t v) {
allowed_.Release_Store(reinterpret_cast<void*>(v));
}

MmapLimiter(const MmapLimiter&);
void operator=(const MmapLimiter&);
};

// mmap() based random-access
Expand All @@ -144,12 +178,12 @@ class PosixMmapReadableFile: public RandomAccessFile {
std::string filename_;
void* mmapped_region_;
size_t length_;
MmapLimiter* limiter_;
Limiter* limiter_;

public:
// base[0,length-1] contains the mmapped contents of the file.
PosixMmapReadableFile(const std::string& fname, void* base, size_t length,
MmapLimiter* limiter)
Limiter* limiter)
: filename_(fname), mmapped_region_(base), length_(length),
limiter_(limiter) {
}
Expand Down Expand Up @@ -332,7 +366,7 @@ class PosixEnv : public Env {
mmap_limit_.Release();
}
} else {
*result = new PosixRandomAccessFile(fname, fd);
*result = new PosixRandomAccessFile(fname, fd, &fd_limit_);
}
return s;
}
Expand Down Expand Up @@ -532,10 +566,42 @@ class PosixEnv : public Env {
BGQueue queue_;

PosixLockTable locks_;
MmapLimiter mmap_limit_;
Limiter mmap_limit_;
Limiter fd_limit_;
};

PosixEnv::PosixEnv() : started_bgthread_(false) {
// Return the maximum number of concurrent mmaps.
static int MaxMmaps() {
if (mmap_limit >= 0) {
return mmap_limit;
}
// Up to 1000 mmaps for 64-bit binaries; none for smaller pointer sizes.
mmap_limit = sizeof(void*) >= 8 ? 1000 : 0;
return mmap_limit;
}

// Return the maximum number of read-only files to keep open.
static intptr_t MaxOpenFiles() {
if (open_read_only_file_limit >= 0) {
return open_read_only_file_limit;
}
struct rlimit rlim;
if (getrlimit(RLIMIT_NOFILE, &rlim)) {
// getrlimit failed, fallback to hard-coded default.
open_read_only_file_limit = 50;
} else if (rlim.rlim_cur == RLIM_INFINITY) {
open_read_only_file_limit = std::numeric_limits<int>::max();
} else {
// Allow use of 20% of available file descriptors for read-only files.
open_read_only_file_limit = rlim.rlim_cur / 5;
}
return open_read_only_file_limit;
}

PosixEnv::PosixEnv()
: started_bgthread_(false),
mmap_limit_(MaxMmaps()),
fd_limit_(MaxOpenFiles()) {
PthreadCall("mutex_init", pthread_mutex_init(&mu_, NULL));
PthreadCall("cvar_init", pthread_cond_init(&bgsignal_, NULL));
}
Expand Down Expand Up @@ -610,6 +676,16 @@ static pthread_once_t once = PTHREAD_ONCE_INIT;
static Env* default_env;
static void InitDefaultEnv() { default_env = new PosixEnv; }

void EnvPosixTestHelper::SetReadOnlyFDLimit(int limit) {
assert(default_env == NULL);
open_read_only_file_limit = limit;
}

void EnvPosixTestHelper::SetReadOnlyMMapLimit(int limit) {
assert(default_env == NULL);
mmap_limit = limit;
}

Env* Env::Default() {
pthread_once(&once, InitDefaultEnv);
return default_env;
Expand Down
28 changes: 28 additions & 0 deletions util/env_posix_test_helper.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
// Copyright 2017 The LevelDB Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file. See the AUTHORS file for names of contributors.

#ifndef STORAGE_LEVELDB_UTIL_ENV_POSIX_TEST_HELPER_H_
#define STORAGE_LEVELDB_UTIL_ENV_POSIX_TEST_HELPER_H_

namespace leveldb {

class EnvPosixTest;

// A helper for the POSIX Env to facilitate testing.
class EnvPosixTestHelper {
private:
friend class EnvPosixTest;

// Set the maximum number of read-only files that will be opened.
// Must be called before creating an Env.
static void SetReadOnlyFDLimit(int limit);

// Set the maximum number of read-only files that will be mapped via mmap.
// Must be called before creating an Env.
static void SetReadOnlyMMapLimit(int limit);
};

} // namespace leveldb

#endif // STORAGE_LEVELDB_UTIL_ENV_POSIX_TEST_HELPER_H_
42 changes: 42 additions & 0 deletions util/env_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@

#include "port/port.h"
#include "util/testharness.h"
#include "util/env_posix_test_helper.h"

namespace leveldb {

static const int kDelayMicros = 100000;
static const int kReadOnlyFileLimit = 4;
static const int kMMapLimit = 4;

class EnvPosixTest {
private:
Expand All @@ -19,6 +22,11 @@ class EnvPosixTest {
public:
Env* env_;
EnvPosixTest() : env_(Env::Default()) { }

static void SetFileLimits(int read_only_file_limit, int mmap_limit) {
EnvPosixTestHelper::SetReadOnlyFDLimit(read_only_file_limit);
EnvPosixTestHelper::SetReadOnlyMMapLimit(mmap_limit);
}
};

static void SetBool(void* ptr) {
Expand Down Expand Up @@ -97,8 +105,42 @@ TEST(EnvPosixTest, StartThread) {
ASSERT_EQ(state.val, 3);
}

TEST(EnvPosixTest, TestOpenOnRead) {
// Write some test data to a single file that will be opened |n| times.
std::string test_dir;
ASSERT_OK(Env::Default()->GetTestDirectory(&test_dir));
std::string test_file = test_dir + "/open_on_read.txt";

FILE* f = fopen(test_file.c_str(), "w");
ASSERT_TRUE(f != NULL);
const char kFileData[] = "abcdefghijklmnopqrstuvwxyz";
fputs(kFileData, f);
fclose(f);

// Open test file some number above the sum of the two limits to force
// open-on-read behavior of POSIX Env leveldb::RandomAccessFile.
const int kNumFiles = kReadOnlyFileLimit + kMMapLimit + 5;
leveldb::RandomAccessFile* files[kNumFiles] = {0};
for (int i = 0; i < kNumFiles; i++) {
ASSERT_OK(Env::Default()->NewRandomAccessFile(test_file, &files[i]));
}
char scratch;
Slice read_result;
for (int i = 0; i < kNumFiles; i++) {
ASSERT_OK(files[i]->Read(i, 1, &read_result, &scratch));
ASSERT_EQ(kFileData[i], read_result[0]);
}
for (int i = 0; i < kNumFiles; i++) {
delete files[i];
}
ASSERT_OK(Env::Default()->DeleteFile(test_file));
}

} // namespace leveldb

int main(int argc, char** argv) {
// All tests currently run with the same read-only file limits.
leveldb::EnvPosixTest::SetFileLimits(leveldb::kReadOnlyFileLimit,
leveldb::kMMapLimit);
return leveldb::test::RunAllTests();
}

0 comments on commit 646c358

Please sign in to comment.