Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve online_delete configuration and DB tuning: #3429

Closed
wants to merge 13 commits into from
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 22 additions & 15 deletions cfg/rippled-example.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -913,13 +913,13 @@
# number of seconds.
# Default is 60.
#
# recovery_buffer_seconds
# recovery_wait_seconds
# The online delete process checks periodically
# that rippled is still in sync with the network,
# and that the validated ledger is less than
# 'age_threshold_seconds' old. By default, if it
# is not the online delete process aborts and
# tries again later. If 'recovery_buffer_seconds'
# tries again later. If 'recovery_wait_seconds'
# is set and rippled is out of sync, but likely to
# recover quickly, then online delete will wait
# this number of seconds for rippled to get back
Expand Down Expand Up @@ -967,30 +967,37 @@
# <key> '=' <value>
# ...
#
# Example:
# Example 1:
# sync_level=low
#
# Example 2:
# journal_mode=off
# synchronous=off
#
# WARNING: These settings can have significant effects on data integrity,
# particularly in failure scenarios. It is strongly recommended that they
# be left at their defaults unless the server is having performance issues
# during normal operation or during automatic purging (online_delete)
# operations. A warning will be logged on startup if 'ledger_history'
# is configured to store more than 10,000,000 ledgers and any of these
# settings are less safe than the default. This is due to the inordinate
# amount of time and bandwidth it will take to safely rebuild a corrupted
# database from other peers.
# particularly in systemic failure scenarios. It is strongly recommended
# that they be left at their defaults unless the server is having
# performance issues during normal operation or during automatic purging
# (online_delete) operations. A warning will be logged on startup if
# 'ledger_history' is configured to store more than 10,000,000 ledgers and
# any of these settings are less safe than the default. This is due to the
# inordinate amount of time and bandwidth it will take to safely rebuild a
# corrupted database of that size from other peers.
#
# Optional keys:
#
# safety_level Valid values: high, low
# The default is "high", and tunes the SQLite
# databases in the most reliable mode. "low"
# is equivalent to
# The default is "high", which tunes the SQLite
# databases in the most reliable mode, and is
# equivalent to:
# journal_mode=wal
# synchronous=normal
# temp_store=file
# "low" is equivalent to:
# journal_mode=memory
# synchronous=off
# temp_store=memory
# These settings trade speed and reduced I/O
# These "low" settings trade speed and reduced I/O
# for a higher risk of data loss. See the
# individual settings below for more information.
# This setting may not be combined with any of the
Expand Down
2 changes: 1 addition & 1 deletion src/ripple/app/main/Application.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1072,7 +1072,7 @@ class ApplicationImp : public Application, public RootStoppable, public BasicApp
mLedgerDB->setupCheckpointing(m_jobQueue.get(), logs());

// wallet database
setup.noPragma();
setup.useGlobalPragma = false;
mWalletDB = std::make_unique<DatabaseCon>(
setup,
WalletDBName,
Expand Down
16 changes: 4 additions & 12 deletions src/ripple/app/main/DBInit.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,21 +71,13 @@ inline constexpr std::array<char const*, 5> LgrDBInit{
// Transaction database holds transactions and public keys
inline constexpr auto TxDBName{"transaction.db"};

inline constexpr
#if (ULONG_MAX > UINT_MAX) && !defined(NO_SQLITE_MMAP)
std::array<char const*, 4>
TxDBPragma
inline constexpr std::array TxDBPragma
{
{
#else
std::array<char const*, 3> TxDBPragma {{
#endif
"PRAGMA page_size=4096;", "PRAGMA journal_size_limit=1582080;",
"PRAGMA max_page_count=2147483646;",
"PRAGMA page_size=4096;", "PRAGMA journal_size_limit=1582080;",
"PRAGMA max_page_count=2147483646;",
#if (ULONG_MAX > UINT_MAX) && !defined(NO_SQLITE_MMAP)
"PRAGMA mmap_size=17179869184;"
"PRAGMA mmap_size=17179869184;"
#endif
}
};

inline constexpr std::array<char const*, 8> TxDBInit{
Expand Down
25 changes: 13 additions & 12 deletions src/ripple/app/main/Main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -517,33 +517,34 @@ run(int argc, char** argv)
}

using namespace boost::filesystem;
DatabaseCon::Setup dbSetup = setup_DatabaseCon(*config);
DatabaseCon::Setup const dbSetup = setup_DatabaseCon(*config);
path dbPath = dbSetup.dataDir / TxDBName;

try
{
uintmax_t const dbSize = file_size(dbPath);
assert(dbSize != static_cast<uintmax_t>(-1));

if (auto available = space(dbPath.parent_path()).available;
available < dbSize)
{
auto available = space(dbPath.parent_path()).available;
if (available < dbSize)
{
std::cerr
<< "The database filesystem must have at least as "
"much free space as the size of "
<< dbPath.string() << ", which is " << dbSize
<< " bytes. Only " << available
<< " bytes are available.\n";
return -1;
}
std::cerr << "The database filesystem must have at least as "
"much free space as the size of "
<< dbPath.string() << ", which is " << dbSize
<< " bytes. Only " << available
<< " bytes are available.\n";
return -1;
}

auto txnDB = std::make_unique<DatabaseCon>(
dbSetup, TxDBName, TxDBPragma, TxDBInit);
auto& session = txnDB->getSession();
std::uint32_t pageSize;

// Only the most trivial databases will fit in memory on typical
// (recommended) software. Force temp files to be written to disk
// regardless of the config settings.
session << boost::format(CommonDBPragmaTemp) % "file";
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The original behavior needs to come back--namely, that the actual temp directory be specified. Otherwise, SQLIte will by default use the directory in which the database is.

Alternately, I think that getting rid of this feature altogether makes a lot of sense. If somebody really wants to VACUUM a database, it's easier to use the command line tool.

session << "PRAGMA page_size;", soci::into(pageSize);

std::cout << "VACUUM beginning. page_size: " << pageSize
Expand Down
6 changes: 3 additions & 3 deletions src/ripple/app/misc/NetworkOPs.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2757,12 +2757,12 @@ NetworkOPsImp::getServerInfo(bool human, bool admin, bool counters)
if (std::abs(closeOffset.count()) >= 60)
l[jss::close_time_offset] = closeOffset.count();

constexpr std::chrono::seconds HIGH_AGE_THRESHOLD{1000000};
constexpr std::chrono::seconds highAgeThreshold{1000000};
if (m_ledgerMaster.haveValidated())
{
auto const age = m_ledgerMaster.getValidatedLedgerAge();
l[jss::age] =
Json::UInt(age < HIGH_AGE_THRESHOLD ? age.count() : 0);
Json::UInt(age < highAgeThreshold ? age.count() : 0);
}
else
{
Expand All @@ -2773,7 +2773,7 @@ NetworkOPsImp::getServerInfo(bool human, bool admin, bool counters)
using namespace std::chrono_literals;
auto age = closeTime - lCloseTime;
l[jss::age] =
Json::UInt(age < HIGH_AGE_THRESHOLD ? age.count() : 0);
Json::UInt(age < highAgeThreshold ? age.count() : 0);
}
}
}
Expand Down
11 changes: 5 additions & 6 deletions src/ripple/app/misc/SHAMapStoreImp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,8 +195,8 @@ SHAMapStoreImp::SHAMapStoreImp(
}
if (get_if_exists(section, "age_threshold_seconds", temp))
ageThreshold_ = std::chrono::seconds{temp};
if (get_if_exists(section, "recovery_buffer_seconds", temp))
recoveryBuffer_.emplace(std::chrono::seconds{temp});
if (get_if_exists(section, "recovery_wait_seconds", temp))
recoveryWaitTime_.emplace(std::chrono::seconds{temp});

get_if_exists(section, "advisory_delete", advisoryDelete_);

Expand Down Expand Up @@ -624,7 +624,6 @@ SHAMapStoreImp::clearSql(
return;
}
JLOG(journal_.debug()) << "finished: " << deleteQuery;
return;
}

void
Expand Down Expand Up @@ -700,15 +699,15 @@ SHAMapStoreImp::health()
{
auto age = ledgerMaster_->getValidatedLedgerAge();
OperatingMode mode = netOPs_->getOperatingMode();
if (recoveryBuffer_ && mode == OperatingMode::SYNCING &&
if (recoveryWaitTime_ && mode == OperatingMode::SYNCING &&
age < ageThreshold_)
{
JLOG(journal_.warn())
<< "Waiting " << recoveryBuffer_->count()
<< "Waiting " << recoveryWaitTime_->count()
<< "s for node to get back into sync with network. state: "
<< app_.getOPs().strOperatingMode(mode, false) << ". age "
<< age.count() << 's';
std::this_thread::sleep_for(*recoveryBuffer_);
std::this_thread::sleep_for(*recoveryWaitTime_);

age = ledgerMaster_->getValidatedLedgerAge();
mode = netOPs_->getOperatingMode();
Expand Down
18 changes: 12 additions & 6 deletions src/ripple/app/misc/SHAMapStoreImp.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,12 @@ class SHAMapStoreImp : public SHAMapStore
std::uint32_t deleteBatch_ = 100;
std::chrono::milliseconds backOff_{100};
std::chrono::seconds ageThreshold_{60};
boost::optional<std::chrono::seconds> recoveryBuffer_{};
/// If set, and the node is out of sync during an
/// online_delete health check, sleep the thread
/// for this time and check again so the node can
/// recover.
/// See also: "recovery_wait_seconds" in rippled-example.cfg
boost::optional<std::chrono::seconds> recoveryWaitTime_;

// these do not exist upon SHAMapStore creation, but do exist
// as of onPrepare() or before
Expand Down Expand Up @@ -214,11 +219,9 @@ class SHAMapStoreImp : public SHAMapStore
return false;
}

/** delete from sqlite table in batches to not lock the db excessively
* pause briefly to extend access time to other users
* call with mutex object unlocked
* @return true if any deletable rows were found (though not
* necessarily deleted.
/** delete from sqlite table in batches to not lock the db excessively.
* Pause briefly to extend access time to other users.
* Call with mutex object unlocked.
*/
void
clearSql(
Expand All @@ -238,6 +241,9 @@ class SHAMapStoreImp : public SHAMapStore
// Assume that, once unhealthy, a necessary step has been
// aborted, so the online-delete process needs to restart
// at next ledger.
// If recoveryWaitTime_ is set, this may sleep to give rippled
// time to recover, so never call it from any thread other than
// the main "run()".
Health
health();
//
Expand Down
55 changes: 29 additions & 26 deletions src/ripple/core/DatabaseCon.h
Original file line number Diff line number Diff line change
Expand Up @@ -90,22 +90,19 @@ class DatabaseCon
Config::StartUpType startUp = Config::NORMAL;
bool standAlone = false;
boost::filesystem::path dataDir;
// If unseated, then the `globalPragma` are not used,
// otherwise should point to `globalPragma`
std::shared_ptr<std::vector<std::string> const> commonPragma;
void
noPragma()
{
commonPragma.reset();
}
void
usePragma()
// Indicates whether or not to return the `globalPragma`
// from commonPragma()
bool useGlobalPragma = false;

std::vector<std::string> const*
commonPragma() const
{
assert(globalPragma);
commonPragma = globalPragma;
assert(!useGlobalPragma || globalPragma);
return useGlobalPragma && globalPragma ? globalPragma.get()
: nullptr;
}

static std::shared_ptr<std::vector<std::string> const> globalPragma;
static std::unique_ptr<std::vector<std::string> const> globalPragma;
};

template <std::size_t N, std::size_t M>
Expand All @@ -114,16 +111,18 @@ class DatabaseCon
std::string const& DBName,
std::array<char const*, N> const& pragma,
std::array<char const*, M> const& initSQL)
{
// Use temporary files or regular DB files?
auto const useTempFiles = setup.standAlone &&
setup.startUp != Config::LOAD &&
setup.startUp != Config::LOAD_FILE &&
setup.startUp != Config::REPLAY;
boost::filesystem::path pPath =
useTempFiles ? "" : (setup.dataDir / DBName);

init(pPath, setup.commonPragma, pragma, initSQL);
: DatabaseCon(
{},
setup.standAlone && setup.startUp != Config::LOAD &&
setup.startUp != Config::LOAD_FILE &&
setup.startUp != Config::REPLAY
? ""
: (setup.dataDir / DBName),
setup.commonPragma(),
pragma,
initSQL)
{
}

template <std::size_t N, std::size_t M>
Expand All @@ -132,8 +131,8 @@ class DatabaseCon
std::string const& DBName,
std::array<char const*, N> const& pragma,
std::array<char const*, M> const& initSQL)
: DatabaseCon({}, dataDir / DBName, {}, pragma, initSQL)
seelabs marked this conversation as resolved.
Show resolved Hide resolved
{
init((dataDir / DBName), {}, pragma, initSQL);
}

soci::session&
Expand All @@ -152,11 +151,15 @@ class DatabaseCon
setupCheckpointing(JobQueue*, Logs&);

private:
class Base
{
};

template <std::size_t N, std::size_t M>
void
init(
DatabaseCon(
Base,
seelabs marked this conversation as resolved.
Show resolved Hide resolved
boost::filesystem::path const& pPath,
std::shared_ptr<std::vector<std::string> const> const& commonPragma,
std::vector<std::string> const* commonPragma,
std::array<char const*, N> const& pragma,
std::array<char const*, M> const& initSQL)
{
Expand Down
8 changes: 4 additions & 4 deletions src/ripple/core/impl/DatabaseCon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -56,13 +56,13 @@ setup_DatabaseCon(Config const& c, boost::optional<beast::Journal> j)

if (set(safety_level, "safety_level", sqlite))
{
showRiskWarning = boost::iequals(safety_level, "low");
if (showRiskWarning)
if (boost::iequals(safety_level, "low"))
{
// low safety defaults
journal_mode = "memory";
synchronous = "off";
temp_store = "memory";
showRiskWarning = true;
}
else if (!boost::iequals(safety_level, "high"))
{
Expand Down Expand Up @@ -160,12 +160,12 @@ setup_DatabaseCon(Config const& c, boost::optional<beast::Journal> j)
return result;
}();
}
setup.commonPragma = setup.globalPragma;
setup.useGlobalPragma = true;

return setup;
}

std::shared_ptr<std::vector<std::string> const>
std::unique_ptr<std::vector<std::string> const>
DatabaseCon::Setup::globalPragma;

void
Expand Down
2 changes: 1 addition & 1 deletion src/ripple/net/impl/DatabaseBody.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ DatabaseBody::value_type::open(

auto setup = setup_DatabaseCon(config);
setup.dataDir = path.parent_path();
setup.noPragma();
setup.useGlobalPragma = false;

// Downloader ignores the "CommonPragma"
conn_ = std::make_unique<DatabaseCon>(
Expand Down
Loading