Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix division by zero and directory removal in shards #3343

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 20 additions & 40 deletions src/ripple/nodestore/impl/DatabaseShardImp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,10 +147,10 @@ DatabaseShardImp::init()
return false;

// Remove legacy shard
shard->removeOnDestroy();
JLOG(j_.warn()) <<
"shard " << shardIndex <<
" incompatible legacy shard, removing";
remove_all(shardDir);
" removed, legacy shard";
continue;
}

Expand Down Expand Up @@ -740,21 +740,21 @@ DatabaseShardImp::import(Database& source)

// Create the new shard
app_.shardFamily()->reset();
auto const shardDir {dir_ / std::to_string(shardIndex)};
auto shard {std::make_unique<Shard>(app_, *this, shardIndex, j_)};
if (!shard->open(scheduler_, *ctx_))
continue;

// Create a marker file to signify an import in progress
auto const shardDir {dir_ / std::to_string(shardIndex)};
auto const markerFile {shardDir / importMarker_};
{
std::ofstream ofs {markerFile.string()};
if (!ofs.is_open())
{
JLOG(j_.error()) <<
"shard " << shardIndex <<
" is unable to create temp marker file";
remove_all(shardDir);
" failed to create temp marker file";
shard->removeOnDestroy();
continue;
}
ofs.close();
Expand Down Expand Up @@ -825,14 +825,14 @@ DatabaseShardImp::import(Database& source)
JLOG(j_.error()) <<
"exception " << e.what() <<
" in function " << __func__;
remove_all(shardDir);
shard->removeOnDestroy();
}
}
else
{
JLOG(j_.error()) <<
"shard " << shardIndex << " failed to import";
remove_all(shardDir);
shard->removeOnDestroy();
}
}

Expand Down Expand Up @@ -1239,26 +1239,15 @@ DatabaseShardImp::finalizeShard(
if (isStopping())
return;

// Bad shard, remove it
// Invalid or corrupt shard, remove it
{
std::lock_guard lock(mutex_);
shards_.erase(shardIndex);
updateStatus(lock);

using namespace boost::filesystem;
path const dir {shard->getDir()};
shard.reset();
try
{
remove_all(dir);
}
catch (std::exception const& e)
{
JLOG(j_.error()) <<
"exception " << e.what() << " in function " << __func__;
}
}

shard->removeOnDestroy();
shard.reset();
setFileStats();
return;
}
Expand Down Expand Up @@ -1326,7 +1315,7 @@ DatabaseShardImp::setFileStats()
std::lock_guard lock(mutex_);
fileSz_ = sumSz;
fdRequired_ = sumFd;
avgShardFileSz_ = fileSz_ / numShards;
avgShardFileSz_ = (numShards == 0 ? fileSz_ : fileSz_ / numShards);

if (fileSz_ >= maxFileSz_)
{
Expand Down Expand Up @@ -1404,28 +1393,19 @@ DatabaseShardImp::storeLedgerInShard(

if (!shard->store(ledger))
{
// Shard may be corrupt, remove it
std::lock_guard lock(mutex_);

shards_.erase(shard->index());
if (shard->index() == acquireIndex_)
acquireIndex_ = 0;
// Invalid or corrupt shard, remove it
{
std::lock_guard lock(mutex_);
shards_.erase(shard->index());

updateStatus(lock);
if (shard->index() == acquireIndex_)
acquireIndex_ = 0;

using namespace boost::filesystem;
path const dir {shard->getDir()};
shard.reset();
try
{
remove_all(dir);
}
catch (std::exception const& e)
{
JLOG(j_.error()) <<
"exception " << e.what() << " in function " << __func__;
updateStatus(lock);
}

shard->removeOnDestroy();
shard.reset();
result = false;
}
else if (shard->isBackendComplete())
Expand Down
23 changes: 23 additions & 0 deletions src/ripple/nodestore/impl/Shard.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,29 @@ Shard::Shard(
Throw<std::runtime_error>("Shard: Invalid index");
}

Shard::~Shard()
{
if (removeOnDestroy_)
{
backend_.reset();
lgrSQLiteDB_.reset();
txSQLiteDB_.reset();
acquireInfo_.reset();

try
{
boost::filesystem::remove_all(dir_);
}
catch (std::exception const& e)
{
JLOG(j_.error()) <<
"shard " << index_ <<
" exception " << e.what() <<
" in function " << __func__;
}
}
}

bool
Shard::open(Scheduler& scheduler, nudb::context& ctx)
{
Expand Down
11 changes: 11 additions & 0 deletions src/ripple/nodestore/impl/Shard.h
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,8 @@ class Shard final
std::uint32_t index,
beast::Journal j);

~Shard();

bool
open(Scheduler& scheduler, nudb::context& ctx);

Expand Down Expand Up @@ -125,6 +127,12 @@ class Shard final
void
stop() {stop_ = true;}

/** If called, the shard directory will be removed when
the shard is destroyed.
*/
void
removeOnDestroy() {removeOnDestroy_ = true;}

// Current shard version
static constexpr std::uint32_t version {2};

Expand Down Expand Up @@ -202,6 +210,9 @@ class Shard final
// Determines if the shard needs to stop processing for shutdown
std::atomic<bool> stop_ {false};

// Determines if the shard directory should be removed in the destructor
std::atomic<bool> removeOnDestroy_ {false};

// Set the backend cache
// Lock over mutex_ required
void
Expand Down