Skip to content

Commit

Permalink
[BACKPORT 2.14][#13127] docdb: Add field to control waiting for paren…
Browse files Browse the repository at this point in the history
…t deletion in IsTabletSplittingComplete.

Summary:
D17148 (commit 13736e1) introduced a wait for split parents to be deleted before return true from IsTabletSplittingComplete. We do not always need to wait for parents to be deleted when checking if tablet splitting is complete (as of now, we only need to wait for PITR). Adding this field allows other features that do not require parents to be deleted (e.g. backfilling and backups) to return from IsTabletSplittingComplete sooner.

Note that this field is not set in `yb_backup.py` for backwards compatibility, but the default value of false should be correct. (If we switch to a default value of true later, yb_backup would still work, though it might take longer for IsTabletSplittingComplete to return true).

(As part of backport) Fix missing set of wait_for_parent_deletion.

Original commit: 6da6a5b / D18132.

Test Plan: `ybd  --cxx-test integration-tests_tablet-split-itest --gtest_filter AutomaticTabletSplitITest.IsTabletSplittingComplete`

Reviewers: skedia

Reviewed By: skedia

Subscribers: skedia, bogdan

Differential Revision: https://phabricator.dev.yugabyte.com/D18578
  • Loading branch information
SrivastavaAnubhav committed Jul 27, 2022
1 parent de7969e commit f86af13
Show file tree
Hide file tree
Showing 15 changed files with 71 additions and 33 deletions.
6 changes: 3 additions & 3 deletions ent/src/yb/tools/yb-admin_client_ent.cc
Original file line number Diff line number Diff line change
Expand Up @@ -559,9 +559,9 @@ Status ClusterAdminClient::DisableTabletSplitsDuringRestore(CoarseTimePoint dead

while (CoarseMonoClock::Now() < std::min(splitting_disabled_until, deadline)) {
// Wait for existing split operations to complete.
const auto resp =
VERIFY_RESULT_PREPEND(IsTabletSplittingCompleteInternal(),
"Tablet splitting did not complete. Cannot restore.");
const auto resp = VERIFY_RESULT_PREPEND(
IsTabletSplittingCompleteInternal(true /* wait_for_parent_deletion */),
"Tablet splitting did not complete. Cannot restore.");
if (resp.is_tablet_splitting_complete()) {
break;
}
Expand Down
3 changes: 2 additions & 1 deletion src/yb/integration-tests/tablet-split-itest-base.cc
Original file line number Diff line number Diff line change
Expand Up @@ -336,11 +336,12 @@ void TabletSplitITestBase<MiniClusterType>::CheckTableKeysInRange(const size_t n

template <class MiniClusterType>
Result<bool> TabletSplitITestBase<MiniClusterType>::IsSplittingComplete(
yb::master::MasterAdminProxy* master_proxy) {
yb::master::MasterAdminProxy* master_proxy, bool wait_for_parent_deletion) {
rpc::RpcController controller;
controller.set_timeout(kRpcTimeout);
master::IsTabletSplittingCompleteRequestPB is_tablet_splitting_complete_req;
master::IsTabletSplittingCompleteResponsePB is_tablet_splitting_complete_resp;
is_tablet_splitting_complete_req.set_wait_for_parent_deletion(wait_for_parent_deletion);

RETURN_NOT_OK(master_proxy->IsTabletSplittingComplete(is_tablet_splitting_complete_req,
&is_tablet_splitting_complete_resp, &controller));
Expand Down
3 changes: 2 additions & 1 deletion src/yb/integration-tests/tablet-split-itest-base.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,8 @@ class TabletSplitITestBase : public client::TransactionTestBase<MiniClusterType>
// Make sure table contains only keys 1...num_keys without gaps.
void CheckTableKeysInRange(const size_t num_keys);

Result<bool> IsSplittingComplete(yb::master::MasterAdminProxy* master_proxy);
Result<bool> IsSplittingComplete(
yb::master::MasterAdminProxy* master_proxy, bool wait_for_parent_deletion);

protected:
virtual int64_t GetRF() { return 3; }
Expand Down
28 changes: 22 additions & 6 deletions src/yb/integration-tests/tablet-split-itest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1041,6 +1041,7 @@ TEST_F(AutomaticTabletSplitITest, IsTabletSplittingComplete) {
ANNOTATE_UNPROTECTED_WRITE(FLAGS_tablet_split_low_phase_size_threshold_bytes) = 0;
ANNOTATE_UNPROTECTED_WRITE(FLAGS_rocksdb_disable_compactions) = true;
ANNOTATE_UNPROTECTED_WRITE(FLAGS_enable_automatic_tablet_splitting) = false;
ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_skip_deleting_split_tablets) = true;

CreateSingleTablet();
ASSERT_OK(WriteRows(1000, 1));
Expand All @@ -1053,27 +1054,42 @@ TEST_F(AutomaticTabletSplitITest, IsTabletSplittingComplete) {
proxy_cache_.get(), client_->GetMasterLeaderAddress());

// No splits at the beginning.
ASSERT_TRUE(ASSERT_RESULT(IsSplittingComplete(master_admin_proxy.get())));
ASSERT_TRUE(ASSERT_RESULT(IsSplittingComplete(master_admin_proxy.get(),
false /* wait_for_parent_deletion */)));

// Create a split task by pausing when trying to get split key. IsTabletSplittingComplete should
// include this ongoing task.
ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_pause_tserver_get_split_key) = true;
ANNOTATE_UNPROTECTED_WRITE(FLAGS_enable_automatic_tablet_splitting) = true;
std::this_thread::sleep_for(FLAGS_catalog_manager_bg_task_wait_ms * 2ms);
ASSERT_FALSE(ASSERT_RESULT(IsSplittingComplete(master_admin_proxy.get())));
ASSERT_FALSE(ASSERT_RESULT(IsSplittingComplete(master_admin_proxy.get(),
false /* wait_for_parent_deletion */)));

// Now let the split occur on master but not tserver.
// IsTabletSplittingComplete should include splits that are only complete on master.
ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_fail_tablet_split_probability) = 1;
ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_pause_tserver_get_split_key) = false;
ASSERT_FALSE(ASSERT_RESULT(IsSplittingComplete(master_admin_proxy.get())));
ASSERT_FALSE(ASSERT_RESULT(IsSplittingComplete(master_admin_proxy.get(),
false /* wait_for_parent_deletion */)));

// Verify that the split finishes, and that IsTabletSplittingComplete returns true even though
// compactions are not done.
// Verify that the split finishes, and that IsTabletSplittingComplete returns true (even though
// compactions are not done) if wait_for_parent_deletion is false .
ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_fail_tablet_split_probability) = 0;
ASSERT_OK(WaitForTabletSplitCompletion(2 /* expected_non_split_tablets */,
1 /* expected_split_tablets */));
ASSERT_OK(WaitFor([&]() -> Result<bool> {
return VERIFY_RESULT(IsSplittingComplete(master_admin_proxy.get(),
false /* wait_for_parent_deletion */));
}, MonoDelta::FromMilliseconds(FLAGS_catalog_manager_bg_task_wait_ms * 2),
"IsTabletSplittingComplete did not return true."));

// Re-enable deletion of children and check that IsTabletSplittingComplete returns true if
// wait_for_parent_deletion is true.
ANNOTATE_UNPROTECTED_WRITE(FLAGS_TEST_skip_deleting_split_tablets) = false;
ASSERT_OK(WaitForTabletSplitCompletion(2));
ASSERT_OK(WaitFor([&]() -> Result<bool> {
return VERIFY_RESULT(IsSplittingComplete(master_admin_proxy.get()));
return VERIFY_RESULT(IsSplittingComplete(master_admin_proxy.get(),
true /* wait_for_parent_deletion */));
}, MonoDelta::FromMilliseconds(FLAGS_catalog_manager_bg_task_wait_ms * 2),
"IsTabletSplittingComplete did not return true."));
}
Expand Down
3 changes: 2 additions & 1 deletion src/yb/integration-tests/xcluster-tablet-split-itest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -890,7 +890,8 @@ TEST_F(XClusterAutomaticTabletSplitITest, YB_DISABLE_TEST_IN_TSAN(AutomaticTable
auto master_admin_proxy = std::make_unique<master::MasterAdminProxy>(
proxy_cache_.get(), client_->GetMasterLeaderAddress());
ASSERT_OK(WaitFor(
std::bind(&TabletSplitITestBase::IsSplittingComplete, this, master_admin_proxy.get()),
std::bind(&TabletSplitITestBase::IsSplittingComplete, this, master_admin_proxy.get(),
false /* wait_for_parent_deletion */),
30s, "Wait for tablet splitting to complete."));
}

Expand Down
3 changes: 2 additions & 1 deletion src/yb/master/backfill_index.cc
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,8 @@ Status BackfillTable::WaitForTabletSplitting() {
tablet_split_manager->DisableSplittingForBackfillingTable(indexed_table_->id());
CoarseTimePoint deadline = CoarseMonoClock::Now() +
FLAGS_index_backfill_tablet_split_completion_timeout_sec * 1s;
while (!tablet_split_manager->IsTabletSplittingComplete(*indexed_table_)) {
while (!tablet_split_manager->IsTabletSplittingComplete(*indexed_table_,
false /* wait_for_parent_deletion */)) {
if (CoarseMonoClock::Now() > deadline) {
return STATUS(TimedOut, "Tablet splitting did not complete after being disabled; cannot "
"safely backfill the index.");
Expand Down
10 changes: 8 additions & 2 deletions src/yb/master/catalog_entity_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -789,7 +789,7 @@ TabletInfos TableInfo::GetTablets(IncludeInactive include_inactive) const {
return result;
}

bool TableInfo::HasOutstandingSplits() const {
bool TableInfo::HasOutstandingSplits(bool wait_for_parent_deletion) const {
SharedLock<decltype(lock_)> l(lock_);
DCHECK(!colocated());
std::unordered_set<TabletId> partitions_tablets;
Expand All @@ -800,8 +800,14 @@ bool TableInfo::HasOutstandingSplits() const {
<< " belonging to table " << id() << " is not yet running";
return true;
}
partitions_tablets.insert(p.second->tablet_id());
if (wait_for_parent_deletion) {
partitions_tablets.insert(p.second->tablet_id());
}
}
if (!wait_for_parent_deletion) {
return false;
}

for (const auto& p : tablets_) {
// If any parents have not been deleted yet, the split is not yet complete.
if (!partitions_tablets.contains(p.second->tablet_id())) {
Expand Down
11 changes: 6 additions & 5 deletions src/yb/master/catalog_entity_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -480,11 +480,12 @@ class TableInfo : public RefCountedThreadSafe<TableInfo>,
// Return whether given partition start keys match partitions_.
bool HasPartitions(const std::vector<PartitionKey> other) const;

// Returns true if all non-active tablets (e.g. split parent) have already been deleted / hidden.
// This function should not be called for colocated tables since the colocated tablet will not be
// deleted / hidden if the table is dropped (since it may still an active tablet of another
// table).
bool HasOutstandingSplits() const;
// Returns true if all active split children are running, and all non-active tablets (e.g. split
// parents) have already been deleted / hidden.
// This function should not be called for colocated tables with wait_for_parent_deletion set to
// true, since colocated tablets are not deleted / hidden if the table is dropped (the tablet may
// be part of another table).
bool HasOutstandingSplits(bool wait_for_parent_deletion) const;

// Get all tablets of the table.
// If include_inactive is true then it also returns inactive tablets along with the active ones.
Expand Down
2 changes: 1 addition & 1 deletion src/yb/master/catalog_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8686,7 +8686,7 @@ Status CatalogManager::IsTabletSplittingComplete(
}
}
for (const auto& table : tables) {
if (!tablet_split_manager_.IsTabletSplittingComplete(*table)) {
if (!tablet_split_manager_.IsTabletSplittingComplete(*table, req->wait_for_parent_deletion())) {
resp->set_is_tablet_splitting_complete(false);
return Status::OK();
}
Expand Down
1 change: 1 addition & 0 deletions src/yb/master/master_admin.proto
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ message DisableTabletSplittingResponsePB {
}

message IsTabletSplittingCompleteRequestPB {
optional bool wait_for_parent_deletion = 1;
}

message IsTabletSplittingCompleteResponsePB {
Expand Down
5 changes: 3 additions & 2 deletions src/yb/master/tablet_split_manager.cc
Original file line number Diff line number Diff line change
Expand Up @@ -652,7 +652,8 @@ bool TabletSplitManager::IsRunning() {
return is_running_;
}

bool TabletSplitManager::IsTabletSplittingComplete(const TableInfo& table) {
bool TabletSplitManager::IsTabletSplittingComplete(
const TableInfo& table, bool wait_for_parent_deletion) {
// It is important to check that is_running_ is false BEFORE checking for outstanding splits.
// Otherwise, we could have the following order of events:
// 1. Thread A: Tablet split manager enqueues a split for table T.
Expand Down Expand Up @@ -681,7 +682,7 @@ bool TabletSplitManager::IsTabletSplittingComplete(const TableInfo& table) {
}
}

return !table.HasOutstandingSplits();
return !table.HasOutstandingSplits(wait_for_parent_deletion);
}

void TabletSplitManager::DisableSplittingFor(
Expand Down
2 changes: 1 addition & 1 deletion src/yb/master/tablet_split_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class TabletSplitManager {
// not running (to ensure that no splits are started immediately after returning).
// This function should eventually return true if called repeatedly after temporarily disabling
// splitting for the table.
bool IsTabletSplittingComplete(const TableInfo& table);
bool IsTabletSplittingComplete(const TableInfo& table, bool wait_for_parent_deletion);

// Perform one round of tablet splitting. This method is not thread-safe.
void MaybeDoSplitting(const TableInfoMap& table_info_map, const TabletInfoMap& tablet_info_map);
Expand Down
14 changes: 11 additions & 3 deletions src/yb/tools/yb-admin_cli.cc
Original file line number Diff line number Diff line change
Expand Up @@ -860,9 +860,17 @@ void ClusterAdminCli::RegisterCommandHandlers(ClusterAdminClientClass* client) {
});

Register(
"is_tablet_splitting_complete", "",
[client](const CLIArguments&) -> Status {
RETURN_NOT_OK_PREPEND(client->IsTabletSplittingComplete(),
"is_tablet_splitting_complete", " [wait_for_parent_deletion] (default false)",
[client](const CLIArguments& args) -> Status {
bool wait_for_parent_deletion = false;
if (args.size() > 0) {
if (IsEqCaseInsensitive(args[0], "wait_for_parent_deletion")) {
wait_for_parent_deletion = true;
} else {
return ClusterAdminCli::kInvalidArguments;
}
}
RETURN_NOT_OK_PREPEND(client->IsTabletSplittingComplete(wait_for_parent_deletion),
"Unable to check if tablet splitting is complete");
return Status::OK();
});
Expand Down
8 changes: 4 additions & 4 deletions src/yb/tools/yb-admin_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2125,14 +2125,14 @@ Status ClusterAdminClient::DisableTabletSplitting(
}

Result<master::IsTabletSplittingCompleteResponsePB>
ClusterAdminClient::IsTabletSplittingCompleteInternal() {
ClusterAdminClient::IsTabletSplittingCompleteInternal(bool wait_for_parent_deletion) {
master::IsTabletSplittingCompleteRequestPB req;
req.set_wait_for_parent_deletion(wait_for_parent_deletion);
return InvokeRpc(&master::MasterAdminProxy::IsTabletSplittingComplete, *master_admin_proxy_, req);
}

Status ClusterAdminClient::IsTabletSplittingComplete() {
master::IsTabletSplittingCompleteRequestPB req;
const auto resp = VERIFY_RESULT(IsTabletSplittingCompleteInternal());
Status ClusterAdminClient::IsTabletSplittingComplete(bool wait_for_parent_deletion) {
const auto resp = VERIFY_RESULT(IsTabletSplittingCompleteInternal(wait_for_parent_deletion));
std::cout << "Response: " << AsString(resp) << std::endl;
return Status::OK();
}
Expand Down
5 changes: 3 additions & 2 deletions src/yb/tools/yb-admin_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ class ClusterAdminClient {

Status DisableTabletSplitting(int64_t disable_duration_ms, const std::string& feature_name);

Status IsTabletSplittingComplete();
Status IsTabletSplittingComplete(bool wait_for_parent_deletion);

Status CreateTransactionsStatusTable(const std::string& table_name);

Expand Down Expand Up @@ -375,7 +375,8 @@ class ClusterAdminClient {
Result<master::DisableTabletSplittingResponsePB> DisableTabletSplitsInternal(
int64_t disable_duration_ms, const std::string& feature_name);

Result<master::IsTabletSplittingCompleteResponsePB> IsTabletSplittingCompleteInternal();
Result<master::IsTabletSplittingCompleteResponsePB> IsTabletSplittingCompleteInternal(
bool wait_for_parent_deletion);

std::string master_addr_list_;
HostPort init_master_addr_;
Expand Down

0 comments on commit f86af13

Please sign in to comment.