Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Robj/deukyeon deadlock #626

Merged
merged 11 commits into from
May 22, 2024
45 changes: 22 additions & 23 deletions src/trunk.c
Original file line number Diff line number Diff line change
Expand Up @@ -5111,6 +5111,8 @@ trunk_compact_bundle(void *arg, void *scratch_buf)
compact_bundle_scratch *scratch = &task_scratch->compact_bundle;
trunk_handle *spl = req->spl;
threadid tid;
key start_key = key_buffer_key(&req->start_key);
key end_key = key_buffer_key(&req->end_key);

/*
* 1. Acquire node read lock
Expand Down Expand Up @@ -5139,10 +5141,9 @@ trunk_compact_bundle(void *arg, void *scratch_buf)
node.addr,
key_string(trunk_data_config(spl), trunk_min_key(spl, &node)),
key_string(trunk_data_config(spl), trunk_max_key(spl, &node)),
key_string(trunk_data_config(spl), key_buffer_key(&req->start_key)),
key_string(trunk_data_config(spl), key_buffer_key(&req->end_key)),
trunk_key_compare(
spl, trunk_max_key(spl, &node), key_buffer_key(&req->end_key)),
key_string(trunk_data_config(spl), start_key),
key_string(trunk_data_config(spl), end_key),
trunk_key_compare(spl, trunk_max_key(spl, &node), end_key),
req->node_id,
node.hdr->node_id);

Expand All @@ -5155,8 +5156,8 @@ trunk_compact_bundle(void *arg, void *scratch_buf)
trunk_default_log_if_enabled(
spl,
"compact_bundle abort flushed: range %s-%s, height %u, bundle %u\n",
key_string(trunk_data_config(spl), key_buffer_key(&req->start_key)),
key_string(trunk_data_config(spl), key_buffer_key(&req->end_key)),
key_string(trunk_data_config(spl), start_key),
key_string(trunk_data_config(spl), end_key),
req->height,
req->bundle_no);
platform_free(spl->heap_id, req);
Expand Down Expand Up @@ -5195,8 +5196,8 @@ trunk_compact_bundle(void *arg, void *scratch_buf)
&stream,
"compact_bundle starting: addr %lu, range %s-%s, height %u, bundle %u\n",
node.addr,
key_string(trunk_data_config(spl), key_buffer_key(&req->start_key)),
key_string(trunk_data_config(spl), key_buffer_key(&req->end_key)),
key_string(trunk_data_config(spl), start_key),
key_string(trunk_data_config(spl), end_key),
req->height,
req->bundle_no);

Expand Down Expand Up @@ -5311,6 +5312,7 @@ trunk_compact_bundle(void *arg, void *scratch_buf)
uint64 old_root_addr;
trunk_compact_bundle_node_copy_path(spl, req, &node, &old_root_addr);
trunk_log_node_if_enabled(&stream, spl, &node);
key max_key = trunk_max_key(spl, &node);

/*
* 11a. ...unless node is a leaf which has split, in which case discard
Expand All @@ -5327,8 +5329,8 @@ trunk_compact_bundle(void *arg, void *scratch_buf)
spl,
&stream,
"compact_bundle discard split: range %s-%s, height %u, bundle %u\n",
key_string(trunk_data_config(spl), key_buffer_key(&req->start_key)),
key_string(trunk_data_config(spl), key_buffer_key(&req->end_key)),
key_string(trunk_data_config(spl), start_key),
key_string(trunk_data_config(spl), end_key),
req->height,
req->bundle_no);
if (spl->cfg.use_stats) {
Expand Down Expand Up @@ -5377,33 +5379,30 @@ trunk_compact_bundle(void *arg, void *scratch_buf)
}
trunk_log_node_if_enabled(&stream, spl, &node);

should_continue = trunk_compact_bundle_node_has_split(spl, req, &node);
should_continue = trunk_key_compare(spl, max_key, end_key) < 0;
platform_assert(!should_continue
|| trunk_compact_bundle_node_has_split(spl, req, &node));

if (!should_continue && num_replacements != 0 && pack_req.num_tuples != 0)
{
key max_key = trunk_max_key(spl, &node);
trunk_zap_branch_range(
spl, &new_branch, max_key, max_key, PAGE_TYPE_BRANCH);
}

debug_assert(trunk_verify_node(spl, &node));

// garbage collect the old path and bundle
trunk_garbage_collect_bundle(spl, old_root_addr, req);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the garbage collection should happen before we update the start key of the req. Otherwise we will garbage-collect the successor node. Confirmed that, in the original order, this caused a crash.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that makes sense.


if (should_continue) {
debug_assert(height != 0);
key_buffer_copy_key(&req->start_key, trunk_max_key(spl, &node));
key_buffer_copy_key(&req->start_key, max_key);
}

// garbage collect the old path and bundle
trunk_garbage_collect_bundle(spl, old_root_addr, req);

// only release locks on node after the garbage collection is complete
trunk_node_unlock(spl->cc, &node);
trunk_node_unclaim(spl->cc, &node);
trunk_node_unget(spl->cc, &node);

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not needed -- we get the node at the top of the loop.

(In fact, this causes a deterministic lockup.)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeesh

if (should_continue) {
rc = trunk_compact_bundle_node_get(spl, req, &node);
platform_assert_status_ok(rc);
}
}

if (spl->cfg.use_stats) {
Expand Down Expand Up @@ -5442,8 +5441,8 @@ trunk_compact_bundle(void *arg, void *scratch_buf)
spl,
&stream,
"build_filter enqueue: range %s-%s, height %u, bundle %u\n",
key_string(trunk_data_config(spl), key_buffer_key(&req->start_key)),
key_string(trunk_data_config(spl), key_buffer_key(&req->end_key)),
key_string(trunk_data_config(spl), start_key),
key_string(trunk_data_config(spl), end_key),
req->height,
req->bundle_no);
task_enqueue(
Expand Down