-
Notifications
You must be signed in to change notification settings - Fork 203
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat: replay logs of different tables in parallel #1492
Changes from 5 commits
44b0942
b4542b2
8c37fc8
0b36c92
9f2c894
d048ada
02c332e
3c9e358
8e2a412
9ff1fa7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,13 +29,15 @@ use common_types::{ | |
schema::{IndexInWriterSchema, Schema}, | ||
table::ShardId, | ||
}; | ||
use dashmap::{mapref::one::RefMut, DashMap}; | ||
use futures::StreamExt; | ||
use generic_error::BoxError; | ||
use lazy_static::lazy_static; | ||
use logger::{debug, error, info, trace, warn}; | ||
use prometheus::{exponential_buckets, register_histogram, Histogram}; | ||
use snafu::ResultExt; | ||
use table_engine::table::TableId; | ||
use tokio::sync::MutexGuard; | ||
use tokio::sync::{Mutex, MutexGuard}; | ||
use wal::{ | ||
log_batch::LogEntry, | ||
manager::{ | ||
|
@@ -374,31 +376,54 @@ impl RegionBasedReplay { | |
// TODO: No `group_by` method in `VecDeque`, so implement it manually here... | ||
Self::split_log_batch_by_table(log_batch, &mut table_batches); | ||
|
||
// TODO: Replay logs of different tables in parallel. | ||
for table_batch in table_batches { | ||
// Some tables may have failed in previous replay, ignore them. | ||
if failed_tables.contains_key(&table_batch.table_id) { | ||
continue; | ||
} | ||
|
||
// Replay all log entries of current table. | ||
// Some tables may have been moved to other shards or dropped, ignore such logs. | ||
if let Some(ctx) = serial_exec_ctxs.get_mut(&table_batch.table_id) { | ||
let result = replay_table_log_entries( | ||
&context.flusher, | ||
context.max_retry_flush_limit, | ||
&mut ctx.serial_exec, | ||
&ctx.table_data, | ||
log_batch.range(table_batch.range), | ||
) | ||
.await; | ||
|
||
// If occur error, mark this table as failed and store the cause. | ||
if let Err(e) = result { | ||
failed_tables.insert(table_batch.table_id, e); | ||
let alter_failed_tables = HashMap::new(); | ||
let alter_failed_tables_ref = Arc::new(Mutex::new(alter_failed_tables)); | ||
|
||
let mut serial_exec_ctxs_dash_map = DashMap::new(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This map seems unnecessary, what I think of is like this: modified src/analytic_engine/src/instance/wal_replayer.rs
@@ -29,6 +29,7 @@ use common_types::{
schema::{IndexInWriterSchema, Schema},
table::ShardId,
};
+use futures::StreamExt;
use generic_error::BoxError;
use lazy_static::lazy_static;
use logger::{debug, error, info, trace, warn};
@@ -374,6 +375,7 @@ impl RegionBasedReplay {
// TODO: No `group_by` method in `VecDeque`, so implement it manually here...
Self::split_log_batch_by_table(log_batch, &mut table_batches);
+ let mut replay_tasks = Vec::with_capacity(table_batches.len());
// TODO: Replay logs of different tables in parallel.
for table_batch in table_batches {
// Some tables may have failed in previous replay, ignore them.
@@ -384,22 +386,27 @@ impl RegionBasedReplay {
// Replay all log entries of current table.
// Some tables may have been moved to other shards or dropped, ignore such logs.
if let Some(ctx) = serial_exec_ctxs.get_mut(&table_batch.table_id) {
- let result = replay_table_log_entries(
+ replay_tasks.push(replay_table_log_entries(
&context.flusher,
context.max_retry_flush_limit,
&mut ctx.serial_exec,
&ctx.table_data,
log_batch.range(table_batch.range),
- )
- .await;
+ ));
- // If occur error, mark this table as failed and store the cause.
- if let Err(e) = result {
- failed_tables.insert(table_batch.table_id, e);
- }
+ // If occur error, mark this table as failed and store the
+ // cause. if let Err(e) = result {
+ // failed_tables.insert(table_batch.table_id, e);
+ // }
}
}
-
+ for ret in futures::stream::iter(replay_tasks)
+ .buffer_unordered(20)
+ .collect::<Vec<_>>()
+ .await
+ {
+ // insert to failed_tables in there are errors
+ }
Ok(())
}
But this compile failed due to mutable reference
So the first step to do this task is to remove those mutable references. The fix should be easy, just define serial_exec_ctxs with There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I ran into the same compile failed before. Here is my code. Is this what you were expecting? However, My concern is, wouldn't serial_exec_ctxs.lock().await.get_mut break concurrency? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I push my commits to your branch, it compile OK.
Yes, this step will be run in serially, but we make |
||
serial_exec_ctxs_dash_map.extend(serial_exec_ctxs); | ||
let serial_exec_ctxs_dash_map_ref = Arc::new(serial_exec_ctxs_dash_map); | ||
|
||
// Some tables may have failed in previous replay, ignore them. | ||
futures::stream::iter( | ||
table_batches | ||
.into_iter() | ||
.filter(|table_batch| !failed_tables.contains_key(&table_batch.table_id)), | ||
) | ||
.for_each_concurrent(None, |table_batch| { | ||
let alter_failed_tables_ref = Arc::clone(&alter_failed_tables_ref); | ||
let serial_exec_ctxs_dash_map_ref = Arc::clone(&serial_exec_ctxs_dash_map_ref); | ||
async move { | ||
// Replay all log entries of current table. | ||
// Some tables may have been moved to other shards or dropped, ignore such logs. | ||
if let Some(mut ctx) = serial_exec_ctxs_dash_map_ref.get_mut(&table_batch.table_id) | ||
{ | ||
let ctx = RefMut::value_mut(&mut ctx); | ||
|
||
let result = replay_table_log_entries( | ||
&context.flusher, | ||
context.max_retry_flush_limit, | ||
&mut ctx.serial_exec, | ||
&ctx.table_data, | ||
log_batch.range(table_batch.range), | ||
) | ||
.await; | ||
|
||
// If occur error, mark this table as failed and store the cause. | ||
if let Err(e) = result { | ||
alter_failed_tables_ref | ||
.lock() | ||
.await | ||
.insert(table_batch.table_id, e); | ||
} | ||
} | ||
} | ||
} | ||
}) | ||
.await; | ||
|
||
let alter_failed_tables = Arc::try_unwrap(alter_failed_tables_ref) | ||
.unwrap() | ||
.into_inner(); | ||
failed_tables.extend(alter_failed_tables); | ||
|
||
Ok(()) | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wonder if this is a required dependency for this task?
If HashMap works, I prefer to stick with it first.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'm trying to run 'replay_table_log_entries' concurrently, but I faced an issue with 'serial_exec_ctxs', which is a mutable reference HashMap. I had to wrap this by Arc and Mutex, then every time I grap a mutable reference to a value from the map, it locks the entire map.
DashMap allowing concurrent access to different keys. I wonder if there's an appoach to make Hashmap work in this case.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it's fine to use plain Mutex here, since they are not the bottle neck,
replay_table_log_entries
is the most heavy task in this place.Also, there is a partitioned lock in our codebase, you can use if you want to optimize here:
https://github.com/apache/incubator-horaedb/blob/9f166f3daa9a02ef8af1e733c22f956ab97e7aaf/src/components/partitioned_lock/src/lib.rs#L130
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Awesome! I'm gonna check it out. Thx for the heads up