apache · jiacai2050 · Mar 11, 2024 · Mar 4, 2024 · Mar 4, 2024 · Mar 4, 2024
diff --git a/Cargo.toml b/Cargo.toml
@@ -107,6 +107,7 @@ cluster = { path = "src/cluster" }
 criterion = "0.5"
 horaedb-client = "1.0.2"
 common_types = { path = "src/common_types" }
+dashmap = "5.5.3"
 datafusion = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "e21b03154" }
 datafusion-proto = { git = "https://github.com/CeresDB/arrow-datafusion.git", rev = "e21b03154" }
 derive_builder = "0.12"

diff --git a/src/analytic_engine/Cargo.toml b/src/analytic_engine/Cargo.toml
@@ -48,6 +48,7 @@ base64 = { workspace = true }
 bytes_ext = { workspace = true }
 codec = { workspace = true }
 common_types = { workspace = true }
+dashmap = { workspace = true }
 datafusion = { workspace = true }
 future_ext = { workspace = true }
 futures = { workspace = true }

diff --git a/src/analytic_engine/src/instance/wal_replayer.rs b/src/analytic_engine/src/instance/wal_replayer.rs
@@ -29,13 +29,15 @@ use common_types::{
     schema::{IndexInWriterSchema, Schema},
     table::ShardId,
 };
+use dashmap::{mapref::one::RefMut, DashMap};
+use futures::StreamExt;
 use generic_error::BoxError;
 use lazy_static::lazy_static;
 use logger::{debug, error, info, trace, warn};
 use prometheus::{exponential_buckets, register_histogram, Histogram};
 use snafu::ResultExt;
 use table_engine::table::TableId;
-use tokio::sync::MutexGuard;
+use tokio::sync::{Mutex, MutexGuard};
 use wal::{
     log_batch::LogEntry,
     manager::{
@@ -374,31 +376,54 @@ impl RegionBasedReplay {
         // TODO: No `group_by` method in `VecDeque`, so implement it manually here...
         Self::split_log_batch_by_table(log_batch, &mut table_batches);
 
-        // TODO: Replay logs of different tables in parallel.
-        for table_batch in table_batches {
-            // Some tables may have failed in previous replay, ignore them.
-            if failed_tables.contains_key(&table_batch.table_id) {
-                continue;
-            }
-
-            // Replay all log entries of current table.
-            // Some tables may have been moved to other shards or dropped, ignore such logs.
-            if let Some(ctx) = serial_exec_ctxs.get_mut(&table_batch.table_id) {
-                let result = replay_table_log_entries(
-                    &context.flusher,
-                    context.max_retry_flush_limit,
-                    &mut ctx.serial_exec,
-                    &ctx.table_data,
-                    log_batch.range(table_batch.range),
-                )
-                .await;
-
-                // If occur error, mark this table as failed and store the cause.
-                if let Err(e) = result {
-                    failed_tables.insert(table_batch.table_id, e);
+        let alter_failed_tables = HashMap::new();
+        let alter_failed_tables_ref = Arc::new(Mutex::new(alter_failed_tables));
+
+        let mut serial_exec_ctxs_dash_map = DashMap::new();
+        serial_exec_ctxs_dash_map.extend(serial_exec_ctxs);
+        let serial_exec_ctxs_dash_map_ref = Arc::new(serial_exec_ctxs_dash_map);
+
+        // Some tables may have failed in previous replay, ignore them.
+        futures::stream::iter(
+            table_batches
+                .into_iter()
+                .filter(|table_batch| !failed_tables.contains_key(&table_batch.table_id)),
+        )
+        .for_each_concurrent(None, |table_batch| {
+            let alter_failed_tables_ref = Arc::clone(&alter_failed_tables_ref);
+            let serial_exec_ctxs_dash_map_ref = Arc::clone(&serial_exec_ctxs_dash_map_ref);
+            async move {
+                // Replay all log entries of current table.
+                // Some tables may have been moved to other shards or dropped, ignore such logs.
+                if let Some(mut ctx) = serial_exec_ctxs_dash_map_ref.get_mut(&table_batch.table_id)
+                {
+                    let ctx = RefMut::value_mut(&mut ctx);
+
+                    let result = replay_table_log_entries(
+                        &context.flusher,
+                        context.max_retry_flush_limit,
+                        &mut ctx.serial_exec,
+                        &ctx.table_data,
+                        log_batch.range(table_batch.range),
+                    )
+                    .await;
+
+                    // If occur error, mark this table as failed and store the cause.
+                    if let Err(e) = result {
+                        alter_failed_tables_ref
+                            .lock()
+                            .await
+                            .insert(table_batch.table_id, e);
+                    }
                 }
             }
-        }
+        })
+        .await;
+
+        let alter_failed_tables = Arc::try_unwrap(alter_failed_tables_ref)
+            .unwrap()
+            .into_inner();
+        failed_tables.extend(alter_failed_tables);
 
         Ok(())
     }