-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
165 additions
and
0 deletions.
There are no files selected for viewing
112 changes: 112 additions & 0 deletions
112
content/blog/2024-04-26-ballista-mvp-part5/ballista-mvp-executor.drawio
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
<mxfile host="Electron" modified="2024-04-26T07:45:10.770Z" agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/24.2.5 Chrome/120.0.6099.109 Electron/28.1.0 Safari/537.36" etag="BrpBkrujaqnxWiCiN1Mv" version="24.2.5" type="device"> | ||
<diagram name="第 1 页" id="CD6N5ZRrbqDiyocHICmu"> | ||
<mxGraphModel dx="2901" dy="1196" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="1" pageScale="1" pageWidth="827" pageHeight="1169" math="0" shadow="0"> | ||
<root> | ||
<mxCell id="0" /> | ||
<mxCell id="1" parent="0" /> | ||
<mxCell id="1erlsmTN53r9LNktAl-M-3" value="" style="rounded=1;whiteSpace=wrap;html=1;dashed=1;" vertex="1" parent="1"> | ||
<mxGeometry x="130" y="270" width="580" height="680" as="geometry" /> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-1" value="ExecutorGrpc API" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" parent="1" vertex="1"> | ||
<mxGeometry x="60" y="320" width="110" height="70" as="geometry" /> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-3" value="Flight API" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#d5e8d4;strokeColor=#82b366;" parent="1" vertex="1"> | ||
<mxGeometry x="60" y="840" width="110" height="70" as="geometry" /> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-6" value="Task Runner Pool" style="ellipse;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;" parent="1" vertex="1"> | ||
<mxGeometry x="477" y="320" width="120" height="80" as="geometry" /> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-7" value="New Task Queue" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;" parent="1" vertex="1"> | ||
<mxGeometry x="250" y="340" width="150" height="40" as="geometry" /> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-8" value="Task Status Queue" style="rounded=0;whiteSpace=wrap;html=1;fillColor=#fff2cc;strokeColor=#d6b656;" parent="1" vertex="1"> | ||
<mxGeometry x="512" y="450" width="50" height="160" as="geometry" /> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-10" value="" style="endArrow=classic;html=1;rounded=0;" parent="1" edge="1"> | ||
<mxGeometry width="50" height="50" relative="1" as="geometry"> | ||
<mxPoint x="190" y="360" as="sourcePoint" /> | ||
<mxPoint x="240" y="360" as="targetPoint" /> | ||
</mxGeometry> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-11" value="" style="endArrow=classic;html=1;rounded=0;" parent="1" edge="1"> | ||
<mxGeometry width="50" height="50" relative="1" as="geometry"> | ||
<mxPoint x="417" y="359.5" as="sourcePoint" /> | ||
<mxPoint x="467" y="359.5" as="targetPoint" /> | ||
</mxGeometry> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-14" value="" style="endArrow=classic;html=1;rounded=0;" parent="1" edge="1"> | ||
<mxGeometry width="50" height="50" relative="1" as="geometry"> | ||
<mxPoint x="537" y="410" as="sourcePoint" /> | ||
<mxPoint x="537" y="440" as="targetPoint" /> | ||
</mxGeometry> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-15" value="Task Status Reporter" style="ellipse;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;" parent="1" vertex="1"> | ||
<mxGeometry x="477" y="660" width="120" height="80" as="geometry" /> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-16" value="" style="endArrow=classic;html=1;rounded=0;" parent="1" edge="1"> | ||
<mxGeometry width="50" height="50" relative="1" as="geometry"> | ||
<mxPoint x="538" y="620" as="sourcePoint" /> | ||
<mxPoint x="538" y="650" as="targetPoint" /> | ||
</mxGeometry> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-17" value="Local Disk" style="rounded=1;whiteSpace=wrap;html=1;" parent="1" vertex="1"> | ||
<mxGeometry x="250" y="845" width="430" height="60" as="geometry" /> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-18" value="" style="endArrow=classic;startArrow=classic;html=1;rounded=0;" parent="1" edge="1"> | ||
<mxGeometry width="50" height="50" relative="1" as="geometry"> | ||
<mxPoint x="180" y="876" as="sourcePoint" /> | ||
<mxPoint x="240" y="876" as="targetPoint" /> | ||
</mxGeometry> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-19" value="" style="endArrow=classic;html=1;rounded=0;" parent="1" edge="1"> | ||
<mxGeometry width="50" height="50" relative="1" as="geometry"> | ||
<mxPoint x="460" y="700" as="sourcePoint" /> | ||
<mxPoint x="40" y="700" as="targetPoint" /> | ||
</mxGeometry> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-21" value="Heartbeater" style="ellipse;whiteSpace=wrap;html=1;fillColor=#e1d5e7;strokeColor=#9673a6;" parent="1" vertex="1"> | ||
<mxGeometry x="250" y="730" width="120" height="80" as="geometry" /> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-23" value="" style="endArrow=classic;html=1;rounded=0;" parent="1" edge="1"> | ||
<mxGeometry width="50" height="50" relative="1" as="geometry"> | ||
<mxPoint x="230" y="769.5" as="sourcePoint" /> | ||
<mxPoint x="40" y="770" as="targetPoint" /> | ||
</mxGeometry> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-24" value="Scheduler" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" parent="1" vertex="1"> | ||
<mxGeometry x="-90" y="310" width="100" height="490" as="geometry" /> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-25" value="" style="endArrow=classic;html=1;rounded=0;" parent="1" edge="1"> | ||
<mxGeometry width="50" height="50" relative="1" as="geometry"> | ||
<mxPoint x="19" y="359" as="sourcePoint" /> | ||
<mxPoint x="49" y="359" as="targetPoint" /> | ||
</mxGeometry> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-26" value="Client" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" parent="1" vertex="1"> | ||
<mxGeometry x="-85" y="830" width="90" height="40" as="geometry" /> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-27" value="Other Executors" style="rounded=1;whiteSpace=wrap;html=1;fillColor=#dae8fc;strokeColor=#6c8ebf;" parent="1" vertex="1"> | ||
<mxGeometry x="-85" y="880" width="90" height="40" as="geometry" /> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-29" value="" style="endArrow=classic;startArrow=classic;html=1;rounded=0;" parent="1" edge="1"> | ||
<mxGeometry width="50" height="50" relative="1" as="geometry"> | ||
<mxPoint x="13" y="876" as="sourcePoint" /> | ||
<mxPoint x="53" y="876" as="targetPoint" /> | ||
</mxGeometry> | ||
</mxCell> | ||
<mxCell id="w1BsYSWlBb3kda8zS0cG-30" value="Executor" style="text;html=1;align=center;verticalAlign=middle;resizable=0;points=[];autosize=1;strokeColor=none;fillColor=none;fontStyle=1" parent="1" vertex="1"> | ||
<mxGeometry x="379" y="240" width="70" height="30" as="geometry" /> | ||
</mxCell> | ||
<mxCell id="1erlsmTN53r9LNktAl-M-2" value="" style="endArrow=classic;html=1;rounded=0;" edge="1" parent="1"> | ||
<mxGeometry width="50" height="50" relative="1" as="geometry"> | ||
<mxPoint x="610" y="360" as="sourcePoint" /> | ||
<mxPoint x="640" y="830" as="targetPoint" /> | ||
<Array as="points"> | ||
<mxPoint x="640" y="360" /> | ||
</Array> | ||
</mxGeometry> | ||
</mxCell> | ||
</root> | ||
</mxGraphModel> | ||
</diagram> | ||
</mxfile> |
Binary file added
BIN
+60.9 KB
content/blog/2024-04-26-ballista-mvp-part5/ballista-mvp-executor.drawio.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
+++ | ||
title = "Ballista 分布式查询引擎 - 计划执行" | ||
date = 2024-04-26 | ||
+++ | ||
|
||
所有计划的执行均由 Executor 节点负责。 | ||
|
||
![ballista-mvp-executor](./ballista-mvp-executor.drawio.png) | ||
|
||
Executor 节点启动时 | ||
1. 启动 ExecutorGrpc 服务,负责接收 Scheduler 节点发送的任务和其他指令。 | ||
2. 启动 Flight 服务,Client 和其他 Executor 通过 Flight 协议读取本机执行结果数据 | ||
3. 向 Scheduler 注册 | ||
4. 启动心跳任务,定时向所有 Scheduler 节点发送心跳 | ||
5. 启动任务执行池,负责轮询队列获取新任务并执行 | ||
6. 启动执行状态上报任务,负责上报任务状态给 Scheduler | ||
|
||
Executor 执行的基本单元是 Task,一个 Task 会执行一个 Job 内的一个 stage 对应的一个 partition。 | ||
```rust | ||
pub struct TaskDefinition { | ||
/// 在执行图中的唯一的(单调递增) | ||
pub task_id: usize, | ||
/// task 执行次数 | ||
pub task_attempt_num: usize, | ||
/// 所属 job | ||
pub job_id: String, | ||
/// 所属 stage | ||
pub stage_id: usize, | ||
/// stage 执行次数 | ||
pub stage_attempt_num: usize, | ||
/// partition | ||
pub partition_id: usize, | ||
/// stage 执行计划 | ||
pub plan: Arc<dyn ExecutionPlan>, | ||
/// Scheduler 发起任务时间 | ||
pub launch_time: u64, | ||
/// 所属会话 | ||
pub session_id: String, | ||
/// 会话设置 | ||
pub props: Arc<HashMap<String, String>>, | ||
} | ||
``` | ||
|
||
**Ballista 运行 CPU 密集型任务** | ||
|
||
Ballista 采用 [Tokio](https://github.com/tokio-rs/tokio) 作为 Rust 异步运行时。Tokio 使用协作式调度,异步任务会在执行到 .await 时切换其他任务执行,使用 Tokio 须遵守 `"Async code should never spend a long time without reaching an .await."`(参考 Tokio maintainer 的[这篇博客](https://ryhl.io/blog/async-what-is-blocking/))。因此 CPU 密集型任务会长时间占用 CPU,导致运行时上的其他任务无法被及时执行(如心跳)。 | ||
|
||
Ballista 解决此问题的办法是 | ||
1. 采用单独的运行时专门跑 CPU 密集型任务 | ||
2. IO 密集型任务运行时与 CPU 密集型任务运行时通过队列通信(创新新任务/任务执行结果通知) | ||
3. 当然 CPU 密集型任务代码仍要遵守 `"Async code should never spend a long time without reaching an .await."` | ||
|
||
更多参考 Datafusion maintainer 的这篇文章:[https://thenewstack.io/using-rustlangs-async-tokio-runtime-for-cpu-bound-tasks/](https://thenewstack.io/using-rustlangs-async-tokio-runtime-for-cpu-bound-tasks/) 。 |