From 984b47d9b12b897f14c617a2020824e5e63de914 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 18 Nov 2024 20:34:39 +0800 Subject: [PATCH] branch-3.0: [fix](external) fix count(*) performance fallback in external table query #44172 (#44193) Cherry-picked from #44172 Co-authored-by: Mingyu Chen (Rayner) --- .../org/apache/doris/datasource/FileQueryScanNode.java | 3 +++ .../main/java/org/apache/doris/planner/OlapScanNode.java | 7 ------- .../src/main/java/org/apache/doris/planner/ScanNode.java | 7 ++++++- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java index e75e2d144eff2e..b1cd799d5c4168 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java @@ -297,6 +297,7 @@ public void createScanRangeLocations() throws UserException { location.setServer(new TNetworkAddress(backend.getHost(), backend.getBePort())); curLocations.addToLocations(location); scanRangeLocations.add(curLocations); + scanBackendIds.add(backendId); return; } } @@ -346,6 +347,7 @@ public void createScanRangeLocations() throws UserException { // However, even one ScanNode instance can provide maximum scanning concurrency. scanRangeLocations.add(curLocations); setLocationPropertiesIfNecessary(backend, locationType, locationProperties); + scanBackendIds.add(backend.getId()); } } else { List inputSplits = getSplits(); @@ -363,6 +365,7 @@ public void createScanRangeLocations() throws UserException { scanRangeLocations.add(splitToScanRange(backend, locationProperties, split, pathPartitionKeys)); totalFileSize += split.getLength(); } + scanBackendIds.add(backend.getId()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index afdffc748c01a6..41c055062e3e9b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -192,8 +192,6 @@ public class OlapScanNode extends ScanNode { private Set sampleTabletIds = Sets.newHashSet(); private TableSample tableSample; - private HashSet scanBackendIds = new HashSet<>(); - private Map tabletId2BucketSeq = Maps.newHashMap(); // a bucket seq may map to many tablets, and each tablet has a // TScanRangeLocations. @@ -1919,9 +1917,4 @@ public boolean pushDownAggNoGroupingCheckCol(FunctionCallExpr aggExpr, Column co public int getScanRangeNum() { return getScanTabletIds().size(); } - - @Override - public int numScanBackends() { - return scanBackendIds.size(); - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java index b392075be9b938..7a36fbb3b047aa 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/ScanNode.java @@ -110,6 +110,11 @@ public abstract class ScanNode extends PlanNode implements SplitGenerator { protected TableSnapshot tableSnapshot; + // Save the id of backends which this scan node will be executed on. + // This is also important for local shuffle logic. + // Now only OlapScanNode and FileQueryScanNode implement this. + protected HashSet scanBackendIds = new HashSet<>(); + public ScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName, StatisticalType statisticalType) { super(id, desc.getId().asList(), planNodeName, statisticalType); this.desc = desc; @@ -731,7 +736,7 @@ && getScanRangeNum() } public int numScanBackends() { - return 0; + return scanBackendIds.size(); } public int getScanRangeNum() {