Skip to content

Commit

Permalink
orc support
Browse files Browse the repository at this point in the history
  • Loading branch information
zuochunwei committed Apr 27, 2023
1 parent 4262fcb commit d28074b
Show file tree
Hide file tree
Showing 7 changed files with 21 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ import io.glutenproject.GlutenConfig
import io.glutenproject.backendsapi._
import io.glutenproject.expression.WindowFunctionsBuilder
import io.glutenproject.substrait.rel.LocalFilesNode.ReadFileFormat
import io.glutenproject.substrait.rel.LocalFilesNode.ReadFileFormat.{DwrfReadFormat, ParquetReadFormat}
import io.glutenproject.substrait.rel.LocalFilesNode.ReadFileFormat.{OrcReadFormat, DwrfReadFormat, ParquetReadFormat}
import org.apache.spark.sql.catalyst.expressions.aggregate.{AggregateExpression, Count}
import org.apache.spark.sql.catalyst.expressions.{Alias, CumeDist, DenseRank, Literal, NamedExpression, PercentRank, Rank, RowNumber}
import org.apache.spark.sql.catalyst.plans.JoinType
Expand Down Expand Up @@ -72,6 +72,7 @@ object VeloxBackendSettings extends BackendSettings {
format match {
case ParquetReadFormat => validateTypes && validateFilePath
case DwrfReadFormat => true
case OrcReadFormat => true
case _ => false
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -407,4 +407,15 @@ class TestOperator extends WholeStageTransformerSuite {
assert(result.collect()(0).get(0).toString.equals("0.0345678900000000000000000000000000000"))
checkOperatorMatch[GlutenHashAggregateExecTransformer](result)
}

test("orc scan") {
val df = spark.read.format("orc").load("../cpp/velox/benchmarks/data/bm_lineitem/orc/lineitem.orc")
df.createOrReplaceTempView("lineitem_orc")
runQueryAndCompare(
"select l_orderkey from lineitem_orc") { df => {
assert(getExecutedPlan(df).count(plan => {
plan.isInstanceOf[BatchScanExecTransformer]}) == 1)
}
}
}
}
9 changes: 4 additions & 5 deletions cpp/velox/benchmarks/QueryBenchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ int main(int argc, char** argv) {
// The multi-thread performance is not correct.
// BENCHMARK(BM)->ThreadRange(36, 36);

#if 0
const auto& lineitemParquetPath = getFilePath("bm_lineitem/parquet/");
if (argc < 2) {
::benchmark::RegisterBenchmark(
Expand All @@ -80,18 +81,16 @@ int main(int argc, char** argv) {
::benchmark::RegisterBenchmark(
"select", BM, std::vector<std::string>{std::string(argv[1]) + "/"}, "select.json", "parquet");
}

#else
// For ORC debug.
/*
const auto& lineitemOrcPath = getFilePath("bm_lineitem/orc/");
if (argc < 2) {
::benchmark::RegisterBenchmark(
"select", BM, std::vector<std::string>{lineitemOrcPath}, "select.json", "orc");
::benchmark::RegisterBenchmark("select", BM, std::vector<std::string>{lineitemOrcPath}, "select.json", "orc");
} else {
::benchmark::RegisterBenchmark(
"select", BM, std::vector<std::string>{std::string(argv[1]) + "/"}, "select.json", "orc");
}
*/
#endif

::benchmark::RunSpecifiedBenchmarks();
::benchmark::Shutdown();
Expand Down
Binary file not shown.
2 changes: 1 addition & 1 deletion cpp/velox/compute/VeloxBackend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,7 @@ std::shared_ptr<ResultIterator> VeloxBackend::GetResultIterator(
getInfoAndIds(subVeloxPlanConverter_->splitInfos(), veloxPlan_->leafPlanNodeIds(), scanInfos, scanIds, streamIds);

auto veloxPool = AsWrappedVeloxAggregateMemoryPool(allocator, memPoolOptions_);
auto ctxPool = veloxPool->addLeafChild("result_iterator");
auto ctxPool = veloxPool->addAggregateChild("result_iterator");
auto wholestageIter = std::make_unique<WholeStageResultIteratorFirstStage>(
ctxPool, veloxPlan_, scanIds, setScanInfos, streamIds, "/tmp/test-spill", confMap_);
return std::make_shared<ResultIterator>(std::move(wholestageIter), shared_from_this());
Expand Down
1 change: 1 addition & 0 deletions cpp/velox/compute/VeloxInitializer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@ void VeloxInitializer::Init(std::unordered_map<std::string, std::string>& conf)
registerConnector(hiveConnector);
velox::parquet::registerParquetReaderFactory(velox::parquet::ParquetReaderType::NATIVE);
velox::dwrf::registerDwrfReaderFactory();
velox::dwrf::registerOrcReaderFactory();
// Register Velox functions
registerAllFunctions();
if (!facebook::velox::isRegisteredVectorSerde()) {
Expand Down
4 changes: 2 additions & 2 deletions ep/build-velox/src/get_velox.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

set -exu

VELOX_REPO=https://github.com/oap-project/velox.git
VELOX_BRANCH=main
VELOX_REPO=https://github.com/zuochunwei/velox.git
VELOX_BRANCH=orcSupport

#Set on run gluten on HDFS
ENABLE_HDFS=OFF
Expand Down

0 comments on commit d28074b

Please sign in to comment.