Skip to content

Commit

Permalink
Add Substrait-to-Velox conversion (#1048)
Browse files Browse the repository at this point in the history
Summary:
This pr is targeted to add the base layout for Substrait-to-Velox conversion. With this change, the base layout of Substrait-to-Velox conversion would be decided. Some of Substrait plan representations can be converted into Velox plan for computing. More conversions support will be added in following pull requests.

Based on [Substrait Commit](substrait-io/substrait@9d9805b).

The used function names are based on: [Substrait PR](substrait-io/substrait#147).

Pull Request resolved: #1048

Reviewed By: kagamiori

Differential Revision: D34773052

Pulled By: pedroerp

fbshipit-source-id: c4c56a43486519b567aa1d715a0d64ce02a447d1
  • Loading branch information
rui-mo authored and facebook-github-bot committed Mar 11, 2022
1 parent 41b9b73 commit 2ed40db
Show file tree
Hide file tree
Showing 23 changed files with 4,051 additions and 0 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ option(VELOX_ENABLE_PARSE "Build parser used for unit tests." ON)
option(VELOX_ENABLE_EXAMPLES
"Build examples. This will enable VELOX_ENABLE_EXPRESSION automatically."
ON)
option(VELOX_ENABLE_SUBSTRAIT "Buid Substrait-to-Velox converter." OFF)
option(VELOX_ENABLE_BENCHMARKS "Build velox top level benchmarks." OFF)
option(VELOX_ENABLE_S3 "Build S3 Connector" OFF)
option(VELOX_ENABLE_PARQUET "Enable Parquet support" OFF)
Expand All @@ -60,6 +61,7 @@ if(${VELOX_BUILD_MINIMAL})
set(VELOX_ENABLE_SPARK_FUNCTIONS OFF)
set(VELOX_ENABLE_EXAMPLES OFF)
set(VELOX_ENABLE_S3 OFF)
set(VELOX_ENABLE_SUBSTRAIT OFF)
endif()

if(${VELOX_BUILD_TESTING})
Expand Down
5 changes: 5 additions & 0 deletions velox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,8 @@ endif()
if(${VELOX_CODEGEN_SUPPORT})
add_subdirectory(experimental/codegen)
endif()

# substrait converter
if(${VELOX_ENABLE_SUBSTRAIT})
add_subdirectory(substrait)
endif()
54 changes: 54 additions & 0 deletions velox/substrait/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Set up Proto
set(proto_directory ${CMAKE_CURRENT_SOURCE_DIR}/proto)
set(substrait_proto_directory ${CMAKE_CURRENT_SOURCE_DIR}/proto/substrait)
set(PROTO_OUTPUT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/proto/")
file(MAKE_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/proto/substrait)
file(GLOB PROTO_FILES ${substrait_proto_directory}/*.proto
${substrait_proto_directory}/extensions/*.proto)
foreach(PROTO ${PROTO_FILES})
file(RELATIVE_PATH REL_PROTO ${substrait_proto_directory} ${PROTO})
string(REGEX REPLACE "\\.proto" "" PROTO_NAME ${REL_PROTO})
list(APPEND PROTO_SRCS "${PROTO_OUTPUT_DIR}/substrait/${PROTO_NAME}.pb.cc")
list(APPEND PROTO_HDRS "${PROTO_OUTPUT_DIR}/substrait/${PROTO_NAME}.pb.h")
endforeach()
set(PROTO_OUTPUT_FILES ${PROTO_HDRS} ${PROTO_SRCS})
set_source_files_properties(${PROTO_OUTPUT_FILES} PROPERTIES GENERATED TRUE)

get_filename_component(PROTO_DIR ${substrait_proto_directory}/, DIRECTORY)

# Generate Substrait hearders
add_custom_command(
OUTPUT ${PROTO_OUTPUT_FILES}
COMMAND protoc --proto_path ${proto_directory}/ --cpp_out ${PROTO_OUTPUT_DIR}
${PROTO_FILES}
DEPENDS ${PROTO_DIR}
COMMENT "Running PROTO compiler"
VERBATIM)
add_custom_target(substrait_proto ALL DEPENDS ${PROTO_OUTPUT_FILES})
add_dependencies(substrait_proto protobuf::libprotobuf)

set(SRCS ${PROTO_SRCS} SubstraitUtils.cpp SubstraitToVeloxExpr.cpp
SubstraitToVeloxPlan.cpp TypeUtils.cpp)
add_library(velox_substrait_plan_converter ${SRCS})
target_include_directories(velox_substrait_plan_converter
PUBLIC ${PROTO_OUTPUT_DIR})
target_link_libraries(velox_substrait_plan_converter velox_connector
velox_dwio_dwrf_common)

if(${VELOX_BUILD_TESTING})
add_subdirectory(tests)
endif()
94 changes: 94 additions & 0 deletions velox/substrait/SubstraitToVeloxExpr.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/substrait/SubstraitToVeloxExpr.h"
#include "velox/substrait/TypeUtils.h"

namespace facebook::velox::substrait {

std::shared_ptr<const core::FieldAccessTypedExpr>
SubstraitVeloxExprConverter::toVeloxExpr(
const ::substrait::Expression::FieldReference& sField,
int32_t inputPlanNodeId) {
auto typeCase = sField.reference_type_case();
switch (typeCase) {
case ::substrait::Expression::FieldReference::ReferenceTypeCase::
kDirectReference: {
auto dRef = sField.direct_reference();
int32_t colIdx = subParser_->parseReferenceSegment(dRef);
auto fieldName = subParser_->makeNodeName(inputPlanNodeId, colIdx);
// TODO: Get the input type and support different types here.
return std::make_shared<const core::FieldAccessTypedExpr>(
DOUBLE(), fieldName);
}
default:
VELOX_NYI(
"Substrait conversion not supported for Reference '{}'", typeCase);
}
}

std::shared_ptr<const core::ITypedExpr>
SubstraitVeloxExprConverter::toVeloxExpr(
const ::substrait::Expression::ScalarFunction& sFunc,
int32_t inputPlanNodeId) {
std::vector<std::shared_ptr<const core::ITypedExpr>> params;
params.reserve(sFunc.args().size());
for (const auto& sArg : sFunc.args()) {
params.emplace_back(toVeloxExpr(sArg, inputPlanNodeId));
}
auto functionId = sFunc.function_reference();
auto veloxFunction = subParser_->findVeloxFunction(functionMap_, functionId);
auto subType = subParser_->parseType(sFunc.output_type());
auto veloxType = toVeloxType(subType->type);
return std::make_shared<const core::CallTypedExpr>(
veloxType, std::move(params), veloxFunction);
}

std::shared_ptr<const core::ConstantTypedExpr>
SubstraitVeloxExprConverter::toVeloxExpr(
const ::substrait::Expression::Literal& sLit) {
auto typeCase = sLit.literal_type_case();
switch (typeCase) {
case ::substrait::Expression_Literal::LiteralTypeCase::kFp64:
return std::make_shared<core::ConstantTypedExpr>(sLit.fp64());
case ::substrait::Expression_Literal::LiteralTypeCase::kBoolean:
return std::make_shared<core::ConstantTypedExpr>(sLit.boolean());
default:
VELOX_NYI(
"Substrait conversion not supported for type case '{}'", typeCase);
}
}

std::shared_ptr<const core::ITypedExpr>
SubstraitVeloxExprConverter::toVeloxExpr(
const ::substrait::Expression& sExpr,
int32_t inputPlanNodeId) {
std::shared_ptr<const core::ITypedExpr> veloxExpr;
auto typeCase = sExpr.rex_type_case();
switch (typeCase) {
case ::substrait::Expression::RexTypeCase::kLiteral:
return toVeloxExpr(sExpr.literal());
case ::substrait::Expression::RexTypeCase::kScalarFunction:
return toVeloxExpr(sExpr.scalar_function(), inputPlanNodeId);
case ::substrait::Expression::RexTypeCase::kSelection:
return toVeloxExpr(sExpr.selection(), inputPlanNodeId);
default:
VELOX_NYI(
"Substrait conversion not supported for Expression '{}'", typeCase);
}
}

} // namespace facebook::velox::substrait
65 changes: 65 additions & 0 deletions velox/substrait/SubstraitToVeloxExpr.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include "velox/core/Expressions.h"
#include "velox/substrait/SubstraitUtils.h"

namespace facebook::velox::substrait {

/// This class is used to convert Substrait representations to Velox
/// expressions.
class SubstraitVeloxExprConverter {
public:
/// subParser: A Substrait parser used to convert Substrait representations
/// into recognizable representations. functionMap: A pre-constructed map
/// storing the relations between the function id and the function name.
SubstraitVeloxExprConverter(
const std::shared_ptr<SubstraitParser>& subParser,
const std::unordered_map<uint64_t, std::string>& functionMap)
: subParser_(subParser), functionMap_(functionMap) {}

/// Used to convert Substrait Field into Velox Field Expression.
std::shared_ptr<const core::FieldAccessTypedExpr> toVeloxExpr(
const ::substrait::Expression::FieldReference& sField,
int32_t inputPlanNodeId);

/// Used to convert Substrait ScalarFunction into Velox Expression.
std::shared_ptr<const core::ITypedExpr> toVeloxExpr(
const ::substrait::Expression::ScalarFunction& sFunc,
int32_t inputPlanNodeId);

/// Used to convert Substrait Literal into Velox Expression.
std::shared_ptr<const core::ConstantTypedExpr> toVeloxExpr(
const ::substrait::Expression::Literal& sLit);

/// Used to convert Substrait Expression into Velox Expression.
std::shared_ptr<const core::ITypedExpr> toVeloxExpr(
const ::substrait::Expression& sExpr,
int32_t inputPlanNodeId);

private:
/// The Substrait parser used to convert Substrait representations into
/// recognizable representations.
std::shared_ptr<SubstraitParser> subParser_;

/// The map storing the relations between the function id and the function
/// name.
std::unordered_map<uint64_t, std::string> functionMap_;
};

} // namespace facebook::velox::substrait
Loading

0 comments on commit 2ed40db

Please sign in to comment.