Skip to content

Commit

Permalink
solve compiling issues and add a unit test
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-mo committed Feb 22, 2022
1 parent f5b6e5c commit 461b040
Show file tree
Hide file tree
Showing 21 changed files with 1,822 additions and 89 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ option(VELOX_ENABLE_PARSE "Build parser used for unit tests." ON)
option(VELOX_ENABLE_EXAMPLES
"Build examples. This will enable VELOX_ENABLE_EXPRESSION automatically."
ON)
option(VELOX_ENABLE_SUBSTRAIT "Buid Substrait-to-Velox converter." ON)
option(VELOX_ENABLE_BENCHMARKS "Build velox top level benchmarks." OFF)
option(VELOX_ENABLE_S3 "Build S3 Connector" OFF)
option(VELOX_ENABLE_PARQUET "Enable Parquet support" OFF)
Expand All @@ -60,6 +61,7 @@ if(${VELOX_BUILD_MINIMAL})
set(VELOX_ENABLE_SPARK_FUNCTIONS OFF)
set(VELOX_ENABLE_EXAMPLES OFF)
set(VELOX_ENABLE_S3 OFF)
set(VELOX_ENABLE_SUBSTRAIT OFF)
endif()

if(${VELOX_BUILD_TESTING})
Expand Down
5 changes: 5 additions & 0 deletions velox/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,8 @@ endif()
if(${VELOX_CODEGEN_SUPPORT})
add_subdirectory(experimental/codegen)
endif()

# substrait converter
if(${VELOX_ENABLE_SUBSTRAIT})
add_subdirectory(substrait_converter)
endif()
60 changes: 60 additions & 0 deletions velox/substrait_converter/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Set up Proto
set(proto_directory ${CMAKE_CURRENT_SOURCE_DIR}/proto)
set(substrait_proto_directory ${CMAKE_CURRENT_SOURCE_DIR}/proto/substrait)
set(PROTO_OUTPUT_DIR "${CMAKE_CURRENT_SOURCE_DIR}/proto/")
file(MAKE_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/proto/substrait)
file(GLOB PROTO_FILES ${substrait_proto_directory}/*.proto
${substrait_proto_directory}/extensions/*.proto)
FOREACH(PROTO ${PROTO_FILES})
file(RELATIVE_PATH REL_PROTO ${substrait_proto_directory} ${PROTO})
string(REGEX REPLACE "\\.proto" "" PROTO_NAME ${REL_PROTO})
LIST(APPEND PROTO_SRCS "${PROTO_OUTPUT_DIR}/substrait/${PROTO_NAME}.pb.cc")
LIST(APPEND PROTO_HDRS "${PROTO_OUTPUT_DIR}/substrait/${PROTO_NAME}.pb.h")
ENDFOREACH()
set(PROTO_OUTPUT_FILES ${PROTO_HDRS} ${PROTO_SRCS})
set_source_files_properties(${PROTO_OUTPUT_FILES} PROPERTIES GENERATED TRUE)

get_filename_component(PROTO_DIR ${substrait_proto_directory}/,
DIRECTORY)

# Generate Substrait hearders
add_custom_command(OUTPUT ${PROTO_OUTPUT_FILES}
COMMAND protoc
--proto_path
${proto_directory}/
--cpp_out
${PROTO_OUTPUT_DIR}
${PROTO_FILES}
DEPENDS ${PROTO_DIR}
COMMENT "Running PROTO compiler"
VERBATIM)
add_custom_target(substrait_proto ALL DEPENDS ${PROTO_OUTPUT_FILES})
add_dependencies(substrait_proto protobuf::libprotobuf)

add_library(velox_substrait_utils SubstraitUtils.cpp)
target_include_directories(velox_substrait_utils PUBLIC ${PROTO_OUTPUT_DIR})

add_library(velox_substrait_expr_converter SubstraitToVeloxExpr.cpp)
target_link_libraries(velox_substrait_expr_converter velox_substrait_utils velox_connector velox_dwio_dwrf_common)

add_library(velox_substrait_plan_converter SubstraitToVeloxPlan.cpp)
target_link_libraries(velox_substrait_plan_converter velox_substrait_expr_converter)

# Disable tests temporarily for some linking issues are not resolved.
#if(${VELOX_BUILD_TESTING})
# add_subdirectory(tests)
#endif()
Original file line number Diff line number Diff line change
Expand Up @@ -14,14 +14,14 @@
* limitations under the License.
*/

#include "substrait_to_velox_expr.h"
#include "SubstraitToVeloxExpr.h"

using namespace facebook::velox;
using namespace facebook::velox::exec;
using namespace facebook::velox::connector;
using namespace facebook::velox::dwio::common;

namespace facebook::velox::substrait {
namespace facebook::velox::substraitconverter {

SubstraitVeloxExprConverter::SubstraitVeloxExprConverter(
const std::shared_ptr<SubstraitParser>& sub_parser,
Expand All @@ -35,15 +35,18 @@ SubstraitVeloxExprConverter::toVeloxExpr(
const substrait::Expression::FieldReference& sfield,
const int32_t& input_plan_node_id) {
switch (sfield.reference_type_case()) {
case substrait::Expression::FieldReference::ReferenceTypeCase::kDirectReference: {
case substrait::Expression::FieldReference::ReferenceTypeCase::
kDirectReference: {
auto dref = sfield.direct_reference();
int32_t col_idx = parseReferenceSegment(dref);
auto field_name = sub_parser_->makeNodeName(input_plan_node_id, col_idx);
// FIXME: get the input type
return std::make_shared<const core::FieldAccessTypedExpr>(DOUBLE(), field_name);
return std::make_shared<const core::FieldAccessTypedExpr>(
DOUBLE(), field_name);
break;
}
case substrait::Expression::FieldReference::ReferenceTypeCase::kMaskedReference: {
case substrait::Expression::FieldReference::ReferenceTypeCase::
kMaskedReference: {
throw new std::runtime_error("not supported");
break;
}
Expand All @@ -53,7 +56,8 @@ SubstraitVeloxExprConverter::toVeloxExpr(
}
}

std::shared_ptr<const core::ITypedExpr> SubstraitVeloxExprConverter::toVeloxExpr(
std::shared_ptr<const core::ITypedExpr>
SubstraitVeloxExprConverter::toVeloxExpr(
const substrait::Expression::ScalarFunction& sfunc,
const int32_t& input_plan_node_id) {
std::vector<std::shared_ptr<const core::ITypedExpr>> params;
Expand All @@ -63,14 +67,16 @@ std::shared_ptr<const core::ITypedExpr> SubstraitVeloxExprConverter::toVeloxExpr
}
auto function_id = sfunc.function_reference();
auto function_name = sub_parser_->findFunction(functions_map_, function_id);
auto velox_function = sub_parser_->substrait_velox_function_map[function_name];
auto velox_function =
sub_parser_->substrait_velox_function_map[function_name];
auto sub_type = sub_parser_->parseType(sfunc.output_type());
auto velox_type = getVeloxType(sub_type->type);
return std::make_shared<const core::CallTypedExpr>(velox_type, std::move(params),
velox_function);
return std::make_shared<const core::CallTypedExpr>(
velox_type, std::move(params), velox_function);
}

std::shared_ptr<const core::ConstantTypedExpr> SubstraitVeloxExprConverter::toVeloxExpr(
std::shared_ptr<const core::ConstantTypedExpr>
SubstraitVeloxExprConverter::toVeloxExpr(
const substrait::Expression::Literal& slit) {
switch (slit.literal_type_case()) {
case substrait::Expression_Literal::LiteralTypeCase::kFp64: {
Expand All @@ -89,8 +95,10 @@ std::shared_ptr<const core::ConstantTypedExpr> SubstraitVeloxExprConverter::toVe
}
}

std::shared_ptr<const core::ITypedExpr> SubstraitVeloxExprConverter::toVeloxExpr(
const substrait::Expression& sexpr, const int32_t& input_plan_node_id) {
std::shared_ptr<const core::ITypedExpr>
SubstraitVeloxExprConverter::toVeloxExpr(
const substrait::Expression& sexpr,
const int32_t& input_plan_node_id) {
std::shared_ptr<const core::ITypedExpr> velox_expr;
switch (sexpr.rex_type_case()) {
case substrait::Expression::RexTypeCase::kLiteral: {
Expand Down Expand Up @@ -130,7 +138,8 @@ TypePtr SubstraitVeloxExprConverter::getVeloxType(std::string type_name) {
int32_t SubstraitVeloxExprConverter::parseReferenceSegment(
const substrait::Expression::ReferenceSegment& sref) {
switch (sref.reference_type_case()) {
case substrait::Expression::ReferenceSegment::ReferenceTypeCase::kStructField: {
case substrait::Expression::ReferenceSegment::ReferenceTypeCase::
kStructField: {
auto sfield = sref.struct_field();
auto field_id = sfield.field();
return field_id;
Expand Down Expand Up @@ -166,7 +175,9 @@ class SubstraitVeloxExprConverter::FilterInfo {
is_initialized_ = true;
}
}
bool isInitialized() { return is_initialized_ ? true : false; }
bool isInitialized() {
return is_initialized_ ? true : false;
}

std::optional<double> left_ = std::nullopt;
std::optional<double> right_ = std::nullopt;
Expand All @@ -193,13 +204,18 @@ void SubstraitVeloxExprConverter::getFlatConditions(
} else {
(*scalar_functions).push_back(sfunc);
}
break;
}
default:
throw new std::runtime_error("Not supported in getFlatConditions.");
break;
}
}

hive::SubfieldFilters SubstraitVeloxExprConverter::toVeloxFilter(
const std::vector<std::string>& input_name_list,
const std::vector<TypePtr>& input_type_list, const substrait::Expression& sfilter) {
const std::vector<TypePtr>& input_type_list,
const substrait::Expression& sfilter) {
hive::SubfieldFilters filters;
std::unordered_map<int, std::shared_ptr<FilterInfo>> col_info_map;
for (int idx = 0; idx < input_name_list.size(); idx++) {
Expand All @@ -209,8 +225,8 @@ hive::SubfieldFilters SubstraitVeloxExprConverter::toVeloxFilter(
std::vector<substrait::Expression_ScalarFunction> scalar_functions;
getFlatConditions(sfilter, &scalar_functions);
for (auto& scalar_function : scalar_functions) {
auto filter_name =
sub_parser_->findFunction(functions_map_, scalar_function.function_reference());
auto filter_name = sub_parser_->findFunction(
functions_map_, scalar_function.function_reference());
int32_t col_idx;
// FIXME: different type support
double val;
Expand Down Expand Up @@ -270,11 +286,16 @@ hive::SubfieldFilters SubstraitVeloxExprConverter::toVeloxFilter(
bool null_allowed = filter_info->null_allowed_;
filters[common::Subfield(input_name_list[idx])] =
std::make_unique<common::DoubleRange>(
left_bound, left_unbounded, left_exclusive, right_bound, right_unbounded,
right_exclusive, null_allowed);
left_bound,
left_unbounded,
left_exclusive,
right_bound,
right_unbounded,
right_exclusive,
null_allowed);
}
}
return filters;
}

} // namespace facebook::velox::substrait
} // namespace facebook::velox::substraitconverter
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include <folly/executors/IOThreadPoolExecutor.h>

#include "substrait_utils.h"
#include "SubstraitUtils.h"
#include "velox/common/caching/DataCache.h"
#include "velox/common/file/FileSystems.h"
#include "velox/connectors/hive/FileHandle.h"
Expand All @@ -44,7 +44,7 @@
using namespace facebook::velox;
using namespace facebook::velox::exec;

namespace facebook::velox::substrait {
namespace facebook::velox::substraitconverter {

// This class is used to convert Substrait representations to Velox expressions.
class SubstraitVeloxExprConverter {
Expand Down Expand Up @@ -77,4 +77,4 @@ class SubstraitVeloxExprConverter {
class FilterInfo;
};

} // namespace facebook::velox::substrait
} // namespace facebook::velox::substraitconverter
Loading

0 comments on commit 461b040

Please sign in to comment.