Skip to content
This repository has been archived by the owner on Sep 18, 2023. It is now read-only.

[NSE-581] Improve GetArrayItem(Split()) performance #933

Merged
merged 8 commits into from
May 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -387,6 +387,26 @@ object ColumnarExpressionConverter extends Logging {
r.scale,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
expr)
case getArrayItem: GetArrayItem =>
getArrayItem.child match {
case strSplit: StringSplit =>
ColumnarTernaryOperator.create(
replaceWithColumnarExpression(
strSplit.str,
attributeSeq,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
replaceWithColumnarExpression(
strSplit.regex,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
replaceWithColumnarExpression(
getArrayItem.ordinal,
convertBoundRefToAttrRef = convertBoundRefToAttrRef),
new StringSplit(strSplit.str, strSplit.regex, getArrayItem.ordinal))
case other =>
throw new UnsupportedOperationException(
s" --> ${other.getClass} | ${other} is not currently" +
s" supported as child of GetArrayItem.")
}
case b: BinaryExpression =>
logInfo(s"${expr.getClass} ${expr} is supported, no_cal is $check_if_no_calculation.")
ColumnarBinaryExpression.create(
Expand Down Expand Up @@ -441,6 +461,15 @@ object ColumnarExpressionConverter extends Logging {
return true
case c: Concat =>
c.children.map(containsSubquery).exists(_ == true)
case getArrayItem: GetArrayItem =>
getArrayItem.child match {
case strSplit: StringSplit =>
strSplit.children.map(containsSubquery).exists(_ == true)
case other =>
throw new UnsupportedOperationException(
s" --> ${other.getClass} | ${other} is not currently" +
s" supported as child of GetArrayItem.")
}
case b: BinaryExpression =>
containsSubquery(b.left) || containsSubquery(b.right)
case s: String2TrimExpression =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ class ColumnarSubString(str: Expression, pos: Expression, len: Expression, origi
}

// StringSplit, not functionality ready, need array type support.
class ColumnarStringSplit(child: Expression, regex: Expression,
class ColumnarStringSplitPart(child: Expression, regex: Expression,
limit: Expression, original: Expression)
extends StringSplit(child: Expression,
regex: Expression, limit: Expression)
Expand All @@ -86,11 +86,12 @@ class ColumnarStringSplit(child: Expression, regex: Expression,
val supportedTypes = List(
StringType
)
if (supportedTypes.indexOf(child.dataType) == -1) {
if (supportedTypes.indexOf(dataType) == -1) {
throw new UnsupportedOperationException(
s"${child.dataType} is not supported in ColumnarStringSplit.")
s"${child} | ${child.dataType} is not supported in ColumnarStringSplitPart.")
}
}
override def dataType: DataType = StringType

override def doColumnarCodeGen(args: java.lang.Object)
: (TreeNode, ArrowType) = {
Expand All @@ -101,7 +102,7 @@ class ColumnarStringSplit(child: Expression, regex: Expression,
val (limit_node, limitType): (TreeNode, ArrowType) =
limit.asInstanceOf[ColumnarExpression].doColumnarCodeGen(args)

val resultType = new ArrowType.Bool()
val resultType = new ArrowType.Utf8()
val funcNode =
TreeBuilder.makeFunction(
"split_part", Lists.newArrayList(child_node, regex_node,
Expand Down Expand Up @@ -271,8 +272,8 @@ object ColumnarTernaryOperator {
case ss: Substring =>
new ColumnarSubString(src, arg1, arg2, ss)
// Currently not supported.
// case a: StringSplit =>
// new ColumnarStringSplit(str, a.regex, a.limit, a)
case ssp: StringSplit =>
new ColumnarStringSplitPart(src, arg1, arg2, ssp)
case st: StringTranslate =>
new ColumnarStringTranslate(src, arg1, arg2, st)
case sl: StringLocate =>
Expand Down