Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SPARK-33890][SQL] Improve the implement of trim/trimleft/trimright #30905

Closed
wants to merge 47 commits into from
Closed
Changes from 42 commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
4a6f903
Reuse completeNextStageWithFetchFailure
beliefer Jun 19, 2020
96456e2
Merge remote-tracking branch 'upstream/master'
beliefer Jul 1, 2020
4314005
Merge remote-tracking branch 'upstream/master'
beliefer Jul 3, 2020
d6af4a7
Merge remote-tracking branch 'upstream/master'
beliefer Jul 9, 2020
f69094f
Merge remote-tracking branch 'upstream/master'
beliefer Jul 16, 2020
b86a42d
Merge remote-tracking branch 'upstream/master'
beliefer Jul 25, 2020
2ac5159
Merge branch 'master' of github.com:beliefer/spark
beliefer Jul 25, 2020
9021d6c
Merge remote-tracking branch 'upstream/master'
beliefer Jul 28, 2020
74a2ef4
Merge branch 'master' of github.com:beliefer/spark
beliefer Jul 28, 2020
9828158
Merge remote-tracking branch 'upstream/master'
beliefer Jul 31, 2020
9cd1aaf
Merge remote-tracking branch 'upstream/master'
beliefer Aug 5, 2020
abfcbb9
Merge remote-tracking branch 'upstream/master'
beliefer Aug 26, 2020
07c6c81
Merge remote-tracking branch 'upstream/master'
beliefer Sep 1, 2020
580130b
Merge remote-tracking branch 'upstream/master'
beliefer Sep 2, 2020
3712808
Merge branch 'master' of github.com:beliefer/spark
beliefer Sep 11, 2020
6107413
Merge remote-tracking branch 'upstream/master'
beliefer Sep 11, 2020
4b799b4
Merge remote-tracking branch 'upstream/master'
beliefer Sep 14, 2020
ee0ecbf
Merge remote-tracking branch 'upstream/master'
beliefer Sep 18, 2020
596bc61
Merge remote-tracking branch 'upstream/master'
beliefer Sep 24, 2020
0164e2f
Merge remote-tracking branch 'upstream/master'
beliefer Sep 27, 2020
90b79fc
Merge remote-tracking branch 'upstream/master'
beliefer Sep 29, 2020
2cef3a9
Merge remote-tracking branch 'upstream/master'
beliefer Oct 13, 2020
c26b64f
Merge remote-tracking branch 'upstream/master'
beliefer Oct 19, 2020
2e02cd2
Merge remote-tracking branch 'upstream/master'
beliefer Oct 22, 2020
a6d0741
Merge remote-tracking branch 'upstream/master'
beliefer Oct 28, 2020
82e5b2c
Merge remote-tracking branch 'upstream/master'
beliefer Nov 4, 2020
70bbf5d
Merge remote-tracking branch 'upstream/master'
beliefer Nov 6, 2020
126a51e
Merge remote-tracking branch 'upstream/master'
beliefer Nov 13, 2020
f2ceacd
Merge remote-tracking branch 'upstream/master'
beliefer Nov 19, 2020
5ad208f
Merge remote-tracking branch 'upstream/master'
beliefer Nov 23, 2020
970917e
Merge remote-tracking branch 'upstream/master'
beliefer Dec 1, 2020
ddc1b8b
Merge remote-tracking branch 'upstream/master'
beliefer Dec 3, 2020
2b1ed0b
Merge remote-tracking branch 'upstream/master'
beliefer Dec 4, 2020
a7d3729
Merge remote-tracking branch 'upstream/master'
beliefer Dec 7, 2020
17ef8fc
Merge remote-tracking branch 'upstream/master'
beliefer Dec 10, 2020
f7a2902
Merge remote-tracking branch 'upstream/master'
beliefer Dec 11, 2020
a803c9b
Merge remote-tracking branch 'upstream/master'
beliefer Dec 21, 2020
9d79697
Merge remote-tracking branch 'upstream/master'
beliefer Dec 21, 2020
7127f5e
Merge remote-tracking branch 'upstream/master'
beliefer Dec 21, 2020
21fb9b9
Improve implement of trim
beliefer Dec 23, 2020
7d32023
Optimize code
beliefer Dec 28, 2020
ba91af5
Fix Can not interpolate scala.collection.immutable.$colon$colon into …
beliefer Dec 28, 2020
42f75f3
Adjust code
beliefer Dec 29, 2020
b8ad0cb
Optimize code
beliefer Dec 29, 2020
8f15837
Optimize code
beliefer Dec 29, 2020
fb13fe4
Merge branch 'master' into SPARK-33890
beliefer Dec 29, 2020
3331b48
fix indentation
cloud-fan Dec 29, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -756,6 +756,54 @@ trait String2TrimExpression extends Expression with ImplicitCastInputTypes {
override def nullable: Boolean = children.exists(_.nullable)
override def foldable: Boolean = children.forall(_.foldable)

protected def doEval(srcString: UTF8String): Any
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These will definitely return UTF8String right?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. I updated it.

protected def doEval(srcString: UTF8String, trimString: UTF8String): Any

override def eval(input: InternalRow): Any = {
val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
if (srcString == null) {
null
} else if (trimStr.isDefined) {
doEval(srcString, trimStr.get.eval(input).asInstanceOf[UTF8String])
} else {
doEval(srcString)
}
}

protected val trimMethod: String

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val evals = children.map(_.genCode(ctx))
val srcString = evals(0)

if (evals.length == 1) {
ev.copy(code = evals.map(_.code) :+
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit:

ev.copy(code = code"""
  |${evals.head.code}
  |...""".stripMargin 

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK

code"""
|boolean ${ev.isNull} = false;
|UTF8String ${ev.value} = null;
|if (${srcString.isNull}) {
| ${ev.isNull} = true;
|} else {
| ${ev.value} = ${srcString.value}.$trimMethod();
|}
""")
} else {
val trimString = evals(1)
ev.copy(code = evals.map(_.code) :+
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can skip evaluating trim string if possible

ev.copy(code = code"""
  |${evals.head.code}
  |if (${srcString.isNull}) {
  | ...
  |} else {
  |  ${trimString.code}
  |  if (${trimString.isNull}) ...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good idea.

code"""
|boolean ${ev.isNull} = false;
|UTF8String ${ev.value} = null;
|if (${srcString.isNull}) {
| ${ev.isNull} = true;
|} else if (${trimString.isNull}) {
| ${ev.isNull} = true;
|} else {
| ${ev.value} = ${srcString.value}.$trimMethod(${trimString.value});
|}
""")
}
}

override def sql: String = if (trimStr.isDefined) {
s"TRIM($direction ${trimStr.get.sql} FROM ${srcStr.sql})"
} else {
Expand Down Expand Up @@ -844,51 +892,12 @@ case class StringTrim(

override protected def direction: String = "BOTH"

override def eval(input: InternalRow): Any = {
val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
if (srcString == null) {
null
} else {
if (trimStr.isDefined) {
srcString.trim(trimStr.get.eval(input).asInstanceOf[UTF8String])
} else {
srcString.trim()
}
}
}
override def doEval(srcString: UTF8String): Any = srcString.trim()

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val evals = children.map(_.genCode(ctx))
val srcString = evals(0)
override def doEval(srcString: UTF8String, trimString: UTF8String): Any =
srcString.trim(trimString)

if (evals.length == 1) {
ev.copy(evals.map(_.code) :+ code"""
boolean ${ev.isNull} = false;
UTF8String ${ev.value} = null;
if (${srcString.isNull}) {
${ev.isNull} = true;
} else {
${ev.value} = ${srcString.value}.trim();
}""")
} else {
val trimString = evals(1)
val getTrimFunction =
s"""
if (${trimString.isNull}) {
${ev.isNull} = true;
} else {
${ev.value} = ${srcString.value}.trim(${trimString.value});
}"""
ev.copy(evals.map(_.code) :+ code"""
boolean ${ev.isNull} = false;
UTF8String ${ev.value} = null;
if (${srcString.isNull}) {
${ev.isNull} = true;
} else {
$getTrimFunction
}""")
}
}
override val trimMethod: String = "trim"
}

object StringTrimLeft {
Expand Down Expand Up @@ -937,51 +946,12 @@ case class StringTrimLeft(

override protected def direction: String = "LEADING"

override def eval(input: InternalRow): Any = {
val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
if (srcString == null) {
null
} else {
if (trimStr.isDefined) {
srcString.trimLeft(trimStr.get.eval(input).asInstanceOf[UTF8String])
} else {
srcString.trimLeft()
}
}
}
override def doEval(srcString: UTF8String): Any = srcString.trimLeft()

override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val evals = children.map(_.genCode(ctx))
val srcString = evals(0)
override def doEval(srcString: UTF8String, trimString: UTF8String): Any =
srcString.trimLeft(trimString)

if (evals.length == 1) {
ev.copy(evals.map(_.code) :+ code"""
boolean ${ev.isNull} = false;
UTF8String ${ev.value} = null;
if (${srcString.isNull}) {
${ev.isNull} = true;
} else {
${ev.value} = ${srcString.value}.trimLeft();
}""")
} else {
val trimString = evals(1)
val getTrimLeftFunction =
s"""
if (${trimString.isNull}) {
${ev.isNull} = true;
} else {
${ev.value} = ${srcString.value}.trimLeft(${trimString.value});
}"""
ev.copy(evals.map(_.code) :+ code"""
boolean ${ev.isNull} = false;
UTF8String ${ev.value} = null;
if (${srcString.isNull}) {
${ev.isNull} = true;
} else {
$getTrimLeftFunction
}""")
}
}
override val trimMethod: String = "trimLeft"
}

object StringTrimRight {
Expand Down Expand Up @@ -1032,51 +1002,12 @@ case class StringTrimRight(

override protected def direction: String = "TRAILING"

override def eval(input: InternalRow): Any = {
val srcString = srcStr.eval(input).asInstanceOf[UTF8String]
if (srcString == null) {
null
} else {
if (trimStr.isDefined) {
srcString.trimRight(trimStr.get.eval(input).asInstanceOf[UTF8String])
} else {
srcString.trimRight()
}
}
}
override def doEval(srcString: UTF8String): Any = srcString.trimRight()

override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val evals = children.map(_.genCode(ctx))
val srcString = evals(0)
override def doEval(srcString: UTF8String, trimString: UTF8String): Any =
srcString.trimRight(trimString)

if (evals.length == 1) {
ev.copy(evals.map(_.code) :+ code"""
boolean ${ev.isNull} = false;
UTF8String ${ev.value} = null;
if (${srcString.isNull}) {
${ev.isNull} = true;
} else {
${ev.value} = ${srcString.value}.trimRight();
}""")
} else {
val trimString = evals(1)
val getTrimRightFunction =
s"""
if (${trimString.isNull}) {
${ev.isNull} = true;
} else {
${ev.value} = ${srcString.value}.trimRight(${trimString.value});
}"""
ev.copy(evals.map(_.code) :+ code"""
boolean ${ev.isNull} = false;
UTF8String ${ev.value} = null;
if (${srcString.isNull}) {
${ev.isNull} = true;
} else {
$getTrimRightFunction
}""")
}
}
override val trimMethod: String = "trimRight"
}

/**
Expand Down