-
Notifications
You must be signed in to change notification settings - Fork 28.7k
[SPARK-4244] [SQL] Support Hive Generic UDFs with constant object inspector parameters #3109
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,7 +21,7 @@ import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ConversionHelper | |
|
||
import scala.collection.mutable.ArrayBuffer | ||
|
||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector | ||
import org.apache.hadoop.hive.serde2.objectinspector.{ObjectInspector, ConstantObjectInspector} | ||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions | ||
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory | ||
import org.apache.hadoop.hive.ql.exec.{UDF, UDAF} | ||
|
@@ -108,9 +108,7 @@ private[hive] case class HiveSimpleUdf(functionClassName: String, children: Seq[ | |
udfType != null && udfType.deterministic() | ||
} | ||
|
||
override def foldable = { | ||
isUDFDeterministic && children.foldLeft(true)((prev, n) => prev && n.foldable) | ||
} | ||
override def foldable = isUDFDeterministic && children.forall(_.foldable) | ||
|
||
// Create parameter converters | ||
@transient | ||
|
@@ -154,17 +152,17 @@ private[hive] case class HiveGenericUdf(functionClassName: String, children: Seq | |
protected lazy val argumentInspectors = children.map(toInspector) | ||
|
||
@transient | ||
protected lazy val returnInspector = function.initialize(argumentInspectors.toArray) | ||
protected lazy val returnInspector = | ||
function.initializeAndFoldConstants(argumentInspectors.toArray) | ||
|
||
@transient | ||
protected lazy val isUDFDeterministic = { | ||
val udfType = function.getClass().getAnnotation(classOf[HiveUDFType]) | ||
(udfType != null && udfType.deterministic()) | ||
} | ||
|
||
override def foldable = { | ||
isUDFDeterministic && children.foldLeft(true)((prev, n) => prev && n.foldable) | ||
} | ||
override def foldable = | ||
isUDFDeterministic && returnInspector.isInstanceOf[ConstantObjectInspector] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't really understand all the contracts here, so please correct me if I'm missing something, but why does the return type have to be a Constant? It seems like if a UDF is deterministic it should be safe to fold as long as its children are foldable too, independent of the type of inspector it returns. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The key change here is we need to get the folded result via Hive the method |
||
|
||
@transient | ||
protected lazy val deferedObjects = | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"aa":"10","aaaaaa":"11","aaaaaa":"12","bb12":"13","s14s14":"14"} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we really want to throw an error here? Why not just skip creating a constant object inspector?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We should enumerate all of the possible constant data type in this function, this actually gives us a chance to check if we really missed one, just as previously, we did miss all of the constant type by specifying data type in matching (see #3114)