Skip to content

Commit 1fcbf13

Browse files
committed
change metadata type in StructField for Scala/Java
1 parent 60cc131 commit 1fcbf13

File tree

11 files changed

+74
-57
lines changed

11 files changed

+74
-57
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Expression.scala

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.errors.TreeNodeException
2121
import org.apache.spark.sql.catalyst.trees
2222
import org.apache.spark.sql.catalyst.trees.TreeNode
2323
import org.apache.spark.sql.catalyst.types.{DataType, FractionalType, IntegralType, NumericType, NativeType}
24+
import org.apache.spark.sql.catalyst.util.Metadata
2425

2526
abstract class Expression extends TreeNode[Expression] {
2627
self: Product =>
@@ -44,7 +45,7 @@ abstract class Expression extends TreeNode[Expression] {
4445
def references: AttributeSet = AttributeSet(children.flatMap(_.references.iterator))
4546

4647
/** Returns the metadata when an expression is a reference to another expression with metadata. */
47-
def metadata: Map[String, Any] = Map.empty
48+
def metadata: Metadata = Metadata.empty
4849

4950
/** Returns the result of evaluating this expression on a given input Row */
5051
def eval(input: Row = null): EvaluatedType

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/namedExpressions.scala

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import org.apache.spark.sql.catalyst.trees
2121
import org.apache.spark.sql.catalyst.analysis.UnresolvedAttribute
2222
import org.apache.spark.sql.catalyst.errors.TreeNodeException
2323
import org.apache.spark.sql.catalyst.types._
24+
import org.apache.spark.sql.catalyst.util.Metadata
2425

2526
object NamedExpression {
2627
private val curId = new java.util.concurrent.atomic.AtomicLong()
@@ -86,7 +87,7 @@ case class Alias(child: Expression, name: String)
8687

8788
override def dataType = child.dataType
8889
override def nullable = child.nullable
89-
override def metadata: Map[String, Any] = child.metadata
90+
override def metadata: Metadata = child.metadata
9091

9192
override def toAttribute = {
9293
if (resolved) {
@@ -118,7 +119,7 @@ case class AttributeReference(
118119
name: String,
119120
dataType: DataType,
120121
nullable: Boolean = true,
121-
override val metadata: Map[String, Any] = Map.empty)(
122+
override val metadata: Metadata = Metadata.empty)(
122123
val exprId: ExprId = NamedExpression.newExprId,
123124
val qualifiers: Seq[String] = Nil) extends Attribute with trees.LeafNode[Expression] {
124125

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/types/dataTypes.scala

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ package org.apache.spark.sql.catalyst.types
1919

2020
import java.sql.Timestamp
2121

22+
import org.apache.spark.sql.catalyst.util.Metadata
23+
2224
import scala.math.Numeric.{BigDecimalAsIfIntegral, DoubleAsIfIntegral, FloatAsIfIntegral}
2325
import scala.reflect.ClassTag
2426
import scala.reflect.runtime.universe.{TypeTag, runtimeMirror, typeTag}
@@ -74,7 +76,7 @@ object DataType {
7476
("name", JString(name)),
7577
("nullable", JBool(nullable)),
7678
("type", dataType: JValue)) =>
77-
StructField(name, parseDataType(dataType), nullable, metadata.values)
79+
StructField(name, parseDataType(dataType), nullable, Metadata.fromJObject(metadata))
7880
}
7981

8082
@deprecated("Use DataType.fromJson instead")
@@ -386,7 +388,7 @@ case class StructField(
386388
name: String,
387389
dataType: DataType,
388390
nullable: Boolean,
389-
metadata: Map[String, Any] = Map.empty) {
391+
metadata: Metadata = Metadata.empty) {
390392

391393
private[sql] def buildFormattedString(prefix: String, builder: StringBuilder): Unit = {
392394
builder.append(s"$prefix-- $name: ${dataType.typeName} (nullable = $nullable)\n")
@@ -402,7 +404,7 @@ case class StructField(
402404
("name" -> name) ~
403405
("type" -> dataType.jsonValue) ~
404406
("nullable" -> nullable) ~
405-
("metadata" -> Extraction.decompose(metadata)(DefaultFormats))
407+
("metadata" -> metadata.jsonValue)
406408
}
407409
}
408410

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/Metadata.scala

Lines changed: 29 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -65,11 +65,9 @@ sealed class Metadata private[util] (private[util] val map: Map[String, Any]) ex
6565
def getMetadataArray(key: String): Array[Metadata] = get(key)
6666

6767
/** Converts to its JSON representation. */
68-
def toJson: String = {
69-
compact(render(Metadata.toJValue(this)))
70-
}
68+
def json: String = compact(render(jsonValue))
7169

72-
override def toString: String = toJson
70+
override def toString: String = json
7371

7472
override def equals(obj: Any): Boolean = {
7573
obj match {
@@ -96,6 +94,8 @@ sealed class Metadata private[util] (private[util] val map: Map[String, Any]) ex
9694
private def get[T](key: String): T = {
9795
map(key).asInstanceOf[T]
9896
}
97+
98+
private[sql] def jsonValue: JValue = Metadata.toJsonValue(this)
9999
}
100100

101101
object Metadata {
@@ -105,41 +105,40 @@ object Metadata {
105105

106106
/** Creates a Metadata instance from JSON. */
107107
def fromJson(json: String): Metadata = {
108-
val map = parse(json).values.asInstanceOf[Map[String, Any]]
109-
fromMap(map.toMap)
108+
fromJObject(parse(json).asInstanceOf[JObject])
110109
}
111110

112-
/** Creates a Metadata instance from Map[String, Any]. */
113-
private def fromMap(map: Map[String, Any]): Metadata = {
111+
/** Creates a Metadata instance from JSON AST. */
112+
private[sql] def fromJObject(jObj: JObject): Metadata = {
114113
val builder = new MetadataBuilder
115-
map.foreach {
116-
case (key, value: BigInt) =>
114+
jObj.obj.foreach {
115+
case (key, JInt(value)) =>
117116
builder.putLong(key, value.toLong)
118-
case (key, value: Double) =>
117+
case (key, JDouble(value)) =>
119118
builder.putDouble(key, value)
120-
case (key, value: Boolean) =>
119+
case (key, JBool(value)) =>
121120
builder.putBoolean(key, value)
122-
case (key, value: String) =>
121+
case (key, JString(value)) =>
123122
builder.putString(key, value)
124-
case (key, value: Map[_, _]) =>
125-
builder.putMetadata(key, fromMap(value.asInstanceOf[Map[String, Any]]))
126-
case (key, value: Seq[_]) =>
123+
case (key, o: JObject) =>
124+
builder.putMetadata(key, fromJObject(o))
125+
case (key, JArray(value)) =>
127126
if (value.isEmpty) {
128127
// If it is an empty array, we cannot infer its element type. We put an empty Array[Long].
129128
builder.putLongArray(key, Array.empty)
130129
} else {
131130
value.head match {
132-
case _: BigInt =>
133-
builder.putLongArray(key, value.asInstanceOf[Seq[BigInt]].map(_.toLong).toArray)
134-
case _: Double =>
135-
builder.putDoubleArray(key, value.asInstanceOf[Seq[Double]].toArray)
136-
case _: Boolean =>
137-
builder.putBooleanArray(key, value.asInstanceOf[Seq[Boolean]].toArray)
138-
case _: String =>
139-
builder.putStringArray(key, value.asInstanceOf[Seq[String]].toSeq.toArray)
140-
case _: Map[_, _] =>
131+
case _: JInt =>
132+
builder.putLongArray(key, value.asInstanceOf[List[JInt]].map(_.num.toLong).toArray)
133+
case _: JDouble =>
134+
builder.putDoubleArray(key, value.asInstanceOf[List[JDouble]].map(_.num).toArray)
135+
case _: JBool =>
136+
builder.putBooleanArray(key, value.asInstanceOf[List[JBool]].map(_.value).toArray)
137+
case _: JString =>
138+
builder.putStringArray(key, value.asInstanceOf[List[JString]].map(_.s).toArray)
139+
case _: JObject =>
141140
builder.putMetadataArray(
142-
key, value.asInstanceOf[Seq[Map[String, Any]]].map(fromMap).toArray)
141+
key, value.asInstanceOf[List[JObject]].map(fromJObject).toArray)
143142
case other =>
144143
throw new RuntimeException(s"Do not support array of type ${other.getClass}.")
145144
}
@@ -151,13 +150,13 @@ object Metadata {
151150
}
152151

153152
/** Converts to JSON AST. */
154-
private def toJValue(obj: Any): JValue = {
153+
private def toJsonValue(obj: Any): JValue = {
155154
obj match {
156155
case map: Map[_, _] =>
157-
val fields = map.toList.map { case (k: String, v) => (k, toJValue(v))}
156+
val fields = map.toList.map { case (k: String, v) => (k, toJsonValue(v))}
158157
JObject(fields)
159158
case arr: Array[_] =>
160-
val values = arr.toList.map(toJValue)
159+
val values = arr.toList.map(toJsonValue)
161160
JArray(values)
162161
case x: Long =>
163162
JInt(x)
@@ -168,7 +167,7 @@ object Metadata {
168167
case x: String =>
169168
JString(x)
170169
case x: Metadata =>
171-
toJValue(x.map)
170+
toJsonValue(x.map)
172171
case other =>
173172
throw new RuntimeException(s"Do not support type ${other.getClass}.")
174173
}

sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/MetadataSuite.scala

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ class MetadataSuite extends FunSuite {
7171
}
7272

7373
test("metadata json conversion") {
74-
val json = metadata.toJson
74+
val json = metadata.json
7575
withClue("toJson must produce a valid JSON string") {
7676
parse(json)
7777
}

sql/core/src/main/java/org/apache/spark/sql/api/java/DataType.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.spark.sql.api.java;
1919

20+
import org.apache.spark.sql.catalyst.util.Metadata;
21+
2022
import java.util.*;
2123

2224
/**
@@ -148,7 +150,7 @@ public static StructField createStructField(
148150
String name,
149151
DataType dataType,
150152
boolean nullable,
151-
Map<String, Object> metadata) {
153+
Metadata metadata) {
152154
if (name == null) {
153155
throw new IllegalArgumentException("name should not be null.");
154156
}
@@ -165,10 +167,10 @@ public static StructField createStructField(
165167
/**
166168
* Creates a StructField with empty metadata.
167169
*
168-
* @see #createStructField(String, DataType, boolean, java.util.Map)
170+
* @see #createStructField(String, DataType, boolean, Metadata)
169171
*/
170172
public static StructField createStructField(String name, DataType dataType, boolean nullable) {
171-
return createStructField(name, dataType, nullable, new HashMap<String, Object>());
173+
return createStructField(name, dataType, nullable, Metadata.empty());
172174
}
173175

174176
/**

sql/core/src/main/java/org/apache/spark/sql/api/java/StructField.java

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
package org.apache.spark.sql.api.java;
1919

20+
import org.apache.spark.sql.catalyst.util.Metadata;
21+
2022
import java.util.Map;
2123

2224
/**
@@ -37,13 +39,13 @@ public class StructField {
3739
private String name;
3840
private DataType dataType;
3941
private boolean nullable;
40-
private Map<String, Object> metadata;
42+
private Metadata metadata;
4143

4244
protected StructField(
4345
String name,
4446
DataType dataType,
4547
boolean nullable,
46-
Map<String, Object> metadata) {
48+
Metadata metadata) {
4749
this.name = name;
4850
this.dataType = dataType;
4951
this.nullable = nullable;
@@ -62,7 +64,7 @@ public boolean isNullable() {
6264
return nullable;
6365
}
6466

65-
public Map<String, Object> getMetadata() {
67+
public Metadata getMetadata() {
6668
return metadata;
6769
}
6870

sql/core/src/main/scala/org/apache/spark/sql/types/util/DataTypeConversions.scala

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ protected[sql] object DataTypeConversions {
3232
scalaStructField.name,
3333
asJavaDataType(scalaStructField.dataType),
3434
scalaStructField.nullable,
35-
scalaStructField.metadata.asJava.asInstanceOf[java.util.Map[String, Object]])
35+
scalaStructField.metadata)
3636
}
3737

3838
/**
@@ -69,7 +69,7 @@ protected[sql] object DataTypeConversions {
6969
javaStructField.getName,
7070
asScalaDataType(javaStructField.getDataType),
7171
javaStructField.isNullable,
72-
javaStructField.getMetadata.asScala.toMap)
72+
javaStructField.getMetadata)
7373
}
7474

7575
/**

sql/core/src/test/scala/org/apache/spark/sql/DataTypeSuite.scala

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
package org.apache.spark.sql
1919

20+
import org.apache.spark.sql.catalyst.util.MetadataBuilder
2021
import org.scalatest.FunSuite
2122

2223
import org.apache.spark.sql.catalyst.types.DataType
@@ -79,9 +80,12 @@ class DataTypeSuite extends FunSuite {
7980
checkDataTypeJsonRepr(ArrayType(StringType, false))
8081
checkDataTypeJsonRepr(MapType(IntegerType, StringType, true))
8182
checkDataTypeJsonRepr(MapType(IntegerType, ArrayType(DoubleType), false))
83+
val metadata = new MetadataBuilder()
84+
.putString("name", "age")
85+
.build()
8286
checkDataTypeJsonRepr(
8387
StructType(Seq(
8488
StructField("a", IntegerType, nullable = true),
8589
StructField("b", ArrayType(DoubleType), nullable = false),
86-
StructField("c", DoubleType, nullable = false, metadata = Map("name" -> "age")))))
90+
StructField("c", DoubleType, nullable = false, metadata))))
8791
}

sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,15 @@
1717

1818
package org.apache.spark.sql
1919

20-
import org.apache.spark.sql.catalyst.errors.TreeNodeException
21-
import org.apache.spark.sql.catalyst.expressions._
22-
import org.apache.spark.sql.execution.joins.BroadcastHashJoin
23-
import org.apache.spark.sql.test._
24-
import org.scalatest.BeforeAndAfterAll
2520
import java.util.TimeZone
2621

27-
/* Implicits */
28-
import TestSQLContext._
29-
import TestData._
22+
import org.scalatest.BeforeAndAfterAll
23+
24+
import org.apache.spark.sql.TestData._
25+
import org.apache.spark.sql.catalyst.errors.TreeNodeException
26+
import org.apache.spark.sql.catalyst.expressions._
27+
import org.apache.spark.sql.catalyst.util.MetadataBuilder
28+
import org.apache.spark.sql.test.TestSQLContext._
3029

3130
class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
3231
// Make sure the tables are loaded.
@@ -684,11 +683,14 @@ class SQLQuerySuite extends QueryTest with BeforeAndAfterAll {
684683
val schema = person.schema
685684
val docKey = "doc"
686685
val docValue = "first name"
686+
val metadata = new MetadataBuilder()
687+
.putString(docKey, docValue)
688+
.build()
687689
val schemaWithMeta = new StructType(Seq(
688-
schema("id"), schema("name").copy(metadata = Map(docKey -> docValue)), schema("age")))
690+
schema("id"), schema("name").copy(metadata = metadata), schema("age")))
689691
val personWithMeta = applySchema(person, schemaWithMeta)
690692
def validateMetadata(rdd: SchemaRDD): Unit = {
691-
assert(rdd.schema("name").metadata(docKey) === docValue)
693+
assert(rdd.schema("name").metadata.getString(docKey) == docValue)
692694
}
693695
personWithMeta.registerTempTable("personWithMeta")
694696
validateMetadata(personWithMeta.select('name))

0 commit comments

Comments
 (0)