Skip to content

Commit 60cc131

Browse files
committed
add doc and header
1 parent 60614c7 commit 60cc131

File tree

2 files changed

+178
-63
lines changed

2 files changed

+178
-63
lines changed

sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/util/Metadata.scala

Lines changed: 131 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1,65 +1,120 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
118
package org.apache.spark.sql.catalyst.util
219

320
import scala.collection.mutable
421

522
import org.json4s._
623
import org.json4s.jackson.JsonMethods._
724

8-
import scala.reflect.ClassTag
9-
10-
sealed class Metadata private[util] (val map: Map[String, Any]) extends Serializable {
11-
12-
def getInt(key: String): Int = get(key)
13-
25+
/**
26+
* Metadata is a wrapper over Map[String, Any] that limits the value type to simple ones: Boolean,
27+
* Long, Double, String, Metadata, Array[Boolean], Array[Long], Array[Double], Array[String], and
28+
* Array[Metadata]. JSON is used for serialization.
29+
*
30+
* The default constructor is private. User should use either [[MetadataBuilder]] or
31+
* [[Metadata$#fromJson]] to create Metadata instances.
32+
*
33+
* @param map an immutable map that stores the data
34+
*/
35+
sealed class Metadata private[util] (private[util] val map: Map[String, Any]) extends Serializable {
36+
37+
/** Gets a Long. */
38+
def getLong(key: String): Long = get(key)
39+
40+
/** Gets a Double. */
1441
def getDouble(key: String): Double = get(key)
1542

43+
/** Gets a Boolean. */
1644
def getBoolean(key: String): Boolean = get(key)
1745

46+
/** Gets a String. */
1847
def getString(key: String): String = get(key)
1948

49+
/** Gets a Metadata. */
2050
def getMetadata(key: String): Metadata = get(key)
2151

22-
def getIntArray(key: String): Array[Int] = getArray(key)
52+
/** Gets a Long array. */
53+
def getLongArray(key: String): Array[Long] = get(key)
2354

24-
def getDoubleArray(key: String): Array[Double] = getArray(key)
55+
/** Gets a Double array. */
56+
def getDoubleArray(key: String): Array[Double] = get(key)
2557

26-
def getBooleanArray(key: String): Array[Boolean] = getArray(key)
58+
/** Gets a Boolean array. */
59+
def getBooleanArray(key: String): Array[Boolean] = get(key)
2760

28-
def getStringArray(key: String): Array[String] = getArray(key)
61+
/** Gets a String array. */
62+
def getStringArray(key: String): Array[String] = get(key)
2963

30-
def getMetadataArray(key: String): Array[Metadata] = getArray(key)
64+
/** Gets a Metadata array. */
65+
def getMetadataArray(key: String): Array[Metadata] = get(key)
3166

67+
/** Converts to its JSON representation. */
3268
def toJson: String = {
3369
compact(render(Metadata.toJValue(this)))
3470
}
3571

36-
private def get[T](key: String): T = {
37-
map(key).asInstanceOf[T]
38-
}
72+
override def toString: String = toJson
3973

40-
private def getArray[T: ClassTag](key: String): Array[T] = {
41-
map(key).asInstanceOf[Seq[T]].toArray
74+
override def equals(obj: Any): Boolean = {
75+
obj match {
76+
case that: Metadata =>
77+
if (map.keySet == that.map.keySet) {
78+
map.keys.forall { k =>
79+
(map(k), that.map(k)) match {
80+
case (v0: Array[_], v1: Array[_]) =>
81+
v0.view == v1.view
82+
case (v0, v1) =>
83+
v0 == v1
84+
}
85+
}
86+
} else {
87+
false
88+
}
89+
case other =>
90+
false
91+
}
4292
}
4393

44-
override def toString: String = toJson
94+
override def hashCode: Int = Metadata.hash(this)
95+
96+
private def get[T](key: String): T = {
97+
map(key).asInstanceOf[T]
98+
}
4599
}
46100

47101
object Metadata {
48102

103+
/** Returns an empty Metadata. */
49104
def empty: Metadata = new Metadata(Map.empty)
50105

106+
/** Creates a Metadata instance from JSON. */
51107
def fromJson(json: String): Metadata = {
52108
val map = parse(json).values.asInstanceOf[Map[String, Any]]
53109
fromMap(map.toMap)
54110
}
55111

112+
/** Creates a Metadata instance from Map[String, Any]. */
56113
private def fromMap(map: Map[String, Any]): Metadata = {
57114
val builder = new MetadataBuilder
58115
map.foreach {
59-
case (key, value: Int) =>
60-
builder.putInt(key, value)
61116
case (key, value: BigInt) =>
62-
builder.putInt(key, value.toInt)
117+
builder.putLong(key, value.toLong)
63118
case (key, value: Double) =>
64119
builder.putDouble(key, value)
65120
case (key, value: Boolean) =>
@@ -70,22 +125,21 @@ object Metadata {
70125
builder.putMetadata(key, fromMap(value.asInstanceOf[Map[String, Any]]))
71126
case (key, value: Seq[_]) =>
72127
if (value.isEmpty) {
73-
builder.putIntArray(key, Seq.empty)
128+
// If it is an empty array, we cannot infer its element type. We put an empty Array[Long].
129+
builder.putLongArray(key, Array.empty)
74130
} else {
75131
value.head match {
76-
case _: Int =>
77-
builder.putIntArray(key, value.asInstanceOf[Seq[Int]].toSeq)
78132
case _: BigInt =>
79-
builder.putIntArray(key, value.asInstanceOf[Seq[BigInt]].map(_.toInt).toSeq)
133+
builder.putLongArray(key, value.asInstanceOf[Seq[BigInt]].map(_.toLong).toArray)
80134
case _: Double =>
81-
builder.putDoubleArray(key, value.asInstanceOf[Seq[Double]].toSeq)
135+
builder.putDoubleArray(key, value.asInstanceOf[Seq[Double]].toArray)
82136
case _: Boolean =>
83-
builder.putBooleanArray(key, value.asInstanceOf[Seq[Boolean]].toSeq)
137+
builder.putBooleanArray(key, value.asInstanceOf[Seq[Boolean]].toArray)
84138
case _: String =>
85-
builder.putStringArray(key, value.asInstanceOf[Seq[String]].toSeq)
86-
case _: Map[String, Any] =>
139+
builder.putStringArray(key, value.asInstanceOf[Seq[String]].toSeq.toArray)
140+
case _: Map[_, _] =>
87141
builder.putMetadataArray(
88-
key, value.asInstanceOf[Seq[Map[String, Any]]].map(fromMap).toSeq)
142+
key, value.asInstanceOf[Seq[Map[String, Any]]].map(fromMap).toArray)
89143
case other =>
90144
throw new RuntimeException(s"Do not support array of type ${other.getClass}.")
91145
}
@@ -96,15 +150,16 @@ object Metadata {
96150
builder.build()
97151
}
98152

153+
/** Converts to JSON AST. */
99154
private def toJValue(obj: Any): JValue = {
100155
obj match {
101156
case map: Map[_, _] =>
102-
val fields = map.toList.map { case (k: String, v) => (k, toJValue(v)) }
157+
val fields = map.toList.map { case (k: String, v) => (k, toJValue(v))}
103158
JObject(fields)
104-
case arr: Seq[_] =>
159+
case arr: Array[_] =>
105160
val values = arr.toList.map(toJValue)
106161
JArray(values)
107-
case x: Int =>
162+
case x: Long =>
108163
JInt(x)
109164
case x: Double =>
110165
JDouble(x)
@@ -118,37 +173,75 @@ object Metadata {
118173
throw new RuntimeException(s"Do not support type ${other.getClass}.")
119174
}
120175
}
176+
177+
/** Computes the hash code for the types we support. */
178+
private def hash(obj: Any): Int = {
179+
obj match {
180+
case map: Map[_, _] =>
181+
map.mapValues(hash).##
182+
case arr: Array[_] =>
183+
// Seq.empty[T] has the same hashCode regardless of T.
184+
arr.toSeq.map(hash).##
185+
case x: Long =>
186+
x.##
187+
case x: Double =>
188+
x.##
189+
case x: Boolean =>
190+
x.##
191+
case x: String =>
192+
x.##
193+
case x: Metadata =>
194+
hash(x.map)
195+
case other =>
196+
throw new RuntimeException(s"Do not support type ${other.getClass}.")
197+
}
198+
}
121199
}
122200

201+
/**
202+
* Builder for [[Metadata]]. If there is a key collision, the latter will overwrite the former.
203+
*/
123204
class MetadataBuilder {
124205

125206
private val map: mutable.Map[String, Any] = mutable.Map.empty
126207

208+
/** Include the content of an existing [[Metadata]] instance. */
127209
def withMetadata(metadata: Metadata): this.type = {
128210
map ++= metadata.map
129211
this
130212
}
131213

132-
def putInt(key: String, value: Int): this.type = put(key, value)
214+
/** Puts a Long. */
215+
def putLong(key: String, value: Long): this.type = put(key, value)
133216

217+
/** Puts a Double. */
134218
def putDouble(key: String, value: Double): this.type = put(key, value)
135219

220+
/** Puts a Boolean. */
136221
def putBoolean(key: String, value: Boolean): this.type = put(key, value)
137222

223+
/** Puts a String. */
138224
def putString(key: String, value: String): this.type = put(key, value)
139225

226+
/** Puts a [[Metadata]]. */
140227
def putMetadata(key: String, value: Metadata): this.type = put(key, value)
141228

142-
def putIntArray(key: String, value: Seq[Int]): this.type = put(key, value)
229+
/** Puts a Long array. */
230+
def putLongArray(key: String, value: Array[Long]): this.type = put(key, value)
143231

144-
def putDoubleArray(key: String, value: Seq[Double]): this.type = put(key, value)
232+
/** Puts a Double array. */
233+
def putDoubleArray(key: String, value: Array[Double]): this.type = put(key, value)
145234

146-
def putBooleanArray(key: String, value: Seq[Boolean]): this.type = put(key, value)
235+
/** Puts a Boolean array. */
236+
def putBooleanArray(key: String, value: Array[Boolean]): this.type = put(key, value)
147237

148-
def putStringArray(key: String, value: Seq[String]): this.type = put(key, value)
238+
/** Puts a String array. */
239+
def putStringArray(key: String, value: Array[String]): this.type = put(key, value)
149240

150-
def putMetadataArray(key: String, value: Seq[Metadata]): this.type = put(key, value)
241+
/** Puts a [[Metadata]] array. */
242+
def putMetadataArray(key: String, value: Array[Metadata]): this.type = put(key, value)
151243

244+
/** Builds the [[Metadata]] instance. */
152245
def build(): Metadata = {
153246
new Metadata(map.toMap)
154247
}
Lines changed: 47 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -1,49 +1,69 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one or more
3+
* contributor license agreements. See the NOTICE file distributed with
4+
* this work for additional information regarding copyright ownership.
5+
* The ASF licenses this file to You under the Apache License, Version 2.0
6+
* (the "License"); you may not use this file except in compliance with
7+
* the License. You may obtain a copy of the License at
8+
*
9+
* http://www.apache.org/licenses/LICENSE-2.0
10+
*
11+
* Unless required by applicable law or agreed to in writing, software
12+
* distributed under the License is distributed on an "AS IS" BASIS,
13+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
* See the License for the specific language governing permissions and
15+
* limitations under the License.
16+
*/
17+
118
package org.apache.spark.sql.catalyst.util
219

3-
import org.json4s.jackson.JsonMethods._
20+
import org.json4s.jackson.JsonMethods.parse
421
import org.scalatest.FunSuite
522

623
class MetadataSuite extends FunSuite {
724

825
val baseMetadata = new MetadataBuilder()
9-
.putString("purpose", "ml")
10-
.build()
26+
.putString("purpose", "ml")
27+
.putBoolean("isBase", true)
28+
.build()
1129

1230
val summary = new MetadataBuilder()
13-
.putInt("numFeatures", 10)
14-
.build()
31+
.putLong("numFeatures", 10L)
32+
.build()
1533

1634
val age = new MetadataBuilder()
17-
.putString("name", "age")
18-
.putInt("index", 1)
19-
.putBoolean("categorical", false)
20-
.putDouble("average", 45.0)
21-
.build()
35+
.putString("name", "age")
36+
.putLong("index", 1L)
37+
.putBoolean("categorical", false)
38+
.putDouble("average", 45.0)
39+
.build()
2240

2341
val gender = new MetadataBuilder()
24-
.putString("name", "gender")
25-
.putInt("index", 5)
26-
.putBoolean("categorical", true)
27-
.putStringArray("categories", Seq("male", "female"))
28-
.build()
42+
.putString("name", "gender")
43+
.putLong("index", 5)
44+
.putBoolean("categorical", true)
45+
.putStringArray("categories", Array("male", "female"))
46+
.build()
2947

3048
val metadata = new MetadataBuilder()
31-
.withMetadata(baseMetadata)
32-
.putMetadata("summary", summary)
33-
.putIntArray("int[]", Seq(0, 1))
34-
.putDoubleArray("double[]", Seq(3.0, 4.0))
35-
.putBooleanArray("boolean[]", Seq(true, false))
36-
.putMetadataArray("features", Seq(age, gender))
37-
.build()
49+
.withMetadata(baseMetadata)
50+
.putBoolean("isBase", false) // overwrite an existing key
51+
.putMetadata("summary", summary)
52+
.putLongArray("long[]", Array(0L, 1L))
53+
.putDoubleArray("double[]", Array(3.0, 4.0))
54+
.putBooleanArray("boolean[]", Array(true, false))
55+
.putMetadataArray("features", Array(age, gender))
56+
.build()
3857

3958
test("metadata builder and getters") {
40-
assert(age.getInt("index") === 1)
59+
assert(age.getLong("index") === 1L)
4160
assert(age.getDouble("average") === 45.0)
4261
assert(age.getBoolean("categorical") === false)
4362
assert(age.getString("name") === "age")
4463
assert(metadata.getString("purpose") === "ml")
64+
assert(metadata.getBoolean("isBase") === false)
4565
assert(metadata.getMetadata("summary") === summary)
46-
assert(metadata.getIntArray("int[]").toSeq === Seq(0, 1))
66+
assert(metadata.getLongArray("long[]").toSeq === Seq(0L, 1L))
4767
assert(metadata.getDoubleArray("double[]").toSeq === Seq(3.0, 4.0))
4868
assert(metadata.getBooleanArray("boolean[]").toSeq === Seq(true, false))
4969
assert(gender.getStringArray("categories").toSeq === Seq("male", "female"))
@@ -55,6 +75,8 @@ class MetadataSuite extends FunSuite {
5575
withClue("toJson must produce a valid JSON string") {
5676
parse(json)
5777
}
58-
assert(Metadata.fromJson(json) === metadata)
78+
val parsed = Metadata.fromJson(json)
79+
assert(parsed === metadata)
80+
assert(parsed.## === metadata.##)
5981
}
6082
}

0 commit comments

Comments
 (0)