Skip to content

Commit 5185389

Browse files
committed
[SPARK-8148] Do not use FloatType in partition column inference.
Use DoubleType instead to be more stable and robust. Author: Reynold Xin <[email protected]> Closes #6692 from rxin/SPARK-8148 and squashes the following commits: 6742ecc [Reynold Xin] [SPARK-8148] Do not use FloatType in partition column inference.
1 parent fe7669d commit 5185389

File tree

2 files changed

+15
-13
lines changed

2 files changed

+15
-13
lines changed

sql/core/src/main/scala/org/apache/spark/sql/sources/PartitioningUtils.scala

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717

1818
package org.apache.spark.sql.sources
1919

20-
import java.lang.{Double => JDouble, Float => JFloat, Long => JLong}
20+
import java.lang.{Double => JDouble, Float => JFloat, Integer => JInteger, Long => JLong}
2121
import java.math.{BigDecimal => JBigDecimal}
2222

2323
import scala.collection.mutable.ArrayBuffer
@@ -178,7 +178,7 @@ private[sql] object PartitioningUtils {
178178
* {{{
179179
* NullType ->
180180
* IntegerType -> LongType ->
181-
* FloatType -> DoubleType -> DecimalType.Unlimited ->
181+
* DoubleType -> DecimalType.Unlimited ->
182182
* StringType
183183
* }}}
184184
*/
@@ -208,8 +208,8 @@ private[sql] object PartitioningUtils {
208208
}
209209

210210
/**
211-
* Converts a string to a `Literal` with automatic type inference. Currently only supports
212-
* [[IntegerType]], [[LongType]], [[FloatType]], [[DoubleType]], [[DecimalType.Unlimited]], and
211+
* Converts a string to a [[Literal]] with automatic type inference. Currently only supports
212+
* [[IntegerType]], [[LongType]], [[DoubleType]], [[DecimalType.Unlimited]], and
213213
* [[StringType]].
214214
*/
215215
private[sql] def inferPartitionColumnValue(
@@ -221,13 +221,15 @@ private[sql] object PartitioningUtils {
221221
Try(Literal.create(Integer.parseInt(raw), IntegerType))
222222
.orElse(Try(Literal.create(JLong.parseLong(raw), LongType)))
223223
// Then falls back to fractional types
224-
.orElse(Try(Literal.create(JFloat.parseFloat(raw), FloatType)))
225224
.orElse(Try(Literal.create(JDouble.parseDouble(raw), DoubleType)))
226225
.orElse(Try(Literal.create(new JBigDecimal(raw), DecimalType.Unlimited)))
227226
// Then falls back to string
228227
.getOrElse {
229-
if (raw == defaultPartitionName) Literal.create(null, NullType)
230-
else Literal.create(unescapePathName(raw), StringType)
228+
if (raw == defaultPartitionName) {
229+
Literal.create(null, NullType)
230+
} else {
231+
Literal.create(unescapePathName(raw), StringType)
232+
}
231233
}
232234
} else {
233235
if (raw == defaultPartitionName) {

sql/core/src/test/scala/org/apache/spark/sql/parquet/ParquetPartitionDiscoverySuite.scala

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
5353

5454
check("10", Literal.create(10, IntegerType))
5555
check("1000000000000000", Literal.create(1000000000000000L, LongType))
56-
check("1.5", Literal.create(1.5f, FloatType))
56+
check("1.5", Literal.create(1.5, DoubleType))
5757
check("hello", Literal.create("hello", StringType))
5858
check(defaultPartitionName, Literal.create(null, NullType))
5959
}
@@ -83,13 +83,13 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
8383
ArrayBuffer(
8484
Literal.create(10, IntegerType),
8585
Literal.create("hello", StringType),
86-
Literal.create(1.5f, FloatType)))
86+
Literal.create(1.5, DoubleType)))
8787
})
8888

8989
check("file://path/a=10/b_hello/c=1.5", Some {
9090
PartitionValues(
9191
ArrayBuffer("c"),
92-
ArrayBuffer(Literal.create(1.5f, FloatType)))
92+
ArrayBuffer(Literal.create(1.5, DoubleType)))
9393
})
9494

9595
check("file:///", None)
@@ -121,7 +121,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
121121
"hdfs://host:9000/path/a=10.5/b=hello"),
122122
PartitionSpec(
123123
StructType(Seq(
124-
StructField("a", FloatType),
124+
StructField("a", DoubleType),
125125
StructField("b", StringType))),
126126
Seq(
127127
Partition(Row(10, "20"), "hdfs://host:9000/path/a=10/b=20"),
@@ -140,7 +140,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
140140
"hdfs://host:9000/path/a=10.5/b=world/_temporary/path"),
141141
PartitionSpec(
142142
StructType(Seq(
143-
StructField("a", FloatType),
143+
StructField("a", DoubleType),
144144
StructField("b", StringType))),
145145
Seq(
146146
Partition(Row(10, "20"), "hdfs://host:9000/path/a=10/b=20"),
@@ -162,7 +162,7 @@ class ParquetPartitionDiscoverySuite extends QueryTest with ParquetTest {
162162
s"hdfs://host:9000/path/a=10.5/b=$defaultPartitionName"),
163163
PartitionSpec(
164164
StructType(Seq(
165-
StructField("a", FloatType),
165+
StructField("a", DoubleType),
166166
StructField("b", StringType))),
167167
Seq(
168168
Partition(Row(10, null), s"hdfs://host:9000/path/a=10/b=$defaultPartitionName"),

0 commit comments

Comments
 (0)