Skip to content

Commit c1abc2b

Browse files
committed
[SPARK-8525][MLLIB] fix LabeledPoint parser when there is a whitespace on specific position
1 parent 47c1d56 commit c1abc2b

File tree

3 files changed

+16
-0
lines changed

3 files changed

+16
-0
lines changed

mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ package org.apache.spark.mllib.util
1919

2020
import java.util.StringTokenizer
2121

22+
import org.apache.commons.lang.StringUtils.isBlank
23+
2224
import scala.collection.mutable.{ArrayBuilder, ListBuffer}
2325

2426
import org.apache.spark.SparkException
@@ -98,6 +100,8 @@ private[mllib] object NumericParser {
98100
}
99101
} else if (token == ")") {
100102
parsing = false
103+
} else if (isBlank(token)){
104+
// ignore whitespaces between delim chars, e.g. ", ["
101105
} else {
102106
// expecting a number
103107
items.append(parseDouble(token))

mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ class LabeledPointSuite extends SparkFunSuite {
3131
}
3232
}
3333

34+
test("parse labeled points with whitespaces") {
35+
val point = LabeledPoint.parse("(0.0, [1.0, 2.0])")
36+
assert(point === LabeledPoint(0.0, Vectors.dense(1.0, 2.0)))
37+
}
38+
3439
test("parse labeled points with v0.9 format") {
3540
val point = LabeledPoint.parse("1.0,1.0 0.0 -2.0")
3641
assert(point === LabeledPoint(1.0, Vectors.dense(1.0, 0.0, -2.0)))

mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,11 @@ class NumericParserSuite extends SparkFunSuite {
3737
}
3838
}
3939
}
40+
41+
test("parser with whitespaces") {
42+
val s = "(0.0, [1.0, 2.0])"
43+
val parsed = NumericParser.parse(s).asInstanceOf[Seq[_]]
44+
assert(parsed(0).asInstanceOf[Double] === 0.0)
45+
assert(parsed(1).asInstanceOf[Array[Double]] === Array(1.0, 2.0))
46+
}
4047
}

0 commit comments

Comments
 (0)