Skip to content

Commit a803118

Browse files
fe2smengxr
authored andcommitted
[SPARK-8525] [MLLIB] fix LabeledPoint parser when there is a whitespace between label and features vector
fix LabeledPoint parser when there is a whitespace between label and features vector, e.g. (y, [x1, x2, x3]) Author: Oleksiy Dyagilev <[email protected]> Closes #6954 from fe2s/SPARK-8525 and squashes the following commits: 0755b9d [Oleksiy Dyagilev] [SPARK-8525][MLLIB] addressing comment, removing dep on commons-lang c1abc2b [Oleksiy Dyagilev] [SPARK-8525][MLLIB] fix LabeledPoint parser when there is a whitespace on specific position
1 parent f2fb028 commit a803118

File tree

3 files changed

+14
-0
lines changed

3 files changed

+14
-0
lines changed

mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ private[mllib] object NumericParser {
9898
}
9999
} else if (token == ")") {
100100
parsing = false
101+
} else if (token.trim.isEmpty){
102+
// ignore whitespaces between delim chars, e.g. ", ["
101103
} else {
102104
// expecting a number
103105
items.append(parseDouble(token))

mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ class LabeledPointSuite extends SparkFunSuite {
3131
}
3232
}
3333

34+
test("parse labeled points with whitespaces") {
35+
val point = LabeledPoint.parse("(0.0, [1.0, 2.0])")
36+
assert(point === LabeledPoint(0.0, Vectors.dense(1.0, 2.0)))
37+
}
38+
3439
test("parse labeled points with v0.9 format") {
3540
val point = LabeledPoint.parse("1.0,1.0 0.0 -2.0")
3641
assert(point === LabeledPoint(1.0, Vectors.dense(1.0, 0.0, -2.0)))

mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,4 +37,11 @@ class NumericParserSuite extends SparkFunSuite {
3737
}
3838
}
3939
}
40+
41+
test("parser with whitespaces") {
42+
val s = "(0.0, [1.0, 2.0])"
43+
val parsed = NumericParser.parse(s).asInstanceOf[Seq[_]]
44+
assert(parsed(0).asInstanceOf[Double] === 0.0)
45+
assert(parsed(1).asInstanceOf[Array[Double]] === Array(1.0, 2.0))
46+
}
4047
}

0 commit comments

Comments
 (0)