Skip to content

Commit 24c2c58

Browse files
fe2smengxr
authored andcommitted
[SPARK-8525] [MLLIB] fix LabeledPoint parser when there is a whitespace between label and features vector
fix LabeledPoint parser when there is a whitespace between label and features vector, e.g. (y, [x1, x2, x3]) Author: Oleksiy Dyagilev <[email protected]> Closes #6954 from fe2s/SPARK-8525 and squashes the following commits: 0755b9d [Oleksiy Dyagilev] [SPARK-8525][MLLIB] addressing comment, removing dep on commons-lang c1abc2b [Oleksiy Dyagilev] [SPARK-8525][MLLIB] fix LabeledPoint parser when there is a whitespace on specific position (cherry picked from commit a803118) Signed-off-by: Xiangrui Meng <[email protected]>
1 parent 30789f6 commit 24c2c58

File tree

3 files changed

+14
-0
lines changed

3 files changed

+14
-0
lines changed

mllib/src/main/scala/org/apache/spark/mllib/util/NumericParser.scala

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,8 @@ private[mllib] object NumericParser {
9898
}
9999
} else if (token == ")") {
100100
parsing = false
101+
} else if (token.trim.isEmpty){
102+
// ignore whitespaces between delim chars, e.g. ", ["
101103
} else {
102104
// expecting a number
103105
items.append(parseDouble(token))

mllib/src/test/scala/org/apache/spark/mllib/regression/LabeledPointSuite.scala

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ class LabeledPointSuite extends FunSuite {
3232
}
3333
}
3434

35+
test("parse labeled points with whitespaces") {
36+
val point = LabeledPoint.parse("(0.0, [1.0, 2.0])")
37+
assert(point === LabeledPoint(0.0, Vectors.dense(1.0, 2.0)))
38+
}
39+
3540
test("parse labeled points with v0.9 format") {
3641
val point = LabeledPoint.parse("1.0,1.0 0.0 -2.0")
3742
assert(point === LabeledPoint(1.0, Vectors.dense(1.0, 0.0, -2.0)))

mllib/src/test/scala/org/apache/spark/mllib/util/NumericParserSuite.scala

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,4 +39,11 @@ class NumericParserSuite extends FunSuite {
3939
}
4040
}
4141
}
42+
43+
test("parser with whitespaces") {
44+
val s = "(0.0, [1.0, 2.0])"
45+
val parsed = NumericParser.parse(s).asInstanceOf[Seq[_]]
46+
assert(parsed(0).asInstanceOf[Double] === 0.0)
47+
assert(parsed(1).asInstanceOf[Array[Double]] === Array(1.0, 2.0))
48+
}
4249
}

0 commit comments

Comments
 (0)