Skip to content
Snippets Groups Projects
Commit a8031183 authored by Oleksiy Dyagilev's avatar Oleksiy Dyagilev Committed by Xiangrui Meng
Browse files

[SPARK-8525] [MLLIB] fix LabeledPoint parser when there is a whitespace...

[SPARK-8525] [MLLIB] fix LabeledPoint parser when there is a whitespace between label and features vector

fix LabeledPoint parser when there is a whitespace between label and features vector, e.g.
(y, [x1, x2, x3])

Author: Oleksiy Dyagilev <oleksiy_dyagilev@epam.com>

Closes #6954 from fe2s/SPARK-8525 and squashes the following commits:

0755b9d [Oleksiy Dyagilev] [SPARK-8525][MLLIB] addressing comment, removing dep on commons-lang
c1abc2b [Oleksiy Dyagilev] [SPARK-8525][MLLIB] fix LabeledPoint parser when there is a whitespace on specific position
parent f2fb0285
No related branches found
No related tags found
No related merge requests found
...@@ -98,6 +98,8 @@ private[mllib] object NumericParser { ...@@ -98,6 +98,8 @@ private[mllib] object NumericParser {
} }
} else if (token == ")") { } else if (token == ")") {
parsing = false parsing = false
} else if (token.trim.isEmpty){
// ignore whitespaces between delim chars, e.g. ", ["
} else { } else {
// expecting a number // expecting a number
items.append(parseDouble(token)) items.append(parseDouble(token))
......
...@@ -31,6 +31,11 @@ class LabeledPointSuite extends SparkFunSuite { ...@@ -31,6 +31,11 @@ class LabeledPointSuite extends SparkFunSuite {
} }
} }
test("parse labeled points with whitespaces") {
val point = LabeledPoint.parse("(0.0, [1.0, 2.0])")
assert(point === LabeledPoint(0.0, Vectors.dense(1.0, 2.0)))
}
test("parse labeled points with v0.9 format") { test("parse labeled points with v0.9 format") {
val point = LabeledPoint.parse("1.0,1.0 0.0 -2.0") val point = LabeledPoint.parse("1.0,1.0 0.0 -2.0")
assert(point === LabeledPoint(1.0, Vectors.dense(1.0, 0.0, -2.0))) assert(point === LabeledPoint(1.0, Vectors.dense(1.0, 0.0, -2.0)))
......
...@@ -37,4 +37,11 @@ class NumericParserSuite extends SparkFunSuite { ...@@ -37,4 +37,11 @@ class NumericParserSuite extends SparkFunSuite {
} }
} }
} }
test("parser with whitespaces") {
val s = "(0.0, [1.0, 2.0])"
val parsed = NumericParser.parse(s).asInstanceOf[Seq[_]]
assert(parsed(0).asInstanceOf[Double] === 0.0)
assert(parsed(1).asInstanceOf[Array[Double]] === Array(1.0, 2.0))
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment