Skip to content
Snippets Groups Projects
Commit 78b39c7e authored by Xiangrui Meng's avatar Xiangrui Meng
Browse files

[SPARK-7115] [MLLIB] skip the very first 1 in poly expansion

yinxusen

Author: Xiangrui Meng <meng@databricks.com>

Closes #5681 from mengxr/SPARK-7115 and squashes the following commits:

9ac27cd [Xiangrui Meng] skip the very first 1 in poly expansion
parent 8509519d
No related branches found
No related tags found
No related merge requests found
......@@ -87,7 +87,9 @@ object PolynomialExpansion {
if (multiplier == 0.0) {
// do nothing
} else if (degree == 0 || lastIdx < 0) {
polyValues(curPolyIdx) = multiplier
if (curPolyIdx >= 0) { // skip the very first 1
polyValues(curPolyIdx) = multiplier
}
} else {
val v = values(lastIdx)
val lastIdx1 = lastIdx - 1
......@@ -116,8 +118,10 @@ object PolynomialExpansion {
if (multiplier == 0.0) {
// do nothing
} else if (degree == 0 || lastIdx < 0) {
polyIndices += curPolyIdx
polyValues += multiplier
if (curPolyIdx >= 0) { // skip the very first 1
polyIndices += curPolyIdx
polyValues += multiplier
}
} else {
// Skip all zeros at the tail.
val v = values(lastIdx)
......@@ -139,8 +143,8 @@ object PolynomialExpansion {
private def expand(dv: DenseVector, degree: Int): DenseVector = {
val n = dv.size
val polySize = getPolySize(n, degree)
val polyValues = new Array[Double](polySize)
expandDense(dv.values, n - 1, degree, 1.0, polyValues, 0)
val polyValues = new Array[Double](polySize - 1)
expandDense(dv.values, n - 1, degree, 1.0, polyValues, -1)
new DenseVector(polyValues)
}
......@@ -149,12 +153,12 @@ object PolynomialExpansion {
val nnz = sv.values.length
val nnzPolySize = getPolySize(nnz, degree)
val polyIndices = mutable.ArrayBuilder.make[Int]
polyIndices.sizeHint(nnzPolySize)
polyIndices.sizeHint(nnzPolySize - 1)
val polyValues = mutable.ArrayBuilder.make[Double]
polyValues.sizeHint(nnzPolySize)
polyValues.sizeHint(nnzPolySize - 1)
expandSparse(
sv.indices, sv.values, nnz - 1, sv.size - 1, degree, 1.0, polyIndices, polyValues, 0)
new SparseVector(polySize, polyIndices.result(), polyValues.result())
sv.indices, sv.values, nnz - 1, sv.size - 1, degree, 1.0, polyIndices, polyValues, -1)
new SparseVector(polySize - 1, polyIndices.result(), polyValues.result())
}
def expand(v: Vector, degree: Int): Vector = {
......
......@@ -44,11 +44,11 @@ class PolynomialExpansionSuite extends FunSuite with MLlibTestSparkContext {
)
val twoDegreeExpansion: Array[Vector] = Array(
Vectors.sparse(10, Array(0, 1, 2, 3, 4, 5), Array(1.0, -2.0, 4.0, 2.3, -4.6, 5.29)),
Vectors.dense(1.0, -2.0, 4.0, 2.3, -4.6, 5.29),
Vectors.dense(Array(1.0) ++ Array.fill[Double](9)(0.0)),
Vectors.dense(1.0, 0.6, 0.36, -1.1, -0.66, 1.21, -3.0, -1.8, 3.3, 9.0),
Vectors.sparse(10, Array(0), Array(1.0)))
Vectors.sparse(9, Array(0, 1, 2, 3, 4), Array(-2.0, 4.0, 2.3, -4.6, 5.29)),
Vectors.dense(-2.0, 4.0, 2.3, -4.6, 5.29),
Vectors.dense(new Array[Double](9)),
Vectors.dense(0.6, 0.36, -1.1, -0.66, 1.21, -3.0, -1.8, 3.3, 9.0),
Vectors.sparse(9, Array.empty, Array.empty))
val df = sqlContext.createDataFrame(data.zip(twoDegreeExpansion)).toDF("features", "expected")
......@@ -76,13 +76,13 @@ class PolynomialExpansionSuite extends FunSuite with MLlibTestSparkContext {
)
val threeDegreeExpansion: Array[Vector] = Array(
Vectors.sparse(20, Array(0, 1, 2, 3, 4, 5, 6, 7, 8, 9),
Array(1.0, -2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17)),
Vectors.dense(1.0, -2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17),
Vectors.dense(Array(1.0) ++ Array.fill[Double](19)(0.0)),
Vectors.dense(1.0, 0.6, 0.36, 0.216, -1.1, -0.66, -0.396, 1.21, 0.726, -1.331, -3.0, -1.8,
Vectors.sparse(19, Array(0, 1, 2, 3, 4, 5, 6, 7, 8),
Array(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17)),
Vectors.dense(-2.0, 4.0, -8.0, 2.3, -4.6, 9.2, 5.29, -10.58, 12.17),
Vectors.dense(new Array[Double](19)),
Vectors.dense(0.6, 0.36, 0.216, -1.1, -0.66, -0.396, 1.21, 0.726, -1.331, -3.0, -1.8,
-1.08, 3.3, 1.98, -3.63, 9.0, 5.4, -9.9, -27.0),
Vectors.sparse(20, Array(0), Array(1.0)))
Vectors.sparse(19, Array.empty, Array.empty))
val df = sqlContext.createDataFrame(data.zip(threeDegreeExpansion)).toDF("features", "expected")
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment