diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala index a09bc65cf3c3defab729ccda68189db47d4c67f5..6e571fe35a75cb791e09ae43b42af456305f21b9 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Matrices.scala @@ -940,8 +940,16 @@ object Matrices { case dm: BDM[Double] => new DenseMatrix(dm.rows, dm.cols, dm.data, dm.isTranspose) case sm: BSM[Double] => + // Spark-11507. work around breeze issue 479. + val mat = if (sm.colPtrs.last != sm.data.length) { + val matCopy = sm.copy + matCopy.compact() + matCopy + } else { + sm + } // There is no isTranspose flag for sparse matrices in Breeze - new SparseMatrix(sm.rows, sm.cols, sm.colPtrs, sm.rowIndices, sm.data) + new SparseMatrix(mat.rows, mat.cols, mat.colPtrs, mat.rowIndices, mat.data) case _ => throw new UnsupportedOperationException( s"Do not support conversion from type ${breeze.getClass.getName}.") diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala index 57907f415cd0fd7ffd7d434d702e19af32988649..e289724cdaa3c83a6fc9a365a12d848be06ed386 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/MatricesSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.mllib.linalg import java.util.Random +import breeze.linalg.{CSCMatrix, Matrix => BM} import org.mockito.Mockito.when import org.scalatest.mock.MockitoSugar._ import scala.collection.mutable.{Map => MutableMap} @@ -499,6 +500,17 @@ class MatricesSuite extends SparkFunSuite { assert(sm1.numActives === 3) } + test("fromBreeze with sparse matrix") { + // colPtr.last does NOT always equal to values.length in breeze SCSMatrix and + // invocation of compact() may be necessary. Refer to SPARK-11507 + val bm1: BM[Double] = new CSCMatrix[Double]( + Array(1.0, 1, 1), 3, 3, Array(0, 1, 2, 3), Array(0, 1, 2)) + val bm2: BM[Double] = new CSCMatrix[Double]( + Array(1.0, 2, 2, 4), 3, 3, Array(0, 0, 2, 4), Array(1, 2, 1, 2)) + val sum = bm1 + bm2 + Matrices.fromBreeze(sum) + } + test("row/col iterator") { val dm = new DenseMatrix(3, 2, Array(0, 1, 2, 3, 4, 0)) val sm = dm.toSparse