diff --git a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala index 4374e99631560ddc4947528d5a44eeb458b9f29a..d7eaa5a9268ff85ad7bc6ec5572c796aed06f360 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/optim/WeightedLeastSquares.scala @@ -17,12 +17,8 @@ package org.apache.spark.ml.optim -import com.github.fommil.netlib.LAPACK.{getInstance => lapack} -import org.netlib.util.intW - import org.apache.spark.Logging import org.apache.spark.mllib.linalg._ -import org.apache.spark.mllib.linalg.distributed.RowMatrix import org.apache.spark.rdd.RDD /** @@ -110,7 +106,7 @@ private[ml] class WeightedLeastSquares( j += 1 } - val x = choleskySolve(aaBar.values, abBar) + val x = new DenseVector(CholeskyDecomposition.solve(aaBar.values, abBar.values)) // compute intercept val intercept = if (fitIntercept) { @@ -121,23 +117,6 @@ private[ml] class WeightedLeastSquares( new WeightedLeastSquaresModel(x, intercept) } - - /** - * Solves a symmetric positive definite linear system via Cholesky factorization. - * The input arguments are modified in-place to store the factorization and the solution. - * @param A the upper triangular part of A - * @param bx right-hand side - * @return the solution vector - */ - // TODO: SPARK-10490 - consolidate this and the Cholesky solver in ALS - private def choleskySolve(A: Array[Double], bx: DenseVector): DenseVector = { - val k = bx.size - val info = new intW(0) - lapack.dppsv("U", k, 1, A, bx.values, k, info) - val code = info.`val` - assert(code == 0, s"lapack.dpotrs returned $code.") - bx - } } private[ml] object WeightedLeastSquares { diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala index f6f5281f71a5fcdc142ec1e2b0ae5c7c52c8b9d0..535f266b9a9446a7dd57bbe1eaad977cda06acf6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala @@ -26,9 +26,7 @@ import scala.util.Sorting import scala.util.hashing.byteswap64 import com.github.fommil.netlib.BLAS.{getInstance => blas} -import com.github.fommil.netlib.LAPACK.{getInstance => lapack} import org.apache.hadoop.fs.{FileSystem, Path} -import org.netlib.util.intW import org.apache.spark.{Logging, Partitioner} import org.apache.spark.annotation.{DeveloperApi, Experimental} @@ -36,6 +34,7 @@ import org.apache.spark.ml.{Estimator, Model} import org.apache.spark.ml.param._ import org.apache.spark.ml.param.shared._ import org.apache.spark.ml.util.{Identifiable, SchemaUtils} +import org.apache.spark.mllib.linalg.CholeskyDecomposition import org.apache.spark.mllib.optimization.NNLS import org.apache.spark.rdd.RDD import org.apache.spark.sql.DataFrame @@ -366,8 +365,6 @@ object ALS extends Logging { /** Cholesky solver for least square problems. */ private[recommendation] class CholeskySolver extends LeastSquaresNESolver { - private val upper = "U" - /** * Solves a least squares problem with L2 regularization: * @@ -387,10 +384,7 @@ object ALS extends Logging { i += j j += 1 } - val info = new intW(0) - lapack.dppsv(upper, k, 1, ne.ata, ne.atb, k, info) - val code = info.`val` - assert(code == 0, s"lapack.dppsv returned $code.") + CholeskyDecomposition.solve(ne.ata, ne.atb) val x = new Array[Float](k) i = 0 while (i < k) { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala new file mode 100644 index 0000000000000000000000000000000000000000..66eb40b6f4a69f4c01b45cafd01aabcedbd35deb --- /dev/null +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/CholeskyDecomposition.scala @@ -0,0 +1,43 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.mllib.linalg + +import com.github.fommil.netlib.LAPACK.{getInstance => lapack} +import org.netlib.util.intW + +/** + * Compute Cholesky decomposition. + */ +private[spark] object CholeskyDecomposition { + + /** + * Solves a symmetric positive definite linear system via Cholesky factorization. + * The input arguments are modified in-place to store the factorization and the solution. + * @param A the upper triangular part of A + * @param bx right-hand side + * @return the solution array + */ + def solve(A: Array[Double], bx: Array[Double]): Array[Double] = { + val k = bx.size + val info = new intW(0) + lapack.dppsv("U", k, 1, A, bx, k, info) + val code = info.`val` + assert(code == 0, s"lapack.dpotrs returned $code.") + bx + } +} diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala index ae3ba3099c87835fc77d5ba8ce9eef875b79ed14..863abe86d38d7343077ea88be1186b30f7abb723 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/EigenValueDecomposition.scala @@ -21,13 +21,9 @@ import breeze.linalg.{DenseMatrix => BDM, DenseVector => BDV} import com.github.fommil.netlib.ARPACK import org.netlib.util.{intW, doubleW} -import org.apache.spark.annotation.Experimental - /** - * :: Experimental :: * Compute eigen-decomposition. */ -@Experimental private[mllib] object EigenValueDecomposition { /** * Compute the leading k eigenvalues and eigenvectors on a symmetric square matrix using ARPACK. @@ -46,7 +42,7 @@ private[mllib] object EigenValueDecomposition { * for more details). The maximum number of Arnoldi update iterations is set to 300 in this * function. */ - private[mllib] def symmetricEigs( + def symmetricEigs( mul: BDV[Double] => BDV[Double], n: Int, k: Int,