Skip to content
Snippets Groups Projects
Commit 8b292b19 authored by Reza Zadeh's avatar Reza Zadeh Committed by Xiangrui Meng
Browse files

[SPARK-10654][MLLIB] Add columnSimilarities to IndexedRowMatrix

Add columnSimilarities to IndexedRowMatrix by delegating to functionality already in RowMatrix.

With a test.

Author: Reza Zadeh <reza@databricks.com>

Closes #8792 from rezazadeh/colsims.
parent 3cac6614
No related branches found
No related tags found
No related merge requests found
......@@ -68,6 +68,19 @@ class IndexedRowMatrix @Since("1.0.0") (
nRows
}
/**
* Compute all cosine similarities between columns of this matrix using the brute-force
* approach of computing normalized dot products.
*
* @return An n x n sparse upper-triangular matrix of cosine similarities between
* columns of this matrix.
*/
@Since("1.6.0")
def columnSimilarities(): CoordinateMatrix = {
toRowMatrix().columnSimilarities()
}
/**
* Drops row indices and converts this matrix to a
* [[org.apache.spark.mllib.linalg.distributed.RowMatrix]].
......
......@@ -153,6 +153,18 @@ class IndexedRowMatrixSuite extends SparkFunSuite with MLlibTestSparkContext {
}
}
test("similar columns") {
val A = new IndexedRowMatrix(indexedRows)
val gram = A.computeGramianMatrix().toBreeze.toDenseMatrix
val G = A.columnSimilarities().toBreeze()
for (i <- 0 until n; j <- i + 1 until n) {
val trueResult = gram(i, j) / scala.math.sqrt(gram(i, i) * gram(j, j))
assert(math.abs(G(i, j) - trueResult) < 1e-6)
}
}
def closeToZero(G: BDM[Double]): Boolean = {
G.valuesIterator.map(math.abs).sum < 1e-6
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment