Skip to content
Snippets Groups Projects
Commit 0b18bfba authored by Reynold Xin's avatar Reynold Xin
Browse files

Updated doc for PageRank.

parent 9317286b
No related branches found
No related tags found
No related merge requests found
......@@ -5,7 +5,42 @@ import scala.reflect.ClassTag
import org.apache.spark.Logging
import org.apache.spark.graphx._
/** PageRank algorithm implementation. */
/**
* PageRank algorithm implementation. There are two implementations of PageRank implemented.
*
* The first implementation uses the [[Pregel]] interface and runs PageRank for a fixed number
* of iterations:
* {{{
* var PR = Array.fill(n)( 1.0 )
* val oldPR = Array.fill(n)( 1.0 )
* for( iter <- 0 until numIter ) {
* swap(oldPR, PR)
* for( i <- 0 until n ) {
* PR[i] = alpha + (1 - alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
* }
* }
* }}}
*
* The second implementation uses the standalone [[Graph]] interface and runs PageRank until
* convergence:
*
* {{{
* var PR = Array.fill(n)( 1.0 )
* val oldPR = Array.fill(n)( 0.0 )
* while( max(abs(PR - oldPr)) > tol ) {
* swap(oldPR, PR)
* for( i <- 0 until n if abs(PR[i] - oldPR[i]) > tol ) {
* PR[i] = alpha + (1 - \alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
* }
* }
* }}}
*
* `alpha` is the random reset probability (typically 0.15), `inNbrs[i]` is the set of
* neighbors whick link to `i` and `outDeg[j]` is the out degree of vertex `j`.
*
* Note that this is not the "normalized" PageRank and as a consequence pages that have no
* inlinks will have a PageRank of alpha.
*/
object PageRank extends Logging {
/**
......@@ -13,26 +48,6 @@ object PageRank extends Logging {
* with vertex attributes containing the PageRank and edge
* attributes the normalized edge weight.
*
* The following PageRank fixed point is computed for each vertex.
*
* {{{
* var PR = Array.fill(n)( 1.0 )
* val oldPR = Array.fill(n)( 1.0 )
* for( iter <- 0 until numIter ) {
* swap(oldPR, PR)
* for( i <- 0 until n ) {
* PR[i] = alpha + (1 - alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
* }
* }
* }}}
*
* where `alpha` is the random reset probability (typically 0.15),
* `inNbrs[i]` is the set of neighbors whick link to `i` and
* `outDeg[j]` is the out degree of vertex `j`.
*
* Note that this is not the "normalized" PageRank and as a consequence pages that have no
* inlinks will have a PageRank of alpha.
*
* @tparam VD the original vertex attribute (not used)
* @tparam ED the original edge attribute (not used)
*
......@@ -47,16 +62,11 @@ object PageRank extends Logging {
def run[VD: ClassTag, ED: ClassTag](
graph: Graph[VD, ED], numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] =
{
/**
* Initialize the pagerankGraph with each edge attribute having
* weight 1/outDegree and each vertex with attribute 1.0.
*/
// Initialize the pagerankGraph with each edge attribute having
// weight 1/outDegree and each vertex with attribute 1.0.
val pagerankGraph: Graph[Double, Double] = graph
// Associate the degree with each vertex
.outerJoinVertices(graph.outDegrees){
(vid, vdata, deg) => deg.getOrElse(0)
}
.outerJoinVertices(graph.outDegrees) { (vid, vdata, deg) => deg.getOrElse(0) }
// Set the weight on the edges based on the degree
.mapTriplets( e => 1.0 / e.srcAttr )
// Set the vertex attributes to the initial pagerank values
......@@ -85,23 +95,6 @@ object PageRank extends Logging {
* Run a dynamic version of PageRank returning a graph with vertex attributes containing the
* PageRank and edge attributes containing the normalized edge weight.
*
* {{{
* var PR = Array.fill(n)( 1.0 )
* val oldPR = Array.fill(n)( 0.0 )
* while( max(abs(PR - oldPr)) > tol ) {
* swap(oldPR, PR)
* for( i <- 0 until n if abs(PR[i] - oldPR[i]) > tol ) {
* PR[i] = alpha + (1 - \alpha) * inNbrs[i].map(j => oldPR[j] / outDeg[j]).sum
* }
* }
* }}}
*
* where `alpha` is the random reset probability (typically 0.15), `inNbrs[i]` is the set of
* neighbors whick link to `i` and `outDeg[j]` is the out degree of vertex `j`.
*
* Note that this is not the "normalized" PageRank and as a consequence pages that have no
* inlinks will have a PageRank of alpha.
*
* @tparam VD the original vertex attribute (not used)
* @tparam ED the original edge attribute (not used)
*
......@@ -157,5 +150,4 @@ object PageRank extends Logging {
vertexProgram, sendMessage, messageCombiner)
.mapVertices((vid, attr) => attr._1)
} // end of deltaPageRank
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment