From 37611e57f6ca1768529e4038198de56f85f90665 Mon Sep 17 00:00:00 2001 From: Ankur Dave <ankurdave@gmail.com> Date: Fri, 10 Jan 2014 12:37:03 -0800 Subject: [PATCH] Improve docs for EdgeRDD, EdgeTriplet, and GraphLab --- .../org/apache/spark/graphx/EdgeRDD.scala | 12 +++---- .../org/apache/spark/graphx/EdgeTriplet.scala | 13 +++---- .../org/apache/spark/graphx/GraphLab.scala | 34 +++++++++---------- 3 files changed, 24 insertions(+), 35 deletions(-) diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala index 61228e9628..05d3dbe337 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeRDD.scala @@ -9,7 +9,7 @@ import org.apache.spark.storage.StorageLevel /** * `EdgeRDD[ED]` extends `RDD[Edge[ED]]` by storing the edges in columnar format on each partition - * for performance. It is constructed using [[org.apache.spark.graphx.impl.EdgePartitionBuilder]]. + * for performance. It is constructed using [[impl.EdgePartitionBuilder]]. */ class EdgeRDD[@specialized ED: ClassTag]( val partitionsRDD: RDD[(PartitionID, EdgePartition[ED])]) @@ -20,9 +20,9 @@ class EdgeRDD[@specialized ED: ClassTag]( override protected def getPartitions: Array[Partition] = partitionsRDD.partitions /** - * If partitionsRDD already has a partitioner, use it. Otherwise assume that the PartitionIDs in - * partitionsRDD correspond to the actual partitions and create a new partitioner that allows - * co-partitioning with partitionsRDD. + * If `partitionsRDD` already has a partitioner, use it. Otherwise assume that the + * [[PartitionID]]s in `partitionsRDD` correspond to the actual partitions and create a new + * partitioner that allows co-partitioning with `partitionsRDD`. */ override val partitioner = partitionsRDD.partitioner.orElse(Some(Partitioner.defaultPartitioner(partitionsRDD))) @@ -33,9 +33,6 @@ class EdgeRDD[@specialized ED: ClassTag]( override def collect(): Array[Edge[ED]] = this.map(_.copy()).collect() - /** - * Caching a VertexRDD causes the index and values to be cached separately. - */ override def persist(newLevel: StorageLevel): EdgeRDD[ED] = { partitionsRDD.persist(newLevel) this @@ -76,5 +73,4 @@ class EdgeRDD[@specialized ED: ClassTag]( def collectVertexIDs(): RDD[VertexID] = { partitionsRDD.flatMap { case (_, p) => Array.concat(p.srcIds, p.dstIds) } } - } diff --git a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala index 5e2528925f..057d63a0ac 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/EdgeTriplet.scala @@ -3,15 +3,10 @@ package org.apache.spark.graphx import org.apache.spark.graphx.impl.VertexPartition /** - * An edge triplet represents two vertices and edge along with their - * attributes. + * An edge triplet represents an edge along with the vertex attributes of its neighboring vertices. * * @tparam VD the type of the vertex attribute. * @tparam ED the type of the edge attribute - * - * @todo specialize edge triplet for basic types, though when I last - * tried specializing I got a warning about inherenting from a type - * that is not a trait. */ class EdgeTriplet[VD, ED] extends Edge[ED] { /** @@ -37,8 +32,8 @@ class EdgeTriplet[VD, ED] extends Edge[ED] { /** * Given one vertex in the edge return the other vertex. * - * @param vid the id one of the two vertices on the edge. - * @return the attribute for the other vertex on the edge. + * @param vid the id one of the two vertices on the edge + * @return the attribute for the other vertex on the edge */ def otherVertexAttr(vid: VertexID): VD = if (srcId == vid) dstAttr else { assert(dstId == vid); srcAttr } @@ -47,7 +42,7 @@ class EdgeTriplet[VD, ED] extends Edge[ED] { * Get the vertex object for the given vertex in the edge. * * @param vid the id of one of the two vertices on the edge - * @return the attr for the vertex with that id. + * @return the attr for the vertex with that id */ def vertexAttr(vid: VertexID): VD = if (srcId == vid) srcAttr else { assert(dstId == vid); dstAttr } diff --git a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala index 7efc69c64e..016bab5d02 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/GraphLab.scala @@ -7,35 +7,33 @@ import scala.collection.JavaConversions._ import org.apache.spark.rdd.RDD /** - * This object implements the GraphLab gather-apply-scatter api. + * Implements the GraphLab gather-apply-scatter API. */ object GraphLab extends Logging { /** - * Execute the GraphLab Gather-Apply-Scatter API + * Executes the GraphLab Gather-Apply-Scatter API. * - * @todo finish documenting GraphLab Gather-Apply-Scatter API - * - * @param graph The graph on which to execute the GraphLab API - * @param gatherFunc The gather function is executed on each edge triplet - * adjacent to a vertex and returns an accumulator which + * @param graph the graph on which to execute the GraphLab API + * @param gatherFunc executed on each edge triplet + * adjacent to a vertex. Returns an accumulator which * is then merged using the merge function. - * @param mergeFunc An accumulative associative operation on the result of + * @param mergeFunc an accumulative associative operation on the result of * the gather type. - * @param applyFunc Takes a vertex and the final result of the merge operations + * @param applyFunc takes a vertex and the final result of the merge operations * on the adjacent edges and returns a new vertex value. - * @param scatterFunc Executed after the apply function the scatter function takes + * @param scatterFunc executed after the apply function. Takes * a triplet and signals whether the neighboring vertex program * must be recomputed. - * @param startVertices predicate to determine which vertices to start the computation on. - * these will be the active vertices in the first iteration. - * @param numIter The maximum number of iterations to run. - * @param gatherDirection The direction of edges to consider during the gather phase - * @param scatterDirection The direction of edges to consider during the scatter phase + * @param startVertices a predicate to determine which vertices to start the computation on. + * These will be the active vertices in the first iteration. + * @param numIter the maximum number of iterations to run + * @param gatherDirection the direction of edges to consider during the gather phase + * @param scatterDirection the direction of edges to consider during the scatter phase * - * @tparam VD The graph vertex attribute type - * @tparam ED The graph edge attribute type - * @tparam A The type accumulated during the gather phase + * @tparam VD the graph vertex attribute type + * @tparam ED the graph edge attribute type + * @tparam A the type accumulated during the gather phase * @return the resulting graph after the algorithm converges */ def apply[VD: ClassTag, ED: ClassTag, A: ClassTag] -- GitLab