From c84d5a10e8dbdeeeb54bc0d3f3dfb62ff0ca4fc1 Mon Sep 17 00:00:00 2001 From: Sean Owen <sowen@cloudera.com> Date: Sat, 31 Jan 2015 10:40:42 -0800 Subject: [PATCH] SPARK-3359 [CORE] [DOCS] `sbt/sbt unidoc` doesn't work with Java 8 These are more `javadoc` 8-related changes I spotted while investigating. These should be helpful in any event, but this does not nearly resolve SPARK-3359, which may never be feasible while using `unidoc` and `javadoc` 8. Author: Sean Owen <sowen@cloudera.com> Closes #4193 from srowen/SPARK-3359 and squashes the following commits: 5b33f66 [Sean Owen] Additional scaladoc fixes for javadoc 8; still not going to be javadoc 8 compatible --- core/src/main/scala/org/apache/spark/rdd/RDD.scala | 14 +++++++------- .../main/scala/org/apache/spark/graphx/Graph.scala | 2 +- .../main/scala/org/apache/spark/ml/Pipeline.scala | 10 +++++----- .../spark/mllib/linalg/distributed/RowMatrix.scala | 8 ++++---- .../mllib/tree/impl/DecisionTreeMetadata.scala | 2 +- .../org/apache/spark/mllib/tree/loss/Loss.scala | 2 +- .../spark/mllib/util/LinearDataGenerator.scala | 2 +- 7 files changed, 20 insertions(+), 20 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala index 5f39384975..97aee58bdd 100644 --- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala +++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala @@ -604,8 +604,8 @@ abstract class RDD[T: ClassTag]( * print line function (like out.println()) as the 2nd parameter. * An example of pipe the RDD data of groupBy() in a streaming way, * instead of constructing a huge String to concat all the elements: - * def printRDDElement(record:(String, Seq[String]), f:String=>Unit) = - * for (e <- record._2){f(e)} + * def printRDDElement(record:(String, Seq[String]), f:String=>Unit) = + * for (e <- record._2){f(e)} * @param separateWorkingDir Use separate working directories for each task. * @return the result RDD */ @@ -841,7 +841,7 @@ abstract class RDD[T: ClassTag]( * Return an RDD with the elements from `this` that are not in `other`. * * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting - * RDD will be <= us. + * RDD will be <= us. */ def subtract(other: RDD[T]): RDD[T] = subtract(other, partitioner.getOrElse(new HashPartitioner(partitions.size))) @@ -1027,7 +1027,7 @@ abstract class RDD[T: ClassTag]( * * Note that this method should only be used if the resulting map is expected to be small, as * the whole thing is loaded into the driver's memory. - * To handle very large results, consider using rdd.map(x => (x, 1L)).reduceByKey(_ + _), which + * To handle very large results, consider using rdd.map(x => (x, 1L)).reduceByKey(_ + _), which * returns an RDD[T, Long] instead of a map. */ def countByValue()(implicit ord: Ordering[T] = null): Map[T, Long] = { @@ -1065,7 +1065,7 @@ abstract class RDD[T: ClassTag]( * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>. * - * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero `sp > p` + * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero `sp > p` * would trigger sparse representation of registers, which may reduce the memory consumption * and increase accuracy when the cardinality is small. * @@ -1383,7 +1383,7 @@ abstract class RDD[T: ClassTag]( /** * Private API for changing an RDD's ClassTag. - * Used for internal Java <-> Scala API compatibility. + * Used for internal Java-Scala API compatibility. */ private[spark] def retag(cls: Class[T]): RDD[T] = { val classTag: ClassTag[T] = ClassTag.apply(cls) @@ -1392,7 +1392,7 @@ abstract class RDD[T: ClassTag]( /** * Private API for changing an RDD's ClassTag. - * Used for internal Java <-> Scala API compatibility. + * Used for internal Java-Scala API compatibility. */ private[spark] def retag(implicit classTag: ClassTag[T]): RDD[T] = { this.mapPartitions(identity, preservesPartitioning = true)(classTag) diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala index 84b72b390c..ab56580a3a 100644 --- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala +++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala @@ -55,7 +55,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab * @return an RDD containing the edges in this graph * * @see [[Edge]] for the edge type. - * @see [[triplets]] to get an RDD which contains all the edges + * @see [[Graph#triplets]] to get an RDD which contains all the edges * along with their vertex data. * */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala index fe39cd1bc0..bb291e6e1f 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala @@ -58,11 +58,11 @@ abstract class PipelineStage extends Serializable with Logging { /** * :: AlphaComponent :: * A simple pipeline, which acts as an estimator. A Pipeline consists of a sequence of stages, each - * of which is either an [[Estimator]] or a [[Transformer]]. When [[Pipeline.fit]] is called, the - * stages are executed in order. If a stage is an [[Estimator]], its [[Estimator.fit]] method will + * of which is either an [[Estimator]] or a [[Transformer]]. When [[Pipeline#fit]] is called, the + * stages are executed in order. If a stage is an [[Estimator]], its [[Estimator#fit]] method will * be called on the input dataset to fit a model. Then the model, which is a transformer, will be * used to transform the dataset as the input to the next stage. If a stage is a [[Transformer]], - * its [[Transformer.transform]] method will be called to produce the dataset for the next stage. + * its [[Transformer#transform]] method will be called to produce the dataset for the next stage. * The fitted model from a [[Pipeline]] is an [[PipelineModel]], which consists of fitted models and * transformers, corresponding to the pipeline stages. If there are no stages, the pipeline acts as * an identity transformer. @@ -77,9 +77,9 @@ class Pipeline extends Estimator[PipelineModel] { /** * Fits the pipeline to the input dataset with additional parameters. If a stage is an - * [[Estimator]], its [[Estimator.fit]] method will be called on the input dataset to fit a model. + * [[Estimator]], its [[Estimator#fit]] method will be called on the input dataset to fit a model. * Then the model, which is a transformer, will be used to transform the dataset as the input to - * the next stage. If a stage is a [[Transformer]], its [[Transformer.transform]] method will be + * the next stage. If a stage is a [[Transformer]], its [[Transformer#transform]] method will be * called to produce the dataset for the next stage. The fitted model from a [[Pipeline]] is an * [[PipelineModel]], which consists of fitted models and transformers, corresponding to the * pipeline stages. If there are no stages, the output model acts as an identity transformer. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala index ddca30c3c0..53b7970470 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala @@ -151,10 +151,10 @@ class RowMatrix( * storing the right singular vectors, is computed via matrix multiplication as * U = A * (V * S^-1^), if requested by user. The actual method to use is determined * automatically based on the cost: - * - If n is small (n < 100) or k is large compared with n (k > n / 2), we compute the Gramian - * matrix first and then compute its top eigenvalues and eigenvectors locally on the driver. - * This requires a single pass with O(n^2^) storage on each executor and on the driver, and - * O(n^2^ k) time on the driver. + * - If n is small (n < 100) or k is large compared with n (k > n / 2), we compute + * the Gramian matrix first and then compute its top eigenvalues and eigenvectors locally + * on the driver. This requires a single pass with O(n^2^) storage on each executor and + * on the driver, and O(n^2^ k) time on the driver. * - Otherwise, we compute (A' * A) * v in a distributive way and send it to ARPACK's DSAUPD to * compute (A' * A)'s top eigenvalues and eigenvectors on the driver node. This requires O(k) * passes, O(n) storage on each executor, and O(n k) storage on the driver. diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala index 951733fada..f1a6ed2301 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala @@ -183,7 +183,7 @@ private[tree] object DecisionTreeMetadata extends Logging { } /** - * Version of [[buildMetadata()]] for DecisionTree. + * Version of [[DecisionTreeMetadata#buildMetadata]] for DecisionTree. */ def buildMetadata( input: RDD[LabeledPoint], diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala index 4bca9039eb..e1169d9f66 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala @@ -45,7 +45,7 @@ trait Loss extends Serializable { * purposes. * @param model Model of the weak learner. * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]]. - * @return + * @return Measure of model error on data */ def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala index 69299c2198..97f54aa62d 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala @@ -62,7 +62,7 @@ object LinearDataGenerator { * @param nPoints Number of points in sample. * @param seed Random seed * @param eps Epsilon scaling factor. - * @return + * @return Seq of input. */ def generateLinearInput( intercept: Double, -- GitLab