diff --git a/core/src/main/scala/org/apache/spark/rdd/RDD.scala b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
index 5f39384975f9b0f96360359e4168ca16bddab232..97aee58bddbf14b15e8131935e1ca99b1cc3767f 100644
--- a/core/src/main/scala/org/apache/spark/rdd/RDD.scala
+++ b/core/src/main/scala/org/apache/spark/rdd/RDD.scala
@@ -604,8 +604,8 @@ abstract class RDD[T: ClassTag](
    *                        print line function (like out.println()) as the 2nd parameter.
    *                        An example of pipe the RDD data of groupBy() in a streaming way,
    *                        instead of constructing a huge String to concat all the elements:
-   *                        def printRDDElement(record:(String, Seq[String]), f:String=>Unit) =
-   *                          for (e <- record._2){f(e)}
+   *                        def printRDDElement(record:(String, Seq[String]), f:String=&gt;Unit) =
+   *                          for (e &lt;- record._2){f(e)}
    * @param separateWorkingDir Use separate working directories for each task.
    * @return the result RDD
    */
@@ -841,7 +841,7 @@ abstract class RDD[T: ClassTag](
    * Return an RDD with the elements from `this` that are not in `other`.
    *
    * Uses `this` partitioner/partition size, because even if `other` is huge, the resulting
-   * RDD will be <= us.
+   * RDD will be &lt;= us.
    */
   def subtract(other: RDD[T]): RDD[T] =
     subtract(other, partitioner.getOrElse(new HashPartitioner(partitions.size)))
@@ -1027,7 +1027,7 @@ abstract class RDD[T: ClassTag](
    *
    * Note that this method should only be used if the resulting map is expected to be small, as
    * the whole thing is loaded into the driver's memory.
-   * To handle very large results, consider using rdd.map(x => (x, 1L)).reduceByKey(_ + _), which
+   * To handle very large results, consider using rdd.map(x =&gt; (x, 1L)).reduceByKey(_ + _), which
    * returns an RDD[T, Long] instead of a map.
    */
   def countByValue()(implicit ord: Ordering[T] = null): Map[T, Long] = {
@@ -1065,7 +1065,7 @@ abstract class RDD[T: ClassTag](
    * Algorithmic Engineering of a State of The Art Cardinality Estimation Algorithm", available
    * <a href="http://dx.doi.org/10.1145/2452376.2452456">here</a>.
    *
-   * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero `sp > p`
+   * The relative accuracy is approximately `1.054 / sqrt(2^p)`. Setting a nonzero `sp &gt; p`
    * would trigger sparse representation of registers, which may reduce the memory consumption
    * and increase accuracy when the cardinality is small.
    *
@@ -1383,7 +1383,7 @@ abstract class RDD[T: ClassTag](
 
   /**
    * Private API for changing an RDD's ClassTag.
-   * Used for internal Java <-> Scala API compatibility.
+   * Used for internal Java-Scala API compatibility.
    */
   private[spark] def retag(cls: Class[T]): RDD[T] = {
     val classTag: ClassTag[T] = ClassTag.apply(cls)
@@ -1392,7 +1392,7 @@ abstract class RDD[T: ClassTag](
 
   /**
    * Private API for changing an RDD's ClassTag.
-   * Used for internal Java <-> Scala API compatibility.
+   * Used for internal Java-Scala API compatibility.
    */
   private[spark] def retag(implicit classTag: ClassTag[T]): RDD[T] = {
     this.mapPartitions(identity, preservesPartitioning = true)(classTag)
diff --git a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
index 84b72b390ca35fa6b112bce6c6a03a4071484631..ab56580a3abc87d7b8c81c6051a9b7c61f866596 100644
--- a/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
+++ b/graphx/src/main/scala/org/apache/spark/graphx/Graph.scala
@@ -55,7 +55,7 @@ abstract class Graph[VD: ClassTag, ED: ClassTag] protected () extends Serializab
    * @return an RDD containing the edges in this graph
    *
    * @see [[Edge]] for the edge type.
-   * @see [[triplets]] to get an RDD which contains all the edges
+   * @see [[Graph#triplets]] to get an RDD which contains all the edges
    * along with their vertex data.
    *
    */
diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index fe39cd1bc0bd29c04e3c92f30845c3249d79e2df..bb291e6e1fd7d48a0592ad45647c6cff69479029 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -58,11 +58,11 @@ abstract class PipelineStage extends Serializable with Logging {
 /**
  * :: AlphaComponent ::
  * A simple pipeline, which acts as an estimator. A Pipeline consists of a sequence of stages, each
- * of which is either an [[Estimator]] or a [[Transformer]]. When [[Pipeline.fit]] is called, the
- * stages are executed in order. If a stage is an [[Estimator]], its [[Estimator.fit]] method will
+ * of which is either an [[Estimator]] or a [[Transformer]]. When [[Pipeline#fit]] is called, the
+ * stages are executed in order. If a stage is an [[Estimator]], its [[Estimator#fit]] method will
  * be called on the input dataset to fit a model. Then the model, which is a transformer, will be
  * used to transform the dataset as the input to the next stage. If a stage is a [[Transformer]],
- * its [[Transformer.transform]] method will be called to produce the dataset for the next stage.
+ * its [[Transformer#transform]] method will be called to produce the dataset for the next stage.
  * The fitted model from a [[Pipeline]] is an [[PipelineModel]], which consists of fitted models and
  * transformers, corresponding to the pipeline stages. If there are no stages, the pipeline acts as
  * an identity transformer.
@@ -77,9 +77,9 @@ class Pipeline extends Estimator[PipelineModel] {
 
   /**
    * Fits the pipeline to the input dataset with additional parameters. If a stage is an
-   * [[Estimator]], its [[Estimator.fit]] method will be called on the input dataset to fit a model.
+   * [[Estimator]], its [[Estimator#fit]] method will be called on the input dataset to fit a model.
    * Then the model, which is a transformer, will be used to transform the dataset as the input to
-   * the next stage. If a stage is a [[Transformer]], its [[Transformer.transform]] method will be
+   * the next stage. If a stage is a [[Transformer]], its [[Transformer#transform]] method will be
    * called to produce the dataset for the next stage. The fitted model from a [[Pipeline]] is an
    * [[PipelineModel]], which consists of fitted models and transformers, corresponding to the
    * pipeline stages. If there are no stages, the output model acts as an identity transformer.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
index ddca30c3c01c81b057f461eabe639033b243bc73..53b79704703ce01e8ed5eb4aabc8610179af0890 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/distributed/RowMatrix.scala
@@ -151,10 +151,10 @@ class RowMatrix(
    * storing the right singular vectors, is computed via matrix multiplication as
    * U = A * (V * S^-1^), if requested by user. The actual method to use is determined
    * automatically based on the cost:
-   *  - If n is small (n &lt; 100) or k is large compared with n (k > n / 2), we compute the Gramian
-   *    matrix first and then compute its top eigenvalues and eigenvectors locally on the driver.
-   *    This requires a single pass with O(n^2^) storage on each executor and on the driver, and
-   *    O(n^2^ k) time on the driver.
+   *  - If n is small (n &lt; 100) or k is large compared with n (k &gt; n / 2), we compute
+   *    the Gramian matrix first and then compute its top eigenvalues and eigenvectors locally
+   *    on the driver. This requires a single pass with O(n^2^) storage on each executor and
+   *    on the driver, and O(n^2^ k) time on the driver.
    *  - Otherwise, we compute (A' * A) * v in a distributive way and send it to ARPACK's DSAUPD to
    *    compute (A' * A)'s top eigenvalues and eigenvectors on the driver node. This requires O(k)
    *    passes, O(n) storage on each executor, and O(n k) storage on the driver.
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala
index 951733fada6be41d0ac8e1a30d50be40b5ced03e..f1a6ed230186ee975929a89641912874d3dd6824 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/impl/DecisionTreeMetadata.scala
@@ -183,7 +183,7 @@ private[tree] object DecisionTreeMetadata extends Logging {
   }
 
   /**
-   * Version of [[buildMetadata()]] for DecisionTree.
+   * Version of [[DecisionTreeMetadata#buildMetadata]] for DecisionTree.
    */
   def buildMetadata(
       input: RDD[LabeledPoint],
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
index 4bca9039ebe1d2150fd2bb18df6797a7d0523a22..e1169d9f66ea44613ebb4a0f70f8b39607e67241 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/loss/Loss.scala
@@ -45,7 +45,7 @@ trait Loss extends Serializable {
    * purposes.
    * @param model Model of the weak learner.
    * @param data Training dataset: RDD of [[org.apache.spark.mllib.regression.LabeledPoint]].
-   * @return
+   * @return Measure of model error on data
    */
   def computeError(model: TreeEnsembleModel, data: RDD[LabeledPoint]): Double
 
diff --git a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
index 69299c219878cb9afa0839c80dfb3531cf8d324a..97f54aa62d31cb4bd11ef721f588440379060358 100644
--- a/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
+++ b/mllib/src/main/scala/org/apache/spark/mllib/util/LinearDataGenerator.scala
@@ -62,7 +62,7 @@ object LinearDataGenerator {
    * @param nPoints Number of points in sample.
    * @param seed Random seed
    * @param eps Epsilon scaling factor.
-   * @return
+   * @return Seq of input.
    */
   def generateLinearInput(
       intercept: Double,