diff --git a/core/src/main/scala/org/apache/spark/SparkConf.scala b/core/src/main/scala/org/apache/spark/SparkConf.scala index 308a1ed5fa963e71ad9aefe7a24b8e4bd0cb873d..fe912e639bcbc349f976b7acf01f17e9d46782d8 100644 --- a/core/src/main/scala/org/apache/spark/SparkConf.scala +++ b/core/src/main/scala/org/apache/spark/SparkConf.scala @@ -262,7 +262,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria /** * Get a time parameter as seconds; throws a NoSuchElementException if it's not set. If no * suffix is provided then seconds are assumed. - * @throws java.util.NoSuchElementException + * @throws java.util.NoSuchElementException If the time parameter is not set */ def getTimeAsSeconds(key: String): Long = { Utils.timeStringAsSeconds(get(key)) @@ -279,7 +279,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria /** * Get a time parameter as milliseconds; throws a NoSuchElementException if it's not set. If no * suffix is provided then milliseconds are assumed. - * @throws java.util.NoSuchElementException + * @throws java.util.NoSuchElementException If the time parameter is not set */ def getTimeAsMs(key: String): Long = { Utils.timeStringAsMs(get(key)) @@ -296,7 +296,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria /** * Get a size parameter as bytes; throws a NoSuchElementException if it's not set. If no * suffix is provided then bytes are assumed. - * @throws java.util.NoSuchElementException + * @throws java.util.NoSuchElementException If the size parameter is not set */ def getSizeAsBytes(key: String): Long = { Utils.byteStringAsBytes(get(key)) @@ -320,7 +320,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria /** * Get a size parameter as Kibibytes; throws a NoSuchElementException if it's not set. If no * suffix is provided then Kibibytes are assumed. - * @throws java.util.NoSuchElementException + * @throws java.util.NoSuchElementException If the size parameter is not set */ def getSizeAsKb(key: String): Long = { Utils.byteStringAsKb(get(key)) @@ -337,7 +337,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria /** * Get a size parameter as Mebibytes; throws a NoSuchElementException if it's not set. If no * suffix is provided then Mebibytes are assumed. - * @throws java.util.NoSuchElementException + * @throws java.util.NoSuchElementException If the size parameter is not set */ def getSizeAsMb(key: String): Long = { Utils.byteStringAsMb(get(key)) @@ -354,7 +354,7 @@ class SparkConf(loadDefaults: Boolean) extends Cloneable with Logging with Seria /** * Get a size parameter as Gibibytes; throws a NoSuchElementException if it's not set. If no * suffix is provided then Gibibytes are assumed. - * @throws java.util.NoSuchElementException + * @throws java.util.NoSuchElementException If the size parameter is not set */ def getSizeAsGb(key: String): Long = { Utils.byteStringAsGb(get(key)) diff --git a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala index d7bfdbad8442f1975b3e6663828ed19df2ff116b..41093bdb858c0e98b6b3f8d1124d50e76ba782e0 100644 --- a/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala +++ b/core/src/main/scala/org/apache/spark/rdd/PairRDDFunctions.scala @@ -496,7 +496,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * or `PairRDDFunctions.reduceByKey` will provide much better performance. * * @note As currently implemented, groupByKey must be able to hold all the key-value pairs for any - * key in memory. If a key has too many values, it can result in an [[OutOfMemoryError]]. + * key in memory. If a key has too many values, it can result in an `OutOfMemoryError`. */ def groupByKey(partitioner: Partitioner): RDD[(K, Iterable[V])] = self.withScope { // groupByKey shouldn't use map side combine because map side combine does not @@ -520,7 +520,7 @@ class PairRDDFunctions[K, V](self: RDD[(K, V)]) * or `PairRDDFunctions.reduceByKey` will provide much better performance. * * @note As currently implemented, groupByKey must be able to hold all the key-value pairs for any - * key in memory. If a key has too many values, it can result in an [[OutOfMemoryError]]. + * key in memory. If a key has too many values, it can result in an `OutOfMemoryError`. */ def groupByKey(numPartitions: Int): RDD[(K, Iterable[V])] = self.withScope { groupByKey(new HashPartitioner(numPartitions)) diff --git a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala index 1b12af75864eaf9a9369d80ad46c25f1d1acf9b4..4331addb44172ba5e2b421e5fabca28084c25062 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/SparkListener.scala @@ -294,7 +294,7 @@ private[spark] trait SparkListenerInterface { /** * :: DeveloperApi :: - * A default implementation for [[SparkListenerInterface]] that has no-op implementations for + * A default implementation for `SparkListenerInterface` that has no-op implementations for * all callbacks. * * Note that this is an internal interface which might change in different Spark releases. diff --git a/core/src/main/scala/org/apache/spark/scheduler/package.scala b/core/src/main/scala/org/apache/spark/scheduler/package.scala index f0dbfc2ac5f48b96975368246c57f814f63b555e..4847c41710b2bc7baca2093280ffbe72063282f8 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/package.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/package.scala @@ -18,7 +18,7 @@ package org.apache.spark /** - * Spark's scheduling components. This includes the [[org.apache.spark.scheduler.DAGScheduler]] and - * lower level [[org.apache.spark.scheduler.TaskScheduler]]. + * Spark's scheduling components. This includes the `org.apache.spark.scheduler.DAGScheduler` and + * lower level `org.apache.spark.scheduler.TaskScheduler`. */ package object scheduler diff --git a/core/src/main/scala/org/apache/spark/storage/TopologyMapper.scala b/core/src/main/scala/org/apache/spark/storage/TopologyMapper.scala index a0f0fdef8e9484b2f9b828f7665f1e91e1b8929b..a150a8e3636e41bfafc053e701fd85b74227a74d 100644 --- a/core/src/main/scala/org/apache/spark/storage/TopologyMapper.scala +++ b/core/src/main/scala/org/apache/spark/storage/TopologyMapper.scala @@ -60,7 +60,7 @@ class DefaultTopologyMapper(conf: SparkConf) extends TopologyMapper(conf) with L /** * A simple file based topology mapper. This expects topology information provided as a - * [[java.util.Properties]] file. The name of the file is obtained from SparkConf property + * `java.util.Properties` file. The name of the file is obtained from SparkConf property * `spark.storage.replication.topologyFile`. To use this topology mapper, set the * `spark.storage.replication.topologyMapper` property to * [[org.apache.spark.storage.FileBasedTopologyMapper]] diff --git a/docs/js/api-docs.js b/docs/js/api-docs.js index 96c63cc12716f5ac2e1640688c8c4d650bfe3665..13514e11b93324facae15eb814678694d3f28540 100644 --- a/docs/js/api-docs.js +++ b/docs/js/api-docs.js @@ -50,7 +50,7 @@ $(document).ready(function() { MathJax.Hub.Config({ displayAlign: "left", tex2jax: { - inlineMath: [ ["$", "$"], ["\\\\(","\\\\)"] ], + inlineMath: [ ["$", "$"], ["\\(","\\)"] ], displayMath: [ ["$$","$$"], ["\\[", "\\]"] ], processEscapes: true, skipTags: ['script', 'noscript', 'style', 'textarea', 'pre', 'a'] diff --git a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala index 22e4ec693b1f7ac4e30823d6929da7b79a86ee46..8e166ba0ff51a3f4fa896acef2e8aa471b683fd1 100644 --- a/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala +++ b/mllib-local/src/main/scala/org/apache/spark/ml/linalg/Vectors.scala @@ -169,7 +169,7 @@ sealed trait Vector extends Serializable { /** * Factory methods for [[org.apache.spark.ml.linalg.Vector]]. * We don't use the name `Vector` because Scala imports - * [[scala.collection.immutable.Vector]] by default. + * `scala.collection.immutable.Vector` by default. */ @Since("2.0.0") object Vectors { diff --git a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala index 215f9d86f1be12c151a469317ea9b87cc497b1fe..08b0cb9b8f6a5f1525ba00170e24c7cb3aa733c4 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/Predictor.scala @@ -40,7 +40,7 @@ private[ml] trait PredictorParams extends Params * @param schema input schema * @param fitting whether this is in fitting * @param featuresDataType SQL DataType for FeaturesType. - * E.g., [[VectorUDT]] for vector features. + * E.g., `VectorUDT` for vector features. * @return output schema */ protected def validateAndTransformSchema( @@ -72,7 +72,7 @@ private[ml] trait PredictorParams extends Params * in `fit()`. * * @tparam FeaturesType Type of features. - * E.g., [[VectorUDT]] for vector features. + * E.g., `VectorUDT` for vector features. * @tparam Learner Specialization of this class. If you subclass this type, use this type * parameter to specify the concrete type. * @tparam M Specialization of [[PredictionModel]]. If you subclass this type, use this type @@ -122,7 +122,7 @@ abstract class Predictor[ /** * Train a model using the given dataset and parameters. - * Developers can implement this instead of [[fit()]] to avoid dealing with schema validation + * Developers can implement this instead of `fit()` to avoid dealing with schema validation * and copying parameters into the model. * * @param dataset Training dataset @@ -133,7 +133,7 @@ abstract class Predictor[ /** * Returns the SQL DataType corresponding to the FeaturesType type parameter. * - * This is used by [[validateAndTransformSchema()]]. + * This is used by `validateAndTransformSchema()`. * This workaround is needed since SQL has different APIs for Scala and Java. * * The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector. @@ -160,7 +160,7 @@ abstract class Predictor[ * Abstraction for a model for prediction tasks (regression and classification). * * @tparam FeaturesType Type of features. - * E.g., [[VectorUDT]] for vector features. + * E.g., `VectorUDT` for vector features. * @tparam M Specialization of [[PredictionModel]]. If you subclass this type, use this type * parameter to specify the concrete type for the corresponding model. */ @@ -181,7 +181,7 @@ abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType, /** * Returns the SQL DataType corresponding to the FeaturesType type parameter. * - * This is used by [[validateAndTransformSchema()]]. + * This is used by `validateAndTransformSchema()`. * This workaround is needed since SQL has different APIs for Scala and Java. * * The default value is VectorUDT, but it may be overridden if FeaturesType is not Vector. @@ -197,7 +197,7 @@ abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType, * the predictions as a new column [[predictionCol]]. * * @param dataset input dataset - * @return transformed dataset with [[predictionCol]] of type [[Double]] + * @return transformed dataset with [[predictionCol]] of type `Double` */ override def transform(dataset: Dataset[_]): DataFrame = { transformSchema(dataset.schema, logging = true) @@ -219,7 +219,7 @@ abstract class PredictionModel[FeaturesType, M <: PredictionModel[FeaturesType, /** * Predict label for the given features. - * This internal method is used to implement [[transform()]] and output [[predictionCol]]. + * This internal method is used to implement `transform()` and output [[predictionCol]]. */ protected def predict(features: FeaturesType): Double } diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala index 7fbfee75e96a9866d5f8261b6820d0fac27c257d..1cd2b1ad84092f1bd3da9482ae3c0c44be868f49 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/attributes.scala @@ -126,7 +126,7 @@ private[attribute] trait AttributeFactory { private[attribute] def fromMetadata(metadata: Metadata): Attribute /** - * Creates an [[Attribute]] from a [[StructField]] instance, optionally preserving name. + * Creates an [[Attribute]] from a `StructField` instance, optionally preserving name. */ private[ml] def decodeStructField(field: StructField, preserveName: Boolean): Attribute = { require(field.dataType.isInstanceOf[NumericType]) @@ -145,7 +145,7 @@ private[attribute] trait AttributeFactory { } /** - * Creates an [[Attribute]] from a [[StructField]] instance. + * Creates an [[Attribute]] from a `StructField` instance. */ def fromStructField(field: StructField): Attribute = decodeStructField(field, false) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala b/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala index f6964054db8396968bd58b2052c370a19f26d893..25ce0282b12749ffd6c8739e7ea34de2e29708af 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/attribute/package.scala @@ -23,7 +23,7 @@ import org.apache.spark.sql.DataFrame /** * ==ML attributes== * - * The ML pipeline API uses [[DataFrame]]s as ML datasets. + * The ML pipeline API uses `DataFrame`s as ML datasets. * Each dataset consists of typed columns, e.g., string, double, vector, etc. * However, knowing only the column type may not be sufficient to handle the data properly. * For instance, a double column with values 0.0, 1.0, 2.0, ... may represent some label indices, diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala index 8fd6b70d4ddb46f0b16e8645b3d0f600588ce229..d8608d885d6f177156f4488dbd981c0c9807fe32 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/Classifier.scala @@ -71,7 +71,7 @@ abstract class Classifier[ * and put it in an RDD with strong types. * * @param dataset DataFrame with columns for labels ([[org.apache.spark.sql.types.NumericType]]) - * and features ([[Vector]]). + * and features (`Vector`). * @param numClasses Number of classes label can take. Labels must be integers in the range * [0, numClasses). * @throws SparkException if any label is not an integer >= 0 @@ -94,7 +94,7 @@ abstract class Classifier[ * by finding the maximum label value. * * Label validation (ensuring all labels are integers >= 0) needs to be handled elsewhere, - * such as in [[extractLabeledPoints()]]. + * such as in `extractLabeledPoints()`. * * @param dataset Dataset which contains a column [[labelCol]] * @param maxNumClasses Maximum number of classes allowed when inferred from data. If numClasses @@ -150,7 +150,7 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur /** * Transforms dataset by reading from [[featuresCol]], and appending new columns as specified by * parameters: - * - predicted labels as [[predictionCol]] of type [[Double]] + * - predicted labels as [[predictionCol]] of type `Double` * - raw predictions (confidences) as [[rawPredictionCol]] of type `Vector`. * * @param dataset input dataset @@ -192,10 +192,10 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur /** * Predict label for the given features. - * This internal method is used to implement [[transform()]] and output [[predictionCol]]. + * This internal method is used to implement `transform()` and output [[predictionCol]]. * * This default implementation for classification predicts the index of the maximum value - * from [[predictRaw()]]. + * from `predictRaw()`. */ override protected def predict(features: FeaturesType): Double = { raw2prediction(predictRaw(features)) @@ -205,7 +205,7 @@ abstract class ClassificationModel[FeaturesType, M <: ClassificationModel[Featur * Raw prediction for each possible label. * The meaning of a "raw" prediction may vary between algorithms, but it intuitively gives * a measure of confidence in each possible label (where larger = more confident). - * This internal method is used to implement [[transform()]] and output [[rawPredictionCol]]. + * This internal method is used to implement `transform()` and output [[rawPredictionCol]]. * * @return vector where element i is the raw prediction for label i. * This raw prediction may be any real number, where a larger value indicates greater diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala index 93cc1e6f09727befb59ed50c4a697500cebfc52d..95c1337ed5608179b48c34507f321f3b65e3bd54 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/MultilayerPerceptronClassifier.scala @@ -225,7 +225,7 @@ class MultilayerPerceptronClassifier @Since("1.5.0") ( /** * Train a model using the given dataset and parameters. - * Developers can implement this instead of [[fit()]] to avoid dealing with schema validation + * Developers can implement this instead of `fit()` to avoid dealing with schema validation * and copying parameters into the model. * * @param dataset Training dataset @@ -321,7 +321,7 @@ class MultilayerPerceptronClassificationModel private[ml] ( /** * Predict label for the given features. - * This internal method is used to implement [[transform()]] and output [[predictionCol]]. + * This internal method is used to implement `transform()` and output [[predictionCol]]. */ override protected def predict(features: Vector): Double = { LabelConverter.decodeLabel(mlpModel.predict(features)) diff --git a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala index e89da6ff8bdd72b071611aedfdc7cb1f339dca8b..ef08134809915578daf44545b6190e25d167ed83 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/classification/ProbabilisticClassifier.scala @@ -93,7 +93,7 @@ abstract class ProbabilisticClassificationModel[ /** * Transforms dataset by reading from [[featuresCol]], and appending new columns as specified by * parameters: - * - predicted labels as [[predictionCol]] of type [[Double]] + * - predicted labels as [[predictionCol]] of type `Double` * - raw predictions (confidences) as [[rawPredictionCol]] of type `Vector` * - probability of each class as [[probabilityCol]] of type `Vector`. * @@ -158,13 +158,15 @@ abstract class ProbabilisticClassificationModel[ * doing the computation in-place. * These predictions are also called class conditional probabilities. * - * This internal method is used to implement [[transform()]] and output [[probabilityCol]]. + * This internal method is used to implement `transform()` and output [[probabilityCol]]. * * @return Estimated class conditional probabilities (modified input vector) */ protected def raw2probabilityInPlace(rawPrediction: Vector): Vector - /** Non-in-place version of [[raw2probabilityInPlace()]] */ + /** + * Non-in-place version of `raw2probabilityInPlace()` + */ protected def raw2probability(rawPrediction: Vector): Vector = { val probs = rawPrediction.copy raw2probabilityInPlace(probs) @@ -182,7 +184,7 @@ abstract class ProbabilisticClassificationModel[ * Predict the probability of each class given the features. * These predictions are also called class conditional probabilities. * - * This internal method is used to implement [[transform()]] and output [[probabilityCol]]. + * This internal method is used to implement `transform()` and output [[probabilityCol]]. * * @return Estimated class conditional probabilities */ diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala index 19978c97d2cfdcdf1b57217fbb93e2a9a26d5d9c..f648deced54cdcfdf16b277f034b434595c2e745 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/MinMaxScaler.scala @@ -84,7 +84,7 @@ private[feature] trait MinMaxScalerParams extends Params with HasInputCol with H * $$ * </blockquote> * - * For the case $E_{max} == E_{min}$, $Rescaled(e_i) = 0.5 * (max + min)$. + * For the case \(E_{max} == E_{min}\), \(Rescaled(e_i) = 0.5 * (max + min)\). * * @note Since zero values will probably be transformed to non-zero values, output of the * transformer will be DenseVector even for sparse input. diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala index 5dd648aecc95c3b0855a450f57b5300f71c43fa7..d75a6dc9377aea3e660a9bf6403cef4e9a4147c6 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/package.scala @@ -25,13 +25,13 @@ import org.apache.spark.sql.DataFrame * * The `ml.feature` package provides common feature transformers that help convert raw data or * features into more suitable forms for model fitting. - * Most feature transformers are implemented as [[Transformer]]s, which transform one [[DataFrame]] + * Most feature transformers are implemented as [[Transformer]]s, which transform one `DataFrame` * into another, e.g., [[HashingTF]]. * Some feature transformers are implemented as [[Estimator]]s, because the transformation requires * some aggregated information of the dataset, e.g., document frequencies in [[IDF]]. - * For those feature transformers, calling [[Estimator!.fit]] is required to obtain the model first, + * For those feature transformers, calling `Estimator.fit` is required to obtain the model first, * e.g., [[IDFModel]], in order to apply transformation. - * The transformation is usually done by appending new columns to the input [[DataFrame]], so all + * The transformation is usually done by appending new columns to the input `DataFrame`, so all * input columns are carried over. * * We try to make each transformer minimal, so it becomes flexible to assemble feature diff --git a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala index ab0620ca7553ac1b64665c3bb6a19c9388600ec8..12ad8002064636ef3e3040854a7260ba19909b45 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/param/params.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/param/params.scala @@ -446,7 +446,7 @@ class StringArrayParam(parent: Params, name: String, doc: String, isValid: Array def this(parent: Params, name: String, doc: String) = this(parent, name, doc, ParamValidators.alwaysTrue) - /** Creates a param pair with a [[java.util.List]] of values (for Java and Python). */ + /** Creates a param pair with a `java.util.List` of values (for Java and Python). */ def w(value: java.util.List[String]): ParamPair[Array[String]] = w(value.asScala.toArray) override def jsonEncode(value: Array[String]): String = { @@ -471,7 +471,7 @@ class DoubleArrayParam(parent: Params, name: String, doc: String, isValid: Array def this(parent: Params, name: String, doc: String) = this(parent, name, doc, ParamValidators.alwaysTrue) - /** Creates a param pair with a [[java.util.List]] of values (for Java and Python). */ + /** Creates a param pair with a `java.util.List` of values (for Java and Python). */ def w(value: java.util.List[java.lang.Double]): ParamPair[Array[Double]] = w(value.asScala.map(_.asInstanceOf[Double]).toArray) @@ -501,7 +501,7 @@ class IntArrayParam(parent: Params, name: String, doc: String, isValid: Array[In def this(parent: Params, name: String, doc: String) = this(parent, name, doc, ParamValidators.alwaysTrue) - /** Creates a param pair with a [[java.util.List]] of values (for Java and Python). */ + /** Creates a param pair with a `java.util.List` of values (for Java and Python). */ def w(value: java.util.List[java.lang.Integer]): ParamPair[Array[Int]] = w(value.asScala.map(_.asInstanceOf[Int]).toArray) @@ -652,7 +652,9 @@ trait Params extends Identifiable with Serializable { throw new NoSuchElementException(s"Failed to find a default value for ${param.name}")) } - /** An alias for [[getOrDefault()]]. */ + /** + * An alias for `getOrDefault()`. + */ protected final def $[T](param: Param[T]): T = getOrDefault(param) /** @@ -749,14 +751,14 @@ trait Params extends Identifiable with Serializable { * Copies param values from this instance to another instance for params shared by them. * * This handles default Params and explicitly set Params separately. - * Default Params are copied from and to [[defaultParamMap]], and explicitly set Params are - * copied from and to [[paramMap]]. + * Default Params are copied from and to `defaultParamMap`, and explicitly set Params are + * copied from and to `paramMap`. * Warning: This implicitly assumes that this [[Params]] instance and the target instance * share the same set of default Params. * * @param to the target instance, which should work with the same set of default Params as this * source instance - * @param extra extra params to be copied to the target's [[paramMap]] + * @param extra extra params to be copied to the target's `paramMap` * @return the target instance with param values copied */ protected def copyValues[T <: Params](to: T, extra: ParamMap = ParamMap.empty): T = { @@ -822,7 +824,7 @@ final class ParamMap private[ml] (private val map: mutable.Map[Param[Any], Any]) this } - /** Put param pairs with a [[java.util.List]] of values for Python. */ + /** Put param pairs with a `java.util.List` of values for Python. */ private[ml] def put(paramPairs: JList[ParamPair[_]]): this.type = { put(paramPairs.asScala: _*) } diff --git a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala b/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala index be356575ca09a39bf9a6366b84664aef33c1b961..c0a1683d3cb6f5ae2efd7ec7b0e1d0ed75b51b8c 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/regression/Regressor.scala @@ -40,7 +40,7 @@ private[spark] abstract class Regressor[ /** * :: DeveloperApi :: * - * Model produced by a [[Regressor]]. + * Model produced by a `Regressor`. * * @tparam FeaturesType Type of input features. E.g., [[org.apache.spark.mllib.linalg.Vector]] * @tparam M Concrete Model type. diff --git a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala index c0e3801499818b10c38d3cb1d2daa137a7391ccc..09bddcdb810bb95ba0b5185efb9366309b31994e 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/util/ReadWrite.scala @@ -37,7 +37,7 @@ import org.apache.spark.sql.{SparkSession, SQLContext} import org.apache.spark.util.Utils /** - * Trait for [[MLWriter]] and [[MLReader]]. + * Trait for `MLWriter` and `MLReader`. */ private[util] sealed trait BaseReadWrite { private var optionSparkSession: Option[SparkSession] = None @@ -112,7 +112,7 @@ abstract class MLWriter extends BaseReadWrite with Logging { } /** - * [[save()]] handles overwriting and then calls this method. Subclasses should override this + * `save()` handles overwriting and then calls this method. Subclasses should override this * method to implement the actual saving of the instance. */ @Since("1.6.0") @@ -135,13 +135,13 @@ abstract class MLWriter extends BaseReadWrite with Logging { } /** - * Trait for classes that provide [[MLWriter]]. + * Trait for classes that provide `MLWriter`. */ @Since("1.6.0") trait MLWritable { /** - * Returns an [[MLWriter]] instance for this ML instance. + * Returns an `MLWriter` instance for this ML instance. */ @Since("1.6.0") def write: MLWriter @@ -193,7 +193,7 @@ abstract class MLReader[T] extends BaseReadWrite { } /** - * Trait for objects that provide [[MLReader]]. + * Trait for objects that provide `MLReader`. * * @tparam T ML instance type */ @@ -201,7 +201,7 @@ abstract class MLReader[T] extends BaseReadWrite { trait MLReadable[T] { /** - * Returns an [[MLReader]] instance for this class. + * Returns an `MLReader` instance for this class. */ @Since("1.6.0") def read: MLReader[T] @@ -235,7 +235,7 @@ trait DefaultParamsReadable[T] extends MLReadable[T] { } /** - * Default [[MLWriter]] implementation for transformers and estimators that contain basic + * Default `MLWriter` implementation for transformers and estimators that contain basic * (json4s-serializable) params and no data. This will not handle more complex params or types with * data (e.g., models with coefficients). * @@ -309,7 +309,7 @@ private[ml] object DefaultParamsWriter { } /** - * Default [[MLReader]] implementation for transformers and estimators that contain basic + * Default `MLReader` implementation for transformers and estimators that contain basic * (json4s-serializable) params and no data. This will not handle more complex params or types with * data (e.g., models with coefficients). * diff --git a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala index 4cb9200030293d0d26bb104faec2797a1fcb73b3..6c5f529fb8bfd03bd8f3b99bdf6428f5c4a3452b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/clustering/LDA.scala @@ -91,7 +91,7 @@ class LDA private ( * distributions over topics ("theta"). * * This method assumes the Dirichlet distribution is symmetric and can be described by a single - * [[Double]] parameter. It should fail if docConcentration is asymmetric. + * `Double` parameter. It should fail if docConcentration is asymmetric. */ @Since("1.3.0") def getDocConcentration: Double = { @@ -113,7 +113,7 @@ class LDA private ( * * If set to a singleton vector Vector(-1), then docConcentration is set automatically. If set to * singleton vector Vector(t) where t != -1, then t is replicated to a vector of length k during - * `LDAOptimizer.initialize()`. Otherwise, the [[docConcentration]] vector must be length k. + * `LDAOptimizer.initialize()`. Otherwise, the `docConcentration` vector must be length k. * (default = Vector(-1) = automatic) * * Optimizer-specific parameter settings: @@ -137,7 +137,7 @@ class LDA private ( } /** - * Replicates a [[Double]] docConcentration to create a symmetric prior. + * Replicates a `Double` docConcentration to create a symmetric prior. */ @Since("1.3.0") def setDocConcentration(docConcentration: Double): this.type = { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala index 85a90fa959f938347de85dca3a115468b232ac79..acb83ac31affdc1368bac96fcbaeb0b2d934f079 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/AssociationRules.scala @@ -54,7 +54,7 @@ class AssociationRules private[fpm] ( } /** - * Computes the association rules with confidence above [[minConfidence]]. + * Computes the association rules with confidence above `minConfidence`. * @param freqItemsets frequent itemset model obtained from [[FPGrowth]] * @return a `Set[Rule[Item]]` containing the association rules. * diff --git a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala index 635da00b6990e197e9563709d91749252ef67cab..f6b1143272d165dea66569a16393e45385c80f4b 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/fpm/FPGrowth.scala @@ -52,7 +52,7 @@ class FPGrowthModel[Item: ClassTag] @Since("1.3.0") ( @Since("1.3.0") val freqItemsets: RDD[FreqItemset[Item]]) extends Saveable with Serializable { /** - * Generates association rules for the [[Item]]s in [[freqItemsets]]. + * Generates association rules for the `Item`s in [[freqItemsets]]. * @param confidence minimal confidence of the rules produced */ @Since("1.5.0") diff --git a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala index 63ea9d3264b0f35ee02a61c8a7c13050e0e3bf69..723addc7150ddf8f91e479793e1d0abfe8dff502 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/linalg/Vectors.scala @@ -273,7 +273,7 @@ class VectorUDT extends UserDefinedType[Vector] { /** * Factory methods for [[org.apache.spark.mllib.linalg.Vector]]. * We don't use the name `Vector` because Scala imports - * [[scala.collection.immutable.Vector]] by default. + * `scala.collection.immutable.Vector` by default. */ @Since("1.0.0") object Vectors { diff --git a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala index 0efce3c76f15afa953f19960a0a2f0c644ce24bf..88c73241fb555ee106a3db15b6503b4ae15ef510 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/optimization/Gradient.scala @@ -78,7 +78,7 @@ abstract class Gradient extends Serializable { * * for K classes multiclass classification problem. * - * The model weights $w = (w_1, w_2, ..., w_{K-1})^T$ becomes a matrix which has dimension of + * The model weights \(w = (w_1, w_2, ..., w_{K-1})^T\) becomes a matrix which has dimension of * (K-1) * (N+1) if the intercepts are added. If the intercepts are not added, the dimension * will be (K-1) * N. * @@ -93,9 +93,9 @@ abstract class Gradient extends Serializable { * $$ * </blockquote> * - * where $\alpha(i) = 1$ if $i \ne 0$, and - * $\alpha(i) = 0$ if $i == 0$, - * $margins_i = x w_i$. + * where $\alpha(i) = 1$ if \(i \ne 0\), and + * $\alpha(i) = 0$ if \(i == 0\), + * \(margins_i = x w_i\). * * For optimization, we have to calculate the first derivative of the loss function, and * a simple calculation shows that @@ -110,18 +110,19 @@ abstract class Gradient extends Serializable { * $$ * </blockquote> * - * where $\delta_{i, j} = 1$ if $i == j$, - * $\delta_{i, j} = 0$ if $i != j$, and + * where $\delta_{i, j} = 1$ if \(i == j\), + * $\delta_{i, j} = 0$ if \(i != j\), and * multiplier = * $\exp(margins_i) / (1 + \sum_k^{K-1} \exp(margins_i)) - (1-\alpha(y)\delta_{y, i+1})$ * * If any of margins is larger than 709.78, the numerical computation of multiplier and loss * function will be suffered from arithmetic overflow. This issue occurs when there are outliers * in data which are far away from hyperplane, and this will cause the failing of training once - * infinity / infinity is introduced. Note that this is only a concern when max(margins) > 0. + * infinity / infinity is introduced. Note that this is only a concern when max(margins) + * {@literal >} 0. * - * Fortunately, when max(margins) = maxMargin > 0, the loss function and the multiplier can be - * easily rewritten into the following equivalent numerically stable formula. + * Fortunately, when max(margins) = maxMargin {@literal >} 0, the loss function and the multiplier + * can be easily rewritten into the following equivalent numerically stable formula. * * <blockquote> * $$ @@ -133,7 +134,7 @@ abstract class Gradient extends Serializable { * \end{align} * $$ * </blockquote> - + * * where sum = $\exp(-maxMargin) + \sum_i^{K-1}\exp(margins_i - maxMargin) - 1$. * * Note that each term, $(margins_i - maxMargin)$ in $\exp$ is smaller than zero; as a result, diff --git a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala index d680237bf687fa6c54a77889aa562c04102dd7ea..551ea357950bab19665933e45b96b1756561acb5 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/stat/test/StreamingTest.scala @@ -54,7 +54,7 @@ case class BinarySample @Since("1.6.0") ( * cumulative processing, using all batches seen so far. * * Different tests may be used for assessing statistical significance depending on assumptions - * satisfied by data. For more details, see [[StreamingTestMethod]]. The `testMethod` specifies + * satisfied by data. For more details, see `StreamingTestMethod`. The `testMethod` specifies * which test will be used. * * Use a builder pattern to construct a streaming test in an application, for example: diff --git a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala index 299950785e420320cf4f573a525f2558a0e1eedf..fc1d4125a56490faa6c3b3e9afb36e1d8a64af62 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/tree/model/treeEnsembleModels.scala @@ -341,7 +341,7 @@ private[tree] sealed class TreeEnsembleModel( def predict(features: RDD[Vector]): RDD[Double] = features.map(x => predict(x)) /** - * Java-friendly version of [[org.apache.spark.mllib.tree.model.TreeEnsembleModel#predict]]. + * Java-friendly version of `org.apache.spark.mllib.tree.model.TreeEnsembleModel.predict`. */ def predict(features: JavaRDD[Vector]): JavaRDD[java.lang.Double] = { predict(features.rdd).toJavaRDD().asInstanceOf[JavaRDD[java.lang.Double]] diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala index c362104b26ffbcb49f6e7fd3a8b728664b33eb81..180c2d130074e32e81e82d389362edf8ddda9c2e 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/Row.scala @@ -48,7 +48,7 @@ object Row { def apply(values: Any*): Row = new GenericRow(values.toArray) /** - * This method can be used to construct a [[Row]] from a [[Seq]] of values. + * This method can be used to construct a [[Row]] from a `Seq` of values. */ def fromSeq(values: Seq[Any]): Row = new GenericRow(values.toArray) @@ -283,7 +283,7 @@ trait Row extends Serializable { def getSeq[T](i: Int): Seq[T] = getAs[Seq[T]](i) /** - * Returns the value at position i of array type as [[java.util.List]]. + * Returns the value at position i of array type as `java.util.List`. * * @throws ClassCastException when data type does not match. */ @@ -298,7 +298,7 @@ trait Row extends Serializable { def getMap[K, V](i: Int): scala.collection.Map[K, V] = getAs[Map[K, V]](i) /** - * Returns the value at position i of array type as a [[java.util.Map]]. + * Returns the value at position i of array type as a `java.util.Map`. * * @throws ClassCastException when data type does not match. */ diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala index da5775bc3753a55552aabc4e018e98e3baf39798..1d54ff5825c2e4586cc56373fb1ceb91ef350817 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/AbstractDataType.scala @@ -80,7 +80,7 @@ private[sql] object TypeCollection { /** * Types that can be ordered/compared. In the long run we should probably make this a trait - * that can be mixed into each data type, and perhaps create an [[AbstractDataType]]. + * that can be mixed into each data type, and perhaps create an `AbstractDataType`. */ // TODO: Should we consolidate this with RowOrdering.isOrderable? val Ordered = TypeCollection( @@ -106,7 +106,7 @@ private[sql] object TypeCollection { /** - * An [[AbstractDataType]] that matches any concrete data types. + * An `AbstractDataType` that matches any concrete data types. */ protected[sql] object AnyDataType extends AbstractDataType { diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala index 98efba199ad472edb6903a8b2b6d0032fc6713ee..38c40482fa4d9d738dc5aa8d9ba0c311f750f72f 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/types/ArrayType.scala @@ -49,7 +49,7 @@ object ArrayType extends AbstractDataType { * The data type for collections of multiple values. * Internally these are represented as columns that contain a ``scala.collection.Seq``. * - * Please use [[DataTypes.createArrayType()]] to create a specific instance. + * Please use `DataTypes.createArrayType()` to create a specific instance. * * An [[ArrayType]] object comprises two fields, `elementType: [[DataType]]` and * `containsNull: Boolean`. The field of `elementType` is used to specify the type of diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index fe34d597dbf1022fd2bf37c33173ca4150be8d1f..a787d5a9a94380a7d5365930cbf328c7c614b8e0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -310,7 +310,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * Lines text format or newline-delimited JSON</a>) and returns the result as * a `DataFrame`. * - * Unless the schema is specified using [[schema]] function, this function goes through the + * Unless the schema is specified using `schema` function, this function goes through the * input once to determine the input schema. * * @param jsonRDD input RDD with one JSON object per record @@ -322,7 +322,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * Loads an `RDD[String]` storing JSON objects (<a href="http://jsonlines.org/">JSON Lines * text format or newline-delimited JSON</a>) and returns the result as a `DataFrame`. * - * Unless the schema is specified using [[schema]] function, this function goes through the + * Unless the schema is specified using `schema` function, this function goes through the * input once to determine the input schema. * * @param jsonRDD input RDD with one JSON object per record @@ -365,7 +365,7 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * * This function will go through the input once to determine the input schema if `inferSchema` * is enabled. To avoid going through the entire data once, disable `inferSchema` option or - * specify the schema explicitly using [[schema]]. + * specify the schema explicitly using `schema`. * * You can set the following CSV-specific options to deal with CSV files: * <ul> diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala index 89c3a74f4f0676d905c62cc8866fdf4712e99ff2..72945320614bf5a745060fae0bcb4be99ad2fee9 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameStatFunctions.scala @@ -152,7 +152,7 @@ final class DataFrameStatFunctions private[sql](df: DataFrame) { * The number of distinct values for each column should be less than 1e4. At most 1e6 non-zero * pair frequencies will be returned. * The first column of each row will be the distinct values of `col1` and the column names will - * be the distinct values of `col2`. The name of the first column will be `$col1_$col2`. Counts + * be the distinct values of `col2`. The name of the first column will be `col1_col2`. Counts * will be returned as `Long`s. Pairs that have no occurrences will have zero as their counts. * Null elements will be replaced by "null", and back ticks will be dropped from elements if they * exist. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala index 391c34f1285ed84e7a9227f3d6e68a2e4591532f..ce6e8be8b0ab1390782841f92b1bebf6025bd563 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala @@ -2520,7 +2520,7 @@ class Dataset[T] private[sql]( def unpersist(): this.type = unpersist(blocking = false) /** - * Represents the content of the Dataset as an `RDD` of [[T]]. + * Represents the content of the Dataset as an `RDD` of `T`. * * @group basic * @since 1.6.0 @@ -2534,14 +2534,14 @@ class Dataset[T] private[sql]( } /** - * Returns the content of the Dataset as a `JavaRDD` of [[T]]s. + * Returns the content of the Dataset as a `JavaRDD` of `T`s. * @group basic * @since 1.6.0 */ def toJavaRDD: JavaRDD[T] = rdd.toJavaRDD() /** - * Returns the content of the Dataset as a `JavaRDD` of [[T]]s. + * Returns the content of the Dataset as a `JavaRDD` of `T`s. * @group basic * @since 1.6.0 */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala index 1a7fd689a04d211b3c63aa334bfd79ec9cbb69b1..ea465e2c834d072768cb20b70990981891771021 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SQLContext.scala @@ -298,7 +298,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) } /** - * Convert a [[BaseRelation]] created for external data sources into a `DataFrame`. + * Convert a `BaseRelation` created for external data sources into a `DataFrame`. * * @group dataframes * @since 1.3.0 @@ -309,7 +309,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: DeveloperApi :: - * Creates a `DataFrame` from an [[RDD]] containing [[Row]]s using the given schema. + * Creates a `DataFrame` from an `RDD` containing [[Row]]s using the given schema. * It is important to make sure that the structure of every [[Row]] of the provided RDD matches * the provided schema. Otherwise, there will be runtime exception. * Example: @@ -406,7 +406,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: Experimental :: - * Creates a [[Dataset]] from a [[java.util.List]] of a given type. This method requires an + * Creates a [[Dataset]] from a `java.util.List` of a given type. This method requires an * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation) * that is generally created automatically through implicits from a `SparkSession`, or can be * created explicitly by calling static methods on [[Encoders]]. @@ -438,7 +438,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: DeveloperApi :: - * Creates a `DataFrame` from a [[JavaRDD]] containing [[Row]]s using the given schema. + * Creates a `DataFrame` from a `JavaRDD` containing [[Row]]s using the given schema. * It is important to make sure that the structure of every [[Row]] of the provided RDD matches * the provided schema. Otherwise, there will be runtime exception. * @@ -453,7 +453,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: DeveloperApi :: - * Creates a `DataFrame` from a [[java.util.List]] containing [[Row]]s using the given schema. + * Creates a `DataFrame` from a `java.util.List` containing [[Row]]s using the given schema. * It is important to make sure that the structure of every [[Row]] of the provided List matches * the provided schema. Otherwise, there will be runtime exception. * @@ -518,7 +518,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: Experimental :: - * Returns a [[DataStreamReader]] that can be used to read streaming data in as a `DataFrame`. + * Returns a `DataStreamReader` that can be used to read streaming data in as a `DataFrame`. * {{{ * sparkSession.readStream.parquet("/path/to/directory/of/parquet/files") * sparkSession.readStream.schema(schema).json("/path/to/directory/of/json/files") @@ -638,7 +638,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: Experimental :: - * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements + * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements * in a range from 0 to `end` (exclusive) with step value 1. * * @since 1.4.1 @@ -650,7 +650,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: Experimental :: - * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements + * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements * in a range from `start` to `end` (exclusive) with step value 1. * * @since 1.4.0 @@ -662,7 +662,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: Experimental :: - * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements + * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements * in a range from `start` to `end` (exclusive) with a step value. * * @since 2.0.0 @@ -676,7 +676,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) /** * :: Experimental :: - * Creates a `DataFrame` with a single [[LongType]] column named `id`, containing elements + * Creates a `DataFrame` with a single `LongType` column named `id`, containing elements * in an range from `start` to `end` (exclusive) with an step value, with partition number * specified. * @@ -733,7 +733,7 @@ class SQLContext private[sql](val sparkSession: SparkSession) } /** - * Returns a [[StreamingQueryManager]] that allows managing all the + * Returns a `StreamingQueryManager` that allows managing all the * [[org.apache.spark.sql.streaming.StreamingQuery StreamingQueries]] active on `this` context. * * @since 2.0.0 diff --git a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala index f3dde480eabe03223616693479a543c0cf5e704a..e1fdb2f2876b9f9d82344dfd6f65c7d799a45d8a 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/SparkSession.scala @@ -323,7 +323,7 @@ class SparkSession private( /** * :: DeveloperApi :: - * Creates a `DataFrame` from a [[java.util.List]] containing [[Row]]s using the given schema. + * Creates a `DataFrame` from a `java.util.List` containing [[Row]]s using the given schema. * It is important to make sure that the structure of every [[Row]] of the provided List matches * the provided schema. Otherwise, there will be runtime exception. * @@ -448,7 +448,7 @@ class SparkSession private( /** * :: Experimental :: - * Creates a [[Dataset]] from a [[java.util.List]] of a given type. This method requires an + * Creates a [[Dataset]] from a `java.util.List` of a given type. This method requires an * encoder (to convert a JVM object of type `T` to and from the internal Spark SQL representation) * that is generally created automatically through implicits from a `SparkSession`, or can be * created explicitly by calling static methods on [[Encoders]]. diff --git a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala index c86ae5be9ef62f440d0a9d2f99eef838ab111eaa..5e27484c110473a81f1eb03b669ba0824449c493 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/functions.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/functions.scala @@ -1200,8 +1200,8 @@ object functions { * Creates a new struct column. * If the input column is a column in a `DataFrame`, or a derived column expression * that is named (i.e. aliased), its name would be remained as the StructField's name, - * otherwise, the newly generated StructField's name would be auto generated as col${index + 1}, - * i.e. col1, col2, col3, ... + * otherwise, the newly generated StructField's name would be auto generated as + * `col` with a suffix `index + 1`, i.e. col1, col2, col3, ... * * @group normal_funcs * @since 1.4.0 @@ -2482,7 +2482,7 @@ object functions { * format given by the second argument. * * A pattern could be for instance `dd.MM.yyyy` and could return a string like '18.03.1993'. All - * pattern letters of [[java.text.SimpleDateFormat]] can be used. + * pattern letters of `java.text.SimpleDateFormat` can be used. * * @note Use when ever possible specialized functions like [[year]]. These benefit from a * specialized implementation. @@ -2732,14 +2732,14 @@ object functions { * @param timeColumn The column or the expression to use as the timestamp for windowing by time. * The time column must be of TimestampType. * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`, - * `1 second`. Check [[org.apache.spark.unsafe.types.CalendarInterval]] for + * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for * valid duration identifiers. Note that the duration is a fixed length of * time, and does not vary over time according to a calendar. For example, * `1 day` always means 86,400,000 milliseconds, not a calendar day. * @param slideDuration A string specifying the sliding interval of the window, e.g. `1 minute`. * A new window will be generated every `slideDuration`. Must be less than * or equal to the `windowDuration`. Check - * [[org.apache.spark.unsafe.types.CalendarInterval]] for valid duration + * `org.apache.spark.unsafe.types.CalendarInterval` for valid duration * identifiers. This duration is likewise absolute, and does not vary * according to a calendar. * @param startTime The offset with respect to 1970-01-01 00:00:00 UTC with which to start @@ -2790,14 +2790,14 @@ object functions { * @param timeColumn The column or the expression to use as the timestamp for windowing by time. * The time column must be of TimestampType. * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`, - * `1 second`. Check [[org.apache.spark.unsafe.types.CalendarInterval]] for + * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for * valid duration identifiers. Note that the duration is a fixed length of * time, and does not vary over time according to a calendar. For example, * `1 day` always means 86,400,000 milliseconds, not a calendar day. * @param slideDuration A string specifying the sliding interval of the window, e.g. `1 minute`. * A new window will be generated every `slideDuration`. Must be less than * or equal to the `windowDuration`. Check - * [[org.apache.spark.unsafe.types.CalendarInterval]] for valid duration + * `org.apache.spark.unsafe.types.CalendarInterval` for valid duration * identifiers. This duration is likewise absolute, and does not vary * according to a calendar. * @@ -2837,7 +2837,7 @@ object functions { * @param timeColumn The column or the expression to use as the timestamp for windowing by time. * The time column must be of TimestampType. * @param windowDuration A string specifying the width of the window, e.g. `10 minutes`, - * `1 second`. Check [[org.apache.spark.unsafe.types.CalendarInterval]] for + * `1 second`. Check `org.apache.spark.unsafe.types.CalendarInterval` for * valid duration identifiers. * * @group datetime_funcs diff --git a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala index f288ad61410f7cd97060a2fb0376b88ea38cd1fc..ff8b15b3ff3fff1dfdc42988aca7453af1f471d2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/sources/interfaces.scala @@ -305,7 +305,7 @@ trait InsertableRelation { * ::Experimental:: * An interface for experimenting with a more direct connection to the query planner. Compared to * [[PrunedFilteredScan]], this operator receives the raw expressions from the - * [[org.apache.spark.sql.catalyst.plans.logical.LogicalPlan]]. Unlike the other APIs this + * `org.apache.spark.sql.catalyst.plans.logical.LogicalPlan`. Unlike the other APIs this * interface is NOT designed to be binary compatible across releases and thus should only be used * for experimentation. * diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala index 6d2cede6f5a23d4d1454dea7db434a09912487fe..b7ffb3cddb472ce8d1d2bbcdefaa112841a8a556 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala @@ -192,7 +192,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * * This function will go through the input once to determine the input schema if `inferSchema` * is enabled. To avoid going through the entire data once, disable `inferSchema` option or - * specify the schema explicitly using [[schema]]. + * specify the schema explicitly using `schema`. * * You can set the following CSV-specific options to deal with CSV files: * <ul> diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala index 9c00259f73e2ea94fd75b692fa8cd46644a8ff2e..12a1bb1db5779c9c70a08816ee5d5d85d00a4aa8 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQuery.scala @@ -137,7 +137,7 @@ trait StreamingQuery { * Blocks until all available data in the source has been processed and committed to the sink. * This method is intended for testing. Note that in the case of continually arriving data, this * method may block forever. Additionally, this method is only guaranteed to block until data that - * has been synchronously appended data to a [[org.apache.spark.sql.execution.streaming.Source]] + * has been synchronously appended data to a `org.apache.spark.sql.execution.streaming.Source` * prior to invocation. (i.e. `getOffset` must immediately reflect the addition). * @since 2.0.0 */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala index 6b871b1fe68577f0d4f0f3d8a5f38c9778c189db..c376913516ef73765f4b5ae3816a9504f5de25b0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/StreamingQueryListener.scala @@ -52,7 +52,7 @@ abstract class StreamingQueryListener { * @note This method is asynchronous. The status in [[StreamingQuery]] will always be * latest no matter when this method is called. Therefore, the status of [[StreamingQuery]] * may be changed before/when you process the event. E.g., you may find [[StreamingQuery]] - * is terminated when you are processing [[QueryProgressEvent]]. + * is terminated when you are processing `QueryProgressEvent`. * @since 2.0.0 */ def onQueryProgress(event: QueryProgressEvent): Unit diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala index 52aa1088acd4aa1b3c47c941b59bfefd1a422527..21b729bf29ea6bfb9e5ce23f25ebaba5f336f53c 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveInspectors.scala @@ -772,7 +772,7 @@ private[hive] trait HiveInspectors { /** * Map the catalyst expression to ObjectInspector, however, - * if the expression is [[Literal]] or foldable, a constant writable object inspector returns; + * if the expression is `Literal` or foldable, a constant writable object inspector returns; * Otherwise, we always get the object inspector according to its data type(in catalyst) * @param expr Catalyst expression to be mapped * @return Hive java objectinspector (recursively). diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala index badccae16837bc0d306aebd18c59986ff40ecdda..39be41770d8edb566c0cb4deb5ae024e441a86e6 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/HiveStrategies.scala @@ -80,7 +80,7 @@ class DetermineHiveSerde(conf: SQLConf) extends Rule[LogicalPlan] { /** * Replaces generic operations with specific variants that are designed to work with Hive. * - * Note that, this rule must be run after [[PreprocessTableInsertion]]. + * Note that, this rule must be run after `PreprocessTableInsertion`. */ class HiveAnalysis(session: SparkSession) extends Rule[LogicalPlan] { override def apply(plan: LogicalPlan): LogicalPlan = plan resolveOperators { diff --git a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala index ce418ae135dd9d09b4d45f373174ce92b7ebc0ea..3e654d8eeb3554101fe5ce4d511ff1259ad47d46 100644 --- a/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala +++ b/sql/hive/src/main/scala/org/apache/spark/sql/hive/execution/InsertIntoHiveTable.scala @@ -50,10 +50,10 @@ import org.apache.spark.SparkException * blame Reynold for this! He was just moving code around! * * In the future we should converge the write path for Hive with the normal data source write path, - * as defined in [[org.apache.spark.sql.execution.datasources.FileFormatWriter]]. + * as defined in `org.apache.spark.sql.execution.datasources.FileFormatWriter`. * * @param table the logical plan representing the table. In the future this should be a - * [[org.apache.spark.sql.catalyst.catalog.CatalogTable]] once we converge Hive tables + * `org.apache.spark.sql.catalyst.catalog.CatalogTable` once we converge Hive tables * and data source tables. * @param partition a map from the partition key to the partition value (optional). If the partition * value is optional, dynamic partition insert will be performed. diff --git a/streaming/src/main/scala/org/apache/spark/streaming/State.scala b/streaming/src/main/scala/org/apache/spark/streaming/State.scala index 23cf48eb0673839a38fe22ed2ffbeadbdce65a64..734c6ef42696ea931bc799fe61661b6d4ec52c0b 100644 --- a/streaming/src/main/scala/org/apache/spark/streaming/State.scala +++ b/streaming/src/main/scala/org/apache/spark/streaming/State.scala @@ -120,7 +120,7 @@ sealed abstract class State[S] { def isTimingOut(): Boolean /** - * Get the state as a [[scala.Option]]. It will be `Some(state)` if it exists, otherwise `None`. + * Get the state as a `scala.Option`. It will be `Some(state)` if it exists, otherwise `None`. */ @inline final def getOption(): Option[S] = if (exists) Some(get()) else None