diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala index 118a6e3e6ad4463976e175bb6f02f3374db7fa62..626e97efb47c64161d2e4d2f70ac040e1d629da8 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/StandardScaler.scala @@ -66,6 +66,11 @@ private[feature] trait StandardScalerParams extends Params with HasInputCol with * :: Experimental :: * Standardizes features by removing the mean and scaling to unit variance using column summary * statistics on the samples in the training set. + * + * The "unit std" is computed using the + * [[https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation + * corrected sample standard deviation]], + * which is computed as the square root of the unbiased sample variance. */ @Experimental class StandardScaler(override val uid: String) extends Estimator[StandardScalerModel] diff --git a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala index 5c35e1b91c9bf71db72b5ca7acc736935b1b6b48..ee97045f34dc8a16987584241e1d7a23f370bbb8 100644 --- a/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala +++ b/mllib/src/main/scala/org/apache/spark/mllib/feature/StandardScaler.scala @@ -27,6 +27,11 @@ import org.apache.spark.rdd.RDD * Standardizes features by removing the mean and scaling to unit std using column summary * statistics on the samples in the training set. * + * The "unit std" is computed using the + * [[https://en.wikipedia.org/wiki/Standard_deviation#Corrected_sample_standard_deviation + * corrected sample standard deviation]], + * which is computed as the square root of the unbiased sample variance. + * * @param withMean False by default. Centers the data with mean before scaling. It will build a * dense output, so this does not work on sparse input and will raise an exception. * @param withStd True by default. Scales the data to unit standard deviation.