Skip to content
Snippets Groups Projects
Commit ae4b91d1 authored by Joseph K. Bradley's avatar Joseph K. Bradley
Browse files

[SPARK-20039][ML] rename ChiSquare to ChiSquareTest

## What changes were proposed in this pull request?

I realized that since ChiSquare is in the package stat, it's pretty unclear if it's the hypothesis test, distribution, or what. This PR renames it to ChiSquareTest to clarify this.

## How was this patch tested?

Existing unit tests

Author: Joseph K. Bradley <joseph@databricks.com>

Closes #17368 from jkbradley/SPARK-20039.
parent 4c0ff5f5
No related branches found
No related tags found
No related merge requests found
......@@ -37,7 +37,7 @@ import org.apache.spark.sql.functions.col
*/
@Experimental
@Since("2.2.0")
object ChiSquare {
object ChiSquareTest {
/** Used to construct output schema of tests */
private case class ChiSquareResult(
......
......@@ -27,7 +27,7 @@ import org.apache.spark.ml.util.TestingUtils._
import org.apache.spark.mllib.stat.test.ChiSqTest
import org.apache.spark.mllib.util.MLlibTestSparkContext
class ChiSquareSuite
class ChiSquareTestSuite
extends SparkFunSuite with MLlibTestSparkContext with DefaultReadWriteTest {
import testImplicits._
......@@ -45,7 +45,7 @@ class ChiSquareSuite
LabeledPoint(1.0, Vectors.dense(3.5, 40.0)))
for (numParts <- List(2, 4, 6, 8)) {
val df = spark.createDataFrame(sc.parallelize(data, numParts))
val chi = ChiSquare.test(df, "features", "label")
val chi = ChiSquareTest.test(df, "features", "label")
val (pValues: Vector, degreesOfFreedom: Array[Int], statistics: Vector) =
chi.select("pValues", "degreesOfFreedom", "statistics")
.as[(Vector, Array[Int], Vector)].head()
......@@ -62,7 +62,7 @@ class ChiSquareSuite
LabeledPoint(0.0, Vectors.sparse(numCols, Seq((100, 2.0)))),
LabeledPoint(0.1, Vectors.sparse(numCols, Seq((200, 1.0)))))
val df = spark.createDataFrame(sparseData)
val chi = ChiSquare.test(df, "features", "label")
val chi = ChiSquareTest.test(df, "features", "label")
val (pValues: Vector, degreesOfFreedom: Array[Int], statistics: Vector) =
chi.select("pValues", "degreesOfFreedom", "statistics")
.as[(Vector, Array[Int], Vector)].head()
......@@ -83,7 +83,7 @@ class ChiSquareSuite
withClue("ChiSquare should throw an exception when given a continuous-valued label") {
intercept[SparkException] {
val df = spark.createDataFrame(continuousLabel)
ChiSquare.test(df, "features", "label")
ChiSquareTest.test(df, "features", "label")
}
}
val continuousFeature = Seq.fill(tooManyCategories)(
......@@ -91,7 +91,7 @@ class ChiSquareSuite
withClue("ChiSquare should throw an exception when given continuous-valued features") {
intercept[SparkException] {
val df = spark.createDataFrame(continuousFeature)
ChiSquare.test(df, "features", "label")
ChiSquareTest.test(df, "features", "label")
}
}
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment