Skip to content
Snippets Groups Projects
Commit f89808b0 authored by WeichenXu's avatar WeichenXu Committed by Felix Cheung
Browse files

[SPARK-17499][SPARKR][ML][MLLIB] make the default params in sparkR spark.mlp...

[SPARK-17499][SPARKR][ML][MLLIB] make the default params in sparkR spark.mlp consistent with MultilayerPerceptronClassifier

## What changes were proposed in this pull request?

update `MultilayerPerceptronClassifierWrapper.fit` paramter type:
`layers: Array[Int]`
`seed: String`

update several default params in sparkR `spark.mlp`:
`tol` --> 1e-6
`stepSize` --> 0.03
`seed` --> NULL ( when seed == NULL, the scala-side wrapper regard it as a `null` value and the seed will use the default one )
r-side `seed` only support 32bit integer.

remove `layers` default value, and move it in front of those parameters with default value.
add `layers` parameter validation check.

## How was this patch tested?

tests added.

Author: WeichenXu <WeichenXu123@outlook.com>

Closes #15051 from WeichenXu123/update_py_mlp_default.
parent 90d57542
No related branches found
No related tags found
No related merge requests found
...@@ -694,12 +694,19 @@ setMethod("predict", signature(object = "KMeansModel"), ...@@ -694,12 +694,19 @@ setMethod("predict", signature(object = "KMeansModel"),
#' } #' }
#' @note spark.mlp since 2.1.0 #' @note spark.mlp since 2.1.0
setMethod("spark.mlp", signature(data = "SparkDataFrame"), setMethod("spark.mlp", signature(data = "SparkDataFrame"),
function(data, blockSize = 128, layers = c(3, 5, 2), solver = "l-bfgs", maxIter = 100, function(data, layers, blockSize = 128, solver = "l-bfgs", maxIter = 100,
tol = 0.5, stepSize = 1, seed = 1) { tol = 1E-6, stepSize = 0.03, seed = NULL) {
layers <- as.integer(na.omit(layers))
if (length(layers) <= 1) {
stop ("layers must be a integer vector with length > 1.")
}
if (!is.null(seed)) {
seed <- as.character(as.integer(seed))
}
jobj <- callJStatic("org.apache.spark.ml.r.MultilayerPerceptronClassifierWrapper", jobj <- callJStatic("org.apache.spark.ml.r.MultilayerPerceptronClassifierWrapper",
"fit", data@sdf, as.integer(blockSize), as.array(layers), "fit", data@sdf, as.integer(blockSize), as.array(layers),
as.character(solver), as.integer(maxIter), as.numeric(tol), as.character(solver), as.integer(maxIter), as.numeric(tol),
as.numeric(stepSize), as.integer(seed)) as.numeric(stepSize), seed)
new("MultilayerPerceptronClassificationModel", jobj = jobj) new("MultilayerPerceptronClassificationModel", jobj = jobj)
}) })
......
...@@ -391,6 +391,25 @@ test_that("spark.mlp", { ...@@ -391,6 +391,25 @@ test_that("spark.mlp", {
unlink(modelPath) unlink(modelPath)
# Test default parameter
model <- spark.mlp(df, layers = c(4, 5, 4, 3))
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
expect_equal(head(mlpPredictions$prediction, 10), c(1, 1, 1, 1, 0, 1, 2, 2, 1, 0))
# Test illegal parameter
expect_error(spark.mlp(df, layers = NULL), "layers must be a integer vector with length > 1.")
expect_error(spark.mlp(df, layers = c()), "layers must be a integer vector with length > 1.")
expect_error(spark.mlp(df, layers = c(3)), "layers must be a integer vector with length > 1.")
# Test random seed
# default seed
model <- spark.mlp(df, layers = c(4, 5, 4, 3), maxIter = 10)
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 2, 2, 1, 2, 0, 1))
# seed equals 10
model <- spark.mlp(df, layers = c(4, 5, 4, 3), maxIter = 10, seed = 10)
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 2, 1, 2, 2, 1, 0, 0, 1))
}) })
test_that("spark.naiveBayes", { test_that("spark.naiveBayes", {
......
...@@ -53,26 +53,26 @@ private[r] object MultilayerPerceptronClassifierWrapper ...@@ -53,26 +53,26 @@ private[r] object MultilayerPerceptronClassifierWrapper
def fit( def fit(
data: DataFrame, data: DataFrame,
blockSize: Int, blockSize: Int,
layers: Array[Double], layers: Array[Int],
solver: String, solver: String,
maxIter: Int, maxIter: Int,
tol: Double, tol: Double,
stepSize: Double, stepSize: Double,
seed: Int seed: String
): MultilayerPerceptronClassifierWrapper = { ): MultilayerPerceptronClassifierWrapper = {
// get labels and feature names from output schema // get labels and feature names from output schema
val schema = data.schema val schema = data.schema
// assemble and fit the pipeline // assemble and fit the pipeline
val mlp = new MultilayerPerceptronClassifier() val mlp = new MultilayerPerceptronClassifier()
.setLayers(layers.map(_.toInt)) .setLayers(layers)
.setBlockSize(blockSize) .setBlockSize(blockSize)
.setSolver(solver) .setSolver(solver)
.setMaxIter(maxIter) .setMaxIter(maxIter)
.setTol(tol) .setTol(tol)
.setStepSize(stepSize) .setStepSize(stepSize)
.setSeed(seed)
.setPredictionCol(PREDICTED_LABEL_COL) .setPredictionCol(PREDICTED_LABEL_COL)
if (seed != null && seed.length > 0) mlp.setSeed(seed.toInt)
val pipeline = new Pipeline() val pipeline = new Pipeline()
.setStages(Array(mlp)) .setStages(Array(mlp))
.fit(data) .fit(data)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment