Skip to content
Snippets Groups Projects
Commit 12b3e8d2 authored by WeichenXu's avatar WeichenXu Committed by Felix Cheung
Browse files

[SPARK-18007][SPARKR][ML] update SparkR MLP - add initalWeights parameter

## What changes were proposed in this pull request?

update SparkR MLP, add initalWeights parameter.

## How was this patch tested?

test added.

Author: WeichenXu <WeichenXu123@outlook.com>

Closes #15552 from WeichenXu123/mlp_r_add_initialWeight_param.
parent c329a568
No related branches found
No related tags found
No related merge requests found
...@@ -665,6 +665,8 @@ setMethod("predict", signature(object = "KMeansModel"), ...@@ -665,6 +665,8 @@ setMethod("predict", signature(object = "KMeansModel"),
#' @param tol convergence tolerance of iterations. #' @param tol convergence tolerance of iterations.
#' @param stepSize stepSize parameter. #' @param stepSize stepSize parameter.
#' @param seed seed parameter for weights initialization. #' @param seed seed parameter for weights initialization.
#' @param initialWeights initialWeights parameter for weights initialization, it should be a
#' numeric vector.
#' @param ... additional arguments passed to the method. #' @param ... additional arguments passed to the method.
#' @return \code{spark.mlp} returns a fitted Multilayer Perceptron Classification Model. #' @return \code{spark.mlp} returns a fitted Multilayer Perceptron Classification Model.
#' @rdname spark.mlp #' @rdname spark.mlp
...@@ -677,8 +679,9 @@ setMethod("predict", signature(object = "KMeansModel"), ...@@ -677,8 +679,9 @@ setMethod("predict", signature(object = "KMeansModel"),
#' df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm") #' df <- read.df("data/mllib/sample_multiclass_classification_data.txt", source = "libsvm")
#' #'
#' # fit a Multilayer Perceptron Classification Model #' # fit a Multilayer Perceptron Classification Model
#' model <- spark.mlp(df, blockSize = 128, layers = c(4, 5, 4, 3), solver = "l-bfgs", #' model <- spark.mlp(df, blockSize = 128, layers = c(4, 3), solver = "l-bfgs",
#' maxIter = 100, tol = 0.5, stepSize = 1, seed = 1) #' maxIter = 100, tol = 0.5, stepSize = 1, seed = 1,
#' initialWeights = c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
#' #'
#' # get the summary of the model #' # get the summary of the model
#' summary(model) #' summary(model)
...@@ -695,7 +698,7 @@ setMethod("predict", signature(object = "KMeansModel"), ...@@ -695,7 +698,7 @@ setMethod("predict", signature(object = "KMeansModel"),
#' @note spark.mlp since 2.1.0 #' @note spark.mlp since 2.1.0
setMethod("spark.mlp", signature(data = "SparkDataFrame"), setMethod("spark.mlp", signature(data = "SparkDataFrame"),
function(data, layers, blockSize = 128, solver = "l-bfgs", maxIter = 100, function(data, layers, blockSize = 128, solver = "l-bfgs", maxIter = 100,
tol = 1E-6, stepSize = 0.03, seed = NULL) { tol = 1E-6, stepSize = 0.03, seed = NULL, initialWeights = NULL) {
if (is.null(layers)) { if (is.null(layers)) {
stop ("layers must be a integer vector with length > 1.") stop ("layers must be a integer vector with length > 1.")
} }
...@@ -706,10 +709,13 @@ setMethod("spark.mlp", signature(data = "SparkDataFrame"), ...@@ -706,10 +709,13 @@ setMethod("spark.mlp", signature(data = "SparkDataFrame"),
if (!is.null(seed)) { if (!is.null(seed)) {
seed <- as.character(as.integer(seed)) seed <- as.character(as.integer(seed))
} }
if (!is.null(initialWeights)) {
initialWeights <- as.array(as.numeric(na.omit(initialWeights)))
}
jobj <- callJStatic("org.apache.spark.ml.r.MultilayerPerceptronClassifierWrapper", jobj <- callJStatic("org.apache.spark.ml.r.MultilayerPerceptronClassifierWrapper",
"fit", data@sdf, as.integer(blockSize), as.array(layers), "fit", data@sdf, as.integer(blockSize), as.array(layers),
as.character(solver), as.integer(maxIter), as.numeric(tol), as.character(solver), as.integer(maxIter), as.numeric(tol),
as.numeric(stepSize), seed) as.numeric(stepSize), seed, initialWeights)
new("MultilayerPerceptronClassificationModel", jobj = jobj) new("MultilayerPerceptronClassificationModel", jobj = jobj)
}) })
......
...@@ -410,6 +410,21 @@ test_that("spark.mlp", { ...@@ -410,6 +410,21 @@ test_that("spark.mlp", {
model <- spark.mlp(df, layers = c(4, 5, 4, 3), maxIter = 10, seed = 10) model <- spark.mlp(df, layers = c(4, 5, 4, 3), maxIter = 10, seed = 10)
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction")) mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 2, 1, 2, 2, 1, 0, 0, 1)) expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 2, 1, 2, 2, 1, 0, 0, 1))
# test initialWeights
model <- spark.mlp(df, layers = c(4, 3), maxIter = 2, initialWeights =
c(0, 0, 0, 0, 0, 5, 5, 5, 5, 5, 9, 9, 9, 9, 9))
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1))
model <- spark.mlp(df, layers = c(4, 3), maxIter = 2, initialWeights =
c(0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 9.0, 9.0, 9.0, 9.0, 9.0))
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1))
model <- spark.mlp(df, layers = c(4, 3), maxIter = 2)
mlpPredictions <- collect(select(predict(model, mlpTestDF), "prediction"))
expect_equal(head(mlpPredictions$prediction, 12), c(1, 1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 1))
}) })
test_that("spark.naiveBayes", { test_that("spark.naiveBayes", {
......
...@@ -24,6 +24,7 @@ import org.json4s.jackson.JsonMethods._ ...@@ -24,6 +24,7 @@ import org.json4s.jackson.JsonMethods._
import org.apache.spark.ml.{Pipeline, PipelineModel} import org.apache.spark.ml.{Pipeline, PipelineModel}
import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier} import org.apache.spark.ml.classification.{MultilayerPerceptronClassificationModel, MultilayerPerceptronClassifier}
import org.apache.spark.ml.linalg.Vectors
import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter} import org.apache.spark.ml.util.{MLReadable, MLReader, MLWritable, MLWriter}
import org.apache.spark.sql.{DataFrame, Dataset} import org.apache.spark.sql.{DataFrame, Dataset}
...@@ -58,7 +59,8 @@ private[r] object MultilayerPerceptronClassifierWrapper ...@@ -58,7 +59,8 @@ private[r] object MultilayerPerceptronClassifierWrapper
maxIter: Int, maxIter: Int,
tol: Double, tol: Double,
stepSize: Double, stepSize: Double,
seed: String seed: String,
initialWeights: Array[Double]
): MultilayerPerceptronClassifierWrapper = { ): MultilayerPerceptronClassifierWrapper = {
// get labels and feature names from output schema // get labels and feature names from output schema
val schema = data.schema val schema = data.schema
...@@ -73,6 +75,11 @@ private[r] object MultilayerPerceptronClassifierWrapper ...@@ -73,6 +75,11 @@ private[r] object MultilayerPerceptronClassifierWrapper
.setStepSize(stepSize) .setStepSize(stepSize)
.setPredictionCol(PREDICTED_LABEL_COL) .setPredictionCol(PREDICTED_LABEL_COL)
if (seed != null && seed.length > 0) mlp.setSeed(seed.toInt) if (seed != null && seed.length > 0) mlp.setSeed(seed.toInt)
if (initialWeights != null) {
require(initialWeights.length > 0)
mlp.setInitialWeights(Vectors.dense(initialWeights))
}
val pipeline = new Pipeline() val pipeline = new Pipeline()
.setStages(Array(mlp)) .setStages(Array(mlp))
.fit(data) .fit(data)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment