Skip to content
Snippets Groups Projects
Commit 7da67485 authored by Sean Owen's avatar Sean Owen
Browse files

[SPARK-11988][ML][MLLIB] Update JPMML to 1.2.7

Update JPMML pmml-model to 1.2.7

Author: Sean Owen <sowen@cloudera.com>

Closes #9972 from srowen/SPARK-11988.
parent e9c9ae22
No related branches found
No related tags found
No related merge requests found
Apache License Apache License
Version 2.0, January 2004 Version 2.0, January 2004
http://www.apache.org/licenses/ http://www.apache.org/licenses/
...@@ -237,7 +236,7 @@ The following components are provided under a BSD-style license. See project lin ...@@ -237,7 +236,7 @@ The following components are provided under a BSD-style license. See project lin
The text of each license is also included at licenses/LICENSE-[project].txt. The text of each license is also included at licenses/LICENSE-[project].txt.
(BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core) (BSD 3 Clause) netlib core (com.github.fommil.netlib:core:1.1.2 - https://github.com/fommil/netlib-java/core)
(BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.1.15 - https://github.com/jpmml/jpmml-model) (BSD 3 Clause) JPMML-Model (org.jpmml:pmml-model:1.2.7 - https://github.com/jpmml/jpmml-model)
(BSD 3-clause style license) jblas (org.jblas:jblas:1.2.4 - http://jblas.org/) (BSD 3-clause style license) jblas (org.jblas:jblas:1.2.4 - http://jblas.org/)
(BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/) (BSD License) AntLR Parser Generator (antlr:antlr:2.7.7 - http://www.antlr.org/)
(BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org) (BSD licence) ANTLR ST4 4.0.4 (org.antlr:ST4:4.0.4 - http://www.stringtemplate.org)
......
...@@ -109,7 +109,7 @@ ...@@ -109,7 +109,7 @@
<dependency> <dependency>
<groupId>org.jpmml</groupId> <groupId>org.jpmml</groupId>
<artifactId>pmml-model</artifactId> <artifactId>pmml-model</artifactId>
<version>1.1.15</version> <version>1.2.7</version>
<exclusions> <exclusions>
<exclusion> <exclusion>
<groupId>com.sun.xml.fastinfoset</groupId> <groupId>com.sun.xml.fastinfoset</groupId>
......
...@@ -45,7 +45,7 @@ private[mllib] class BinaryClassificationPMMLModelExport( ...@@ -45,7 +45,7 @@ private[mllib] class BinaryClassificationPMMLModelExport(
val fields = new SArray[FieldName](model.weights.size) val fields = new SArray[FieldName](model.weights.size)
val dataDictionary = new DataDictionary val dataDictionary = new DataDictionary
val miningSchema = new MiningSchema val miningSchema = new MiningSchema
val regressionTableYES = new RegressionTable(model.intercept).withTargetCategory("1") val regressionTableYES = new RegressionTable(model.intercept).setTargetCategory("1")
var interceptNO = threshold var interceptNO = threshold
if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) { if (RegressionNormalizationMethodType.LOGIT == normalizationMethod) {
if (threshold <= 0) { if (threshold <= 0) {
...@@ -56,35 +56,35 @@ private[mllib] class BinaryClassificationPMMLModelExport( ...@@ -56,35 +56,35 @@ private[mllib] class BinaryClassificationPMMLModelExport(
interceptNO = -math.log(1 / threshold - 1) interceptNO = -math.log(1 / threshold - 1)
} }
} }
val regressionTableNO = new RegressionTable(interceptNO).withTargetCategory("0") val regressionTableNO = new RegressionTable(interceptNO).setTargetCategory("0")
val regressionModel = new RegressionModel() val regressionModel = new RegressionModel()
.withFunctionName(MiningFunctionType.CLASSIFICATION) .setFunctionName(MiningFunctionType.CLASSIFICATION)
.withMiningSchema(miningSchema) .setMiningSchema(miningSchema)
.withModelName(description) .setModelName(description)
.withNormalizationMethod(normalizationMethod) .setNormalizationMethod(normalizationMethod)
.withRegressionTables(regressionTableYES, regressionTableNO) .addRegressionTables(regressionTableYES, regressionTableNO)
for (i <- 0 until model.weights.size) { for (i <- 0 until model.weights.size) {
fields(i) = FieldName.create("field_" + i) fields(i) = FieldName.create("field_" + i)
dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema miningSchema
.withMiningFields(new MiningField(fields(i)) .addMiningFields(new MiningField(fields(i))
.withUsageType(FieldUsageType.ACTIVE)) .setUsageType(FieldUsageType.ACTIVE))
regressionTableYES.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) regressionTableYES.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
} }
// add target field // add target field
val targetField = FieldName.create("target") val targetField = FieldName.create("target")
dataDictionary dataDictionary
.withDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING)) .addDataFields(new DataField(targetField, OpType.CATEGORICAL, DataType.STRING))
miningSchema miningSchema
.withMiningFields(new MiningField(targetField) .addMiningFields(new MiningField(targetField)
.withUsageType(FieldUsageType.TARGET)) .setUsageType(FieldUsageType.TARGET))
dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size) dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)
pmml.setDataDictionary(dataDictionary) pmml.setDataDictionary(dataDictionary)
pmml.withModels(regressionModel) pmml.addModels(regressionModel)
} }
} }
} }
...@@ -45,31 +45,31 @@ private[mllib] class GeneralizedLinearPMMLModelExport( ...@@ -45,31 +45,31 @@ private[mllib] class GeneralizedLinearPMMLModelExport(
val miningSchema = new MiningSchema val miningSchema = new MiningSchema
val regressionTable = new RegressionTable(model.intercept) val regressionTable = new RegressionTable(model.intercept)
val regressionModel = new RegressionModel() val regressionModel = new RegressionModel()
.withFunctionName(MiningFunctionType.REGRESSION) .setFunctionName(MiningFunctionType.REGRESSION)
.withMiningSchema(miningSchema) .setMiningSchema(miningSchema)
.withModelName(description) .setModelName(description)
.withRegressionTables(regressionTable) .addRegressionTables(regressionTable)
for (i <- 0 until model.weights.size) { for (i <- 0 until model.weights.size) {
fields(i) = FieldName.create("field_" + i) fields(i) = FieldName.create("field_" + i)
dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema miningSchema
.withMiningFields(new MiningField(fields(i)) .addMiningFields(new MiningField(fields(i))
.withUsageType(FieldUsageType.ACTIVE)) .setUsageType(FieldUsageType.ACTIVE))
regressionTable.withNumericPredictors(new NumericPredictor(fields(i), model.weights(i))) regressionTable.addNumericPredictors(new NumericPredictor(fields(i), model.weights(i)))
} }
// for completeness add target field // for completeness add target field
val targetField = FieldName.create("target") val targetField = FieldName.create("target")
dataDictionary.withDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE)) dataDictionary.addDataFields(new DataField(targetField, OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema miningSchema
.withMiningFields(new MiningField(targetField) .addMiningFields(new MiningField(targetField)
.withUsageType(FieldUsageType.TARGET)) .setUsageType(FieldUsageType.TARGET))
dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size) dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)
pmml.setDataDictionary(dataDictionary) pmml.setDataDictionary(dataDictionary)
pmml.withModels(regressionModel) pmml.addModels(regressionModel)
} }
} }
} }
...@@ -42,42 +42,42 @@ private[mllib] class KMeansPMMLModelExport(model : KMeansModel) extends PMMLMode ...@@ -42,42 +42,42 @@ private[mllib] class KMeansPMMLModelExport(model : KMeansModel) extends PMMLMode
val dataDictionary = new DataDictionary val dataDictionary = new DataDictionary
val miningSchema = new MiningSchema val miningSchema = new MiningSchema
val comparisonMeasure = new ComparisonMeasure() val comparisonMeasure = new ComparisonMeasure()
.withKind(ComparisonMeasure.Kind.DISTANCE) .setKind(ComparisonMeasure.Kind.DISTANCE)
.withMeasure(new SquaredEuclidean()) .setMeasure(new SquaredEuclidean())
val clusteringModel = new ClusteringModel() val clusteringModel = new ClusteringModel()
.withModelName("k-means") .setModelName("k-means")
.withMiningSchema(miningSchema) .setMiningSchema(miningSchema)
.withComparisonMeasure(comparisonMeasure) .setComparisonMeasure(comparisonMeasure)
.withFunctionName(MiningFunctionType.CLUSTERING) .setFunctionName(MiningFunctionType.CLUSTERING)
.withModelClass(ClusteringModel.ModelClass.CENTER_BASED) .setModelClass(ClusteringModel.ModelClass.CENTER_BASED)
.withNumberOfClusters(model.clusterCenters.length) .setNumberOfClusters(model.clusterCenters.length)
for (i <- 0 until clusterCenter.size) { for (i <- 0 until clusterCenter.size) {
fields(i) = FieldName.create("field_" + i) fields(i) = FieldName.create("field_" + i)
dataDictionary.withDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE)) dataDictionary.addDataFields(new DataField(fields(i), OpType.CONTINUOUS, DataType.DOUBLE))
miningSchema miningSchema
.withMiningFields(new MiningField(fields(i)) .addMiningFields(new MiningField(fields(i))
.withUsageType(FieldUsageType.ACTIVE)) .setUsageType(FieldUsageType.ACTIVE))
clusteringModel.withClusteringFields( clusteringModel.addClusteringFields(
new ClusteringField(fields(i)).withCompareFunction(CompareFunctionType.ABS_DIFF)) new ClusteringField(fields(i)).setCompareFunction(CompareFunctionType.ABS_DIFF))
} }
dataDictionary.withNumberOfFields(dataDictionary.getDataFields.size) dataDictionary.setNumberOfFields(dataDictionary.getDataFields.size)
for (i <- 0 until model.clusterCenters.length) { for (i <- model.clusterCenters.indices) {
val cluster = new Cluster() val cluster = new Cluster()
.withName("cluster_" + i) .setName("cluster_" + i)
.withArray(new org.dmg.pmml.Array() .setArray(new org.dmg.pmml.Array()
.withType(Array.Type.REAL) .setType(Array.Type.REAL)
.withN(clusterCenter.size) .setN(clusterCenter.size)
.withValue(model.clusterCenters(i).toArray.mkString(" "))) .setValue(model.clusterCenters(i).toArray.mkString(" ")))
// we don't have the size of the single cluster but only the centroids (withValue) // we don't have the size of the single cluster but only the centroids (withValue)
// .withSize(value) // .withSize(value)
clusteringModel.withClusters(cluster) clusteringModel.addClusters(cluster)
} }
pmml.setDataDictionary(dataDictionary) pmml.setDataDictionary(dataDictionary)
pmml.withModels(clusteringModel) pmml.addModels(clusteringModel)
} }
} }
} }
...@@ -30,19 +30,14 @@ private[mllib] trait PMMLModelExport { ...@@ -30,19 +30,14 @@ private[mllib] trait PMMLModelExport {
* Holder of the exported model in PMML format * Holder of the exported model in PMML format
*/ */
@BeanProperty @BeanProperty
val pmml: PMML = new PMML val pmml: PMML = {
pmml.setVersion("4.2")
setHeader(pmml)
private def setHeader(pmml: PMML): Unit = {
val version = getClass.getPackage.getImplementationVersion val version = getClass.getPackage.getImplementationVersion
val app = new Application().withName("Apache Spark MLlib").withVersion(version) val app = new Application("Apache Spark MLlib").setVersion(version)
val timestamp = new Timestamp() val timestamp = new Timestamp()
.withContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date())) .addContent(new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss").format(new Date()))
val header = new Header() val header = new Header()
.withApplication(app) .setApplication(app)
.withTimestamp(timestamp) .setTimestamp(timestamp)
pmml.setHeader(header) new PMML("4.2", header, null)
} }
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment