From c78a12c4cc4d4312c4ee1069d3b218882d32d678 Mon Sep 17 00:00:00 2001
From: Peter Rudenko <petro.rudenko@gmail.com>
Date: Sun, 15 Feb 2015 20:51:32 -0800
Subject: [PATCH] [Ml] SPARK-5796 Don't transform data on a last estimator in
 Pipeline

If it's a last estimator in Pipeline there's no need to transform data, since there's no next stage that would consume this data.

Author: Peter Rudenko <petro.rudenko@gmail.com>

Closes #4590 from petro-rudenko/patch-1 and squashes the following commits:

d13ec33 [Peter Rudenko] [Ml] SPARK-5796 Don't transform data on a last estimator in Pipeline
---
 mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
index bb291e6e1f..5607ed21af 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/Pipeline.scala
@@ -114,7 +114,9 @@ class Pipeline extends Estimator[PipelineModel] {
             throw new IllegalArgumentException(
               s"Do not support stage $stage of type ${stage.getClass}")
         }
-        curDataset = transformer.transform(curDataset, paramMap)
+        if (index < indexOfLastEstimator) {
+          curDataset = transformer.transform(curDataset, paramMap)
+        }
         transformers += transformer
       } else {
         transformers += stage.asInstanceOf[Transformer]
-- 
GitLab