From a5257048d74359c3fa7810009be1d60d370e2896 Mon Sep 17 00:00:00 2001 From: Liu Xiang <lxmtlab@gmail.com> Date: Thu, 11 Feb 2016 17:28:37 -0800 Subject: [PATCH] [SPARK-12765][ML][COUNTVECTORIZER] fix CountVectorizer.transform's lost transformSchema https://issues.apache.org/jira/browse/SPARK-12765 Author: Liu Xiang <lxmtlab@gmail.com> Closes #10720 from sloth2012/sloth. --- .../main/scala/org/apache/spark/ml/feature/CountVectorizer.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala index 10dcda2382..d5cb05f29b 100644 --- a/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala +++ b/mllib/src/main/scala/org/apache/spark/ml/feature/CountVectorizer.scala @@ -210,6 +210,7 @@ class CountVectorizerModel(override val uid: String, val vocabulary: Array[Strin private var broadcastDict: Option[Broadcast[Map[String, Int]]] = None override def transform(dataset: DataFrame): DataFrame = { + transformSchema(dataset.schema, logging = true) if (broadcastDict.isEmpty) { val dict = vocabulary.zipWithIndex.toMap broadcastDict = Some(dataset.sqlContext.sparkContext.broadcast(dict)) -- GitLab