Skip to content
Snippets Groups Projects
Commit e6f8d368 authored by Vinod K C's avatar Vinod K C Committed by Xiangrui Meng
Browse files

[SPARK-10468] [ MLLIB ] Verify schema before Dataframe select API call

Loader.checkSchema was called to verify the schema after dataframe.select(...).
Schema verification should be done before dataframe.select(...)

Author: Vinod K C <vinod.kc@huawei.com>

Closes #8636 from vinodkc/fix_GaussianMixtureModel_load_verification.
parent 7a9dcbc9
No related branches found
No related tags found
No related merge requests found
...@@ -168,10 +168,9 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] { ...@@ -168,10 +168,9 @@ object GaussianMixtureModel extends Loader[GaussianMixtureModel] {
val dataPath = Loader.dataPath(path) val dataPath = Loader.dataPath(path)
val sqlContext = new SQLContext(sc) val sqlContext = new SQLContext(sc)
val dataFrame = sqlContext.read.parquet(dataPath) val dataFrame = sqlContext.read.parquet(dataPath)
val dataArray = dataFrame.select("weight", "mu", "sigma").collect()
// Check schema explicitly since erasure makes it hard to use match-case for checking. // Check schema explicitly since erasure makes it hard to use match-case for checking.
Loader.checkSchema[Data](dataFrame.schema) Loader.checkSchema[Data](dataFrame.schema)
val dataArray = dataFrame.select("weight", "mu", "sigma").collect()
val (weights, gaussians) = dataArray.map { val (weights, gaussians) = dataArray.map {
case Row(weight: Double, mu: Vector, sigma: Matrix) => case Row(weight: Double, mu: Vector, sigma: Matrix) =>
......
...@@ -590,12 +590,10 @@ object Word2VecModel extends Loader[Word2VecModel] { ...@@ -590,12 +590,10 @@ object Word2VecModel extends Loader[Word2VecModel] {
val dataPath = Loader.dataPath(path) val dataPath = Loader.dataPath(path)
val sqlContext = new SQLContext(sc) val sqlContext = new SQLContext(sc)
val dataFrame = sqlContext.read.parquet(dataPath) val dataFrame = sqlContext.read.parquet(dataPath)
val dataArray = dataFrame.select("word", "vector").collect()
// Check schema explicitly since erasure makes it hard to use match-case for checking. // Check schema explicitly since erasure makes it hard to use match-case for checking.
Loader.checkSchema[Data](dataFrame.schema) Loader.checkSchema[Data](dataFrame.schema)
val dataArray = dataFrame.select("word", "vector").collect()
val word2VecMap = dataArray.map(i => (i.getString(0), i.getSeq[Float](1).toArray)).toMap val word2VecMap = dataArray.map(i => (i.getString(0), i.getSeq[Float](1).toArray)).toMap
new Word2VecModel(word2VecMap) new Word2VecModel(word2VecMap)
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment