Skip to content
Snippets Groups Projects
Commit 84b80944 authored by tedyu's avatar tedyu Committed by Michael Armbrust
Browse files

[SPARK-11884] Drop multiple columns in the DataFrame API

See the thread Ben started:
http://search-hadoop.com/m/q3RTtveEuhjsr7g/

This PR adds drop() method to DataFrame which accepts multiple column names

Author: tedyu <yuzhihong@gmail.com>

Closes #9862 from ted-yu/master.
parent 871e85d9
No related branches found
No related tags found
No related merge requests found
......@@ -1261,16 +1261,24 @@ class DataFrame private[sql](
* @since 1.4.0
*/
def drop(colName: String): DataFrame = {
drop(Seq(colName) : _*)
}
/**
* Returns a new [[DataFrame]] with columns dropped.
* This is a no-op if schema doesn't contain column name(s).
* @group dfops
* @since 1.6.0
*/
@scala.annotation.varargs
def drop(colNames: String*): DataFrame = {
val resolver = sqlContext.analyzer.resolver
val shouldDrop = schema.exists(f => resolver(f.name, colName))
if (shouldDrop) {
val colsAfterDrop = schema.filter { field =>
val name = field.name
!resolver(name, colName)
}.map(f => Column(f.name))
select(colsAfterDrop : _*)
} else {
val remainingCols =
schema.filter(f => colNames.forall(n => !resolver(f.name, n))).map(f => Column(f.name))
if (remainingCols.size == this.schema.size) {
this
} else {
this.select(remainingCols: _*)
}
}
......
......@@ -378,6 +378,13 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
assert(df.schema.map(_.name) === Seq("value"))
}
test("drop columns using drop") {
val src = Seq((0, 2, 3)).toDF("a", "b", "c")
val df = src.drop("a", "b")
checkAnswer(df, Row(3))
assert(df.schema.map(_.name) === Seq("c"))
}
test("drop unknown column (no-op)") {
val df = testData.drop("random")
checkAnswer(
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment