Skip to content
Snippets Groups Projects
Commit d9e30c59 authored by Wenchen Fan's avatar Wenchen Fan Committed by Yin Huai
Browse files

[SPARK-10656][SQL] completely support special chars in DataFrame

the main problem is: we interpret column name with special handling of `.` for DataFrame. This enables us to write something like `df("a.b")` to get the field `b` of `a`. However, we don't need this feature in `DataFrame.apply("*")` or `DataFrame.withColumnRenamed`. In these 2 cases, the column name is the final name already, we don't need extra process to interpret it.

The solution is simple, use `queryExecution.analyzed.output` to get resolved column directly, instead of using `DataFrame.resolve`.

close https://github.com/apache/spark/pull/8811

Author: Wenchen Fan <wenchen@databricks.com>

Closes #9462 from cloud-fan/special-chars.
parent b9455d1f
No related branches found
No related tags found
No related merge requests found
...@@ -698,7 +698,7 @@ class DataFrame private[sql]( ...@@ -698,7 +698,7 @@ class DataFrame private[sql](
*/ */
def col(colName: String): Column = colName match { def col(colName: String): Column = colName match {
case "*" => case "*" =>
Column(ResolvedStar(schema.fieldNames.map(resolve))) Column(ResolvedStar(queryExecution.analyzed.output))
case _ => case _ =>
val expr = resolve(colName) val expr = resolve(colName)
Column(expr) Column(expr)
...@@ -1259,13 +1259,17 @@ class DataFrame private[sql]( ...@@ -1259,13 +1259,17 @@ class DataFrame private[sql](
*/ */
def withColumnRenamed(existingName: String, newName: String): DataFrame = { def withColumnRenamed(existingName: String, newName: String): DataFrame = {
val resolver = sqlContext.analyzer.resolver val resolver = sqlContext.analyzer.resolver
val shouldRename = schema.exists(f => resolver(f.name, existingName)) val output = queryExecution.analyzed.output
val shouldRename = output.exists(f => resolver(f.name, existingName))
if (shouldRename) { if (shouldRename) {
val colNames = schema.map { field => val columns = output.map { col =>
val name = field.name if (resolver(col.name, existingName)) {
if (resolver(name, existingName)) Column(name).as(newName) else Column(name) Column(col).as(newName)
} else {
Column(col)
}
} }
select(colNames : _*) select(columns : _*)
} else { } else {
this this
} }
......
...@@ -1128,4 +1128,10 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { ...@@ -1128,4 +1128,10 @@ class DataFrameSuite extends QueryTest with SharedSQLContext {
} }
} }
} }
test("SPARK-10656: completely support special chars") {
val df = Seq(1 -> "a").toDF("i_$.a", "d^'a.")
checkAnswer(df.select(df("*")), Row(1, "a"))
checkAnswer(df.withColumnRenamed("d^'a.", "a"), Row(1, "a"))
}
} }
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment