Skip to content
Snippets Groups Projects
Commit b60aab8a authored by Frank Rosner's avatar Frank Rosner Committed by Shivaram Venkataraman
Browse files

[SPARK-11258] Converting a Spark DataFrame into an R data.frame is slow / requires a lot of memory

https://issues.apache.org/jira/browse/SPARK-11258

I was not able to locate an existing unit test for this function so I wrote one.

Author: Frank Rosner <frank@fam-rosner.de>

Closes #9222 from FRosner/master.
parent 3689beb9
No related branches found
No related tags found
No related merge requests found
...@@ -130,16 +130,18 @@ private[r] object SQLUtils { ...@@ -130,16 +130,18 @@ private[r] object SQLUtils {
} }
def dfToCols(df: DataFrame): Array[Array[Any]] = { def dfToCols(df: DataFrame): Array[Array[Any]] = {
// localDF is Array[Row] val localDF: Array[Row] = df.collect()
val localDF = df.collect()
val numCols = df.columns.length val numCols = df.columns.length
val numRows = localDF.length
// result is Array[Array[Any]] val colArray = new Array[Array[Any]](numCols)
(0 until numCols).map { colIdx => for (colNo <- 0 until numCols) {
localDF.map { row => colArray(colNo) = new Array[Any](numRows)
row(colIdx) for (rowNo <- 0 until numRows) {
colArray(colNo)(rowNo) = localDF(rowNo)(colNo)
} }
}.toArray }
colArray
} }
def saveMode(mode: String): SaveMode = { def saveMode(mode: String): SaveMode = {
......
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.api.r
import org.apache.spark.sql.test.SharedSQLContext
class SQLUtilsSuite extends SharedSQLContext {
import testImplicits._
test("dfToCols should collect and transpose a data frame") {
val df = Seq(
(1, 2, 3),
(4, 5, 6)
).toDF
assert(SQLUtils.dfToCols(df) === Array(
Array(1, 4),
Array(2, 5),
Array(3, 6)
))
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment