From 94e2064fa1b04c05c805d9175c7c78bf583db5c6 Mon Sep 17 00:00:00 2001
From: Forest Fang <forest.fang@outlook.com>
Date: Thu, 22 Oct 2015 09:34:07 -0700
Subject: [PATCH] [SPARK-11244][SPARKR] sparkR.stop() should remove SQLContext

SparkR should remove `.sparkRSQLsc` and `.sparkRHivesc` when `sparkR.stop()` is called. Otherwise even when SparkContext is reinitialized, `sparkRSQL.init` returns the stale copy of the object and complains:

```r
sc <- sparkR.init("local")
sqlContext <- sparkRSQL.init(sc)
sparkR.stop()
sc <- sparkR.init("local")
sqlContext <- sparkRSQL.init(sc)
sqlContext
```
producing
```r
Error in callJMethod(x, "getClass") :
  Invalid jobj 1. If SparkR was restarted, Spark operations need to be re-executed.
```

I have added the check and removal only when SparkContext itself is initialized. I have also added corresponding test for this fix. Let me know if you want me to move the test to SQL test suite instead.

p.s. I tried lint-r but ended up a lots of errors on existing code.

Author: Forest Fang <forest.fang@outlook.com>

Closes #9205 from saurfang/sparkR.stop.
---
 R/pkg/R/sparkR.R                |  8 ++++++++
 R/pkg/inst/tests/test_context.R | 10 ++++++++++
 2 files changed, 18 insertions(+)

diff --git a/R/pkg/R/sparkR.R b/R/pkg/R/sparkR.R
index 9cf2f1a361..043b0057bd 100644
--- a/R/pkg/R/sparkR.R
+++ b/R/pkg/R/sparkR.R
@@ -39,6 +39,14 @@ sparkR.stop <- function() {
       sc <- get(".sparkRjsc", envir = env)
       callJMethod(sc, "stop")
       rm(".sparkRjsc", envir = env)
+
+      if (exists(".sparkRSQLsc", envir = env)) {
+        rm(".sparkRSQLsc", envir = env)
+      }
+
+      if (exists(".sparkRHivesc", envir = env)) {
+        rm(".sparkRHivesc", envir = env)
+      }
     }
 
     if (exists(".backendLaunched", envir = env)) {
diff --git a/R/pkg/inst/tests/test_context.R b/R/pkg/inst/tests/test_context.R
index 513bbc8e62..e99815ed15 100644
--- a/R/pkg/inst/tests/test_context.R
+++ b/R/pkg/inst/tests/test_context.R
@@ -26,6 +26,16 @@ test_that("repeatedly starting and stopping SparkR", {
   }
 })
 
+test_that("repeatedly starting and stopping SparkR SQL", {
+  for (i in 1:4) {
+    sc <- sparkR.init()
+    sqlContext <- sparkRSQL.init(sc)
+    df <- createDataFrame(sqlContext, data.frame(a = 1:20))
+    expect_equal(count(df), 20)
+    sparkR.stop()
+  }
+})
+
 test_that("rdd GC across sparkR.stop", {
   sparkR.stop()
   sc <- sparkR.init() # sc should get id 0
-- 
GitLab