From e0d49ad229e2047bd6dda8e66341aff8f2a122a2 Mon Sep 17 00:00:00 2001
From: CodingCat <zhunansjtu@gmail.com>
Date: Mon, 17 Feb 2014 15:12:52 -0800
Subject: [PATCH] [SPARK-1090] improvement on spark_shell (help information,
 configure memory)

https://spark-project.atlassian.net/browse/SPARK-1090

spark-shell should print help information about parameters and should allow user to configure exe memory
there is no document about hot to set --cores/-c in spark-shell

and also

users should be able to set executor memory through command line options

In this PR I also check the format of the options passed by the user

Author: CodingCat <zhunansjtu@gmail.com>

Closes #599 from CodingCat/spark_shell_improve and squashes the following commits:

de5aa38 [CodingCat] add parameter to set driver memory
915cbf8 [CodingCat] improvement on spark_shell (help information, configure memory)
---
 bin/spark-shell                               | 48 ++++++++++++++++---
 .../org/apache/spark/repl/SparkILoop.scala    |  2 +-
 2 files changed, 43 insertions(+), 7 deletions(-)

diff --git a/bin/spark-shell b/bin/spark-shell
index 05a46ee0ca..2bff06cf70 100755
--- a/bin/spark-shell
+++ b/bin/spark-shell
@@ -21,8 +21,6 @@
 # Shell script for starting the Spark Shell REPL
 # Note that it will set MASTER to spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}
 # if those two env vars are set in spark-env.sh but MASTER is not.
-# Options:
-#    -c <cores>    Set the number of cores for REPL to use
 
 cygwin=false
 case "`uname`" in
@@ -32,14 +30,52 @@ esac
 # Enter posix mode for bash
 set -o posix
 
+CORE_PATTERN="^[0-9]+$"
+MEM_PATTERN="^[0-9]+[m|g|M|G]$"
+
 FWDIR="$(cd `dirname $0`/..; pwd)"
 
+if [ "$1" = "--help" ] || [ "$1" = "-h" ]; then
+	echo "Usage: spark-shell [OPTIONS]"
+	echo "OPTIONS:"
+	echo "-c --cores num, the maximum number of cores to be used by the spark shell"
+	echo "-em --execmem num[m|g], the memory used by each executor of spark shell"
+	echo "-dm --drivermem num[m|g], the memory used by the spark shell and driver"
+	echo "-h --help, print this help information" 
+	exit
+fi
+
+SPARK_SHELL_OPTS=""
+
 for o in "$@"; do
   if [ "$1" = "-c" -o "$1" = "--cores" ]; then
     shift
-    if [ -n "$1" ]; then
-      OPTIONS="-Dspark.cores.max=$1"
+    if [[ "$1" =~ $CORE_PATTERN ]]; then
+      SPARK_SHELL_OPTS="$SPARK_SHELL_OPTS -Dspark.cores.max=$1"
       shift
+    else
+      echo "ERROR: wrong format for -c/--cores"
+      exit 1
+    fi
+  fi
+  if [ "$1" = "-em" -o "$1" = "--execmem" ]; then
+    shift
+    if [[ $1 =~ $MEM_PATTERN ]]; then
+      SPARK_SHELL_OPTS="$SPARK_SHELL_OPTS -Dspark.executor.memory=$1"
+      shift
+    else
+      echo "ERROR: wrong format for --execmem/-em"
+      exit 1
+    fi
+  fi
+  if [ "$1" = "-dm" -o "$1" = "--drivermem" ]; then
+    shift
+    if [[ $1 =~ $MEM_PATTERN ]]; then
+      export SPARK_MEM=$1
+      shift
+    else
+      echo "ERROR: wrong format for --drivermem/-dm"
+      exit 1
     fi
   fi
 done
@@ -95,10 +131,10 @@ if $cygwin; then
     # "Backspace sends ^H" setting in "Keys" section of the Mintty options
     # (see https://github.com/sbt/sbt/issues/562).
     stty -icanon min 1 -echo > /dev/null 2>&1
-    $FWDIR/bin/spark-class -Djline.terminal=unix $OPTIONS org.apache.spark.repl.Main "$@"
+    $FWDIR/bin/spark-class -Djline.terminal=unix $SPARK_SHELL_OPTS org.apache.spark.repl.Main "$@"
     stty icanon echo > /dev/null 2>&1
 else
-    $FWDIR/bin/spark-class $OPTIONS org.apache.spark.repl.Main "$@"
+    $FWDIR/bin/spark-class $SPARK_SHELL_OPTS org.apache.spark.repl.Main "$@"
 fi
 
 # record the exit status lest it be overwritten:
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index bc25b50a4e..013cea07d4 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -954,7 +954,7 @@ class SparkILoop(in0: Option[BufferedReader], protected val out: JPrintWriter,
       conf.setSparkHome(System.getenv("SPARK_HOME"))
     }
     sparkContext = new SparkContext(conf)
-    echo("Created spark context..")
+    logInfo("Created spark context..")
     sparkContext
   }
 
-- 
GitLab