Newer
Older
#!/usr/bin/env bash
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# Shell script for starting the Spark Shell REPL
# Note that it will set MASTER to spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}
# if those two env vars are set in spark-env.sh but MASTER is not.
cygwin=false
case "`uname`" in
CYGWIN*) cygwin=true;;
esac
# Enter posix mode for bash
set -o posix
Bernardo Gomez Palacio
committed
## Global script variables
Evan Chan
committed
Bernardo Gomez Palacio
committed
SPARK_REPL_OPTS="${SPARK_REPL_OPTS:-""}"
DEFAULT_MASTER="local[*]"
Bernardo Gomez Palacio
committed
MASTER=${MASTER:-""}
info_log=0
#CLI Color Templates
txtund=$(tput sgr 0 1) # Underline
txtbld=$(tput bold) # Bold
bldred=${txtbld}$(tput setaf 1) # red
bldyel=${txtbld}$(tput setaf 3) # yellow
bldblu=${txtbld}$(tput setaf 4) # blue
bldwht=${txtbld}$(tput setaf 7) # white
txtrst=$(tput sgr0) # Reset
info=${bldwht}*${txtrst} # Feedback
pass=${bldblu}*${txtrst}
warn=${bldred}*${txtrst}
ques=${bldblu}?${txtrst}
# Helper function to describe the script usage
function usage() {
cat << EOF
${txtbld}Usage${txtrst}: spark-shell [OPTIONS]
${txtbld}OPTIONS${txtrst}:
-h --help : Print this help information.
-c --cores : The maximum number of cores to be used by the Spark Shell.
-em --executor-memory : The memory used by each executor of the Spark Shell, the number
is followed by m for megabytes or g for gigabytes, e.g. "1g".
-dm --driver-memory : The memory used by the Spark Shell, the number is followed
by m for megabytes or g for gigabytes, e.g. "1g".
-m --master : A full string that describes the Spark Master, defaults to "local[*]"
Bernardo Gomez Palacio
committed
e.g. "spark://localhost:7077".
--log-conf : Enables logging of the supplied SparkConf as INFO at start of the
Spark Context.
e.g.
spark-shell -m spark://localhost:7077 -c 4 -dm 512m -em 2g
EOF
}
function out_error(){
echo -e "${txtund}${bldred}ERROR${txtrst}: $1"
usage
exit 1
}
function log_info(){
[ $info_log -eq 1 ] && echo -e "${bldyel}INFO${txtrst}: $1"
}
function log_warn(){
echo -e "${txtund}${bldyel}WARN${txtrst}: $1"
}
Bernardo Gomez Palacio
committed
# PATTERNS used to validate more than one optional arg.
ARG_FLAG_PATTERN="^-"
MEM_PATTERN="^[0-9]+[m|g|M|G]$"
NUM_PATTERN="^[0-9]+$"
PORT_PATTERN="^[0-9]+$"
# Setters for optional args.
function set_cores(){
CORE_PATTERN="^[0-9]+$"
if [[ "$1" =~ $CORE_PATTERN ]]; then
Bernardo Gomez Palacio
committed
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.cores.max=$1"
else
Bernardo Gomez Palacio
committed
out_error "wrong format for $2"
fi
Bernardo Gomez Palacio
committed
}
function set_em(){
if [[ $1 =~ $MEM_PATTERN ]]; then
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.executor.memory=$1"
else
Bernardo Gomez Palacio
committed
out_error "wrong format for $2"
fi
Bernardo Gomez Palacio
committed
}
function set_dm(){
if [[ $1 =~ $MEM_PATTERN ]]; then
export SPARK_DRIVER_MEMORY=$1
else
Bernardo Gomez Palacio
committed
out_error "wrong format for $2"
Bernardo Gomez Palacio
committed
}
function set_spark_log_conf(){
SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Dspark.logConf=$1"
}
Bernardo Gomez Palacio
committed
function set_spark_master(){
if ! [[ "$1" =~ $ARG_FLAG_PATTERN ]]; then
export MASTER="$1"
Bernardo Gomez Palacio
committed
out_error "wrong format for $2"
fi
}
function resolve_spark_master(){
# Set MASTER from spark-env if possible
DEFAULT_SPARK_MASTER_PORT=7077
if [ -z "$MASTER" ]; then
. $FWDIR/bin/load-spark-env.sh
if [ -n "$SPARK_MASTER_IP" ]; then
SPARK_MASTER_PORT="${SPARK_MASTER_PORT:-"$DEFAULT_SPARK_MASTER_PORT"}"
export MASTER="spark://${SPARK_MASTER_IP}:${SPARK_MASTER_PORT}"
fi
fi
if [ -z "$MASTER" ]; then
export MASTER="$DEFAULT_MASTER"
Bernardo Gomez Palacio
committed
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
}
function main(){
log_info "Base Directory set to $FWDIR"
resolve_spark_master
log_info "Spark Master is $MASTER"
log_info "Spark REPL options $SPARK_REPL_OPTS"
if $cygwin; then
# Workaround for issue involving JLine and Cygwin
# (see http://sourceforge.net/p/jline/bugs/40/).
# If you're using the Mintty terminal emulator in Cygwin, may need to set the
# "Backspace sends ^H" setting in "Keys" section of the Mintty options
# (see https://github.com/sbt/sbt/issues/562).
stty -icanon min 1 -echo > /dev/null 2>&1
export SPARK_REPL_OPTS="$SPARK_REPL_OPTS -Djline.terminal=unix"
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
stty icanon echo > /dev/null 2>&1
else
export SPARK_REPL_OPTS
$FWDIR/bin/spark-class org.apache.spark.repl.Main "$@"
fi
}
for option in "$@"
do
case $option in
-h | --help )
usage
exit 1
;;
-c | --cores)
shift
_1=$1
shift
set_cores $_1 "-c/--cores"
;;
-em | --executor-memory)
shift
_1=$1
shift
set_em $_1 "-em/--executor-memory"
;;
-dm | --driver-memory)
shift
_1=$1
shift
set_dm $_1 "-dm/--driver-memory"
;;
-m | --master)
shift
_1=$1
shift
set_spark_master $_1 "-m/--master"
;;
--log-conf)
shift
set_spark_log_conf "true"
info_log=1
;;
?)
;;
esac
done
Evan Chan
committed
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
# Copy restore-TTY-on-exit functions from Scala script so spark-shell exits properly even in
# binary distribution of Spark where Scala is not installed
exit_status=127
saved_stty=""
# restore stty settings (echo in particular)
function restoreSttySettings() {
stty $saved_stty
saved_stty=""
}
function onExit() {
if [[ "$saved_stty" != "" ]]; then
restoreSttySettings
fi
exit $exit_status
}
# to reenable echo if we are interrupted before completing.
trap onExit INT
# save terminal settings
saved_stty=$(stty -g 2>/dev/null)
# clear on error so we don't later try to restore them
if [[ ! $? ]]; then
saved_stty=""
fi
Bernardo Gomez Palacio
committed
main
Evan Chan
committed
# record the exit status lest it be overwritten:
# then reenable echo and propagate the code.
exit_status=$?
onExit