From a0bcbc159e89be868ccc96175dbf1439461557e1 Mon Sep 17 00:00:00 2001
From: "Allan Douglas R. de Oliveira" <allan@chaordicsystems.com>
Date: Sun, 3 Aug 2014 10:25:59 -0700
Subject: [PATCH] SPARK-2246: Add user-data option to EC2 scripts

Author: Allan Douglas R. de Oliveira <allan@chaordicsystems.com>

Closes #1186 from douglaz/spark_ec2_user_data and squashes the following commits:

94a36f9 [Allan Douglas R. de Oliveira] Added user data option to EC2 script
---
 ec2/spark_ec2.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/ec2/spark_ec2.py b/ec2/spark_ec2.py
index 02cfe4ec39..0c2f85a386 100755
--- a/ec2/spark_ec2.py
+++ b/ec2/spark_ec2.py
@@ -135,6 +135,10 @@ def parse_args():
         "--master-opts", type="string", default="",
         help="Extra options to give to master through SPARK_MASTER_OPTS variable " +
              "(e.g -Dspark.worker.timeout=180)")
+    parser.add_option(
+        "--user-data", type="string", default="",
+        help="Path to a user-data file (most AMI's interpret this as an initialization script)")
+
 
     (opts, args) = parser.parse_args()
     if len(args) != 2:
@@ -274,6 +278,12 @@ def launch_cluster(conn, opts, cluster_name):
     if opts.key_pair is None:
         print >> stderr, "ERROR: Must provide a key pair name (-k) to use on instances."
         sys.exit(1)
+
+    user_data_content = None
+    if opts.user_data:
+        with open(opts.user_data) as user_data_file:
+            user_data_content = user_data_file.read()
+
     print "Setting up security groups..."
     master_group = get_or_make_group(conn, cluster_name + "-master")
     slave_group = get_or_make_group(conn, cluster_name + "-slaves")
@@ -347,7 +357,8 @@ def launch_cluster(conn, opts, cluster_name):
                 key_name=opts.key_pair,
                 security_groups=[slave_group],
                 instance_type=opts.instance_type,
-                block_device_map=block_map)
+                block_device_map=block_map,
+                user_data=user_data_content)
             my_req_ids += [req.id for req in slave_reqs]
             i += 1
 
@@ -398,7 +409,8 @@ def launch_cluster(conn, opts, cluster_name):
                                       placement=zone,
                                       min_count=num_slaves_this_zone,
                                       max_count=num_slaves_this_zone,
-                                      block_device_map=block_map)
+                                      block_device_map=block_map,
+                                      user_data=user_data_content)
                 slave_nodes += slave_res.instances
                 print "Launched %d slaves in %s, regid = %s" % (num_slaves_this_zone,
                                                                 zone, slave_res.id)
-- 
GitLab