@@ -138,7 +138,7 @@ def parse_args():
138
138
help = "The SSH user you want to connect as (default: %default)" )
139
139
parser .add_option (
140
140
"--delete-groups" , action = "store_true" , default = False ,
141
- help = "When destroying a cluster, delete the security groups that were created. " )
141
+ help = "When destroying a cluster, delete the security groups that were created" )
142
142
parser .add_option (
143
143
"--use-existing-master" , action = "store_true" , default = False ,
144
144
help = "Launch fresh slaves, but use an existing stopped master if possible" )
@@ -152,9 +152,6 @@ def parse_args():
152
152
parser .add_option (
153
153
"--user-data" , type = "string" , default = "" ,
154
154
help = "Path to a user-data file (most AMI's interpret this as an initialization script)" )
155
- parser .add_option (
156
- "--security-group-prefix" , type = "string" , default = None ,
157
- help = "Use this prefix for the security group rather than the cluster name." )
158
155
parser .add_option (
159
156
"--authorized-address" , type = "string" , default = "0.0.0.0/0" ,
160
157
help = "Address to authorize on created security groups (default: %default)" )
@@ -305,12 +302,8 @@ def launch_cluster(conn, opts, cluster_name):
305
302
user_data_content = user_data_file .read ()
306
303
307
304
print "Setting up security groups..."
308
- if opts .security_group_prefix is None :
309
- master_group = get_or_make_group (conn , cluster_name + "-master" )
310
- slave_group = get_or_make_group (conn , cluster_name + "-slaves" )
311
- else :
312
- master_group = get_or_make_group (conn , opts .security_group_prefix + "-master" )
313
- slave_group = get_or_make_group (conn , opts .security_group_prefix + "-slaves" )
305
+ master_group = get_or_make_group (conn , cluster_name + "-master" )
306
+ slave_group = get_or_make_group (conn , cluster_name + "-slaves" )
314
307
authorized_address = opts .authorized_address
315
308
if master_group .rules == []: # Group was just now created
316
309
master_group .authorize (src_group = master_group )
@@ -335,11 +328,12 @@ def launch_cluster(conn, opts, cluster_name):
335
328
slave_group .authorize ('tcp' , 60060 , 60060 , authorized_address )
336
329
slave_group .authorize ('tcp' , 60075 , 60075 , authorized_address )
337
330
338
- # Check if instances are already running with the cluster name
331
+ # Check if instances are already running in our groups
339
332
existing_masters , existing_slaves = get_existing_cluster (conn , opts , cluster_name ,
340
333
die_on_error = False )
341
334
if existing_slaves or (existing_masters and not opts .use_existing_master ):
342
- print >> stderr , ("ERROR: There are already instances for name: %s " % cluster_name )
335
+ print >> stderr , ("ERROR: There are already instances running in " +
336
+ "group %s or %s" % (master_group .name , slave_group .name ))
343
337
sys .exit (1 )
344
338
345
339
# Figure out Spark AMI
@@ -413,13 +407,9 @@ def launch_cluster(conn, opts, cluster_name):
413
407
for r in reqs :
414
408
id_to_req [r .id ] = r
415
409
active_instance_ids = []
416
- outstanding_request_ids = []
417
410
for i in my_req_ids :
418
- if i in id_to_req :
419
- if id_to_req [i ].state == "active" :
420
- active_instance_ids .append (id_to_req [i ].instance_id )
421
- else :
422
- outstanding_request_ids .append (i )
411
+ if i in id_to_req and id_to_req [i ].state == "active" :
412
+ active_instance_ids .append (id_to_req [i ].instance_id )
423
413
if len (active_instance_ids ) == opts .slaves :
424
414
print "All %d slaves granted" % opts .slaves
425
415
reservations = conn .get_all_instances (active_instance_ids )
@@ -428,8 +418,8 @@ def launch_cluster(conn, opts, cluster_name):
428
418
slave_nodes += r .instances
429
419
break
430
420
else :
431
- print "%d of %d slaves granted, waiting longer for request ids including %s " % (
432
- len (active_instance_ids ), opts .slaves , outstanding_request_ids [ 0 : 10 ] )
421
+ print "%d of %d slaves granted, waiting longer" % (
422
+ len (active_instance_ids ), opts .slaves )
433
423
except :
434
424
print "Canceling spot instance requests"
435
425
conn .cancel_spot_instance_requests (my_req_ids )
@@ -488,72 +478,47 @@ def launch_cluster(conn, opts, cluster_name):
488
478
489
479
# Give the instances descriptive names
490
480
for master in master_nodes :
491
- name = '{cn}- master-{iid}' . format ( cn = cluster_name , iid = master . id )
492
- tag_instance ( master , name )
493
-
481
+ master . add_tag (
482
+ key = 'Name' ,
483
+ value = '{cn}-master-{iid}' . format ( cn = cluster_name , iid = master . id ))
494
484
for slave in slave_nodes :
495
- name = '{cn}-slave-{iid}' .format (cn = cluster_name , iid = slave .id )
496
- tag_instance (slave , name )
485
+ slave .add_tag (
486
+ key = 'Name' ,
487
+ value = '{cn}-slave-{iid}' .format (cn = cluster_name , iid = slave .id ))
497
488
498
489
# Return all the instances
499
490
return (master_nodes , slave_nodes )
500
491
501
492
502
- def tag_instance (instance , name ):
503
- for i in range (0 , 5 ):
504
- try :
505
- instance .add_tag (key = 'Name' , value = name )
506
- break
507
- except :
508
- print "Failed attempt %i of 5 to tag %s" % ((i + 1 ), name )
509
- if i == 5 :
510
- raise "Error - failed max attempts to add name tag"
511
- time .sleep (5 )
512
-
513
493
# Get the EC2 instances in an existing cluster if available.
514
494
# Returns a tuple of lists of EC2 instance objects for the masters and slaves
515
495
516
496
517
497
def get_existing_cluster (conn , opts , cluster_name , die_on_error = True ):
518
498
print "Searching for existing cluster " + cluster_name + "..."
519
- # Search all the spot instance requests, and copy any tags from the spot
520
- # instance request to the cluster.
521
- spot_instance_requests = conn .get_all_spot_instance_requests ()
522
- for req in spot_instance_requests :
523
- if req .state != u'active' :
524
- continue
525
- name = req .tags .get (u'Name' , "" )
526
- if name .startswith (cluster_name ):
527
- reservations = conn .get_all_instances (instance_ids = [req .instance_id ])
528
- for res in reservations :
529
- active = [i for i in res .instances if is_active (i )]
530
- for instance in active :
531
- if instance .tags .get (u'Name' ) is None :
532
- tag_instance (instance , name )
533
- # Now proceed to detect master and slaves instances.
534
499
reservations = conn .get_all_instances ()
535
500
master_nodes = []
536
501
slave_nodes = []
537
502
for res in reservations :
538
503
active = [i for i in res .instances if is_active (i )]
539
504
for inst in active :
540
- name = inst . tags . get ( u'Name' , "" )
541
- if name . startswith ( cluster_name + "-master" ) :
505
+ group_names = [ g . name for g in inst . groups ]
506
+ if group_names == [ cluster_name + "-master" ] :
542
507
master_nodes .append (inst )
543
- elif name . startswith ( cluster_name + "-slave" ) :
508
+ elif group_names == [ cluster_name + "-slaves" ] :
544
509
slave_nodes .append (inst )
545
510
if any ((master_nodes , slave_nodes )):
546
511
print "Found %d master(s), %d slaves" % (len (master_nodes ), len (slave_nodes ))
547
512
if master_nodes != [] or not die_on_error :
548
513
return (master_nodes , slave_nodes )
549
514
else :
550
515
if master_nodes == [] and slave_nodes != []:
551
- print >> sys .stderr , "ERROR: Could not find master in with name " + \
552
- cluster_name + "-master"
516
+ print >> sys .stderr , "ERROR: Could not find master in group " + cluster_name + "-master"
553
517
else :
554
518
print >> sys .stderr , "ERROR: Could not find any existing cluster"
555
519
sys .exit (1 )
556
520
521
+
557
522
# Deploy configuration files and run setup scripts on a newly launched
558
523
# or started EC2 cluster.
559
524
@@ -984,11 +949,7 @@ def real_main():
984
949
# Delete security groups as well
985
950
if opts .delete_groups :
986
951
print "Deleting security groups (this will take some time)..."
987
- if opts .security_group_prefix is None :
988
- group_names = [cluster_name + "-master" , cluster_name + "-slaves" ]
989
- else :
990
- group_names = [opts .security_group_prefix + "-master" ,
991
- opts .security_group_prefix + "-slaves" ]
952
+ group_names = [cluster_name + "-master" , cluster_name + "-slaves" ]
992
953
wait_for_cluster_state (
993
954
cluster_instances = (master_nodes + slave_nodes ),
994
955
cluster_state = 'terminated' ,
0 commit comments