pax_global_header00006660000000000000000000000064130151235750014514gustar00rootroot0000000000000052 comment=26b4208b55ed8c8cd82299ae292d3679bc151e4e cgcloud-releases-1.6.0/000077500000000000000000000000001301512357500147415ustar00rootroot00000000000000cgcloud-releases-1.6.0/.gitignore000066400000000000000000000001021301512357500167220ustar00rootroot00000000000000/.idea *.pyc *.egg .eggs/ nosetests.xml /venv/ .cache __pycache__ cgcloud-releases-1.6.0/LICENSE000066400000000000000000000011711301512357500157460ustar00rootroot00000000000000Copyright (C) 2011-15 by UCSC Computational Genomics Lab Contributors: Hannes Schmidt, Christopher Ketchum Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. cgcloud-releases-1.6.0/MANIFEST.in000066400000000000000000000001261301512357500164760ustar00rootroot00000000000000include version.py # http://bugs.python.org/issue12885 (I think) include ./version.py cgcloud-releases-1.6.0/Makefile000066400000000000000000000116301301512357500164020ustar00rootroot00000000000000# Copyright (C) 2015 UCSC Computational Genomics Lab # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. define help Supported targets: 'develop', 'sdist', 'clean', 'test' and 'pypi' The 'develop' target creates an editable install (aka develop mode). The 'sdist' target creates source distributions for each of the subprojects. The 'clean' target undoes the effect of 'sdist' and 'develop'. The 'test' target runs the unit tests. The 'pypi' target publishes the current commit of the project to PyPI after asserting that it is being invoked on a continuous integration server, that the working copy and the index are clean and ensuring . endef export help .PHONY: help help: @echo "$$help" python=python2.7 tests=src develop_projects=lib core jenkins spark mesos toil sdist_projects=lib agent spark-tools mesos-tools all_projects=lib core agent jenkins spark spark-tools mesos mesos-tools toil green=\033[0;32m normal=\033[0m red=\033[0;31m .SUFFIXES: define _develop .PHONY: develop_$1 develop_$1: _check_venv $1/version.py $1/MANIFEST.in cd $1 && $(python) setup.py egg_info develop endef $(foreach project,$(develop_projects),$(eval $(call _develop,$(project)))) .PHONY: develop develop: $(foreach project,$(develop_projects),develop_$(project)) # Mirrors the intra-project dependencies declared in each setup.py develop_agent: develop_lib develop_core: develop_lib develop_jenkins: develop_lib develop_core develop_mesos: develop_lib develop_core develop_spark: develop_lib develop_core develop_toil: develop_lib develop_core develop_mesos define _sdist .PHONY: sdist_$1 sdist_$1: _check_venv $1/version.py $1/MANIFEST.in cd $1 && $(python) setup.py sdist endef $(foreach project,$(sdist_projects),$(eval $(call _sdist,$(project)))) .PHONY: sdist sdist: $(foreach project,$(sdist_projects),sdist_$(project)) define _pypi .PHONY: pypi_$1 pypi_$1: _check_venv _check_running_on_jenkins _check_clean_working_copy $1/version.py $1/MANIFEST.in test "$$$$ghprbActualCommit" \ && echo "We're building a PR, skipping PyPI." || ( \ cd $1 && $(python) setup.py egg_info sdist bdist_egg upload ) endef $(foreach project,$(all_projects),$(eval $(call _pypi,$(project)))) .PHONY: pypi pypi: $(foreach project,$(all_projects),pypi_$(project)) define _clean .PHONY: clean_$1 # clean depends on version.py since it invokes setup.py clean_$1: _check_venv $1/version.py cd $1 && $(python) setup.py clean --all && rm -rf dist src/*.egg-info MANIFEST.in version.py version.pyc endef $(foreach project,$(all_projects),$(eval $(call _clean,$(project)))) .PHONY: clean clean: $(foreach project,$(all_projects),clean_$(project)) define _undevelop .PHONY: undevelop_$1 # develop depends on version.py since it invokes setup.py undevelop_$1: _check_venv $1/version.py cd $1 && $(python) setup.py develop -u endef $(foreach project,$(all_projects),$(eval $(call _undevelop,$(project)))) .PHONY: undevelop undevelop: $(foreach project,$(develop_projects),undevelop_$(project)) define _test .PHONY: test_$1 test_$1: _check_venv _check_pytest cd $1 && $(python) ../run_tests.py "$$(tests)" @echo "$(green)Tests succeeded.$(normal)" endef $(foreach project,$(develop_projects),$(eval $(call _test,$(project)))) .PHONY: test test: $(foreach project,$(develop_projects),test_$(project)) .PHONY: _check_venv _check_venv: @$(python) -c 'import sys; sys.exit( int( not hasattr(sys, "real_prefix") ) )' \ || ( echo "$(red)A virtualenv must be active.$(normal)" ; false ) .PHONY: _check_pytest _check_pytest: _check_venv $(python) -c 'import pytest' \ || ( echo "$(red)The 'pytest' distribution must be installed.$(normal)" ; false ) .PHONY: _check_clean_working_copy _check_clean_working_copy: @echo "$(green)Checking if your working copy is clean ...$(normal)" @git diff --exit-code > /dev/null \ || ( echo "$(red)Your working copy looks dirty.$(normal)" ; false ) @git diff --cached --exit-code > /dev/null \ || ( echo "$(red)Your index looks dirty.$(normal)" ; false ) @test -z "$$(git ls-files --other --exclude-standard --directory)" \ || ( echo "$(red)You have are untracked files:$(normal)" \ ; git ls-files --other --exclude-standard --directory \ ; false ) .PHONY: _check_running_on_jenkins _check_running_on_jenkins: @echo "$(green)Checking if running on Jenkins ...$(normal)" test -n "$$BUILD_NUMBER" \ || ( echo "$(red)This target should only be invoked on Jenkins.$(normal)" ; false ) %/version.py: version.py $(python) $< > $@ %/MANIFEST.in: MANIFEST.in cp $< $@ cgcloud-releases-1.6.0/README.md000066400000000000000000000063761301512357500162340ustar00rootroot00000000000000CGCloud lets you automate the creation, management and provisioning of VMs and clusters of VMs in Amazon EC2. While allowing for easy programmatic customization of VMs in developement, it also provides rock-solid reproducability in production. Features ======== * Works with base images of all actively supported releases of Ubuntu and Fedora, and some releases of CentOS * Lets you share VMs between multiple users, keeping the set of authorized SSH keys synchronized on all VMs in real-time as users/keypairs are added or removed from AWS. * Offers isolation between users, teams and deployments via namespaces * Lets you stand up a distributed, continuous integration infrastructure using one long-running Jenkins master and multiple on-demand Jenkins slaves * Lets you create an HDFS-backed Apache Spark cluster of any number of nodes in just three minutes, independently of the number of nodes, with our without attached EBS volumes * Lets you create a Mesos cluster of any number of Nodes * Supports running Spark, Mesos and Toil workers on the spot market * Is easily extensible via a simple plugin architecture * VMs created by CGCloud optionally report memory and disk utilization as custom CloudWatch metrics So what does it not offer? What are its limitations? First and foremost, it is strictly tied to AWS and EC2. Other cloud providers are not supported and probably will not be in the near future. It does not have a GUI. It is written in Python and if you want to customize it, you will need to know Python. It makes extreme use of inheritance, multiple inheritance, actually. Some people frown at that since it will make it likely that your own customizations break between releases of CGCloud. While allowing CGCloud to be extremely [DRY](https://en.wikipedia.org/wiki/Don%27t_repeat_yourself), multiple inheritance also increases the complexity and steepens the learning curve. Where to go from here? ====================== If you are a (potential) **user** of CGCloud, head on over to the [CGCloud Core README](core/README.rst) and then move on to * [CGCloud Jenkins](jenkins/README.rst) * [CGCloud Spark](spark/README.rst) * [CGCloud Mesos](mesos/README.rst) * [CGCloud Toil](toil/README.rst) If you are a **developer**, make sure you have pip and virtualenv, clone this repository and perform the following steps from the project root:: virtualenv venv source venv/bin/activate make develop sdist That will set up the project in development mode inside a virtualenv and create source distributions (aka sdists) for those components that are be installed on remote boxes. In development mode, these components are not installed from PyPI but are instead directly uploaded to the box in sdist form and then installed from the sdist. After pulling changes from the remote, you need to run `make develop sdist` again. This step is easy to forget because you often get by without it. Specifically, `make develop` is necessary after any of the setup.py or version.py files have changed. And `make sdist` is necessary after changes to the agent, spark-tools or mesos-tools subprojects. Otherwise, `cgcloud create` will install a stale version of these on the remote box. To run the unittests, `pip install pytest` and then do `make test`. cgcloud-releases-1.6.0/agent/000077500000000000000000000000001301512357500160375ustar00rootroot00000000000000cgcloud-releases-1.6.0/agent/.gitignore000066400000000000000000000000671301512357500200320ustar00rootroot00000000000000/build /dist *.egg-info *.pyc /MANIFEST.in /version.py cgcloud-releases-1.6.0/agent/main.py000066400000000000000000000003131301512357500173320ustar00rootroot00000000000000#!/usr/bin/env python2.7 import os import sys sys.path.append( os.path.join( os.path.dirname( __file__ ), 'src', 'main' ) ) from cgcloud.agent.cli import main if __name__ == "__main__": main( ) cgcloud-releases-1.6.0/agent/setup.cfg000066400000000000000000000002251301512357500176570ustar00rootroot00000000000000[pytest] # Look for any python file, the default of test_*.py wouldn't work for us python_files=*.py # Also run doctests addopts = --doctest-modules cgcloud-releases-1.6.0/agent/setup.py000066400000000000000000000016531301512357500175560ustar00rootroot00000000000000from __future__ import absolute_import import sys from setuptools import setup, find_packages from version import cgcloud_version, bd2k_python_lib_dep, boto_dep setup( name='cgcloud-agent', version=cgcloud_version, author='Hannes Schmidt', author_email='hannes@ucsc.edu', url='https://github.com/BD2KGenomics/cgcloud', description='Management of ~/.ssh/authorized_keys for a fleet of EC2 instances', package_dir={ '': 'src' }, packages=find_packages( 'src' ), namespace_packages=[ 'cgcloud' ], package_data={ 'cgcloud.agent': [ 'init-script.*' ] }, entry_points={ 'console_scripts': [ 'cgcloudagent = cgcloud.agent.cli:main' ], }, install_requires=filter( None, [ bd2k_python_lib_dep, 'cgcloud-lib==' + cgcloud_version, boto_dep, 'python-daemon==2.0.6', 'argparse==1.4.0' if sys.version_info < (2, 7) else None ] ) ) cgcloud-releases-1.6.0/agent/src/000077500000000000000000000000001301512357500166265ustar00rootroot00000000000000cgcloud-releases-1.6.0/agent/src/cgcloud/000077500000000000000000000000001301512357500202465ustar00rootroot00000000000000cgcloud-releases-1.6.0/agent/src/cgcloud/__init__.py000066400000000000000000000000731301512357500223570ustar00rootroot00000000000000__import__( 'pkg_resources' ).declare_namespace( __name__ )cgcloud-releases-1.6.0/agent/src/cgcloud/agent/000077500000000000000000000000001301512357500213445ustar00rootroot00000000000000cgcloud-releases-1.6.0/agent/src/cgcloud/agent/__init__.py000066400000000000000000000163021301512357500234570ustar00rootroot00000000000000from contextlib import contextmanager import logging import errno import os import tempfile import pwd import threading from boto.sqs.message import RawMessage from bd2k.util.throttle import LocalThrottle import time from cgcloud.lib.context import Context from cgcloud.lib.message import Message, UnknownVersion from cgcloud.lib.util import UserError log = logging.getLogger( __name__ ) class Agent( object ): """ The agent is a daemon process running on every EC2 instance of AgentBox. """ def __init__( self, ctx, options ): """ :type ctx: Context """ super( Agent, self ).__init__( ) self.ctx = ctx self.options = options self.fingerprints = None queue_name = self.ctx.to_aws_name( self.ctx.agent_queue_name ) self.queue = self.ctx.sqs.get_queue( queue_name ) if self.queue is None: # The create_queue API call handles races gracefully, # the conditional above is just an optimization. self.queue = self.ctx.sqs.create_queue( queue_name ) self.queue.set_message_class( RawMessage ) self.ctx.sns.subscribe_sqs_queue( ctx.agent_topic_arn, self.queue ) def run( self ): throttle = LocalThrottle( min_interval=self.options.interval ) # First call always returns immediately throttle.throttle( ) # Always update keys initially self.update_ssh_keys( ) self.start_metric_thread( ) while True: # Do 'long' (20s) polling for messages messages = self.queue.get_messages( num_messages=10, # the maximum permitted wait_time_seconds=20, # ditto visibility_timeout=10 ) if messages: # Process messages, combining multiple messages of the same type update_ssh_keys = False for sqs_message in messages: try: message = Message.from_sqs( sqs_message ) except UnknownVersion as e: log.warning( 'Ignoring message with unkown version' % e.version ) else: if message.type == Message.TYPE_UPDATE_SSH_KEYS: update_ssh_keys = True if update_ssh_keys: self.update_ssh_keys( ) # Greedily consume all accrued messages self.queue.delete_message_batch( messages ) else: # Without messages, update if throttle interval has passed if throttle.throttle( wait=False ): self.update_ssh_keys( ) def make_dir( self, path, mode, uid, gid ): try: os.mkdir( path, mode ) except OSError as e: if e.errno == errno.EEXIST: pass else: raise else: os.chown( path, uid, gid ) @contextmanager def make_file( self, path, mode, uid, gid ): """ Atomically create a file at the given path. To be used as a context manager that yields a file handle for writing to. """ dir_path, file_name = os.path.split( path ) with tempfile.NamedTemporaryFile( prefix=file_name + '.', dir=dir_path, delete=False ) as temp_file: yield temp_file os.chmod( temp_file.name, mode ) os.chown( temp_file.name, uid, gid ) os.rename( temp_file.name, path ) def update_ssh_keys( self ): keypairs = self.ctx.expand_keypair_globs( self.options.ec2_keypair_names ) fingerprints = set( keypair.fingerprint for keypair in keypairs ) if fingerprints != self.fingerprints: ssh_keys = set( self.download_ssh_key( keypair ) for keypair in keypairs ) if None in ssh_keys: ssh_keys.remove( None ) for account in self.options.accounts: pw = pwd.getpwnam( account ) dot_ssh_path = os.path.join( pw.pw_dir, '.ssh' ) self.make_dir( dot_ssh_path, 00755, pw.pw_uid, pw.pw_gid ) authorized_keys_path = os.path.join( dot_ssh_path, 'authorized_keys' ) try: with open( authorized_keys_path ) as f: local_ssh_keys = set( l.strip( ) for l in f.readlines( ) if not l.isspace( ) ) except IOError as e: if e.errno == errno.ENOENT: local_ssh_keys = None else: raise if local_ssh_keys != ssh_keys: with self.make_file( authorized_keys_path, 00644, pw.pw_uid, pw.pw_gid ) as authorized_keys: authorized_keys.writelines( ssh_key + '\n' for ssh_key in ssh_keys ) self.fingerprints = fingerprints def download_ssh_key( self, keypair ): try: return self.ctx.download_ssh_pubkey( keypair ).strip( ) except UserError: log.warn( 'Exception while downloading SSH public key from S3.', exc_info=True ) return None def start_metric_thread( self ): try: import psutil except ImportError: pass else: t = threading.Thread( target=self.metric_thread ) t.daemon = True t.start( ) def metric_thread( self ): """ Collects memory and disk usage as percentages via psutil and adds them as Cloudwatch metrics. Any "3" type instance assumes ephemeral (/mnt/ephemeral) is primary storage. Metrics are updated every 5 minutes under the 'AWS/EC2' Namespace. Resource Metric Name -------- ----------- Memory MemUsage Disk DiskUsage_root or DiskUsage_ """ import psutil from boto.ec2 import cloudwatch from boto.utils import get_instance_metadata metadata = get_instance_metadata( ) instance_id = metadata[ 'instance-id' ] region = metadata[ 'placement' ][ 'availability-zone' ][ 0:-1 ] while True: # Collect memory metrics memory_percent = psutil.virtual_memory( ).percent metrics = { 'MemUsage': memory_percent } # Collect disk metrics for partition in psutil.disk_partitions( ): mountpoint = partition.mountpoint if mountpoint == '/': metrics[ 'DiskUsage_root' ] = psutil.disk_usage( mountpoint ).percent else: metrics[ 'DiskUsage' + mountpoint.replace( '/', '_' ) ] = psutil.disk_usage( mountpoint ).percent # Send metrics cw = cloudwatch.connect_to_region( region ) try: cw.put_metric_data( 'CGCloud', metrics.keys( ), metrics.values( ), unit='Percent', dimensions={ "InstanceId": instance_id } ) finally: cw.close( ) cw = None time.sleep( 300 ) cgcloud-releases-1.6.0/agent/src/cgcloud/agent/cli.py000077500000000000000000000254011301512357500224720ustar00rootroot00000000000000import os import sys import argparse import platform import itertools import logging from logging.handlers import SysLogHandler, SYSLOG_UDP_PORT import daemon from bd2k.util.logging import Utf8SyslogFormatter from bd2k.util import uid_to_name, gid_to_name, name_to_uid, name_to_gid, shell from bd2k.util.lockfile import SmartPIDLockFile from bd2k.util.throttle import LocalThrottle from cgcloud.lib.context import Context from cgcloud.agent import Agent log = logging.getLogger( ) description = "The CGHub Cloud Agent daemon" exec_path = os.path.abspath( sys.argv[ 0 ] ) exec_name = os.path.basename( exec_path ) def main( ): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, description=description ) group = parser.add_argument_group( title='functional options' ) group.add_argument( '--namespace', '-n', metavar='PREFIX', required=True, help='Optional prefix for naming EC2 resource like instances, images, ' 'volumes, etc. Use this option to create a separate namespace in ' 'order to avoid collisions, e.g. when running tests. The value of ' 'the environment variable CGCLOUD_NAMESPACE, if that variable is ' 'present, overrides the default. The string __me__ anywhere in the ' 'namespace will be replaced by the name of the IAM user whose ' 'credentials are used to issue requests to AWS.' ) default_zone = os.environ.get( 'CGCLOUD_ZONE', None ) group.add_argument( '--zone', '-z', metavar='AVAILABILITY_ZONE', default=default_zone, required=not default_zone, dest='availability_zone', help='The name of the EC2 availability zone to operate in, ' 'e.g. us-east-1a, us-west-1b or us-west-2c etc. This argument ' 'implies the AWS region to run in. The value of the environment ' 'variable CGCLOUD_ZONE, if that variable is present, overrides the ' 'default.' ) group.add_argument( '--interval', '-i', metavar='SECONDS', default=300, type=int, help='' ) group.add_argument( '--accounts', metavar='PATH', nargs='+', default=[ uid_to_name( os.getuid( ) ) ], help="The names of user accounts whose .ssh/authorized_keys file should " "be managed by this agent. Note that managing another user's " ".ssh/authorized_keys typically requires running the agent as root." ) default_ec2_keypair_names = os.environ.get( 'CGCLOUD_KEYPAIRS', '' ).split( ) group.add_argument( '--keypairs', '-k', metavar='EC2_KEYPAIR_NAME', dest='ec2_keypair_names', nargs='+', required=not default_ec2_keypair_names, default=default_ec2_keypair_names, help='The names or name patterns of EC2 key pairs whose public key is to ' 'be to maintained in the ~/.ssh/authorized_keys files of each ' 'account listed in the --accounts option. Each argument may be a ' 'literal name of a keypairs or a shell-style glob in which case ' 'every key pair whose name matches that glob will be deployed ' 'to the box. The value of the environment variable CGCLOUD_KEYPAIRS, ' 'if that variable is present, overrides the default.' ) group = parser.add_argument_group( title='process options' ) group.add_argument( '--debug', '-X', default=False, action='store_true', help="Run in debug mode without daemonizing. All other process options " "will be ignored." ) group.add_argument( '--user', '-u', metavar='UID', default=uid_to_name( os.getuid( ) ), help='The name of the user to run the daemon as.' ) group.add_argument( '--group', '-g', metavar='GID', default=gid_to_name( os.getgid( ) ), help='The name of the group to run the daemon as.' ) group.add_argument( '--pid-file', '-p', metavar='PATH', default='./%s.pid' % exec_name, help="The path of the file to which the daemon's process ID will be " "written." ) log_levels = [ logging.getLevelName( level ) for level in ( logging.CRITICAL, logging.ERROR, logging.WARNING, logging.INFO, logging.DEBUG ) ] group.add_argument( '--log-level', default=logging.getLevelName( logging.INFO ), choices=log_levels, help="The default log level." ) group.add_argument( '--log-spill', metavar='PATH', default='./%s.log' % exec_name, help="The path of the file to which the daemon's stderr and stdout will " "be redirected. Most of the diagnostic output will go to syslog but " "some might spill over to stderr or stdout, especially on errors " "during daemonization." ) group = parser.add_argument_group( title='miscellaeneous options' ) group.add_argument( '--init-script', default=False, action='store_true', help='Instead of starting the daemon, generate an /etc/init.d script for ' '%s using the specified options and exit. One would typically ' 'redirect the output to a file, move that file into place, ' 'make it executable and run chkconfig to ' 'update the run levels.' % exec_name ) group.add_argument( '--init', metavar='NAME', default=None, required=False, choices=[ 'sysv', 'upstart', 'systemd' ], help="The init system invoking this program. This parameter is only " "needed when this program is run as a service under the auspices of " "a init daemon." ) options = parser.parse_args( ) # The lock file path will be evaluated by DaemonContext after the chdir to /, # so we need to convert a relative path to an absolute one. Also, the init script generation # should not use relative paths. options.pid_file = os.path.abspath( options.pid_file ) options.log_spill = os.path.abspath( options.log_spill ) if options.init_script: generate_init_script( options ) sys.exit( 0 ) def run( ): log.info( "Entering main loop." ) ctx = Context( availability_zone=options.availability_zone, namespace=options.namespace ) throttle = LocalThrottle( min_interval=options.interval ) for i in itertools.count( ): throttle.throttle( ) try: log.info( "Starting run %i.", i ) Agent( ctx, options ).run( ) log.info( "Completed run %i.", i ) except (SystemExit, KeyboardInterrupt): log.info( 'Terminating.' ) break except: log.exception( 'Abandoning run due to exception' ) formatter = Utf8SyslogFormatter( '%s[%%(process)d]: [%%(levelname)s] %%(threadName)s %%(name)s: %%(message)s' % exec_name ) if options.debug: handler = logging.StreamHandler( sys.stderr ) handler.setFormatter( formatter ) log.addHandler( handler ) log.setLevel( logging.DEBUG ) run( ) else: system = platform.system( ) if system in ( 'Darwin', 'FreeBSD' ): address = '/var/run/syslog' elif system == 'Linux': address = '/dev/log' else: address = ( 'localhost', SYSLOG_UDP_PORT ) handler = SysLogHandler( address=address ) handler.setFormatter( formatter ) log.addHandler( handler ) # getLevelName works in the reverse, too: log.setLevel( logging.getLevelName( options.log_level ) ) log_spill = open( options.log_spill, 'w' ) if options.log_spill else None try: pid_lock_file = SmartPIDLockFile( options.pid_file ) with daemon.DaemonContext( uid=name_to_uid( options.user ), gid=name_to_gid( options.group ), stderr=log_spill, stdout=log_spill, files_preserve=[ handler.socket ], # True needed for systemd (see [1]) detach_process=True if options.init == 'systemd' else None, pidfile=pid_lock_file ): run( ) finally: if log_spill: log_spill.close( ) # [1]: http://echorand.me/2013/08/02/notes-on-writing-systemd-unit-files-for-beakers-daemon-processes/ def generate_init_script( options ): from pkg_resources import resource_string import cgcloud.agent import platform distro, version, codename = map( str.lower, platform.linux_distribution( ) ) console = None if distro == 'ubuntu': quote_level = 1 if codename < 'vivid': script = 'init-script.upstart' # Lucid's version of upstart doesn't support "console log", Precise's does, don't know # about the versions in between console = 'output' if codename < 'precise' else 'log' else: script = 'init-script.systemd' else: script = 'init-script.lsb' quote_level = 2 init_script = resource_string( cgcloud.agent.__name__, script ) args = [ '--namespace', options.namespace, '--zone', options.availability_zone, '--interval', str( options.interval ), '--accounts' ] + options.accounts + [ '--keypairs' ] + options.ec2_keypair_names + [ '--user', options.user, '--group', options.group, '--pid-file', options.pid_file, '--log-level', options.log_level, '--log-spill', options.log_spill ] variables = vars( options ).copy( ) variables.update( dict( args=' '.join( shell.quote( arg, level=quote_level ) for arg in args ), exec_path=exec_path, exec_name=exec_name, console=console, description=description ) ) print init_script % variables cgcloud-releases-1.6.0/agent/src/cgcloud/agent/init-script.lsb000066400000000000000000000034421301512357500243160ustar00rootroot00000000000000#!/bin/sh # # chkconfig: 35 99 1 # description: %(description)s # processname: %(exec_name)s # pid_file: %(pid_file)s # ### BEGIN INIT INFO # Provides: %(exec_name)s # Required-Start: $network # Required-Stop: 3 5 # Default-Start: 3 5 # Default-Stop: # Short-Description: %(exec_name)s # Description: %(description)s ### END INIT INFO exec_path=%(exec_path)s exec_name=%(exec_name)s pid_file=%(pid_file)s log_spill=%(log_spill)s user=%(user)s group=%(group)s if [ -f /etc/rc.d/init.d/functions ]; then . /etc/rc.d/init.d/functions fi RETVAL=0 start() { echo -n "Starting $exec_name: " mkdir -p ${pid_file%%/*} ${log_spill%%/*} chown $user:$group ${pid_file%%/*} chmod 755 ${pid_file%%/*} ${log_spill%%/*} daemon $exec_path --init sysv %(args)s RETVAL=$? echo [ $RETVAL -eq 0 ] && touch /var/lock/subsys/$exec_name return $RETVAL } stop() { echo -n "Stopping $exec_name: " killproc -p $pid_file $exec_path RETVAL=$? echo if [ $RETVAL -eq 0 ]; then rm -f /var/lock/subsys/$exec_name fi } restart() { stop start } # See how we were called. case "$1" in start) [ -f /var/lock/subsys/$exec_name ] && exit 0 $1 ;; stop) [ -f /var/lock/subsys/$exec_name ] || exit 0 $1 ;; restart) $1 ;; status) status -p $pid_file $exec_path RETVAL=$? ;; condrestart|try-restart) [ -f /var/lock/subsys/$exec_name ] && restart || : ;; reload) echo "can't reload configuration, you have to restart it" RETVAL=3 ;; force-reload) restart ;; *) echo "Usage: $0 {start|stop|status|restart|condrestart|try-restart|reload|force-reload}" exit 1 ;; esac exit $RETVAL cgcloud-releases-1.6.0/agent/src/cgcloud/agent/init-script.systemd000066400000000000000000000007761301512357500252350ustar00rootroot00000000000000[Unit] Description=%(exec_name)s Documentation=https://github.com/BD2KGenomics/cgcloud-agent After=network.target [Service] Type=forking PIDFile=%(pid_file)s ExecStart=%(exec_path)s --init systemd %(args)s User=%(user)s Group=%(group)s ExecStartPre=\ pid_file="%(pid_file)s" ; \ log_spill="%(log_spill)s" ; \ user=%(user)s ; \ group=%(group)s ; \ mkdir -p "${pid_file%%/*}" "${log_spill%%/*}" ; \ chown $user:$group "${pid_file%%/*}" ; \ chmod 755 ${pid_file%%/*} ${log_spill%%/*} cgcloud-releases-1.6.0/agent/src/cgcloud/agent/init-script.upstart000066400000000000000000000006711301512357500252410ustar00rootroot00000000000000description "%(exec_name)s" author "Hannes Schmidt " console %(console)s start on runlevel [2345] stop on runlevel [016] respawn exec %(exec_path)s --init upstart %(args)s pre-start script pid_file=%(pid_file)s log_spill=%(log_spill)s user=%(user)s group=%(group)s mkdir -p ${pid_file%%/*} ${log_spill%%/*} chown $user:$group ${pid_file%%/*} chmod 755 ${pid_file%%/*} ${log_spill%%/*} end script cgcloud-releases-1.6.0/core/000077500000000000000000000000001301512357500156715ustar00rootroot00000000000000cgcloud-releases-1.6.0/core/.gitignore000066400000000000000000000000671301512357500176640ustar00rootroot00000000000000/build /dist *.egg-info *.pyc /MANIFEST.in /version.py cgcloud-releases-1.6.0/core/README.rst000066400000000000000000000373251301512357500173720ustar00rootroot00000000000000Prerequisites ============= To install and use CGCloud, you need * Python ≧ 2.7.x * pip_ and virtualenv_ * Git_ * Mac OS X: Xcode_ and the `Xcode Command Line Tools`_ (needed during the installation of cgcloud-core for compiling the PyCrypto dependency) .. _pip: https://pip.readthedocs.org/en/latest/installing.html .. _virtualenv: https://virtualenv.pypa.io/en/latest/installation.html .. _Git: http://git-scm.com/ .. _Xcode: https://itunes.apple.com/us/app/xcode/id497799835?mt=12 .. _Xcode Command Line Tools: http://stackoverflow.com/questions/9329243/xcode-4-4-command-line-tools Installation ============ Read the entire section before pasting any commands and ensure that all prerequisites are installed. It is recommended to install CGCloud into a virtualenv. Create a virtualenv and use ``pip`` to install the ``cgcloud-core`` package:: virtualenv ~/cgcloud source ~/cgcloud/bin/activate pip install cgcloud-core * If you get ``DistributionNotFound: No distributions matching the version for cgcloud-core``, try running ``pip install --pre cgcloud-core``. * If you get an error about ``yaml.h`` being missing you may need to install libyaml (via HomeBrew on OS X) or libyaml-dev (via apt-get or yum on Linux). * If you get :: AttributeError: 'tuple' object has no attribute 'is_prerelease' you may need to upgrade setuptools :: sudo pip install --upgrade setuptools * If you get :: ImportError: cannot import name cgcloud_version you may need to upgrade virtualenv :: sudo pip install --upgrade virtualenv * If, on Mountain Lion, you get:: clang: error: unknown argument: '-mno-fused-madd' [-Wunused-command-line-argument-hard-error-in-future] clang: note: this will be a hard error (cannot be downgraded to a warning) in the future error: command 'clang' failed with exit status 1 try the following work-around:: export CFLAGS=-Qunused-arguments export CPPFLAGS=-Qunused-arguments The installer places the ``cgcloud`` executable into the ``bin`` directory of the virtualenv. Before you can invoke ``cgcloud``, you have to activate the virtualenv as shown above. Alternatively, create a per-user bin directory and symlink the ``cgcloud`` executable into it:: deactivate mkdir -p ~/bin ln -snf ~/cgcloud/bin/cgcloud ~/bin After adding ``export PATH="$HOME/bin:$PATH"`` to your to your `~/.profile`, `~/.bash_profile` or `~/.bashrc`, you won't need to explicitly activate the virtualenv before running cgcloud. You should be able to invoke ``cgcloud`` now:: cgcloud --help Auto-completion for Bash ======================== Install the awesome argcomplete_ module:: pip install argcomplete Then add the following command to your ``~/.profile``:: eval "$(/absolute/path/to/virtualenv/bin/register-python-argcomplete cgcloud)" .. _argcomplete: https://github.com/kislyuk/argcomplete Configuration ============= Access keys ----------- Ask your AWS admin to setup an IAM account in AWS for you. Log into Amazon's IAM console and generate an `access key`_ for yourself. While your IAM username and password are used to authenticate yourself for interactive use via the AWS console, the access key is used for programmatic access via ``cgcloud``. Once you have an access key, create ``~/.boto`` on you local computer with the following contents:: [Credentials] aws_access_key_id = PASTE_YOUR_ACCESS_KEY_ID_HERE aws_secret_access_key = PASTE_YOUR_SECRET_ACCESS_KEY_HERE The ``~/.boto`` file is being deprecated. Consider using ``~/.aws/credentials`` instead. It is supported by various AWS SDKs and allows for easily switching between different AWS accounts (profiles):: [foo] aws_access_key_id=PASTE_YOUR_FOO_ACCESS_KEY_ID_HERE aws_secret_access_key=PASTE_YOUR_FOO_SECRET_KEY_ID_HERE region=us-west-2 [bar] aws_access_key_id=PASTE_YOUR_BAR_ACCESS_KEY_ID_HERE aws_secret_access_key=PASTE_YOUR_BAR_SECRET_KEY_ID_HERE region=us-west-2 To choose an active profile, set the ``AWS_PROFILE`` environment variable:: export AWS_PROFILE=foo .. _access key: http://docs.aws.amazon.com/AWSSimpleQueueService/latest/SQSGettingStartedGuide/AWSCredentials.html EC2 region and availability zone -------------------------------- Edit your ``~/.profile`` or ``~/.bash_profile`` and add the following line:: export CGCLOUD_ZONE=us-west-2a This configures both the region ``us-west-2`` and the availability zone within that region: ``a``. Instead of ``us-west-2a`` you could use ``us-east-1a`` or any other zone in any other EC2 region. Public SSH key -------------- If you don't have an SSH key, you can create one using the ``ssh-keygen`` command. Do not use the EC2 console to generate a key. This would be insecure and produce a key that is incompatible with CGCloud. Register your SSH key in EC2 by running:: cgcloud register-key ~/.ssh/id_rsa.pub The above command imports the given public key to EC2 as a key pair (I know, the terminology is confusing) but also uploads it to S3, see next paragraph for an explanation. The name of the key pair in EC2 will be set to your IAM user account name. In S3 the public key will be stored under its fingerprint. If cgcloud complains that the ``Private key file is encrypted``, your private key is probably encrypted with a passphrase (as it should). You need to add the key to the SSH agent via ``ssh-add`` which should prompt you for the passphrase. On Mac OS X this can be made more convenient by running ``ssh-add -K`` or ``ssh-add -K /path/to/private/key`` once. This will automatically add the key to the agent every time you log in. The passphrase will be stored in OS X's key chain so won't have to enter it again. Note: Importing your key pair using the EC2 console is not equivalent to ``cgcloud register-key`` . In order to be able to manage key pairs within a team, CGCloud needs to know the contents of the public key for every team member's key pair. But EC2 only exposes a fingerprint via its REST API, not the actual public key. For this purpose, CGCloud maintains those public keys in a special S3 bucket. Using ``cgcloud register-key`` makes sure that the public key is imported to EC2 *and* uploaded to that special S3 bucket. Also note that while that S3 bucket is globally visible and the public keys stored therein apply across regions, the corresponding key pair in EC2 is only visible within a zone. So when you switch to a different region, you will have to use ``cgcloud register-key`` again to import the key pair into that EC2 region. Multi-user SSH logins --------------------- By default, CGCloud only injects your public key into the boxes that it creates. This means that only you can SSH into those boxes. If you want other people to be able to SSH into boxes created by you, you can specify a list of key pairs to be injected into boxes. You can do so as using the ``-k`` command line option to ``cgcloud create`` or by setting the ``CGCLOUD_KEYPAIRS`` environment variable. The latter will inject those key pairs by default into every box that you create. The default for ``-k`` is the special string ``__me__`` which is substituted with the name of the current IAM user, i.e. you. This only works your IAM user account and your SSH key pair in EC2 have the same name, a practice that is highly recommended. The ``cgcloud register-key`` command follows that convention by default. The most useful shortcut for ``-k`` and ``CGCLOUD_KEYPAIRS`` however is to list the name of an IAM group by prefixing the group name with ``@@``. Assuming that there exists an IAM group called ``developers``, adding the following line to your ``.profile`` or ``.bash_profile``:: export CGCLOUD_KEYPAIRS="__me__ @@developers" will inject your own key pair and the key pair of every user in the ``developers`` IAM group into every box that you create from that point onwards. Obviously, this only works if EC2 key pairs and IAM usernames are identical but as mentioned above, if you used ``cgcloud register-key`` this should be the case. In the above example, if a user is removed from the IAM group ``developers`` or if their key pair is deleted from EC2, his or her key pair will be automatically removed from every box that was created with that value of ``CGCLOUD_KEYPAIRS``. Note that a change to ``CGCLOUD_KEYPAIRS`` does not affect boxes created with ``cgcloud recreate ROLE``. You will need to create a new image using ``cgcloud create -IT ROLE`` for the change to take effect. First steps =========== You're now ready to create your first *box* aka EC2 instance or VM:: cgcloud create generic-ubuntu-trusty-box This creates a Ubuntu Trusty instance from a stock Ubuntu AMI and then further customizes it by running additional commands via SSH. It'll take a few minutes. The ``generic-ubuntu-trusty-box`` argument denotes a *role*, i.e. a blueprint for an instance. You can use ``cgcloud list-roles`` to see the available roles. Now login to the newly created box:: cgcloud ssh generic-ubuntu-trusty-box The astute reader will notice that it is not necessary to remember the public hostname assigned to the box. As long as there is only one box per role, you can refer to the box by using the role's name. Otherwise you will need to disambiguate by specifying an ordinal using the ``-o`` option. Use ``cgcloud list`` to view all running instances and their ordinals. Also note that it isn't necessary to specify the account name of the administrative user to log in as, e.g. ``ec2-user``, ``root`` or ``ubuntu`` . The stock images for the various Linux distributions use different account names but CGCloud conveniently hides these differences. In order to copy files to and from the box you can use ``cgcloud rsync``:: cgcloud rsync generic-ubuntu-trusty-box -av ~/mystuff : The ``cgcloud rsync`` command behaves like a prefix to the ``rsync`` command with one important difference: With rsync you would specify the remote hostname followed by a colon, with ``cgcloud rsync`` you simply leave the hostname blank and only specify a colon followed by the remote path. If you omit the remote path, the home directory of the administrative user will be used. You can now stop the box with ``cgcloud stop``, start it again using ``cgcloud start`` or terminate it using ``cgcloud terminate``. Note while a stopped instance is much cheaper than a running instance, it is not free. Only the ``terminate`` command will reduce the operating cost incurred by the instance to zero. If you want to preserve the modifications you made to the box such that you can spawn another box in the future just like it, stop the box and then create an image of it using the ``cgcloud image`` command. You may then use the ``cgcloud recreate`` command to bring up a box. Philosophical remarks ===================== While creating an image is a viable mechanism to preserve manual modifications to a box, it is not the best possible way. The problem with it is that you will be stuck with the base image release the box was created from. You will also be stuck with the customizations performed by the particular version of ``cgcloud`` you were using. If either the base image or the role definition in CGCloud is updated, you will not benefit from those updates. Therefore, the preferred way of customizing a box is by *scripting* the customizations. This is typically done by creating a CGCloud plugin, i.e. a Python package with VM definitions aka ``roles``. A role is a subclass of the Box class while a box (aka VM aka EC2 instance) is an instance of that class. The prominent design patterns formed by Box and its derived classes are *Template Method* and *Mix-in*. The mix-in pattern introduces a sensitivity to Python's method resolution order so you need to be aware of that. Creating an image makes sense even if you didn't make any modifications after ``cgcloud create``. It captures all role-specific customizations made by ``cgcloud create``, thereby protecting them from changes in the role definition, the underlying base image and package updates in the Linux distribution used by the box. This is key to CGCloud's philosophy: It gives you a way to *create* an up-to-date image with all the latest software according to your requirements **and** it allows you reliably reproduce the exact result of that step. The fact that ``recreate`` is much faster than ``create`` is icing on the cake. Building & Testing ================== First, clone this repository and ``cd`` into it. To run the tests use * ``python setup.py nosetests --with-doctest``, * ``python setup.py test``, * ``nosetest`` or * ``python -m unittest discover -s src``. We prefer the way listed first as it installs all requirements **and** runs the tests under Nose, a test runner superior to ``unittest`` that can run tests in parallel and produces Xunit-like test reports. For example, on continuous integration we use :: virtualenv env env/bin/python setup.py nosetests --processes=16 --process-timeout=900 To make an editable_ install, also known as *development mode*, use ``python setup.py develop``. To remove the editable install ``python setup.py develop -u``. .. _editable: http://pythonhosted.org//setuptools/setuptools.html#development-mode Troubleshooting =============== * If ``cgcloud create`` gets stuck repeatedly printing ``Private key file is encrypted``, your private key is probably encrypted with a passphrase (as it should). You need to add the key to the SSH agent via ``ssh-add`` which should prompt you for the passphrase. On Mac OS X this can be made more convenient by running ``ssh-add -K`` or ``ssh-add -K /path/to/private/key`` once. This will automatically add the key to the agent every time you log in. The passphrase will be stored in OS X's key chain so won't have to enter it again. * If you get the following error:: ERROR: Exception: Incompatible ssh peer (no acceptable kex algorithm) ERROR: Traceback (most recent call last): ERROR: File "/usr/local/lib/python2.7/site-packages/paramiko/transport.py", line 1585, in run ERROR: self._handler_table[ptype](self, m) ERROR: File "/usr/local/lib/python2.7/site-packages/paramiko/transport.py", line 1664, in _negotiate_keys ERROR: self._parse_kex_init(m) ERROR: File "/usr/local/lib/python2.7/site-packages/paramiko/transport.py", line 1779, in _parse_kex_init ERROR: raise SSHException('Incompatible ssh peer (no acceptable kex algorithm)') ERROR: SSHException: Incompatible ssh peer (no acceptable kex algorithm) try upgrading paramiko:: pip install --upgrade paramiko See also https://github.com/fabric/fabric/issues/1212 Customization ============= CGCloud can be customized via plugins. A plugin is a Python module or package containing two functions:: def roles(): """ Return a list of roles, each role being a concrete subclass of cgcloud.core.box.Box """ return [ FooBox ] def command_classes(): """ Return a list of command classes, each class being a concrete subclass of cgcloud.lib.util.Command. """ return [ FooCommand ] If the plugin is a Python package, these two functions need to be defined in its ``__init__.py``. The box and command classes returned by these two functions can be defined in submodules of that package. In order to be loaded by CGCloud, a plugin needs to be loadable from ``sys.path`` and its module path (foo.bar.blah) needs to be mentioned in the ``CGCLOUD_PLUGINS`` environment variable which should contains a colon-separated list of plugin module paths. You can also run CGCloud with the ``--script`` option and a path to a Python script. The script will be handled like a plugin, except that it should not define a ``command_classes()`` function since that function will not be invoked for a script plugin. In other words, a script plugin should only define roles, not commands. cgcloud-releases-1.6.0/core/main.py000077500000000000000000000003021301512357500171650ustar00rootroot00000000000000#!/usr/bin/env python2.7 import os import sys sys.path.append( os.path.join( os.path.dirname( __file__ ), 'src' ) ) from cgcloud.core.cli import main if __name__ == "__main__": main( ) cgcloud-releases-1.6.0/core/setup.cfg000066400000000000000000000002251301512357500175110ustar00rootroot00000000000000[pytest] # Look for any python file, the default of test_*.py wouldn't work for us python_files=*.py # Also run doctests addopts = --doctest-modules cgcloud-releases-1.6.0/core/setup.py000066400000000000000000000023751301512357500174120ustar00rootroot00000000000000from __future__ import absolute_import from setuptools import setup, find_packages from version import cgcloud_version, bd2k_python_lib_dep, boto_dep, fabric_dep setup( name='cgcloud-core', version=cgcloud_version, author='Hannes Schmidt', author_email='hannes@ucsc.edu', url='https://github.com/BD2KGenomics/cgcloud', description='Efficient and reproducible software deployment for EC2 instances', package_dir={ '': 'src' }, packages=find_packages( 'src', exclude=[ '*.test' ] ), namespace_packages=[ 'cgcloud' ], entry_points={ 'console_scripts': [ 'cgcloud = cgcloud.core.cli:main' ], }, install_requires=[ bd2k_python_lib_dep, 'cgcloud-lib==' + cgcloud_version, 'futures==3.0.4', # such that cgcloud-lib can use the futures backport for its thread_pool boto_dep, fabric_dep, 'paramiko==1.16.0', 'futures==3.0.4', 'PyYAML==3.11', 'subprocess32==3.2.7', 'tabulate==0.7.5'], test_suite='cgcloud.core.test' ) cgcloud-releases-1.6.0/core/src/000077500000000000000000000000001301512357500164605ustar00rootroot00000000000000cgcloud-releases-1.6.0/core/src/cgcloud/000077500000000000000000000000001301512357500201005ustar00rootroot00000000000000cgcloud-releases-1.6.0/core/src/cgcloud/__init__.py000066400000000000000000000000731301512357500222110ustar00rootroot00000000000000__import__( 'pkg_resources' ).declare_namespace( __name__ )cgcloud-releases-1.6.0/core/src/cgcloud/core/000077500000000000000000000000001301512357500210305ustar00rootroot00000000000000cgcloud-releases-1.6.0/core/src/cgcloud/core/__init__.py000066400000000000000000000045201301512357500231420ustar00rootroot00000000000000from cgcloud.core.deprecated import is_deprecated def __fail_deprecated( artifacts ): for artifact in artifacts: if is_deprecated( artifact ): raise DeprecationWarning( artifact ) return artifacts def roles( ): from cgcloud.core.generic_boxes import (GenericCentos6Box, GenericUbuntuPreciseBox, GenericUbuntuTrustyBox, GenericUbuntuVividBox, GenericFedora21Box, GenericFedora22Box) return __fail_deprecated( sorted( locals( ).values( ), key=lambda cls: cls.__name__ ) ) def command_classes( ): from cgcloud.core.commands import (ListRolesCommand, CreateCommand, RecreateCommand, StartCommand, StopCommand, RebootCommand, TerminateCommand, ImageCommand, ShowCommand, SshCommand, RsyncCommand, ListCommand, ListImagesCommand, DeleteImageCommand, RegisterKeyCommand, CleanupCommand, UpdateInstanceProfile, ResetSecurityCommand, ListOptionsCommand) from cgcloud.core.cluster_commands import (CreateClusterCommand, StartClusterCommand, StopClusterCommand, TerminateClusterCommand, SshClusterCommand, RsyncClusterCommand, GrowClusterCommand) return __fail_deprecated( sorted( locals( ).values( ), key=lambda cls: cls.__name__ ) ) cgcloud-releases-1.6.0/core/src/cgcloud/core/agent_box.py000066400000000000000000000163301301512357500233530ustar00rootroot00000000000000import base64 import zlib from bd2k.util.iterables import concat from fabric.context_managers import settings from fabric.operations import run from bd2k.util import shell, less_strict_bool from bd2k.util.strings import interpolate as fmt from cgcloud.core.init_box import AbstractInitBox from cgcloud.core.common_iam_policies import * from cgcloud.fabric.operations import sudo, pip from cgcloud.core.package_manager_box import PackageManagerBox from cgcloud.lib.util import abreviated_snake_case_class_name from cgcloud.core.box import fabric_task class AgentBox( PackageManagerBox, AbstractInitBox ): """ A box on which to install the agent. """ def other_accounts( self ): """ Returns the names of accounts for which, in addition to the account returned by Box.username(), authorized SSH keys should be managed by this agent. """ return [ ] agent_depends_on_pycrypto = False def __init__( self, ctx ): super( AgentBox, self ).__init__( ctx ) self._enable_agent = None @property def enable_agent( self ): if self._enable_agent is None: raise RuntimeError( "Enable_agent property hasn't been set. Must call _set_instance_options() before " "using this instance." ) return self._enable_agent def _set_instance_options( self, options ): super( AgentBox, self )._set_instance_options( options ) self._enable_agent = less_strict_bool( options.get( 'enable_agent' ) ) def _get_instance_options( self ): return self.__get_options( super( AgentBox, self )._get_instance_options( ) ) def _get_image_options( self ): return self.__get_options( super( AgentBox, self )._get_image_options( ) ) def __get_options( self, options ): return dict( options, enable_agent=str( self.enable_agent ) ) def _manages_keys_internally( self ): return self.enable_agent def _list_packages_to_install( self ): packages = super( AgentBox, self )._list_packages_to_install( ) if self.enable_agent: packages += [ 'python', 'python-pip' ] if self.agent_depends_on_pycrypto: packages += [ 'python-dev', 'autoconf', 'automake', 'binutils', 'gcc', 'make' ] return packages @fabric_task def _post_install_packages( self ): super( AgentBox, self )._post_install_packages( ) if self.enable_agent: self.__setup_agent( ) def _enable_agent_metrics( self ): """ Overide this in a subclass to enable reporting of additional CloudWatch metrics like disk space and memory. The metric collection requires the psutil package which in turn requires a compiler and Python headers to be installed. """ return False def __setup_agent( self ): availability_zone = self.ctx.availability_zone namespace = self.ctx.namespace ec2_keypair_globs = ' '.join( shell.quote( _ ) for _ in self.ec2_keypair_globs ) accounts = ' '.join( [ self.admin_account( ) ] + self.other_accounts( ) ) admin_account = self.admin_account( ) run_dir = '/var/run/cgcloudagent' log_dir = '/var/log' install_dir = '/opt/cgcloudagent' # Lucid & CentOS 5 have an ancient pip pip( 'install --upgrade pip==1.5.2', use_sudo=True ) pip( 'install --upgrade virtualenv', use_sudo=True ) sudo( fmt( 'mkdir -p {install_dir}' ) ) sudo( fmt( 'chown {admin_account}:{admin_account} {install_dir}' ) ) # By default, virtualenv installs the latest version of pip. We want a specific # version, so we tell virtualenv not to install pip and then install that version of # pip using easy_install. run( fmt( 'virtualenv --no-pip {install_dir}' ) ) run( fmt( '{install_dir}/bin/easy_install pip==1.5.2' ) ) with settings( forward_agent=True ): venv_pip = install_dir + '/bin/pip' if self._enable_agent_metrics( ): pip( path=venv_pip, args='install psutil==3.4.1' ) with self._project_artifacts( 'agent' ) as artifacts: pip( path=venv_pip, args=concat( 'install', '--allow-external', 'argparse', # needed on CentOS 5 and 6 artifacts ) ) sudo( fmt( 'mkdir {run_dir}' ) ) script = self.__gunzip_base64_decode( run( fmt( '{install_dir}/bin/cgcloudagent' ' --init-script' ' --zone {availability_zone}' ' --namespace {namespace}' ' --accounts {accounts}' ' --keypairs {ec2_keypair_globs}' ' --user root' ' --group root' ' --pid-file {run_dir}/cgcloudagent.pid' ' --log-spill {log_dir}/cgcloudagent.out' '| gzip -c | base64' ) ) ) self._register_init_script( 'cgcloudagent', script ) self._run_init_script( 'cgcloudagent' ) def _get_iam_ec2_role( self ): iam_role_name, policies = super( AgentBox, self )._get_iam_ec2_role( ) if self.enable_agent: iam_role_name += '--' + abreviated_snake_case_class_name( AgentBox ) policies.update( dict( ec2_read_only=ec2_read_only_policy, s3_read_only=s3_read_only_policy, iam_read_only=iam_read_only_policy, sqs_agent=dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource="*", Action=[ "sqs:Get*", "sqs:List*", "sqs:CreateQueue", "sqs:SetQueueAttributes", "sqs:ReceiveMessage", "sqs:DeleteMessage" ] ) ] ), sns_agent=dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource="*", Action=[ "sns:Get*", "sns:List*", "sns:CreateTopic", "sns:Subscribe" ] ) ] ), cloud_watch=dict( Version='2012-10-17', Statement=[ dict( Effect='Allow', Resource='*', Action=[ 'cloudwatch:Get*', 'cloudwatch:List*', 'cloudwatch:PutMetricData' ] ) ] ) ) ) return iam_role_name, policies @staticmethod def __gunzip_base64_decode( s ): """ Fabric doesn't have get( ..., use_sudo=True ) [1] so we need to use sudo( 'cat ...' ) to download protected files. However it also munges line endings [2] so to be safe we sudo( 'cat ... | gzip | base64' ) and this method unravels that. [1]: https://github.com/fabric/fabric/issues/700 [2]: https://github.com/trehn/blockwart/issues/39 """ # See http://stackoverflow.com/questions/2695152/in-python-how-do-i-decode-gzip-encoding#answer-2695466 # for the scoop on 16 + zlib.MAX_WBITS. return zlib.decompress( base64.b64decode( s ), 16 + zlib.MAX_WBITS ) cgcloud-releases-1.6.0/core/src/cgcloud/core/apache.py000066400000000000000000000051331301512357500226250ustar00rootroot00000000000000import json import logging import os from bd2k.util.strings import interpolate as fmt from fabric.operations import run from cgcloud.core.box import Box from cgcloud.fabric.operations import sudo log = logging.getLogger( __name__ ) class ApacheSoftwareBox( Box ): """ A box to be mixed in to ease the hassle of installing Apache Software Foundation released software distros. """ def _install_apache_package( self, remote_path, install_dir ): """ Download the given package from an Apache download mirror and extract it to a child directory of the directory at the given path. :param str remote_path: the URL path of the package on the Apache download server and its mirrors. :param str install_dir: The path to a local directory in which to create the directory containing the extracted package. """ # TODO: run Fabric tasks with a different manager, so we don't need to catch SystemExit components = remote_path.split( '/' ) package, tarball = components[ 0 ], components[ -1 ] # Some mirrors may be down or serve crap, so we may need to retry this a couple of times. tries = iter( xrange( 3 ) ) while True: try: mirror_url = self.__apache_s3_mirror_url( remote_path ) if run( "curl -Ofs '%s'" % mirror_url, warn_only=True ).failed: mirror_url = self.__apache_official_mirror_url( remote_path ) run( "curl -Ofs '%s'" % mirror_url ) try: sudo( fmt( 'mkdir -p {install_dir}/{package}' ) ) sudo( fmt( 'tar -C {install_dir}/{package} ' '--strip-components=1 -xzf {tarball}' ) ) return finally: run( fmt( 'rm {tarball}' ) ) except SystemExit: if next( tries, None ) is None: raise else: log.warn( "Could not download or extract the package, retrying ..." ) def __apache_official_mirror_url( self, remote_path ): url = 'http://www.apache.org/dyn/closer.cgi?path=%s&asjson=1' % remote_path mirrors = run( "curl -fs '%s'" % url ) mirrors = json.loads( mirrors ) mirror = mirrors[ 'preferred' ] url = mirror + remote_path return url def __apache_s3_mirror_url( self, remote_path ): file_name = os.path.basename( remote_path ) return 'https://s3-us-west-2.amazonaws.com/bd2k-artifacts/cgcloud/' + file_name cgcloud-releases-1.6.0/core/src/cgcloud/core/box.py000066400000000000000000002222501301512357500221750ustar00rootroot00000000000000import datetime import hashlib import socket # cluster ssh and rsync commands need thread-safe subprocess import subprocess32 import threading import time from StringIO import StringIO from abc import ABCMeta, abstractmethod from collections import namedtuple, Iterator from contextlib import closing, contextmanager from copy import copy from functools import partial, wraps from itertools import count, izip from operator import attrgetter from pipes import quote from bd2k.util.collections import OrderedSet from bd2k.util.exceptions import panic from bd2k.util.expando import Expando from bd2k.util.iterables import concat from boto import logging from boto.ec2.blockdevicemapping import BlockDeviceType, BlockDeviceMapping from boto.ec2.instance import Instance from boto.ec2.spotpricehistory import SpotPriceHistory from boto.exception import BotoServerError, EC2ResponseError from fabric.api import execute from fabric.context_managers import settings from fabric.operations import sudo, run, get, put from paramiko import SSHClient from paramiko.client import MissingHostKeyPolicy from cgcloud.core.project import project_artifacts from cgcloud.lib import aws_d32 from cgcloud.lib.context import Context from cgcloud.lib.ec2 import (ec2_instance_types, wait_instances_running, inconsistencies_detected, create_spot_instances, create_ondemand_instances, tag_object_persistently) from cgcloud.lib.ec2 import retry_ec2, a_short_time, a_long_time, wait_transition from cgcloud.lib.util import (UserError, camel_to_snake, ec2_keypair_fingerprint, private_to_public_key, mean, std_dev) log = logging.getLogger( __name__ ) # noinspection PyPep8Naming class fabric_task( object ): # A stack to stash the current fabric user before a new one is set via this decorator user_stack = [ ] # A reentrant lock to prevent multiple concurrent uses of fabric, which is not thread-safe lock = threading.RLock( ) def __new__( cls, user=None ): if callable( user ): return cls( )( user ) else: return super( fabric_task, cls ).__new__( cls ) def __init__( self, user=None ): self.user = user def __call__( self, function ): @wraps( function ) def wrapper( box, *args, **kwargs ): with self.lock: user = box.admin_account( ) if self.user is None else self.user user_stack = self.user_stack if user_stack and user_stack[ -1 ] == user: return function( box, *args, **kwargs ) else: user_stack.append( user ) try: task = partial( function, box, *args, **kwargs ) task.name = function.__name__ # noinspection PyProtectedMember return box._execute_task( task, user ) finally: assert user_stack.pop( ) == user return wrapper class Box( object ): """ Manage EC2 instances. Each instance of this class represents a single virtual machine (aka instance) in EC2. """ __metaclass__ = ABCMeta @classmethod def role( cls ): """ The name of the role performed by instances of this class, or rather by the EC2 instances they represent. """ return camel_to_snake( cls.__name__, '-' ) @abstractmethod def admin_account( self ): """ Returns the name of a user that has sudo privileges. All administrative commands on the box are invoked via SSH as this user. """ raise NotImplementedError( ) def default_account( self ): """ Returns the name of the user with which interactive SSH session are started on the box. The default implementation forwards to self.admin_account(). """ return self.admin_account( ) def _image_name_prefix( self ): """ Returns the prefix to be used for naming images created from this box """ return self.role( ) class NoSuchImageException( RuntimeError ): pass @abstractmethod def _base_image( self, virtualization_type ): """ Returns the default base image that boxes performing this role should be booted from before they are being setup :rtype: boto.ec2.image.Image :raises Box.NoSuchImageException: if no image exists for this role and the given virtualization type """ raise NotImplementedError( ) @abstractmethod def setup( self, **kwargs ): """ Create the EC2 instance represented by this box, install OS and additional packages on, optionally create an AMI image of it, and/or terminate it. """ raise NotImplementedError( ) @abstractmethod def _ephemeral_mount_point( self, i ): """ Returns the absolute path to the directory at which the i-th ephemeral volume is mounted or None if no such mount point exists. Note that there must always be a mountpoint for the first volume, so this method always returns a value other than None if i is 0. We have this method because the mount point typically depends on the distribution, and even on the author of the image. """ raise NotImplementedError( ) def _manages_keys_internally( self ): """ Returns True if this box manages its own keypair, e.g. via the agent. """ return False def _populate_ec2_keypair_globs( self, ec2_keypair_globs ): """ Populate the given list with keypair globs defining the set of keypairs whose public component will be deployed to this box. :param ec2_keypair_globs: the suggested list of globs, to be modified in place """ pass def __init__( self, ctx ): """ Before invoking any methods on this object, you must ensure that a corresponding EC2 instance exists by calling either * prepare() and create() * bind() :type ctx: Context """ # The context to be used by the instance self.ctx = ctx # The image the instance was or will be booted from self.image_id = None # The SSH key pairs to be injected into the instance. self.ec2_keypairs = None # The globs from which to derive the SSH key pairs to be inhected into the instance self.ec2_keypair_globs = None # The instance represented by this box self.instance = None # The number of previous generations of this box. When an instance is booted from a stock # AMI, generation is 0. After that instance is set up and imaged and another instance is # booted from the resulting AMI, generation will be 1. self.generation = None # The ordinal of this box within a cluster of boxes. For boxes that don't join a cluster, # this will be 0 self.cluster_ordinal = None # The name of the cluster this box is a node of, or None if this box is not in a cluster. self.cluster_name = None # Role-specifc options for this box self.role_options = { } @property def instance_id( self ): return self.instance and self.instance.id @property def ip_address( self ): return self.instance and self.instance.ip_address @property def private_ip_address( self ): """ Set by bind() and create(), the private IP address of this instance """ return self.instance and self.instance.private_ip_address @property def host_name( self ): return self.instance and self.instance.public_dns_name @property def launch_time( self ): return self.instance and self.instance.launch_time @property def state( self ): return self.instance and self.instance.state @property def zone( self ): return self.instance and self.instance.placement @property def role_name( self ): return self.role( ) @property def instance_type( self ): return self.instance and self.instance.instance_type possible_root_devices = ('/dev/sda1', '/dev/sda', '/dev/xvda') # FIXME: this can probably be rolled into prepare() def _spec_block_device_mapping( self, spec, image ): """ Add, remove or modify the keyword arguments that will be passed to the EC2 run_instances request. :type image: boto.ec2.image.Image :type spec: dict """ for root_device in self.possible_root_devices: root_bdt = image.block_device_mapping.get( root_device ) if root_bdt: root_bdt.size = 10 root_bdt.snapshot_id = None root_bdt.encrypted = None root_bdt.delete_on_termination = True bdm = spec.setdefault( 'block_device_map', BlockDeviceMapping( ) ) bdm[ '/dev/sda1' ] = root_bdt for i in range( ec2_instance_types[ spec[ 'instance_type' ] ].disks ): device = '/dev/sd' + chr( ord( 'b' ) + i ) bdm[ device ] = BlockDeviceType( ephemeral_name='ephemeral%i' % i ) return raise RuntimeError( "Can't determine root volume from image" ) def __select_image( self, image_ref ): if isinstance( image_ref, int ): images = self.list_images( ) try: return images[ image_ref ] except IndexError: raise UserError( "No image with ordinal %i for role %s" % (image_ref, self.role( )) ) else: return self.ctx.ec2.get_image( image_ref ) def _security_group_name( self ): """ Override the security group name to be used for this box """ return self.role( ) def __setup_security_groups( self, vpc_id=None ): log.info( 'Setting up security group ...' ) name = self.ctx.to_aws_name( self._security_group_name( ) ) try: sg = self.ctx.ec2.create_security_group( name=name, vpc_id=vpc_id, description="Security group for box of role %s in namespace %s" % ( self.role( ), self.ctx.namespace) ) except EC2ResponseError as e: if e.error_code == 'InvalidGroup.Duplicate': filters = { 'group-name': name } if vpc_id is not None: filters[ 'vpc-id' ] = vpc_id for attempt in retry_ec2( retry_while=inconsistencies_detected, retry_for=10 * 60 ): with attempt: sgs = self.ctx.ec2.get_all_security_groups( filters=filters ) assert len( sgs ) == 1 sg = sgs[ 0 ] else: raise # It's OK to have two security groups of the same name as long as their VPC is distinct. assert vpc_id is None or sg.vpc_id == vpc_id rules = self._populate_security_group( sg.id ) for rule in rules: try: for attempt in retry_ec2( retry_while=inconsistencies_detected, retry_for=10 * 60 ): with attempt: assert self.ctx.ec2.authorize_security_group( group_id=sg.id, **rule ) except EC2ResponseError as e: if e.error_code == 'InvalidPermission.Duplicate': pass else: raise # FIXME: What about stale rules? I tried writing code that removes them but gave up. The # API in both boto and EC2 is just too brain-dead. log.info( '... finished setting up %s.', sg.id ) return [ sg.id ] def _populate_security_group( self, group_id ): """ :return: A list of rules, each rule is a dict with keyword arguments to boto.ec2.connection.EC2Connection.authorize_security_group, namely ip_protocol from_port to_port cidr_ip src_security_group_name src_security_group_owner_id src_security_group_group_id """ return [ dict( ip_protocol='tcp', from_port=22, to_port=22, cidr_ip='0.0.0.0/0' ), # This is necessary to allow PMTUD. A common symptom for PMTUD not working is that # TCP connections hang after a certain constant amount of data has been transferred # if the connection is between the instance and a host with jumbo frames enabled. dict( ip_protocol='icmp', from_port=3, to_port=4, cidr_ip='0.0.0.0/0' ) ] def __get_virtualization_types( self, instance_type, requested_vtype=None ): instance_vtypes = OrderedSet( ec2_instance_types[ instance_type ].virtualization_types ) role_vtypes = OrderedSet( self.supported_virtualization_types( ) ) supported_vtypes = instance_vtypes & role_vtypes if supported_vtypes: if requested_vtype is None: virtualization_types = list( supported_vtypes ) else: if requested_vtype in supported_vtypes: virtualization_types = [ requested_vtype ] else: raise UserError( 'Virtualization type %s not supported by role %s and instance ' 'type %s' % (requested_vtype, self.role( ), instance_type) ) else: raise RuntimeError( 'Cannot find any virtualization types supported by both role ' '%s and instance type %s' % (self.role( ), instance_type) ) return virtualization_types def __get_image( self, virtualization_types, image_ref=None ): if image_ref is None: for virtualization_type in virtualization_types: log.info( "Looking up default image for role %s and virtualization type %s, ... ", self.role( ), virtualization_type ) try: image = self._base_image( virtualization_type ) except self.NoSuchImageException as e: log.info( "... %s", e.message ) else: log.info( "... found %s.", image.id ) assert (image.virtualization_type in virtualization_types) return image raise RuntimeError( "Could not find suitable image for role %s", self.role( ) ) else: image = self.__select_image( image_ref ) if image.virtualization_type not in virtualization_types: raise RuntimeError( "Role and type support virtualization types %s but image only supports %s" % ( virtualization_types, image.virtualization_type) ) return image # Note: The name of all spot-related keyword arguments should begin with 'spot_' def prepare( self, ec2_keypair_globs, instance_type=None, image_ref=None, virtualization_type=None, spot_bid=None, spot_launch_group=None, spot_auto_zone=False, vpc_id=None, subnet_id=None, **options ): """ Prepare to create an EC2 instance represented by this box. Return a dictionary with keyword arguments to boto.ec2.connection.EC2Connection.run_instances() that can be used to create the instance. :param list[str] ec2_keypair_globs: The names of EC2 keypairs whose public key is to be injected into the instance to facilitate SSH logins. For the first listed keypair a matching private key needs to be present locally. Note that after the agent is installed on the box it will :param str instance_type: The type of instance to create, e.g. m1.small or t1.micro. :param int|str image_ref: The ordinal or AMI ID of the image to boot from. If None, the return value of self._base_image() will be used. :param str virtualization_type: The desired virtualization type to use for the instance :param int num_instances: The number of instances to prepare for :param float spot_bid: Dollar amount to bid for spot instances. If None, an on-demand instance will be created :param str spot_launch_group: Specify a launch group in your Spot instance request to tell Amazon EC2 to launch a set of Spot instances only if it can launch them all. In addition, if the Spot service must terminate one of the instances in a launch group (for example, if the Spot price rises above your bid price), it must terminate them all. :param bool spot_auto_zone: Use heuristic to automatically choose the "best" availability zone to launch spot instances in. Can't be combined with subnet_id. Overrides the availability zone in the context. :param: str vpc_id: The ID of a VPC to create the instance and associated security group in. If this argument is None or absent and the AWS account has a default VPC, the default VPC will be used. This is the most common case. If this argument is None or absent and the AWS account has EC2 Classic enabled and the selected instance type supports EC2 classic mode, no VPC will be used. If this argument is None or absent and the AWS account has no default VPC and an instance type that only supports VPC is used, an exception will be raised. :param: str subnet_id: The ID of a subnet to allocate instance's private IP address from. Can't be combined with spot_auto_zone. The specified subnet must belong to the specified VPC (or the default VPC if none was specified) and reside in the context's availability zone. If this argument is None or absent, a subnet will be chosen automatically. :param dict options: Additional, role-specific options can be specified. These options augment the options associated with the givem image. """ if spot_launch_group is not None and spot_bid is None: raise UserError( 'Need a spot bid when specifying a launch group for spot instances' ) if spot_auto_zone and spot_bid is None: raise UserError( 'Need a spot bid for automatically chosing a zone for spot instances' ) if subnet_id is not None and spot_auto_zone: raise UserError( 'Cannot automatically choose an availability zone for spot instances ' 'while placing them in an explicitly defined subnet since the subnet ' 'implies a specific availability zone.' ) if self.instance_id is not None: raise AssertionError( 'Instance already bound or created' ) if instance_type is None: instance_type = self.recommended_instance_type( ) virtualization_types = self.__get_virtualization_types( instance_type, virtualization_type ) image = self.__get_image( virtualization_types, image_ref ) self.image_id = image.id zone = self.ctx.availability_zone security_group_ids = self.__setup_security_groups( vpc_id=vpc_id ) if vpc_id is not None and subnet_id is None: log.info( 'Looking up suitable subnet for VPC %s in zone %s.', vpc_id, zone ) subnets = self.ctx.vpc.get_all_subnets( filters={ 'vpc-id': vpc_id, 'availability-zone': zone } ) if subnets: subnet_id = subnets[ 0 ].id else: raise UserError( 'There is no subnet belonging to VPC %s in availability zone %s. ' 'Please create a subnet manually using the VPC console.' % (vpc_id, zone) ) options = dict( image.tags, **options ) self._set_instance_options( options ) self._populate_ec2_keypair_globs( ec2_keypair_globs ) ec2_keypairs = self.ctx.expand_keypair_globs( ec2_keypair_globs ) if not ec2_keypairs: raise UserError( "No key pairs matching '%s' found." % ' '.join( ec2_keypair_globs ) ) if ec2_keypairs[ 0 ].name != ec2_keypair_globs[ 0 ]: raise UserError( "The first key pair name can't be a glob." ) self.ec2_keypairs = ec2_keypairs self.ec2_keypair_globs = ec2_keypair_globs spec = Expando( instance_type=instance_type, key_name=ec2_keypairs[ 0 ].name, placement=zone, security_group_ids=security_group_ids, subnet_id=subnet_id, instance_profile_arn=self.get_instance_profile_arn( ) ) self._spec_block_device_mapping( spec, image ) self._spec_spot_market( spec, bid=spot_bid, launch_group=spot_launch_group, auto_zone=spot_auto_zone ) return spec def _spec_spot_market( self, spec, bid, launch_group, auto_zone ): if bid is not None: if not ec2_instance_types[ spec.instance_type ].spot_availability: raise UserError( 'The instance type %s is not available on the spot market.' % spec.instance_type ) if auto_zone: spec.placement = self._optimize_spot_bid( spec.instance_type, bid ) spec.price = bid if launch_group is not None: spec.launch_group = self.ctx.to_aws_name( launch_group ) ZoneTuple = namedtuple( 'ZoneTuple', [ 'name', 'price_deviation' ] ) @classmethod def _choose_spot_zone( cls, zones, bid, spot_history ): """ Returns the zone to put the spot request based on, in order of priority: 1) zones with prices currently under the bid 2) zones with the most stable price :param list[boto.ec2.zone.Zone] zones: :param float bid: :param list[boto.ec2.spotpricehistory.SpotPriceHistory] spot_history: :rtype: str :return: the name of the selected zone >>> from collections import namedtuple >>> FauxHistory = namedtuple( 'FauxHistory', [ 'price', 'availability_zone' ] ) >>> ZoneTuple = namedtuple( 'ZoneTuple', [ 'name' ] ) >>> zones = [ ZoneTuple( 'us-west-2a' ), ZoneTuple( 'us-west-2b' ) ] >>> spot_history = [ FauxHistory( 0.1, 'us-west-2a' ), \ FauxHistory( 0.2,'us-west-2a'), \ FauxHistory( 0.3,'us-west-2b'), \ FauxHistory( 0.6,'us-west-2b')] >>> # noinspection PyProtectedMember >>> Box._choose_spot_zone( zones, 0.15, spot_history ) 'us-west-2a' >>> spot_history=[ FauxHistory( 0.3, 'us-west-2a' ), \ FauxHistory( 0.2, 'us-west-2a' ), \ FauxHistory( 0.1, 'us-west-2b'), \ FauxHistory( 0.6, 'us-west-2b') ] >>> # noinspection PyProtectedMember >>> Box._choose_spot_zone(zones, 0.15, spot_history) 'us-west-2b' >>> spot_history={ FauxHistory( 0.1, 'us-west-2a' ), \ FauxHistory( 0.7, 'us-west-2a' ), \ FauxHistory( 0.1, "us-west-2b" ), \ FauxHistory( 0.6, 'us-west-2b' ) } >>> # noinspection PyProtectedMember >>> Box._choose_spot_zone(zones, 0.15, spot_history) 'us-west-2b' """ # Create two lists of tuples of form: [ (zone.name, std_deviation), ... ] one for zones # over the bid price and one for zones under bid price. Each are sorted by increasing # standard deviation values. # markets_under_bid, markets_over_bid = [ ], [ ] for zone in zones: zone_histories = filter( lambda zone_history: zone_history.availability_zone == zone.name, spot_history ) price_deviation = std_dev( [ history.price for history in zone_histories ] ) recent_price = zone_histories[ 0 ] zone_tuple = cls.ZoneTuple( name=zone.name, price_deviation=price_deviation ) (markets_over_bid, markets_under_bid)[ recent_price.price < bid ].append( zone_tuple ) return min( markets_under_bid or markets_over_bid, key=attrgetter( 'price_deviation' ) ).name def _optimize_spot_bid( self, instance_type, spot_bid ): """ Check whether the bid is sane and makes an effort to place the instance in a sensible zone. """ spot_history = self._get_spot_history( instance_type ) self._check_spot_bid( spot_bid, spot_history ) zones = self.ctx.ec2.get_all_zones( ) most_stable_zone = self._choose_spot_zone( zones, spot_bid, spot_history ) log.info( "Placing spot instances in zone %s.", most_stable_zone ) return most_stable_zone @staticmethod def _check_spot_bid( spot_bid, spot_history ): """ Prevents users from potentially over-paying for instances Note: this checks over the whole region, not a particular zone :param spot_bid: float :type spot_history: list[SpotPriceHistory] :raises UserError: if bid is > 2X the spot price's average >>> from collections import namedtuple >>> FauxHistory = namedtuple( "FauxHistory", [ "price", "availability_zone" ] ) >>> spot_data = [ FauxHistory( 0.1, "us-west-2a" ), \ FauxHistory( 0.2, "us-west-2a" ), \ FauxHistory( 0.3, "us-west-2b" ), \ FauxHistory( 0.6, "us-west-2b" ) ] >>> # noinspection PyProtectedMember >>> Box._check_spot_bid( 0.1, spot_data ) >>> # noinspection PyProtectedMember # >>> Box._check_spot_bid( 2, spot_data ) Traceback (most recent call last): ... UserError: Your bid $ 2.000000 is more than double this instance type's average spot price ($ 0.300000) over the last week """ average = mean( [ datum.price for datum in spot_history ] ) if spot_bid > average * 2: log.warn( "Your bid $ %f is more than double this instance type's average " "spot price ($ %f) over the last week", spot_bid, average ) def _get_spot_history( self, instance_type ): """ Returns list of 1,000 most recent spot market data points represented as SpotPriceHistory objects. Note: The most recent object/data point will be first in the list. :rtype: list[SpotPriceHistory] """ one_week_ago = datetime.datetime.now( ) - datetime.timedelta( days=7 ) spot_data = self.ctx.ec2.get_spot_price_history( start_time=one_week_ago.isoformat( ), instance_type=instance_type, product_description="Linux/UNIX" ) spot_data.sort( key=attrgetter( "timestamp" ), reverse=True ) return spot_data def create( self, spec, num_instances=1, wait_ready=True, terminate_on_error=True, spot_timeout=None, spot_tentative=False, cluster_ordinal=0, executor=None ): """ Create the EC2 instance represented by this box, and optionally, any number of clones of that instance. Optionally wait for the instances to be ready. If this box was prepared to launch clones, and multiple instances were indeed launched by EC2, clones of this Box instance will be created, one clone for each additional instance. This Box instance will represent the first EC2 instance while the clones will represent the additional EC2 instances. The given executor will be used to handle post-creation activity on each instance. :param spec: a dictionary with keyword arguments to request_spot_instances, if the 'price' key is present, or run_instances otherwise. :param bool wait_ready: whether to wait for all instances to be ready. The waiting for an instance will be handled as a task that is submitted to the given executor. :param bool terminate_on_error: If True, terminate instance on errors. If False, never terminate any instances. Unfulfilled spot requests will always be cancelled. :param cluster_ordinal: the cluster ordinal to be assigned to the first instance or an iterable yielding ordinals for the instances :param executor: a callable that accepts two arguments: a task function and a sequence of task arguments. The executor applies the task function to the given sequence of arguments. It may choose to do so immediately, i.e. synchronously or at a later time, i.e asynchronously. If None, a synchronous executor will be used by default. :rtype: list[Box] """ if isinstance( cluster_ordinal, int ): cluster_ordinal = count( start=cluster_ordinal ) if executor is None: def executor( f, args ): f( *args ) adopters = iter( concat( self, self.clones( ) ) ) boxes = [ ] pending_ids = set( ) pending_ids_lock = threading.RLock( ) def adopt( adoptees ): """ :type adoptees: Iterator[Instance] """ pending_ids.update( i.id for i in adoptees ) for box, instance in izip( adopters, adoptees ): box.adopt( instance, next( cluster_ordinal ) ) if not wait_ready: # Without wait_ready, an instance is done as soon as it has been adopted. pending_ids.remove( instance.id ) boxes.append( box ) try: if 'price' in spec: price = spec.price del spec.price tags = dict(cluster_name=self.cluster_name) if self.cluster_name else None # Spot requests are fulfilled in batches. A batch could consist of one instance, # all requested instances or a subset thereof. As soon as a batch comes back from # _create_spot_instances(), we will want to adopt every instance in it. Part of # adoption is tagging which is crucial for the boot code running on cluster nodes. for batch in create_spot_instances( self.ctx.ec2, price, self.image_id, spec, num_instances=num_instances, timeout=spot_timeout, tentative=spot_tentative, tags=tags): adopt( batch ) else: adopt( create_ondemand_instances( self.ctx.ec2, self.image_id, spec, num_instances=num_instances ) ) if spot_tentative: if not boxes: return boxes else: assert boxes assert boxes[ 0 ] is self if wait_ready: def wait_ready_callback( box ): try: # noinspection PyProtectedMember box._wait_ready( { 'pending' }, first_boot=True ) except: with panic( log ): if terminate_on_error: log.warn( 'Terminating instance ...' ) self.ctx.ec2.terminate_instances( [ box.instance_id ] ) finally: with pending_ids_lock: pending_ids.remove( box.instance_id ) self._batch_wait_ready( boxes, executor, wait_ready_callback ) except: if terminate_on_error: with panic( log ): with pending_ids_lock: unfinished_ids_list = list( pending_ids ) if unfinished_ids_list: log.warn( 'Terminating instances ...' ) self.ctx.ec2.terminate_instances( unfinished_ids_list ) raise else: return boxes def _batch_wait_ready( self, boxes, executor, callback ): if len( boxes ) == 1: # For a single instance, self._wait_ready will wait for the instance to change to # running ... executor( callback, (self,) ) else: # .. but for multiple instances it is more efficient to wait for all of the # instances together. boxes_by_id = { box.instance_id: box for box in boxes } # Wait for instances to enter the running state and as they do, pass them to # the executor where they are waited on concurrently. num_running, num_other = 0, 0 # TODO: timeout instances = (box.instance for box in boxes) for instance in wait_instances_running( self.ctx.ec2, instances ): box = boxes_by_id[ instance.id ] # equivalent to the instance.update() done in _wait_ready() box.instance = instance if instance.state == 'running': executor( callback, (box,) ) num_running += 1 else: log.info( 'Instance %s in unexpected state %s.', instance.id, instance.state ) num_other += 1 assert num_running + num_other == len( boxes ) if not num_running: raise RuntimeError( 'None of the instances entered the running state.' ) if num_other: log.warn( '%i instance(s) entered a state other than running.', num_other ) def clones( self ): """ Generates infinite numbers of clones of this box. :rtype: Iterator[Box] """ while True: clone = copy( self ) clone.unbind( ) yield clone def adopt( self, instance, cluster_ordinal ): """ Link the given newly created EC2 instance with this box. """ log.info( '... created %s.', instance.id ) self.instance = instance self.cluster_ordinal = cluster_ordinal if self.cluster_name is None: self.cluster_name = self.instance_id self._on_instance_created( ) def _set_instance_options( self, options ): """ Initialize optional instance attributes from the given dictionary mapping option names to option values. The keys in the dictionary must be strings, the values can be any type. This method handles the conversion of values from string transparently. If a key is missing this method will provide a default. """ # Relies on idempotence of int self.generation = int( options.get( 'generation' ) or 0 ) self.cluster_ordinal = int( options.get( 'cluster_ordinal' ) or 0 ) self.cluster_name = options.get( 'cluster_name' ) for option in self.get_role_options( ): value = options.get( option.name ) if value is not None: self.role_options[ option.name ] = option.type( value ) def _get_instance_options( self ): """ Return a dictionary specifying the tags an instance of this role should be tagged with. Keys and values should be strings. """ options = dict( Name=self.ctx.to_aws_name( self.role( ) ), generation=str( self.generation ), cluster_ordinal=str( self.cluster_ordinal ), cluster_name=self.cluster_name ) for option in self.get_role_options( ): value = self.role_options.get( option.name ) if value is not None: options[ option.name ] = option.repr( value ) return options def _get_image_options( self ): """ Return a dictionary specifying the tags an image of an instance of this role should be tagged with. Keys and values should be strings. """ options = dict( generation=str( self.generation + 1 ) ) for option in self.get_role_options( ): if option.inherited: value = self.role_options.get( option.name ) if value is not None: options[ option.name ] = option.repr( value ) return options # noinspection PyClassHasNoInit class RoleOption( namedtuple( "_RoleOption", 'name type repr help inherited' ) ): """ Describes a role option, i.e. an instance option that is specific to boxes of a particular role. Name is the name of the option, type is a function converting an option value from a string to the option's native type, repr is the inverse of type, help is a help text describing the option and inherited is a boolean controlling whether the option is inherited by images created from an instance. """ def to_dict( self ): return self._asdict( ) def type( self, value ): try: # noinspection PyUnresolvedReferences return super( Box.RoleOption, self ).type( value ) except ValueError: raise UserError( "'%s' is not a valid value for option %s" % (value, self.name) ) @classmethod def get_role_options( cls ): """ Return a list of RoleOption objects, one for each supported option supported by this role. :rtype: list[Box.RoleOption] """ return [ ] def _on_instance_created( self ): """ Invoked right after an instance was created. """ log.info( 'Tagging instance ... ' ) tags_dict = self._get_instance_options( ) tag_object_persistently( self.instance, tags_dict ) log.info( '... instance tagged %r.', tags_dict ) def _on_instance_running( self, first_boot ): """ Invoked while creating, binding or starting an instance, right after the instance entered the running state. :param first_boot: True if this is the first time the instance enters the running state since its creation """ pass def _on_instance_ready( self, first_boot ): """ Invoked while creating, binding or starting an instance, right after the instance was found to be ready. :param first_boot: True if the instance was booted for the first time, i.e. if this is the first time the instance becomes ready since its creation, False if the instance was booted but not for the first time, None if it is not clear whether the instance was booted, e.g. after binding. """ if first_boot and not self._manages_keys_internally( ): self.__inject_authorized_keys( self.ec2_keypairs[ 1: ] ) def bind( self, instance=None, instance_id=None, ordinal=None, cluster_name=None, wait_ready=True, verbose=True ): """ Verify that the EC instance represented by this box exists and, optionally, wait until it is ready, i.e. that it is is running, has a public host name and can be connected to via SSH. If the box doesn't exist and exception will be raised. :param wait_ready: if True, wait for the instance to be ready """ if wait_ready: verbose = True if self.instance is None: if verbose: log.info( 'Binding to instance ... ' ) if instance is not None: assert ordinal is None and cluster_name is None and instance_id is None name = instance.tags[ 'Name' ] assert self.ctx.contains_aws_name( name ) assert self.ctx.base_name( self.ctx.from_aws_name( name ) ) == self.role( ) elif instance_id is not None: assert ordinal is None try: instance = self.ctx.ec2.get_only_instances( instance_id )[ 0 ] except EC2ResponseError as e: if e.error_code.startswith( 'InvalidInstanceID' ): raise UserError( "No instance with ID '%s'." % instance_id ) try: name = instance.tags[ 'Name' ] except KeyError: raise UserError( "Instance %s does not have a Name tag." % instance.id ) if not self.ctx.try_contains_aws_name( name ): raise UserError( "Instance %s with Name tag '%s' is not in namespace %s." % (instance.id, name, self.ctx.namespace) ) if self.ctx.base_name( self.ctx.from_aws_name( name ) ) != self.role( ): raise UserError( "Instance %s with Name tag '%s' is not a %s." % (instance.id, name, self.role( )) ) if cluster_name is not None: actual_cluster_name = instance.tags.get( 'cluster_name' ) if actual_cluster_name is not None and actual_cluster_name != cluster_name: raise UserError( "Instance %s has cluster name '%s', not '%s'." % (instance.id, actual_cluster_name, cluster_name) ) else: instance = self.__get_instance_by_ordinal( ordinal=ordinal, cluster_name=cluster_name ) self.instance = instance self.image_id = self.instance.image_id options = dict( self.instance.tags ) self._set_instance_options( options ) if wait_ready: self._wait_ready( from_states={ 'pending' }, first_boot=None ) else: if verbose: log.info( '... bound to %s.', self.instance.id ) return self def unbind( self ): """ Unset all state in this box that would be specific to an individual EC2 instance. This method prepares this box for being bound to another EC2 instance. """ self.instance = None self.cluster_ordinal = None def list( self, wait_ready=False, **tags ): return [ box.bind( instance=instance, wait_ready=wait_ready, verbose=False ) for box, instance in izip( concat( self, self.clones( ) ), self.__list_instances( **tags ) ) ] def __list_instances( self, **tags ): """ Lookup and return a list of instance performing this box' role. :rtype: list[Instance] """ name = self.ctx.to_aws_name( self.role( ) ) filters = { 'tag:Name': name } for k, v in tags.iteritems( ): if v is not None: filters[ 'tag:' + k ] = v reservations = self.ctx.ec2.get_all_instances( filters=filters ) instances = [ i for r in reservations for i in r.instances if i.state != 'terminated' ] instances.sort( key=self.__ordinal_sort_key ) return instances def __ordinal_sort_key( self, instance ): return instance.launch_time, instance.private_ip_address, instance.id def __get_instance_by_ordinal( self, ordinal=None, cluster_name=None ): """ Get the n-th instance that performs this box' role :param ordinal: the index of the instance based on the ordering by launch_time :rtype: boto.ec2.instance.Instance """ instances = self.__list_instances( cluster_name=cluster_name ) if not instances: raise UserError( "No instance performing role %s in namespace %s" % ( self.role( ), self.ctx.namespace) if cluster_name is None else "No instance performing role %s in cluster %s and namespace %s" % ( self.role( ), cluster_name, self.ctx.namespace) ) if ordinal is None: if len( instances ) > 1: raise UserError( "More than one instance performing role '%s'. Please specify an " "ordinal, a cluster name or both to disambiguate." % self.role( ) ) ordinal = 0 try: return instances[ ordinal ] except IndexError: raise UserError( "No instance performing role %s in namespace %s has ordinal %i" % ( self.role( ), self.ctx.namespace, ordinal) if cluster_name is None else "No instance performing role %s in cluster %s and namespace %s has ordinal %i" % ( self.role( ), cluster_name, self.ctx.namespace, ordinal) ) def _image_block_device_mapping( self ): """ Returns the block device mapping to be used for the image. The base implementation returns None, indicating that all volumes attached to the instance should be included in the image. """ return None def image( self ): """ Create an image (AMI) of the EC2 instance represented by this box and return its ID. The EC2 instance needs to use an EBS-backed root volume. The box must be stopped or an exception will be raised. """ # We've observed instance state to flap from stopped back to stoppping. As a best effort # we wait for it to flap back to stopped. wait_transition( self.instance, { 'stopping' }, 'stopped' ) log.info( "Creating image ..." ) timestamp = time.strftime( '%Y-%m-%d_%H-%M-%S' ) image_name = self.ctx.to_aws_name( self._image_name_prefix( ) + "_" + timestamp ) image_id = self.ctx.ec2.create_image( instance_id=self.instance_id, name=image_name, block_device_mapping=self._image_block_device_mapping( ) ) while True: try: image = self.ctx.ec2.get_image( image_id ) tag_object_persistently( image, self._get_image_options( ) ) wait_transition( image, { 'pending' }, 'available' ) log.info( "... created %s (%s).", image.id, image.name ) break except self.ctx.ec2.ResponseError as e: # FIXME: I don't think get_image can throw this, it should be outside the try if e.error_code != 'InvalidAMIID.NotFound': raise # There seems to be another race condition in EC2 that causes a freshly created image to # not be included in queries other than by AMI ID. log.info( 'Checking if image %s is discoverable ...' % image_id ) while True: if image_id in (_.id for _ in self.list_images( )): log.info( '... image now discoverable.' ) break log.info( '... image %s not yet discoverable, trying again in %is ...', image_id, a_short_time ) time.sleep( a_short_time ) return image_id def stop( self ): """ Stop the EC2 instance represented by this box. Stopped instances can be started later using :py:func:`Box.start`. """ self.__assert_state( 'running' ) log.info( 'Stopping instance ...' ) self.ctx.ec2.stop_instances( [ self.instance_id ] ) wait_transition( self.instance, from_states={ 'running', 'stopping' }, to_state='stopped' ) log.info( '... instance stopped.' ) def start( self ): """ Start the EC2 instance represented by this box """ self.__assert_state( 'stopped' ) log.info( 'Starting instance, ... ' ) self.ctx.ec2.start_instances( [ self.instance_id ] ) # Not 100% sure why from_states includes 'stopped' but I think I noticed that there is a # short interval after start_instances returns during which the instance is still in # stopped before it goes into pending self._wait_ready( from_states={ 'stopped', 'pending' }, first_boot=False ) def reboot( self ): """ Reboot the EC2 instance represented by this box. When this method returns, the EC2 instance represented by this object will likely have different public IP and hostname. """ # There is reboot_instances in the API but reliably detecting the # state transitions is hard. So we stop and start instead. self.stop( ) self.start( ) def terminate( self, wait=True ): """ Terminate the EC2 instance represented by this box. """ if self.instance_id is not None: instance = self.instance if instance.state != 'terminated': log.info( 'Terminating instance ...' ) self.ctx.ec2.terminate_instances( [ self.instance_id ] ) if wait: wait_transition( instance, from_states={ 'running', 'shutting-down', 'stopped' }, to_state='terminated' ) log.info( '... instance terminated.' ) def _attach_volume( self, volume_helper, device ): volume_helper.attach( self.instance_id, device ) def _execute_task( self, task, user ): """ Execute the given Fabric task on the EC2 instance represented by this box """ if not callable( task ): task = task( self ) # using IP instead of host name yields more compact log lines # host = "%s@%s" % ( user, self.ip_address ) with settings( user=user ): host = self.ip_address return execute( task, hosts=[ host ] )[ host ] def __assert_state( self, expected_state ): """ Raises a UserError if the instance represented by this object is not in the given state. :param expected_state: the expected state :return: the instance :rtype: boto.ec2.instance.Instance """ actual_state = self.instance.state if actual_state != expected_state: raise UserError( "Expected instance state '%s' but got '%s'" % (expected_state, actual_state) ) def _wait_ready( self, from_states, first_boot ): """ Wait until the given instance transistions from stopped or pending state to being fully running and accessible via SSH. :param from_states: the set of states the instance may be in when this methods is invoked, any other state will raise an exception. :type from_states: set of str :param first_boot: True if the instance is currently booting for the first time, None if the instance isn't booting, False if the instance is booting but not for the first time. """ log.info( "... waiting for instance %s ... ", self.instance.id ) wait_transition( self.instance, from_states, 'running' ) self._on_instance_running( first_boot ) log.info( "... running, waiting for assignment of public IP ... " ) self.__wait_public_ip_assigned( self.instance ) log.info( "... assigned, waiting for SSH port ... " ) self.__wait_ssh_port_open( ) log.info( "... open ... " ) if first_boot is not None: log.info( "... testing SSH ... " ) self.__wait_ssh_working( ) log.info( "... SSH working ..., " ) log.info( "... instance ready." ) self._on_instance_ready( first_boot ) def __wait_public_ip_assigned( self, instance ): """ Wait until the instances has a public IP address assigned to it. :type instance: boto.ec2.instance.Instance """ while not instance.ip_address or not instance.public_dns_name: time.sleep( a_short_time ) instance.update( ) def __wait_ssh_port_open( self ): """ Wait until the instance represented by this box is accessible via SSH. :return: the number of unsuccessful attempts to connect to the port before a the first success """ for i in count( ): s = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) try: s.settimeout( a_short_time ) s.connect( (self.ip_address, 22) ) return i except socket.error: pass finally: s.close( ) class IgnorePolicy( MissingHostKeyPolicy ): def missing_host_key( self, client, hostname, key ): pass def __wait_ssh_working( self ): while True: client = None try: client = self._ssh_client( ) stdin, stdout, stderr = client.exec_command( 'echo hi' ) try: line = stdout.readline( ) if line == 'hi\n': return else: raise AssertionError( "Read unexpected line '%s'" % line ) finally: stdin.close( ) stdout.close( ) stderr.close( ) except AssertionError: raise except KeyboardInterrupt: raise except Exception as e: logging.info( e ) finally: if client is not None: client.close( ) time.sleep( a_short_time ) def _ssh_client( self ): client = SSHClient( ) client.set_missing_host_key_policy( self.IgnorePolicy( ) ) client.connect( hostname=self.ip_address, username=self.admin_account( ), timeout=a_short_time ) return client def ssh( self, user=None, command=None ): if command is None: command = [ ] status = subprocess32.call( self._ssh_args( user, command ) ) # According to ssh(1), SSH returns the status code of the remote process or 255 if # something else went wrong. Python exits with status 1 if an uncaught exception is # thrown. Since this is also the default status code that most other programs return on # failure, there is no easy way to distinguish between failures in programs run remotely # by cgcloud ssh and something being wrong in cgcloud. if status == 255: raise RuntimeError( 'ssh failed' ) return status def rsync( self, args, user=None, ssh_opts=None ): ssh_args = self._ssh_args( user, [ ] ) if ssh_opts: ssh_args.append( ssh_opts ) subprocess32.check_call( [ 'rsync', '-e', ' '.join( ssh_args ) ] + args ) def _ssh_args( self, user, command ): if user is None: user = self.default_account( ) # Using host name instead of IP allows for more descriptive known_hosts entries and # enables using wildcards like *.compute.amazonaws.com Host entries in ~/.ssh/config. return [ 'ssh', '%s@%s' % (user, self.host_name), '-A' ] + command @fabric_task def __inject_authorized_keys( self, ec2_keypairs ): with closing( StringIO( ) ) as authorized_keys: get( local_path=authorized_keys, remote_path='~/.ssh/authorized_keys' ) authorized_keys.seek( 0 ) ssh_pubkeys = set( l.strip( ) for l in authorized_keys.readlines( ) ) for ec2_keypair in ec2_keypairs: ssh_pubkey = self.__download_ssh_pubkey( ec2_keypair ) if ssh_pubkey: ssh_pubkeys.add( ssh_pubkey ) authorized_keys.seek( 0 ) authorized_keys.truncate( ) authorized_keys.write( '\n'.join( ssh_pubkeys ) ) authorized_keys.write( '\n' ) put( local_path=authorized_keys, remote_path='~/.ssh/authorized_keys' ) def __download_ssh_pubkey( self, keypair ): try: return self.ctx.download_ssh_pubkey( keypair ).strip( ) except UserError as e: log.warn( 'Exception while downloading SSH public key from S3', e ) return None @fabric_task def _propagate_authorized_keys( self, user, group=None ): """ Ensure that the given user account accepts SSH connections for the same keys as the current user. The current user must have sudo. :param user: the name of the user to propagate the current user's authorized keys to :param group: the name of the group that should own the files and directories that are created by this method, defaults to the default group of the given user """ if group is None: group = run( "getent group $(getent passwd %s | cut -d : -f 4) " "| cut -d : -f 1" % user ) args = dict( src_user=self.admin_account( ), dst_user=user, dst_group=group ) sudo( 'install -d ~{dst_user}/.ssh ' '-m 755 -o {dst_user} -g {dst_group}'.format( **args ) ) sudo( 'install -t ~{dst_user}/.ssh ~{src_user}/.ssh/authorized_keys ' '-m 644 -o {dst_user} -g {dst_group}'.format( **args ) ) @classmethod def recommended_instance_type( cls ): return 't2.micro' if 'hvm' in cls.supported_virtualization_types( ) else 't1.micro' @classmethod def supported_virtualization_types( cls ): """ Returns the virtualization types supported by this box in order of preference, preferred types first. """ return [ 'hvm', 'paravirtual' ] def list_images( self ): """ :rtype: list of boto.ec2.image.Image """ image_name_pattern = self.ctx.to_aws_name( self._image_name_prefix( ) + '_' ) + '*' images = self.ctx.ec2.get_all_images( filters={ 'name': image_name_pattern } ) images.sort( key=attrgetter( 'name' ) ) # that sorts by date, effectively return images @abstractmethod def _register_init_command( self, cmd ): """ Register a shell command to be executed towards the end of system initialization. The command should work when set -e is in effect. """ raise NotImplementedError( ) def get_instance_profile_arn( self ): """ Prepares the instance profile to be used for this box and returns its ARN """ iam_role_name, policies = self._get_iam_ec2_role( ) aws_role_name = self.ctx.setup_iam_ec2_role( self._hash_iam_role_name( iam_role_name ), policies ) log.info( 'Set up instance profile using hashed IAM role name %s, derived from %s.', aws_role_name, iam_role_name ) aws_instance_profile_name = self.ctx.to_aws_name( self.role( ) ) try: profile = self.ctx.iam.get_instance_profile( aws_instance_profile_name ) except BotoServerError as e: if e.status == 404: profile = self.ctx.iam.create_instance_profile( aws_instance_profile_name ) profile = profile.create_instance_profile_response.create_instance_profile_result else: raise else: profile = profile.get_instance_profile_response.get_instance_profile_result profile = profile.instance_profile profile_arn = profile.arn # Note that Boto does not correctly parse the result from get/create_instance_profile. # The 'roles' field should be an instance of ListElement, whereas it currently is a # simple, dict-like Element. We can check a dict-like element for size but since all # children have the same name -- 'member' in this case -- the dictionary will always have # just one entry. Luckily, IAM currently only supports one role per profile so this Boto # bug does not affect us much. if len( profile.roles ) > 1: raise RuntimeError( 'Did not expect profile to contain more than one role' ) elif len( profile.roles ) == 1: # this should be profile.roles[0].role_name if profile.roles.member.role_name == aws_role_name: return profile_arn else: self.ctx.iam.remove_role_from_instance_profile( aws_instance_profile_name, profile.roles.member.role_name ) self.ctx.iam.add_role_to_instance_profile( aws_instance_profile_name, aws_role_name ) return profile_arn def _hash_iam_role_name( self, iam_role_name ): # An IAM role name is limited to 64 characters so we hash it with D64 to get a short, # but still unique identifier. Note that Box subclasses should append their CGCloud role # name to the IAM role name. Prependi the prefix here and in _get_iam_ec2_role to be # backwards-compatible PassRole statements generated by older versions of CGCloud. return '-'.join( [ self.iam_role_name_prefix, aws_d32.encode( hashlib.sha1( iam_role_name ).digest( )[ 0:8 ] ) ] ) iam_role_name_prefix = 'cgcloud' def _role_arn( self, iam_role_name_prefix='' ): """ Returns the ARN for roles with the given prefix in the current AWS account """ aws_role_prefix = self.ctx.to_aws_name( iam_role_name_prefix + self.iam_role_name_prefix ) return 'arn:aws:iam::%s:role/%s*' % (self.ctx.account, aws_role_prefix) def _get_iam_ec2_role( self ): """ Returns the IAM role to be associated with this box. :rtype: (str, dict) :return A tuple of the form ( iam_role_name, policy_document ) where policy_document is an IAM policy in the form of a dictionary that can be turned into JSON. When overriding this method, subclasses should append to the tuple elements rather than modify them in place. """ return self.iam_role_name_prefix, { } # http://aws.amazon.com/amazon-linux-ami/instance-type-matrix/ # virtualization_types = [ 'paravirtual', 'hvm' ] paravirtual_families = [ 'm1', 'c1', 'm2', 't1' ] def __default_virtualization_type( self, instance_type ): family = instance_type.split( '.', 2 )[ 0 ].lower( ) return 'paravirtual' if family in self.paravirtual_families else 'hvm' def delete_image( self, image_ref, wait=True, delete_snapshot=True ): image = self.__select_image( image_ref ) image_id = image.id log.info( "Deregistering image %s", image_id ) image.deregister( ) if wait: log.info( "Waiting for deregistration to finalize ..." ) while True: if self.ctx.ec2.get_image( image_id ): log.info( '... image still registered, trying again in %is ...' % a_short_time ) time.sleep( a_short_time ) else: log.info( "... image deregistered." ) break if delete_snapshot: self.__delete_image_snapshot( image, wait=wait ) def __delete_image_snapshot( self, image, wait=True ): for root_device in self.possible_root_devices: root_bdt = image.block_device_mapping.get( root_device ) if root_bdt: snapshot_id = image.block_device_mapping[ root_device ].snapshot_id log.info( "Deleting snapshot %s.", snapshot_id ) # It is safe to retry this indefinitely because a snapshot can only be # referenced by one AMI. See also https://github.com/boto/boto/issues/3019. for attempt in retry_ec2( retry_for=a_long_time if wait else 0, retry_while=lambda e: e.error_code == 'InvalidSnapshot.InUse' ): with attempt: self.ctx.ec2.delete_snapshot( snapshot_id ) return raise RuntimeError( 'Could not determine root device in AMI' ) def _provide_generated_keypair( self, ec2_keypair_name, private_key_path, overwrite_local=True, overwrite_ec2=False ): """ Expects to be running in a Fabric task context! Ensures that 1) a key pair has been generated in EC2 under the given name, 2) a matching private key exists on this box at the given path and 3) the corresponding public key exists at the given path with .pub appended. A generated keypair is one for which EC2 generated the private key. This is different from imported keypairs where the private key is generated locally and the public key is then imported to EC2. Since EC2 exposes only the fingerprint for a particular key pair, but not the public key, the public key of the generated key pair is additionally stored in S3. The public key object in S3 will be identified using the key pair's fingerprint, which really is the the private key's fingerprint. Note that this is different to imported key pairs which are identified by their public key's fingerprint, both by EC2 natively and by cgcloud in S3. If there already is a key pair in EC2 and a private key at the given path in this box, they are checked to match each other. If they don't, an exception will be raised. If there already is a local private key but no key pair in EC2, either an exception will be raised (if overwrite_local is False) or a key pair will be created and the local private key will be overwritten (if overwrite_local is True). If there is a key pair in EC2 but no local private key, either an exception will be raised (if overwrite_ec2 is False) or the key pair will be deleted and a new one will be created in its stead (if overwrite_ec2 is True). To understand the logic behind all this keep in mind that the private component of a EC2-generated keypair can only be downloaded once, at creation time. :param ec2_keypair_name: the name of the keypair in EC2 :param private_key_path: the path to the private key on this box :param overwrite_local: whether to overwrite a local private key, see above :param overwrite_ec2: whether to overwrite a keypair in EC2, see above :return: the actual contents of the private and public keys as a tuple in that order """ ec2_keypair = self.ctx.ec2.get_key_pair( ec2_keypair_name ) key_file_exists = run( 'test -f %s' % private_key_path, quiet=True ).succeeded if ec2_keypair is None: if key_file_exists: if overwrite_local: # TODO: make this more prominent, e.g. by displaying all warnings at the end log.warn( 'Warning: Overwriting private key with new one from EC2.' ) else: raise UserError( "Private key already exists on box. Creating a new key pair " "in EC2 would require overwriting that file" ) ssh_privkey, ssh_pubkey = self.__generate_keypair( ec2_keypair_name, private_key_path ) else: # With an existing keypair there is no way to get the private key from AWS, # all we can do is check whether the locally stored private key is consistent. if key_file_exists: ssh_privkey, ssh_pubkey = self.__verify_generated_keypair( ec2_keypair, private_key_path ) else: if overwrite_ec2: self.ctx.ec2.delete_key_pair( ec2_keypair_name ) ssh_privkey, ssh_pubkey = self.__generate_keypair( ec2_keypair_name, private_key_path ) else: raise UserError( "The key pair {ec2_keypair.name} is registered in EC2 but the " "corresponding private key file {private_key_path} does not exist on the " "instance. In order to create the private key file, the key pair must be " "created at the same time. Please delete the key pair from EC2 before " "retrying.".format( **locals( ) ) ) # Store public key put( local_path=StringIO( ssh_pubkey ), remote_path=private_key_path + '.pub' ) return ssh_privkey, ssh_pubkey def __generate_keypair( self, ec2_keypair_name, private_key_path ): """ Generate a keypair in EC2 using the given name and write the private key to the file at the given path. Return the private and public key contents as a tuple. """ ec2_keypair = self.ctx.ec2.create_key_pair( ec2_keypair_name ) if not ec2_keypair.material: raise AssertionError( "Created key pair but didn't get back private key" ) ssh_privkey = ec2_keypair.material put( local_path=StringIO( ssh_privkey ), remote_path=private_key_path ) assert ec2_keypair.fingerprint == ec2_keypair_fingerprint( ssh_privkey ) run( 'chmod go= %s' % private_key_path ) ssh_pubkey = private_to_public_key( ssh_privkey ) self.ctx.upload_ssh_pubkey( ssh_pubkey, ec2_keypair.fingerprint ) return ssh_privkey, ssh_pubkey def __verify_generated_keypair( self, ec2_keypair, private_key_path ): """ Verify that the given EC2 keypair matches the private key at the given path. Return the private and public key contents as a tuple. """ ssh_privkey = StringIO( ) get( remote_path=private_key_path, local_path=ssh_privkey ) ssh_privkey = ssh_privkey.getvalue( ) fingerprint = ec2_keypair_fingerprint( ssh_privkey ) if ec2_keypair.fingerprint != fingerprint: raise UserError( "The fingerprint {ec2_keypair.fingerprint} of key pair {ec2_keypair.name} doesn't " "match the fingerprint {fingerprint} of the private key file currently present on " "the instance. Please delete the key pair from EC2 before retrying. " .format( **locals( ) ) ) ssh_pubkey = self.ctx.download_ssh_pubkey( ec2_keypair ) if ssh_pubkey != private_to_public_key( ssh_privkey ): raise RuntimeError( "The private key on the data volume doesn't match the " "public key in EC2." ) return ssh_privkey, ssh_pubkey def _provide_imported_keypair( self, ec2_keypair_name, private_key_path, overwrite_ec2=False ): """ Expects to be running in a Fabric task context! Ensures that 1) a key pair has been imported to EC2 under the given name, 2) a matching private key exists on this box at the given path and 3) the corresponding public key exists at the given path with .pub appended. If there is no private key at the given path on this box, one will be created. If there already is a imported key pair in EC2, it is checked to match the local public key. If they don't match an exception will be raised (overwrite_ec2 is False) or the EC2 key pair will be replaced with a new one by importing the local public key. The public key itself will be tracked in S3. See _provide_generated_keypair for details. :param ec2_keypair_name: the name of the keypair in EC2 :param private_key_path: the path to the private key on this box (tilde will be expanded) :return: the actual contents of the private and public keys as a tuple in that order """ key_file_exists = run( 'test -f %s' % private_key_path, quiet=True ).succeeded if not key_file_exists: run( "ssh-keygen -N '' -C '%s' -f '%s'" % (ec2_keypair_name, private_key_path) ) ssh_privkey = StringIO( ) get( remote_path=private_key_path, local_path=ssh_privkey ) ssh_privkey = ssh_privkey.getvalue( ) ssh_pubkey = StringIO( ) get( remote_path=private_key_path + '.pub', local_path=ssh_pubkey ) ssh_pubkey = ssh_pubkey.getvalue( ) self.ctx.register_ssh_pubkey( ec2_keypair_name, ssh_pubkey, force=overwrite_ec2 ) return ssh_privkey, ssh_pubkey @contextmanager def _project_artifacts( self, project_name ): """ Like project.project_artifacts() but uploads any source distributions to the instance represented by this box such that a pip running on that instance box can install them. Must be called directly or indirectly from a function decorated with fabric_task. Returns a list of artifacts references, each reference being either a remote path to a source distribution or a versioned dependency reference, typically referring to a package on PyPI. """ artifacts = [ ] for artifact in project_artifacts( project_name ): if artifact.startswith( '/' ): artifact = put( local_path=artifact )[ 0 ] artifacts.append( artifact ) yield artifacts for artifact in artifacts: if artifact.startswith( '/' ): run( 'rm %s' % quote( artifact ) ) cgcloud-releases-1.6.0/core/src/cgcloud/core/centos_box.py000066400000000000000000000137521301512357500235550ustar00rootroot00000000000000from abc import abstractmethod import re from distutils.version import LooseVersion from fabric.operations import run, sudo from cgcloud.core.box import fabric_task from cgcloud.core.agent_box import AgentBox from cgcloud.core.yum_box import YumBox from cgcloud.core.rc_local_box import RcLocalBox admin_user = 'admin' class CentosBox( YumBox, AgentBox, RcLocalBox ): """ A box representing EC2 instances that boots of a RightScale CentOS AMI. Most of the complexity in this class stems from a workaround for RightScale's handling of the root account. RightScale does not offer a non-root admin account, so after the instance boots for the first time, we create an admin account and disable SSH and console logins to the root account, just like on Canonical's Ubuntu AMIs. The instance is tagged with the name of the admin account such that we can look it up later. """ @abstractmethod def release( self ): """ :return: the version number of the CentOS release, e.g. "6.4" """ raise NotImplementedError def __init__( self, ctx ): super( CentosBox, self ).__init__( ctx ) self._username = None def admin_account( self ): if self._username is None: default_username = 'root' if self.generation == 0 else 'admin' self._username = self.instance.tags.get( 'admin_user', default_username ) return self._username def _set_username( self, admin_user ): self._username = admin_user self.instance.add_tag( 'admin_user', admin_user ) def _base_image( self, virtualization_type ): release = self.release( ) images = self.ctx.ec2.get_all_images( owners=[ '411009282317' ], filters={ 'name': 'RightImage_CentOS_%s_x64*' % release, 'root-device-type': 'ebs', 'virtualization-type': virtualization_type } ) if not images: raise self.NoSuchImageException( "Can't find any candidate AMIs for CentOS release %s and virtualization type %s" % ( release, virtualization_type) ) max_version = None base_image = None for image in images: match = re.match( 'RightImage_CentOS_(\d+(?:\.\d+)*)_x64_v(\d+(?:\.\d+)*)(_HVM)?_EBS', image.name ) if match: assert match.group( 1 ) == release version = LooseVersion( match.group( 2 ) ) if max_version is None or max_version < version: max_version = version base_image = image if not base_image: raise self.NoSuchImageException( "Can't find AMI matching CentOS release %s and virtualization type %s" % ( release, virtualization_type) ) return base_image def _on_instance_ready( self, first_boot ): super( CentosBox, self )._on_instance_ready( first_boot ) if first_boot and self.admin_account( ) == 'root': self.__create_admin( ) self._set_username( admin_user ) self.__setup_admin( ) @fabric_task def __create_admin( self ): # Don't clear screen on logout, it's annoying run( r"sed -i -r 's!^(/usr/bin/)?clear!# \0!' /etc/skel/.bash_logout ~/.bash_logout" ) # Imitate the security model of Canonical's Ubuntu AMIs: Create an admin user that can sudo # without password and disable root logins via console and ssh. run( 'useradd -m -s /bin/bash {0}'.format( admin_user ) ) self._propagate_authorized_keys( admin_user ) run( 'rm ~/.ssh/authorized_keys' ) run( 'echo "{0} ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers'.format( admin_user ) ) # CentOS 6 has "Defaults: requiretty" in /etc/sudoers. This makes no sense for users with # NOPASSWD. Requiretty causes sudo(...,pty=False) to fail with "You need a pty for # sudo". We disable requiretty for the admin since we need pty=False for pip which would # spill the progress info all over the output. run( 'echo "Defaults:{0} !requiretty" >> /etc/sudoers'.format( admin_user ) ) run( 'passwd -l root' ) run( 'echo PermitRootLogin no >> /etc/ssh/sshd_config' ) @fabric_task def __setup_admin( self ): run( "echo 'export PATH=\"/usr/local/sbin:/usr/sbin:/sbin:$PATH\"' >> ~/.bash_profile" ) if False: # I recently discovered the undocumented AuthorizedKeysFile2 option which had been # supported by OpenSSH for a long time. Considering that Ubuntu, too, lacks multi-file # AuthorizedKeysFile in releases before Raring, we would have to update OpenSSH on those # releases as well. @fabric_task def _update_openssh( self ): """ Our cghub-cloud-agent needs a newer version of OpenSSH that support listing with multiple files for the sshd_conf option AuthorizedKeysFile. The stock CentOS 5 and 6 don't have one so we'll install a custom RPM. The multiple file support was added in version 5.9 of OpenSSH. This method should to be invoked early on during setup. """ # I wwasn't able to cusotm build openssh-askpass as it depends on X11 and whatnot, # but it's not crucial so we'll skip it, or rather remove the old version of it self._yum_remove( 'openssh-askpass' ) base_url = 'http://public-artifacts.cghub.ucsc.edu.s3.amazonaws.com/custom-centos-packages/' self._yum_local( is_update=True, rpm_urls=[ base_url + 'openssh-6.3p1-1.x86_64.rpm', base_url + 'openssh-clients-6.3p1-1.x86_64.rpm', base_url + 'openssh-server-6.3p1-1.x86_64.rpm' ] ) self._run_init_script( 'sshd', 'restart' ) @fabric_task def _run_init_script( self, name, command='start' ): script_path = self._init_script_path( name ) sudo( '%s %s' % (script_path, command) ) cgcloud-releases-1.6.0/core/src/cgcloud/core/cli.py000077500000000000000000000120341301512357500221540ustar00rootroot00000000000000# PYTHON_ARGCOMPLETE_OK from __future__ import absolute_import from collections import OrderedDict from importlib import import_module import logging import os import sys import imp from bd2k.util.iterables import concat from cgcloud.lib.util import Application, app_name, UserError import cgcloud.core log = logging.getLogger( __name__ ) def plugin_module( plugin ): """ >>> plugin_module('cgcloud.core') # doctest: +ELLIPSIS >>> plugin_module('cgcloud.foobar') Traceback (most recent call last): ... UserError: Cannot find plugin module 'cgcloud.foobar'. Running 'pip install cgcloud-foobar' may fix this. """ try: return import_module( plugin ) except ImportError: raise UserError( "Cannot find plugin module '%s'. Running 'pip install %s' may fix this." % ( plugin, plugin.replace( '.', '-' )) ) def main( args=None ): """ This is the cgcloud entry point. It should be installed via setuptools.setup(entry_points=...) """ root_logger = CGCloud.setup_logging( ) try: plugins = os.environ.get( 'CGCLOUD_PLUGINS', '' ).strip( ) plugins = concat( cgcloud.core, [ plugin_module( plugin ) for plugin in plugins.split( ":" ) if plugin ] ) app = CGCloud( plugins, root_logger ) for plugin in plugins: if hasattr( plugin, 'command_classes' ): for command_class in plugin.command_classes( ): app.add( command_class ) app.run( args ) except UserError as e: log.error( e.message ) sys.exit( 255 ) class LoggingFormatter( logging.Formatter ): """ A formatter that logs the thread name of secondary threads, but not the main thread. """ def __init__( self ): super( LoggingFormatter, self ).__init__( "%(threadName)s%(levelname)s: %(message)s" ) def format( self, record ): if record.threadName == 'MainThread': record.threadName = '' elif record.threadName is not None: record.threadName += ' ' return super( LoggingFormatter, self ).format( record ) class CGCloud( Application ): """ The main CLI application """ debug_log_file_name = '%s.{pid}.log' % app_name( ) def __init__( self, plugins, root_logger=None ): super( CGCloud, self ).__init__( ) self.root_logger = root_logger self.option( '--debug', default=False, action='store_true', help='Write debug log to %s in current directory.' % self.debug_log_file_name ) self.option( '--script', '-s', metavar='PATH', help='The path to a Python script with additional role definitions.' ) self.roles = OrderedDict( ) self.cluster_types = OrderedDict( ) for plugin in plugins: self._import_plugin_roles( plugin ) def _import_plugin_roles( self, plugin ): if hasattr( plugin, 'roles' ): for role in plugin.roles( ): self.roles[ role.role( ) ] = role if hasattr( plugin, 'cluster_types' ): for cluster_type in plugin.cluster_types( ): self.cluster_types[ cluster_type.name( ) ] = cluster_type def prepare( self, options ): if self.root_logger: if options.debug: self.root_logger.setLevel( logging.DEBUG ) file_name = self.debug_log_file_name.format( pid=os.getpid( ) ) file_handler = logging.FileHandler( file_name ) file_handler.setLevel( logging.DEBUG ) file_handler.setFormatter( logging.Formatter( '%(asctime)s: %(levelname)s: %(name)s: %(message)s' ) ) self.root_logger.addHandler( file_handler ) else: self.silence_boto_and_paramiko( ) if options.script: plugin = imp.load_source( os.path.splitext( os.path.basename( options.script ) )[ 0 ], options.script ) self._import_plugin_roles( plugin ) @classmethod def setup_logging( cls ): root_logger = logging.getLogger( ) # Only setup logging if it hasn't been done already if len( root_logger.handlers ) == 0: root_logger.setLevel( logging.INFO ) stream_handler = logging.StreamHandler( sys.stderr ) stream_handler.setFormatter( LoggingFormatter( ) ) stream_handler.setLevel( logging.INFO ) root_logger.addHandler( stream_handler ) return root_logger else: return None @classmethod def silence_boto_and_paramiko( cls ): # There are quite a few cases where we expect AWS requests to fail, but it seems # that boto handles these by logging the error *and* raising an exception. We # don't want to confuse the user with those error messages. logging.getLogger( 'boto' ).setLevel( logging.CRITICAL ) logging.getLogger( 'paramiko' ).setLevel( logging.WARN ) cgcloud-releases-1.6.0/core/src/cgcloud/core/cloud_init_box.py000066400000000000000000000301271301512357500244060ustar00rootroot00000000000000import logging import time from StringIO import StringIO from abc import abstractmethod from functools import partial import paramiko import yaml from fabric.operations import put from paramiko import Channel from cgcloud.core.box import Box, fabric_task from cgcloud.core.package_manager_box import PackageManagerBox from cgcloud.lib.ec2 import ec2_instance_types from cgcloud.lib.util import heredoc log = logging.getLogger( __name__ ) class CloudInitBox( PackageManagerBox ): """ A box that uses Canonical's cloud-init to initialize the EC2 instance. """ def _ephemeral_mount_point( self, i ): return '/mnt/ephemeral' + ('' if i == 0 else str( i )) @abstractmethod def _get_package_installation_command( self, package ): """ Return the command that needs to be invoked to install the given package. The returned command is an array whose first element is a path or file name of an executable while the remaining elements are arguments to that executable. """ raise NotImplementedError( ) def _get_virtual_block_device_prefix( self ): """ Return the common prefix of paths representing virtual block devices on this box. """ return '/dev/xvd' def _populate_cloud_config( self, instance_type, user_data ): """ Populate cloud-init's configuration for injection into a newly created instance :param user_data: a dictionary that will be be serialized into YAML and used as the instance's user-data """ # see __wait_for_cloud_init_completion() runcmd = user_data.setdefault( 'runcmd', [ ] ) runcmd.append( [ 'touch', '/tmp/cloud-init.done' ] ) # # Lucid's and Oneiric's cloud-init mount ephemeral storage on /mnt instead of # /mnt/ephemeral, Fedora doesn't mount it at all. To keep it consistent across # releases and platforms we should be explicit. # # Also note that Lucid's mountall waits on the disk device. On t1.micro instances this # doesn't show up causing Lucid to hang on boot on this type. The cleanest way to handle # this is to remove the ephemeral entry on t1.micro instances by specififying [ # 'ephemeral0', None ]. Unfortunately, there is a bug [1] in cloud-init that causes the # removal of the entry to be ineffective. The "nobootwait" option might be a workaround # but Fedora stopped supporting it such that now only Ubuntu supports it. A better # workaround is to always have the ephemeral entry in fstab, even on micro instances, # but to exclude the 'auto' option such that when cloud-init runs 'mount -a', it will not # get mounted. We can then mount the filesystem explicitly, except on micro instances. # # The important thing to keep in mind is that when booting instance B from an image # created on a instance A, the fstab from A will be used by B before cloud-init can make # its changes to fstab. This behavior is a major cause of problems and the reason why # micro instances tend to freeze when booting from images created on non-micro instances # since their fstab initially refers to an ephemeral volume that doesn't exist. The # nobootwait and nofail flags are really just attempts at working around this issue. # # [1]: https://bugs.launchpad.net/cloud-init/+bug/1291820 # mounts = user_data.setdefault( 'mounts', [ ] ) mounts.append( [ 'ephemeral0', self._ephemeral_mount_point( 0 ), 'auto', 'defaults,noauto' ] ) commands = [ ] # On instances booted from a stock image, mdadm will likely be missing. So we should # install it. And we should install it early during boot, before the ephemeral drives are # RAIDed. Furthermore, we need to install mdadm on every instance type, not just the # ones with multiple ephemeral drives, since an image taken from an instance with one # ephemeral volume may be used to spawn an instance with multiple ephemeral volumes. # However, since we don't run `apt-get update`, there is a chance that the package index # is stale and that the installation fails. We therefore also install it during regular # setup. if self.generation == 0: commands.append( self._get_package_installation_command( 'mdadm' ) ) num_disks = instance_type.disks device_prefix = self._get_virtual_block_device_prefix( ) def device_name( i ): return device_prefix + (chr( ord( 'b' ) + i )) if num_disks == 0: pass elif instance_type.disk_type == 'HDD': # For HDDs we assume the disk is formatted and we mount each disk separately for i in range( num_disks ): mount_point = self._ephemeral_mount_point( i ) if mount_point is not None: commands.extend( [ [ 'mkdir', '-p', mount_point ], [ 'mount', device_name( i ), mount_point ] ] ) elif num_disks == 1: # The r3 family does not format the ephemeral SSD volume so will have to do it # manually. Other families may also exhibit that behavior so we will format every SSD # volume. It only takes a second *and* ensures that we have a particular type of # filesystem, i.e. ext4. We don't know what the device will be (cloud-init determines # this at runtime) named so we simply try all possible names. if instance_type.disk_type == 'SSD': commands.append( [ 'mkfs.ext4', '-E', 'nodiscard', device_name( 0 ) ] ) mount_point = self._ephemeral_mount_point( 0 ) commands.extend( [ [ 'mkdir', '-p', mount_point ], [ 'mount', device_name( 0 ), mount_point ] ] ) elif num_disks > 1: # RAID multiple SSDs into one, then format and mount it. devices = [ device_name( i ) for i in range( num_disks ) ] mount_point = self._ephemeral_mount_point( 0 ) commands.extend( [ [ 'mdadm', '--create', '/dev/md0', '--run', # do not prompt for confirmation '--level', '0', # RAID 0, i.e. striped '--raid-devices', str( num_disks ) ] + devices, # Disable auto scan at boot time, which would otherwise mount device on reboot # as md127 before these commands are run. 'echo "AUTO -all" > /etc/mdadm/mdadm.conf', # Copy mdadm.conf into init ramdisk [ 'update-initramfs', '-u' ], [ 'mkfs.ext4', '-E', 'nodiscard', '/dev/md0' ], [ 'mkdir', '-p', mount_point ], [ 'mount', '/dev/md0', mount_point ] ] ) else: assert False # Prepend commands as a best effort to getting volume preparation done as early as # possible in the boot sequence. Note that CloudInit's 'bootcmd' is run on every boot, # 'runcmd' only once after instance creation. bootcmd = user_data.setdefault( 'bootcmd', [ ] ) bootcmd[ 0:0 ] = commands def _spec_block_device_mapping( self, spec, image ): super( CloudInitBox, self )._spec_block_device_mapping( spec, image ) cloud_config = { } instance_type = ec2_instance_types[ spec[ 'instance_type' ] ] self._populate_cloud_config( instance_type, cloud_config ) if cloud_config: if 'user_data' in spec: raise ReferenceError( "Conflicting user-data" ) user_data = '#cloud-config\n' + yaml.dump( cloud_config ) spec[ 'user_data' ] = user_data def _on_instance_ready( self, first_boot ): super( CloudInitBox, self )._on_instance_ready( first_boot ) if first_boot: self.__wait_for_cloud_init_completion( ) if self.generation == 0: self.__add_per_boot_script( ) def _cloudinit_boot_script( self, name ): return '/var/lib/cloud/scripts/per-boot/cgcloud-' + name @fabric_task def __add_per_boot_script( self ): """ Ensure that the cloud-init.done file is always created, even on 2nd boot and thereafter. On the first boot of an instance, the .done file creation is preformed by the runcmd stanza in cloud-config. On subsequent boots this per-boot script takes over (runcmd is skipped on those boots). """ put( remote_path=self._cloudinit_boot_script( 'done' ), mode=0755, use_sudo=True, local_path=StringIO( heredoc( """ #!/bin/sh touch /tmp/cloud-init.done""" ) ) ) def __wait_for_cloud_init_completion( self ): """ Wait for cloud-init to finish its job such as to avoid getting in its way. Without this, I've seen weird errors with 'apt-get install' not being able to find any packages. Since this method belongs to a mixin, the author of a derived class is responsible for invoking this method before any other setup action. """ # /var/lib/cloud/instance/boot-finished is only being written by newer cloud-init releases. # For example, it isn't being written by the cloud-init for Lucid. We must use our own file # created by a runcmd, see _populate_cloud_config() # # This function is called on every node in a cluster during that cluster's creation. For # that reason we want to avoid contention on the lock in @fabric_task that's protecting # the thread-unsafe Fabric code. This contention is aggravated by the fact that, # for some unkown reason, the first SSH connection to a node takes unusually long. With a # lock serialising all calls to this method we have to wait for the delay for every node # in sequence, in O(N) time. Paramiko, OTOH, is thread-safe allowing us to do the wait # in concurrently, in O(1) time. command = ';'.join( [ 'echo -n "Waiting for cloud-init to finish ..."', 'while [ ! -e /tmp/cloud-init.done ]', 'do echo -n "."', 'sleep 1 ', 'done ', 'echo "... cloud-init done."' ] ) self._run( command ) def _run( self, cmd ): def stream( name, recv_ready, recv, logger ): i = 0 r = '' try: while recv_ready( ): s = recv( 1024 ) if not s: break i += 1 ls = s.splitlines( ) # Prepend partial line from previous iteration to first line from this # iteration. Note that the first line may be a partial line, too. ls[ 0 ] = r + ls[ 0 ] # Log all complete lines for l in ls[ :-1 ]: logger( "%s: %s", name, l ) r = ls[ -1 ] finally: # No chance to complete the partial line anytime soon, so log it. if r: logger( r ) return i client = self._ssh_client( ) try: with client.get_transport( ).open_session( ) as chan: assert isinstance( chan, Channel ) chan.exec_command( cmd ) streams = ( partial( stream, 'stderr', chan.recv_stderr_ready, chan.recv_stderr, log.warn ), partial( stream, 'stdout', chan.recv_ready, chan.recv, log.info )) while sum( stream( ) for stream in streams ) or not chan.exit_status_ready( ): time.sleep( paramiko.common.io_sleep ) assert 0 == chan.recv_exit_status( ) finally: client.close( ) def _list_packages_to_install( self ): # As a fallback from failed installations of mdadm at boot time, we should install mdadm # unconditionally: https://github.com/BD2KGenomics/cgcloud/issues/194 return super( CloudInitBox, self )._list_packages_to_install( ) + [ 'mdadm' ] cgcloud-releases-1.6.0/core/src/cgcloud/core/cluster.py000066400000000000000000000133161301512357500230670ustar00rootroot00000000000000import logging from abc import ABCMeta, abstractproperty from cgcloud.core.box import Box from cgcloud.lib.util import (abreviated_snake_case_class_name, papply, thread_pool) log = logging.getLogger( __name__ ) class Cluster( object ): """ A cluster consists of one leader box and N worker boxes. A box that is part of a cluster is referred to as "node". There is one role (subclass of Box) describing the leader node and another one describing the workers. Leader and worker roles are siblings and their common ancestor--the node role--describes the software deployed on them, which is identical for both leader and workers. The node role is used to create the single image from which the actual nodes will be booted from when the cluster is created. In other words, the specialization into leader and workers happens at cluster creation time, not earlier. """ __metaclass__ = ABCMeta def __init__( self, ctx ): super( Cluster, self ).__init__( ) self.ctx = ctx @abstractproperty def leader_role( self ): """ :return: The Box subclass to use for the leader """ raise NotImplementedError( ) @abstractproperty def worker_role( self ): """ :return: The Box subclass to use for the workers """ raise NotImplementedError( ) @classmethod def name( cls ): return abreviated_snake_case_class_name( cls, Cluster ) def apply( self, f, cluster_name=None, ordinal=None, leader_first=True, skip_leader=False, wait_ready=True, operation='operation', pool_size=None, callback=None ): """ Apply a callable to the leader and each worker. The callable may be applied to multiple workers concurrently. """ # Look up the leader first, even if leader_first is False or skip_leader is True. That # way we fail early if the cluster doesn't exist. leader = self.leader_role( self.ctx ) leader.bind( cluster_name=cluster_name, ordinal=ordinal, wait_ready=wait_ready ) first_worker = self.worker_role( self.ctx ) def apply_leader( ): if not skip_leader: log.info( '=== Performing %s on leader ===', operation ) result = f( leader ) if callback is not None: callback( result ) def apply_workers( ): log.info( '=== Performing %s on workers ===', operation ) workers = first_worker.list( leader_instance_id=leader.instance_id, wait_ready=wait_ready ) # zip() creates the singleton tuples that papply() expects papply( f, seq=zip( workers ), pool_size=pool_size, callback=callback ) if leader_first: apply_leader( ) apply_workers( ) else: apply_workers( ) apply_leader( ) class ClusterBox( Box ): """ A mixin for a box that is part of a cluster """ def _set_instance_options( self, options ): super( ClusterBox, self )._set_instance_options( options ) self.ebs_volume_size = int( options.get( 'ebs_volume_size' ) or 0 ) def _get_instance_options( self ): return dict( super( ClusterBox, self )._get_instance_options( ), ebs_volume_size=str( self.ebs_volume_size ), leader_instance_id=self.instance_id) @classmethod def _get_node_role( cls ): """ Return the role (box class) from which the node image should be created. """ # Traverses the inheritance DAG upwards until we find a class that has this class as a # base, i.e. that mixes in this class. The traversal itself only follows the first base # class. while cls not in (ClusterBox, ClusterLeader, ClusterWorker, Box): if ClusterBox in cls.__bases__: return cls else: # noinspection PyMethodFirstArgAssignment cls = cls.__bases__[ 0 ] assert False, "Class %s doesn't have an ancestor that mixes in %s" % (cls, ClusterBox) def _image_name_prefix( self ): # The default implementation of this method derives the image name prefix from the # concrete class name. The leader and workers are booted from the node image so we need # to pin the name using the node role. return self._get_node_role( ).role( ) def _security_group_name( self ): # The default implementation of this method derives the security group name from the # concrete class name. The leader and workers must use be assigned the same security # group (because the group allows traffic only within the group) so we need to pin # the name using the node role. return self._get_node_role( ).role( ) class ClusterLeader( ClusterBox ): """ A mixin for a box that serves as a leader in a cluster """ def _get_instance_options( self ): return dict( super( ClusterLeader, self )._get_instance_options( ) ) class ClusterWorker( ClusterBox ): """ A mixin for a box that serves as a leader in a cluster """ def __init__( self, ctx ): super( ClusterWorker, self ).__init__( ctx ) self.leader_instance_id = None def _set_instance_options( self, options ): super( ClusterWorker, self )._set_instance_options( options ) self.leader_instance_id = options.get( 'leader_instance_id' ) if self.cluster_name is None: self.cluster_name = self.leader_instance_id def _get_instance_options( self ): return dict( super( ClusterWorker, self )._get_instance_options( ), leader_instance_id=self.leader_instance_id ) cgcloud-releases-1.6.0/core/src/cgcloud/core/cluster_commands.py000066400000000000000000000453611301512357500247550ustar00rootroot00000000000000import logging import os import sys from abc import abstractmethod from functools import partial from bd2k.util.exceptions import panic from bd2k.util.expando import Expando from cgcloud.core.commands import (RecreateCommand, ContextCommand, SshCommandMixin, RsyncCommandMixin) from cgcloud.lib.util import (abreviated_snake_case_class_name, UserError, heredoc, thread_pool, allocate_cluster_ordinals) log = logging.getLogger( __name__ ) class ClusterTypeCommand( ContextCommand ): def __init__( self, application ): """ Set later, once we have a context. :type: Cluster """ super( ClusterTypeCommand, self ).__init__( application ) self.option( '--num-threads', metavar='NUM', type=int, default=100, help='The maximum number of tasks to be performed concurrently.' ) self.option( 'cluster_type', metavar='TYPE', completer=self.completer, help=heredoc( """The type of the cluster to be used. The cluster type is covariant with the role of the leader node. For example, a box performing the 'foo-leader' role will be part of a cluster of type 'foo'.""" ) ) # noinspection PyUnusedLocal def completer( self, prefix, **kwargs ): return [ cluster_type for cluster_type in self.application.cluster_types.iterkeys( ) if cluster_type.startswith( prefix ) ] def run_in_ctx( self, options, ctx ): try: cluster_type = self.application.cluster_types[ options.cluster_type ] except KeyError: raise UserError( "Unknown cluster type '%s'" % options.cluster_type ) self.run_on_cluster_type( ctx, options, cluster_type ) @abstractmethod def run_on_cluster_type( self, ctx, options, cluster_type ): raise NotImplementedError( ) class CreateClusterCommand( ClusterTypeCommand, RecreateCommand ): """ Creates a cluster with one leader and one or more workers. """ def __init__( self, application ): super( CreateClusterCommand, self ).__init__( application ) self.cluster = None self.option( '--cluster-name', '-c', metavar='NAME', help=heredoc( """A name for the new cluster. If absent, the instance ID of the master will be used. Cluster names do not need to be unique, but they should be in order to avoid user error.""" ) ) self.option( '--num-workers', '-s', metavar='NUM', type=int, default=1, help='The number of workers to launch.' ) self.option( '--ebs-volume-size', '-e', metavar='GB', help=heredoc( """The size in GB of an EBS volume to be attached to each node for persistent data. The volume will be mounted at /mnt/persistent.""" ) ) self.option( '--leader-on-demand', '-D', default=False, action='store_true', help=heredoc( """Use this option to insure that the leader will be an on-demand instance, even if --spot-bid is given.""" ) ) self.option( '--share', '-S', metavar='PATH', default=None, dest='share_path', help=heredoc( """The path to a local file or directory for distribution to the cluster. The given file or directory (or the contents of the given directory, if the path ends in a slash) will be placed in the default user's ~/shared directory on each node.""" ) ) self.option( '--ssh-opts', metavar='OPTS', default=None, help=heredoc( """Additional options to pass to ssh when uploading the files shared via rsync. For more detail refer to cgcloud rsync --help""" ) ) def preparation_kwargs( self, options, box ): return dict( super( CreateClusterCommand, self ).preparation_kwargs( options, box ), cluster_name=options.cluster_name, ebs_volume_size=options.ebs_volume_size ) def creation_kwargs( self, options, box ): return dict( super( CreateClusterCommand, self ).creation_kwargs( options, box ), num_instances=options.num_workers ) def option( self, option_name, *args, **kwargs ): _super = super( CreateClusterCommand, self ) if option_name in ('role', '--terminate'): # Suppress the role positional argument since the role is hard-wired and the # --terminate option since it doesn't make sense when creating clusters. return if option_name == '--instance-type': # We want --instance-type to apply to the workers and --leader-instance-type to the # leader. Furthermore, we want --leader-instance-type to default to the value of # --instance-type. assert 'dest' not in kwargs assert args[ 0 ] == '-t' kwargs[ 'help' ] = kwargs[ 'help' ].replace( 'for the box', 'for the leader' ) _super.option( '--leader-instance-type', '-T', *args[ 1: ], dest='instance_type', **kwargs ) kwargs[ 'help' ] = kwargs[ 'help' ].replace( 'leader', 'workers' ) kwargs[ 'dest' ] = 'worker_instance_type' _super.option( option_name, *args, **kwargs ) def run( self, options ): # Validate shared path if options.share_path is not None: if not os.path.exists( options.share_path ): raise UserError( "No such file or directory: '%s'" % options.share_path ) # --leader-instance-type should default to the value of --instance-type if options.instance_type is None: options.instance_type = options.worker_instance_type super( CreateClusterCommand, self ).run( options ) def run_on_cluster_type( self, ctx, options, cluster_type ): self.cluster = cluster_type( ctx ) leader_role = self.cluster.leader_role options.role = leader_role.role( ) self.run_on_role( options, ctx, leader_role ) def run_on_box( self, options, leader ): """ :type leader: cgcloud.core.box.Box """ log.info( '=== Creating leader ===' ) preparation_kwargs = self.preparation_kwargs( options, leader ) if options.leader_on_demand: preparation_kwargs = { k: v for k, v in preparation_kwargs.iteritems( ) if not k.startswith( 'spot_' ) } spec = leader.prepare( **preparation_kwargs ) creation_kwargs = dict( self.creation_kwargs( options, leader ), num_instances=1, # We must always wait for the leader since workers depend on it. wait_ready=True ) leader.create( spec, **creation_kwargs ) try: self.run_on_creation( leader, options ) except: if options.terminate is not False: with panic( log ): leader.terminate( wait=False ) raise # Leader is fully setup, even if the code below fails to add workers, # the GrowClusterCommand can be used to recover from that failure. if options.num_workers: log.info( '=== Creating workers ===' ) first_worker = self.cluster.worker_role( leader.ctx ) preparation_kwargs = dict( self.preparation_kwargs( options, first_worker ), leader_instance_id=leader.instance_id, instance_type=options.worker_instance_type ) spec = first_worker.prepare( **preparation_kwargs ) with thread_pool( min( options.num_threads, options.num_workers ) ) as pool: workers = first_worker.create( spec, cluster_ordinal=leader.cluster_ordinal + 1, executor=pool.apply_async, **self.creation_kwargs( options, first_worker ) ) else: workers = [ ] if options.list: self.list( [ leader ] ) self.list( workers, print_headers=False ) if not workers: log.warn("This cluster has no workers. You may ssh into the leader now but you should " "use 'cgcloud grow-cluster' to add worker instances before doing real work." ) self.log_ssh_hint( options ) def run_on_creation( self, leader, options ): local_path = options.share_path if local_path is not None: log.info( '=== Copying %s%s to ~/shared on leader ===', 'the contents of ' if local_path.endswith( '/' ) else '', local_path ) leader.rsync( args=[ '-r', local_path, ":shared/" ], ssh_opts=options.ssh_opts ) def ssh_hint( self, options ): hint = super( CreateClusterCommand, self ).ssh_hint( options ) hint.options.append( Expando( name='-c', value=options.cluster_name, default=None ) ) hint.object = 'cluster' return hint class ClusterCommand( ClusterTypeCommand ): def __init__( self, application ): super( ClusterCommand, self ).__init__( application ) self.option( '--cluster-name', '-c', metavar='NAME', help=heredoc( """The name of the cluster to operate on. The default is to consider all clusters of the given type regardless of their name, using --ordinal to disambiguate. Note that the cluster name is not necessarily unique, not even with a specific cluster type, there may be more than one cluster of a particular name and type.""" ) ) self.option( '--ordinal', '-o', default=-1, type=int, help=heredoc( """Selects an individual cluster from the list of currently running clusters of the given cluster type and name. Since there is one leader per cluster, this is equal to the ordinal of the leader among all leaders of clusters of the given type and name. The ordinal is a zero-based index into the list of all clusters of the specified type and name, sorted by creation time. This means that the ordinal of a cluster is not fixed, it may change if another cluster of the same type and name is terminated. If the ordinal is negative, it will be converted to a positive ordinal by adding the number of clusters of the specified type. Passing -1, for example, selects the most recently created box.""" ) ) def run_on_cluster_type( self, ctx, options, cluster_type ): cluster = cluster_type( ctx ) self.run_on_cluster( options, ctx, cluster ) @abstractmethod def run_on_cluster( self, options, ctx, cluster ): raise NotImplementedError( ) class GrowClusterCommand( ClusterCommand, RecreateCommand ): """ Increase the size of the cluster """ def __init__( self, application ): super( GrowClusterCommand, self ).__init__( application ) self.cluster = None self.option( '--num-workers', '-s', metavar='NUM', type=int, default=1, help='The number of workers to add.' ) def option( self, option_name, *args, **kwargs ): _super = super( GrowClusterCommand, self ) if option_name in ('role', '--terminate'): # Suppress the role positional argument since the role is hard-wired and the # --terminate option since it doesn't make sense here. return if option_name == '--instance-type': assert 'dest' not in kwargs assert args[ 0 ] == '-t' kwargs[ 'help' ] = kwargs[ 'help' ].replace( 'for the box', 'for the workers' ) _super.option( option_name, *args, **kwargs ) def run_on_cluster( self, options, ctx, cluster ): self.cluster = cluster options.role = self.cluster.worker_role.role( ) self.run_on_role( options, ctx, self.cluster.worker_role ) def creation_kwargs( self, options, box ): return dict( super( GrowClusterCommand, self ).creation_kwargs( options, box ), num_instances=options.num_workers ) def run_on_box( self, options, first_worker ): """ :param cgcloud.core.box.Box first_worker: """ log.info( '=== Binding to leader ===' ) leader = self.cluster.leader_role( self.cluster.ctx ) leader.bind( cluster_name=options.cluster_name, ordinal=options.ordinal, wait_ready=False ) log.info( '=== Creating workers ===' ) workers = first_worker.list( leader_instance_id=leader.instance_id ) used_cluster_ordinals = set( w.cluster_ordinal for w in workers ) assert len( used_cluster_ordinals ) == len( workers ) # check for collisions assert 0 not in used_cluster_ordinals # master has 0 used_cluster_ordinals.add( 0 ) # to make the math easier cluster_ordinal = allocate_cluster_ordinals( num=options.num_workers, used=used_cluster_ordinals ) first_worker.unbind( ) # list() bound it spec = first_worker.prepare( leader_instance_id=leader.instance_id, cluster_name=leader.cluster_name, **self.preparation_kwargs( options, first_worker ) ) with thread_pool( min( options.num_threads, options.num_workers ) ) as pool: workers = first_worker.create( spec, cluster_ordinal=cluster_ordinal, executor=pool.apply_async, **self.creation_kwargs( options, first_worker ) ) if options.list: self.list( workers ) if not workers: log.warn( 'No workers were added to the cluster.' ) class ApplyClusterCommand( ClusterCommand ): """ A command that applies an operation to a running cluster. """ def __init__( self, application ): super( ApplyClusterCommand, self ).__init__( application ) self.option( '--skip-leader', '-L', default=False, action='store_true', help=heredoc( """Don't perform the operation on the leader.""" ) ) class ClusterLifecycleCommand( ApplyClusterCommand ): """ A command that runs a simple method on each node in a cluster """ leader_first = True wait_ready = False def run_on_cluster( self, options, ctx, cluster ): cluster.apply( partial( self.run_on_node, options ), cluster_name=options.cluster_name, ordinal=options.ordinal, leader_first=self.leader_first, skip_leader=options.skip_leader, wait_ready=self.wait_ready, pool_size=options.num_threads, operation=self.operation( ) + '()' ) def run_on_node( self, options, node ): getattr( node, self.operation( ) )( ) def operation( self ): return abreviated_snake_case_class_name( self.__class__, ClusterCommand ) class StopClusterCommand( ClusterLifecycleCommand ): """ Stop all nodes of a cluster """ leader_first = False class StartClusterCommand( ClusterLifecycleCommand ): """ Start all nodes of a cluster """ leader_first = True class TerminateClusterCommand( ClusterLifecycleCommand ): """ Terminate all nodes of a cluster """ leader_first = False def __init__( self, application ): super( TerminateClusterCommand, self ).__init__( application ) self.option( '--quick', '-Q', default=False, action='store_true', help="""Exit immediately after termination request has been made, don't wait until the cluster is terminated.""" ) def run_on_node( self, options, node ): node.terminate( wait=not options.quick ) # NB: The ordering of bases affects ordering of positionals class SshClusterCommand( SshCommandMixin, ApplyClusterCommand ): """ Run a command via SSH on each node of a cluster. The command is run on the leader first, followed by the workers, serially by default or optionally in parallel. """ def __init__( self, application ): super( SshClusterCommand, self ).__init__( application ) self.option( '--parallel', '-P', default=False, action='store_true', help=heredoc( """Run command on the workers in parallel. Note that this doesn't work if SSH or the command itself prompts for input. This will likely be the case on the first connection attempt when SSH typically prompts for confirmation of the host key. An insecure work-around is to pass "-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no".""" ) ) def run_on_cluster( self, options, ctx, cluster ): exit_codes = [ ] cluster.apply( partial( self.ssh, options ), cluster_name=options.cluster_name, ordinal=options.ordinal, leader_first=True, skip_leader=options.skip_leader, pool_size=options.num_threads if options.parallel else 0, wait_ready=False, callback=exit_codes.append ) if any( exit_code for exit_code in exit_codes ): sys.exit( 2 ) class RsyncClusterCommand( RsyncCommandMixin, ApplyClusterCommand ): """ Run rsync against each node in a cluster. The rsync program will be run against master first, followed by all workers in parallel. To avoid being prompted for confirmation of the host key, use --ssh-opts="-o UserKnownHostsFile=/dev/null -o StrictHostKeyChecking=no". """ def run_on_cluster( self, options, ctx, cluster ): cluster.apply( partial( self.rsync, options ), cluster_name=options.cluster_name, ordinal=options.ordinal, leader_first=True, skip_leader=options.skip_leader, pool_size=options.num_threads, wait_ready=False ) cgcloud-releases-1.6.0/core/src/cgcloud/core/commands.py000066400000000000000000001154621301512357500232140ustar00rootroot00000000000000from __future__ import print_function import argparse import functools import logging import os import re import sys from abc import abstractmethod from operator import itemgetter from bd2k.util.exceptions import panic from bd2k.util.expando import Expando from bd2k.util.iterables import concat from boto.ec2.blockdevicemapping import BlockDeviceType from boto.ec2.connection import EC2Connection from boto.ec2.group import Group from fabric.operations import prompt from tabulate import tabulate from cgcloud.core.box import Box from cgcloud.lib.context import Context from cgcloud.lib.ec2 import ec2_instance_types from cgcloud.lib.util import Application, heredoc from cgcloud.lib.util import UserError, Command log = logging.getLogger( __name__ ) class ContextCommand( Command ): """ A command that runs in a context. Contexts encapsulate the necessary environment for boxes to run in. The most important aspect of a context is its namespace. Namespaces group boxes and other resources into isolated groups. """ @abstractmethod def run_in_ctx( self, options, ctx ): """ Run this command in the given context. :type ctx: Context """ raise NotImplementedError( ) def __init__( self, application, **kwargs ): self.default_namespace = os.environ.get( 'CGCLOUD_NAMESPACE', '/__me__/' ) self.default_zone = os.environ.get( 'CGCLOUD_ZONE', None ) super( ContextCommand, self ).__init__( application, **kwargs ) self.option( '--zone', '-z', metavar='ZONE', default=self.default_zone, dest='availability_zone', required=not bool( self.default_zone ), help=heredoc( """The name of the EC2 availability zone to operate in, e.g. us-east-1b, us-west-1b or us-west-2c etc. This argument implies the AWS region to run in. The value of the environment variable CGCLOUD_ZONE, if that variable is present, determines the default.""" ) ) self.option( '--namespace', '-n', metavar='PREFIX', default=self.default_namespace, help=heredoc( """Optional prefix for naming EC2 resource like instances, images, volumes, etc. Use this option to create a separate namespace in order to avoid collisions, e.g. when running tests. A namespace begins with a slash, followed by zero or more names, each name followed by a slash. Note that this implies that the namespace begins and ends with a slash. Each name must begin with a a digit or lowercase letter followed by zero or more digits, lowercase letters, periods, underscores or dashes. The value of the environment variable CGCLOUD_NAMESPACE, if that variable is present, overrides the default. The string __me__ anywhere in the namespace will be replaced by the name of the IAM user whose credentials are used to issue requests to AWS. If the name of that IAM user contains the @ character, anything after the first occurrance of that character will be discarded before the substitution is done.""" ) ) def run( self, options ): zone = options.availability_zone namespace = options.namespace ctx = None try: ctx = Context( availability_zone=zone, namespace=namespace ) except ValueError as e: raise UserError( cause=e ) except: # print the namespace without __me__ substituted log.error( "An error occurred. Using zone '%s' and namespace '%s'", zone, namespace ) raise else: # print the namespace with __me__ substituted log.info( "Using zone '%s' and namespace '%s'", ctx.availability_zone, ctx.namespace ) return self.run_in_ctx( options, ctx ) finally: if ctx is not None: ctx.close( ) class RoleCommand( ContextCommand ): """ An abstract command that targets boxes of a particular role. Note that there may be more than one box per role. To target a specific box, InstanceCommand might be a better choice. """ def __init__( self, application, **kwargs ): super( RoleCommand, self ).__init__( application, **kwargs ) self.option( 'role', metavar='ROLE', completer=self.completer, help=heredoc( """The name of the role. Use the list-roles command to show all available roles.""" ) ) # noinspection PyUnusedLocal def completer( self, prefix, **kwargs ): return [ role for role in self.application.roles.iterkeys( ) if role.startswith( prefix ) ] def run_in_ctx( self, options, ctx ): role = self.application.roles.get( options.role ) if role is None: raise UserError( "No such role: '%s'" % options.role ) return self.run_on_role( options, ctx, role ) @abstractmethod def run_on_role( self, options, ctx, role ): """ :type options: dict :type ctx: Context :type role: type[Box] """ raise NotImplementedError( ) class BoxCommand( RoleCommand ): """ An abstract command that runs on a box, i.e. an instance of a role class. """ def run_on_role( self, options, ctx, role ): box = role( ctx ) return self.run_on_box( options, box ) @abstractmethod def run_on_box( self, options, box ): """ Execute this command using the specified parsed command line options on the specified box. :type options: dict :type box: Box """ raise NotImplementedError( ) def list( self, boxes, print_header=True ): columns = """ cluster_name role_name cluster_ordinal private_ip_address ip_address instance_id instance_type launch_time state zone""".split( ) if print_header: header = list( columns ) header.insert( 2, 'ordinal' ) print( '\t'.join( header ) ) for ordinal, box in enumerate( boxes ): row = [ getattr( box, column ) for column in columns ] row.insert( 2, ordinal ) print( '\t'.join( str( column ) for column in row ) ) class InstanceCommand( BoxCommand ): """ A command that runs on a box bound to a specific EC2 instance. """ def __init__( self, application, **kwargs ): super( InstanceCommand, self ).__init__( application, **kwargs ) self.option( '--cluster-name', '-c', metavar='NAME', help=heredoc( """This option can be used to restrict the selection to boxes that are part of a cluster of the given name. Boxes that are not part of a cluster use their own instance id as the cluster name.""" ) ) self.begin_mutex() self.option( '--ordinal', '-o', default=-1, type=int, help=heredoc( """Selects an individual box from the list of boxes performing the specified role in a cluster of the given name. The ordinal is a zero-based index into the list of all boxes performing the specified role, sorted by creation time. This means that the ordinal of a box is not fixed, it may change if another box performing the specified role is terminated. If the ordinal is negative, it will be converted to a positive ordinal by adding the number of boxes performing the specified role. Passing -1, for example, selects the most recently created box.""" ) ) self.option( '--instance-id', '-I', default=None, type=str, help=heredoc( """Selects an individual instance. When combined with --cluster-name, the specified instance needs to belong to a cluster of the specified name or an error will be raised.""" ) ) self.end_mutex() wait_ready = True def run_on_box( self, options, box ): if options.instance_id: # Mutual exclusivity is enforced by argparse but we need to unset the default value # for the mutual exclusive options. options.ordinal = None box.bind( ordinal=options.ordinal, cluster_name=options.cluster_name, wait_ready=self.wait_ready, instance_id=options.instance_id ) self.run_on_instance( options, box ) @abstractmethod def run_on_instance( self, options, box ): raise NotImplementedError( ) class ListCommand( BoxCommand ): """ List the boxes performing a particular role. """ def __init__( self, application ): super( ListCommand, self ).__init__( application ) self.option( '--cluster-name', '-c', metavar='NAME', help='Only list boxes belonging to a cluster of the given name.' ) def run_on_box( self, options, box ): boxes = box.list( cluster_name=options.cluster_name ) self.list( boxes ) class UserCommandMixin( Command ): """ A command that runs as a given user """ def __init__( self, application, **kwargs ): super( UserCommandMixin, self ).__init__( application, **kwargs ) self.begin_mutex( ) self.option( '--login', '-l', default=None, metavar='USER', dest='user', help=heredoc( """Name of user to login as. The default depends on the role, for most roles the default is the administrative user. Roles that define a second less privileged application user will default to that user. Can't be used together with -a, --admin.""" ) ) self.option( '--admin', '-a', default=False, action='store_true', help=heredoc( """Force logging in as the administrative user. Can't be used together with -l, --login.""" ) ) self.end_mutex( ) @staticmethod def _user( box, options ): return box.admin_account( ) if options.admin else options.user or box.default_account( ) class SshCommandMixin( UserCommandMixin ): def __init__( self, application ): super( SshCommandMixin, self ).__init__( application ) self.option( 'command', metavar='...', nargs=argparse.REMAINDER, default=[ ], help=heredoc( """Additional arguments to pass to ssh. This can be anything that one would normally pass to the ssh program excluding user name and host but including, for example, the remote command to execute.""" ) ) def ssh( self, options, box ): return box.ssh( user=self._user( box, options ), command=options.command ) # NB: The ordering of bases affects ordering of positionals class SshCommand( SshCommandMixin, InstanceCommand ): """ Start an interactive SSH session on a box. """ def run_on_instance( self, options, box ): status = self.ssh( options, box ) if status != 0: sys.exit( status ) class RsyncCommandMixin( UserCommandMixin ): """ Rsync to or from the box """ def __init__( self, application ): super( RsyncCommandMixin, self ).__init__( application ) self.option( '--ssh-opts', '-e', metavar='OPTS', default=None, help=heredoc( """Additional options to pass to ssh. Note that if OPTS starts with a dash you must use the long option followed by an equal sign. For example, to run ssh in verbose mode, use --ssh-opt=-v. If OPTS is to include spaces, it must be quoted to prevent the shell from breaking it up. So to run ssh in verbose mode and log to syslog, you would use --ssh-opt='-v -y'.""" ) ) self.option( 'args', metavar='...', nargs=argparse.REMAINDER, default=[ ], help=heredoc( """Command line options for rsync(1). The remote path argument must be prefixed with a colon. For example, 'cgcloud.py rsync foo -av :bar .' would copy the file 'bar' from the home directory of the admin user on the box 'foo' to the current directory on the local machine.""" ) ) def rsync( self, options, box ): box.rsync( options.args, user=self._user( box, options ), ssh_opts=options.ssh_opts ) # NB: The ordering of bases affects ordering of positionals class RsyncCommand( RsyncCommandMixin, InstanceCommand ): def run_on_instance( self, options, box ): self.rsync( options, box ) class ImageCommand( InstanceCommand ): """ Create an AMI image of a box performing a given role. The box must be stopped. """ wait_ready = False def run_on_instance( self, options, box ): box.image( ) class ShowCommand( InstanceCommand ): """ Display the EC2 attributes of the box. """ def print_object( self, o, visited=set( ), depth=1 ): _id = id( o ) if not _id in visited: visited.add( _id ) self.print_dict( o.__dict__, visited, depth ) visited.remove( _id ) if depth == 1: sys.stdout.write( '\n' ) def print_dict( self, d, visited, depth ): for k, v in sorted( d.iteritems( ), key=itemgetter( 0 ) ): k = str( k ) if k[ 0:1 ] != '_' \ and k != 'connection' \ and not isinstance( v, EC2Connection ): sys.stdout.write( '\n%s%s: ' % ('\t' * depth, k) ) if isinstance( v, str ): sys.stdout.write( v.strip( ) ) if isinstance( v, unicode ): sys.stdout.write( v.encode( 'utf8' ).strip( ) ) elif hasattr( v, 'iteritems' ): self.print_dict( v, visited, depth + 1 ) elif hasattr( v, '__iter__' ): self.print_dict( dict( enumerate( v ) ), visited, depth + 1 ) elif isinstance( v, BlockDeviceType ) \ or isinstance( v, Group ): self.print_object( v, visited, depth + 1 ) else: sys.stdout.write( repr( v ) ) wait_ready = False def run_on_instance( self, options, box ): self.print_object( box.instance ) class LifecycleCommand( InstanceCommand ): """ Transition an instance box into a particular state. """ wait_ready = False def run_on_instance( self, options, box ): getattr( box, self.name( ) )( ) class StartCommand( LifecycleCommand ): """ Start the box, ie. bring it from the stopped state to the running state. """ pass class StopCommand( LifecycleCommand ): """ Stop the box, ie. bring it from the running state to the stopped state. """ pass class RebootCommand( LifecycleCommand ): """ Stop the box, then start it again. """ pass class TerminateCommand( LifecycleCommand ): """ Terminate the box, ie. delete it permanently. """ def __init__( self, application, **kwargs ): super( TerminateCommand, self ).__init__( application, **kwargs ) self.option( '--quick', '-Q', default=False, action='store_true', help=heredoc( """Exit immediately after termination request has been made, don't wait until the box is terminated.""" ) ) def run_on_instance( self, options, box ): box.terminate( wait=not options.quick ) class ListImagesCommand( BoxCommand ): """ List the AMI images that were created from boxes performing a particular role. """ def run_on_box( self, options, box ): for ordinal, image in enumerate( box.list_images( ) ): print( '{name}\t{ordinal}\t{id}\t{state}'.format( ordinal=ordinal, **image.__dict__ ) ) class CreationCommand( BoxCommand ): def __init__( self, application ): super( CreationCommand, self ).__init__( application ) default_ec2_keypairs = os.environ.get( 'CGCLOUD_KEYPAIRS', '__me__' ).split( ) self.option( '--keypairs', '-k', metavar='NAME', dest='ec2_keypair_names', nargs='+', default=default_ec2_keypairs, help=heredoc( """The names of EC2 key pairs whose public key is to be injected into the box to facilitate SSH logins. For the first listed argument, the so called primary key pair, a matching private key needs to be present locally. All other arguments may use shell-style globs in which case every key pair whose name matches one of the globs will be deployed to the box. The cgcloudagent program that will typically be installed on a box keeps the deployed list of authorized keys up to date in case matching keys are added or removed from EC2. The value of the environment variable CGCLOUD_KEYPAIRS, if that variable is present, overrides the default for this option. The string __me__ anywhere in an argument will be substituted with the name of the IAM user whose credentials are used to issue requests to AWS. An argument beginning with a single @ will be looked up as the name of an IAM user. If that user exists, the name will be used as the name of a key pair. Otherwise an exception is raised. An argument beginning with @@ will be looked up as an IAM group and the name of each user in that group will be used as the name of a keypair. Note that the @ and @@ substitutions depend on the convention that the user and the corresponding key pair have the same name. They only require the respective user or group to exist, while the key pair may be missing. If such a missing key pair is later added, cgcloudagent will automatically add that key pair's public to the list of SSH keys authorized to login to the box. Shell-style globs can not be combined with @ or @@ substitutions within one argument.""" ) ) self.option( '--instance-type', '-t', metavar='TYPE', choices=ec2_instance_types.keys( ), default=os.environ.get( 'CGCLOUD_INSTANCE_TYPE', None ), help=heredoc( """The type of EC2 instance to launch for the box, e.g. t2.micro, m3.small, m3.medium, or m3.large etc. The value of the environment variable CGCLOUD_INSTANCE_TYPE, if that variable is present, overrides the default, an instance type appropriate for the role.""" ) ) self.option( '--virtualization-type', metavar='TYPE', choices=Box.virtualization_types, help=heredoc( """The virtualization type to be used for the instance. This affects the choice of image (AMI) the instance is created from. The default depends on the instance type, but generally speaking, 'hvm' will be used for newer instance types.""" ) ) self.option( '--spot-bid', metavar='AMOUNT', type=float, help=heredoc( """The maximum price to pay for the specified instance type, in dollars per hour as a floating point value, 1.23 for example. Only bids under double the instance type's average price for the past week will be accepted. By default on-demand instances are used. Note that some instance types are not available on the spot market!""" ) ) self.option( '--vpc', metavar='VPC_ID', type=str, dest='vpc_id', help=heredoc( """The ID of a VPC to create the instance and associated security group in. If this option is absent and the AWS account has a default VPC, the default VPC will be used. This is the most common case. If this option is absent and the AWS account has EC2 Classic enabled and the selected instance type supports EC2 classic mode, no VPC will be used. If this option is absent and the AWS account has no default VPC and an instance type that only supports VPC is used, an exception will be raised.""" ) ) self.option( '--subnet', metavar='SUBNET_ID', type=str, dest='subnet_id', help=heredoc( """The ID of a subnet to allocate the instance's private IP address from. Can't be combined with --spot-auto-zone. The specified subnet must belong to the specified VPC (or the default VPC if none was given) and reside in the availability zone given via CGCLOUD_ZONE or --zone. If this option is absent, cgcloud will attempt to choose a subnet automatically.""" ) ) self.option( '--spot-launch-group', metavar='NAME', help=heredoc( """The name of an EC2 spot instance launch group. If specified, the spot request will only be fullfilled once all instances in the group can be launched. Furthermore, if any instance in the group needs to be terminated by Amazon, so will the remaining ones, even if their bid is higher than the market price.""" ) ) self.option( '--spot-auto-zone', default=False, action='store_true', help=heredoc( """Ignore --zone/CGCLOUD_ZONE and instead choose the best EC2 availability zone for spot instances based on a heuristic.""" ) ) self.option( '--spot-timeout', metavar='SECONDS', type=float, help=heredoc( """The maximum time to wait for spot instance requests to enter the active state. Requests that are not active when the timeout fires will be cancelled.""" ) ) self.option( '--spot-tentative', default=False, action='store_true', help=heredoc( """Give up on a spot request at the earliest indication of it not being fulfilled immediately.""" ) ) self.option( '--list', default=False, action='store_true', help=heredoc( """List all instances created by this command on success.""" ) ) option_name_re = re.compile( r'^[A-Za-z][0-9A-Za-z_]*$' ) def option( o ): l = o.split( '=', 1 ) if len( l ) != 2: raise ValueError( "An option must be of the form NAME=VALUE. '%s' is not." % o ) k, v = l if not option_name_re.match( k ): raise ValueError( "An option name must start with a letter and contain only " "letters, digits and underscore. '%s' does not." % o ) return k, v self.option( '--option', '-O', metavar='NAME=VALUE', type=option, action='append', default=[ ], dest='role_options', help=heredoc( """Set a role-specific option for the instance. To see a list of options for a role, use the list-options command.""" ) ) self.begin_mutex( ) self.option( '--terminate', '-T', default=None, action='store_true', help=heredoc( """Terminate the box when setup is complete. The default is to leave the box running except when errors occur.""" ) ) self.option( '--never-terminate', '-N', default=None, dest='terminate', action='store_false', help=heredoc( """Never terminate the box, even after errors. This may be useful for a post-mortem diagnosis.""" ) ) self.end_mutex( ) @abstractmethod def run_on_creation( self, box, options ): """ Run on the given box after it was created. """ raise NotImplementedError( ) def preparation_kwargs( self, options, box ): """ Return dict with keyword arguments to be passed box.prepare() """ role_options = box.get_role_options( ) supported_options = set( option.name for option in role_options ) actual_options = set( name for name, value in options.role_options ) for name in actual_options - supported_options: raise UserError( "Options %s not supported by role '%s'." % (name, box.role( )) ) resolve_me = functools.partial( box.ctx.resolve_me, drop_hostname=False ) return dict( options.role_options, ec2_keypair_globs=map( resolve_me, options.ec2_keypair_names ), instance_type=options.instance_type, virtualization_type=options.virtualization_type, vpc_id=options.vpc_id, subnet_id=options.subnet_id, spot_bid=options.spot_bid, spot_launch_group=options.spot_launch_group, spot_auto_zone=options.spot_auto_zone ) def creation_kwargs( self, options, box ): return dict( terminate_on_error=options.terminate is not False, spot_timeout=options.spot_timeout, spot_tentative=options.spot_tentative ) def run_on_box( self, options, box ): """ :type box: Box """ spec = box.prepare( **self.preparation_kwargs( options, box ) ) box.create( spec, **self.creation_kwargs( options, box ) ) try: self.run_on_creation( box, options ) except: if options.terminate is not False: with panic( log ): box.terminate( wait=False ) raise else: if options.list: self.list( [ box ] ) if options.terminate is True: box.terminate( ) else: self.log_ssh_hint( options ) # noinspection PyUnresolvedReferences def log_ssh_hint( self, options ): hint = self.ssh_hint( options ) def opt( name, value, default ): return name + ' ' + value if value != default else None cmd = concat( hint.executable, hint.command, (opt( **option ) for option in hint.options), hint.args ) cmd = ' '.join( filter( None, cmd ) ) log.info( "Run '%s' to start using this %s.", cmd, hint.object ) def ssh_hint( self, options ): x = Expando return x( executable=os.path.basename( sys.argv[ 0 ] ), command='ssh', options=[ x( name='-n', value=options.namespace, default=self.default_namespace ), x( name='-z', value=options.availability_zone, default=self.default_zone ) ], args=[ options.role ], object='box' ) class RegisterKeyCommand( ContextCommand ): """ Upload an OpenSSH public key for future injection into boxes. The public key will be imported into EC2 as a keypair and stored verbatim in S3. """ def __init__( self, application, **kwargs ): super( RegisterKeyCommand, self ).__init__( application, **kwargs ) self.option( 'ssh_public_key', metavar='KEY_FILE', help=heredoc( """Path of file containing the SSH public key to upload to the EC2 keypair.""" ) ) self.option( '--force', '-F', default=False, action='store_true', help='Overwrite potentially existing EC2 key pair' ) self.option( '--keypair', '-k', metavar='NAME', dest='ec2_keypair_name', default='__me__', help=heredoc( """The desired name of the EC2 key pair. The name should associate the key with you in a way that it is obvious to other users in your organization. The string __me__ anywhere in the key pair name will be replaced with the name of the IAM user whose credentials are used to issue requests to AWS.""" ) ) def run_in_ctx( self, options, ctx ): with open( options.ssh_public_key ) as f: ssh_public_key = f.read( ) try: ctx.register_ssh_pubkey( ec2_keypair_name=ctx.resolve_me( options.ec2_keypair_name, drop_hostname=False ), ssh_pubkey=ssh_public_key, force=options.force ) except ValueError as e: raise UserError( cause=e ) class ListRolesCommand( Command ): """ List available roles. A role is a template for a box. A box is a virtual machines in EC2, also known as an instance. """ def run( self, options ): print( tabulate( (name , (role.__doc__ or '').strip().split('\n')[0].strip()) for name, role in self.application.roles.iteritems( ) ) ) log.info( "If you are expecting to see more roles listed above, you may need to set/change " "the CGCLOUD_PLUGINS environment variable." ) # noinspection PyAbstractClass class ImageReferenceCommand( Command ): """ Any command that accepts an image ordinal or AMI ID. >>> app = Application() >>> class FooCmd( ImageReferenceCommand ): ... long_image_option = '--foo' ... short_image_option = '-f' ... def run(self, options): ... pass >>> cmd = FooCmd( app ) >>> cmd.ordinal_or_ami_id( 'bar' ) Traceback (most recent call last): ... ValueError >>> cmd.ordinal_or_ami_id( '' ) Traceback (most recent call last): ... ValueError >>> cmd.ordinal_or_ami_id( '-1') -1 >>> cmd.ordinal_or_ami_id( 'ami-4dcced7d') 'ami-4dcced7d' >>> cmd.ordinal_or_ami_id( 'ami-4dCCED7D') 'ami-4dcced7d' >>> cmd.ordinal_or_ami_id( 'amI-4dCCED7D') Traceback (most recent call last): ... ValueError >>> cmd.ordinal_or_ami_id( 'ami-4dcced7') Traceback (most recent call last): ... ValueError >>> cmd.ordinal_or_ami_id( 'ami-4dCCED7DD') Traceback (most recent call last): ... ValueError """ ami_id_re = re.compile( r'^ami-([0-9a-fA-F]{8})$' ) def ordinal_or_ami_id( self, s ): try: return int( s ) except ValueError: if self.ami_id_re.match( s ): return s.lower( ) else: raise ValueError( ) long_image_option = None short_image_option = None def __init__( self, application ): super( ImageReferenceCommand, self ).__init__( application ) self.option( self.long_image_option, self.short_image_option, metavar='IMAGE', type=self.ordinal_or_ami_id, default=-1, # default to the last one help=heredoc( """An image ordinal, i.e. the index of an image in the list of images for the given role, sorted by creation time. Use the list-images command to print a list of images for a given role. If the ordinal is negative, it will be converted to a positive ordinal by adding the total number of images for this role. Passing -1, for example, selects the most recently created image. Alternatively, an AMI ID, e.g. 'ami-4dcced7d' can be passed in as well.""" ) ) class DeleteImageCommand( ImageReferenceCommand, BoxCommand ): long_image_option = '--image' short_image_option = '-i' def __init__( self, application ): super( DeleteImageCommand, self ).__init__( application ) self.begin_mutex( ) self.option( '--keep-snapshot', '-K', default=False, action='store_true', help=heredoc( """Do not delete the EBS volume snapshot associated with the given image. This will leave an orphaned snapshot which should be removed at a later time using the 'cgcloud cleanup' command.""" ) ) self.option( '--quick', '-Q', default=False, action='store_true', help=heredoc( """Exit immediately after deregistration request has been made, don't wait until the image is deregistered. Implies --keep-snapshot.""" ) ) self.end_mutex( ) def run_on_box( self, options, box ): box.delete_image( options.image, wait=not options.quick, delete_snapshot=not options.keep_snapshot ) class RecreateCommand( ImageReferenceCommand, CreationCommand ): """ Recreate a box from an image that was taken from an earlier incarnation of the box """ long_image_option = '--boot-image' short_image_option = '-i' def __init__( self, application ): super( RecreateCommand, self ).__init__( application ) self.option( '--quick', '-Q', default=False, action='store_true', help=heredoc( """Don't wait for the box to become running or reachable via SSH. If the agent is disabled in the boot image (this is uncommon, see the --no-agent option to the 'create' command), no additional SSH keypairs will be deployed.""" ) ) def preparation_kwargs( self, options, box ): return dict( super( RecreateCommand, self ).preparation_kwargs( options, box ), image_ref=options.boot_image ) def creation_kwargs( self, options, box ): return dict( super( RecreateCommand, self ).creation_kwargs( options, box ), wait_ready=not options.quick ) def run_on_creation( self, box, options ): pass class CreateCommand( CreationCommand ): """ Create a box performing the specified role, install an OS and additional packages on it and optionally create an AMI image of it. """ def __init__( self, application ): super( CreateCommand, self ).__init__( application ) self.option( '--boot-image', '-i', metavar='AMI_ID', help=heredoc( """The AMI ID of the image from which to create the box. This argument is optional and the default is determined automatically based on the role. Typically, this option does not need to be used.""" ) ) self.option( '--no-agent', default=False, action='store_true', help=heredoc( """Don't install the cghub-cloud-agent package on the box. One note-worthy effect of using this option this is that the SSH keys will be installed initially, but not maintained over time.""" ) ) self.option( '--create-image', '-I', default=False, action='store_true', help='Create an image of the box as soon as setup completes.' ) # FIXME: Take a second look at this: Does it work. Is it necessary? self.option( '--upgrade', '-U', default=False, action='store_true', help=heredoc( """Bring the package repository as well as any installed packages up to date, i.e. do what on Ubuntu is achieved by doing 'sudo apt-get update ; sudo apt-get upgrade'.""" ) ) def preparation_kwargs( self, options, box ): return dict( super( CreateCommand, self ).preparation_kwargs( options, box ), image_ref=options.boot_image, enable_agent=not options.no_agent ) def run_on_creation( self, box, options ): box.setup( upgrade_installed_packages=options.upgrade ) if options.create_image: box.stop( ) box.image( ) if options.terminate is not True: box.start( ) class ListOptionsCommand( RoleCommand ): def run_on_role( self, options, ctx, role ): role_options = role.get_role_options( ) if role_options: for option in role_options: print( "{name}: {help}".format( **option.to_dict( ) ) ) else: print( 'The role %s does not define any options' % role.role( ) ) class CleanupCommand( ContextCommand ): """ Lists and optionally deletes unused AWS resources after prompting for confirmation. """ def run_in_ctx( self, options, ctx ): self.cleanup_image_snapshots( ctx ) self.cleanup_ssh_pubkeys( ctx ) @staticmethod def cleanup_ssh_pubkeys( ctx ): unused_fingerprints = ctx.unused_fingerprints( ) if unused_fingerprints: print( 'The following public keys in S3 are not referenced by any EC2 keypairs:' ) for fingerprint in unused_fingerprints: print( fingerprint ) if 'yes' == prompt( 'Delete these public keys from S3? (yes/no)', default='no' ): ctx.delete_fingerprints( unused_fingerprints ) else: print( 'No orphaned public keys in S3.' ) @staticmethod def cleanup_image_snapshots( ctx ): unused_snapshots = ctx.unused_snapshots( ) if unused_snapshots: print( 'The following snapshots are not referenced by any images:' ) for snapshot_id in unused_snapshots: print( snapshot_id ) if 'yes' == prompt( 'Delete these snapshots? (yes/no)', default='no' ): ctx.delete_snapshots( unused_snapshots ) else: print( 'No unused EBS volume snapshots in EC2.' ) class ResetSecurityCommand( ContextCommand ): """ Delete security-related objects like IAM instance profiles or EC2 security groups in a namespace and its children. """ def run_in_ctx( self, options, ctx ): message = ("Do you really want to delete all IAM instance profiles, IAM roles and EC2 " "security groups in namespace %s and its children? Although these resources " "will be created on-the-fly for newly created boxes, existing boxes will " "likely be impacted negatively." % ctx.namespace) if 'yes' == prompt( message + ' (yes/no)', default='no' ): ctx.reset_namespace_security( ) class UpdateInstanceProfile( InstanceCommand ): """ Update the instance profile and associated IAM roles for a given role. This command ensures that a box of this role has accurate and up-to-date privileges to interact with AWS resources. The instance profile is updated whenever a box is created. Use this command to update the instance profile for existing boxes. """ def run_on_instance( self, options, box ): box.get_instance_profile_arn( ) cgcloud-releases-1.6.0/core/src/cgcloud/core/common_iam_policies.py000066400000000000000000000021071301512357500254070ustar00rootroot00000000000000ec2_read_only_policy = dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource="*", Action="ec2:Describe*" ), dict( Effect="Allow", Resource="*", Action="autoscaling:Describe*" ), dict( Effect="Allow", Resource="*", Action="elasticloadbalancing:Describe*" ), dict( Effect="Allow", Resource="*", Action=[ "cloudwatch:ListMetrics", "cloudwatch:GetMetricStatistics", "cloudwatch:Describe*" ] ) ] ) s3_read_only_policy = dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource="*", Action=[ "s3:Get*", "s3:List*" ] ) ] ) iam_read_only_policy = dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource="*", Action=[ "iam:List*", "iam:Get*" ] ) ] ) ec2_full_policy = dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource="*", Action="ec2:*" ) ] ) s3_full_policy = dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource="*", Action="s3:*" ) ] ) sdb_full_policy = dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource="*", Action="sdb:*" ) ] ) cgcloud-releases-1.6.0/core/src/cgcloud/core/deprecated.py000066400000000000000000000004151301512357500235020ustar00rootroot00000000000000def deprecated( artifact ): # TODO: print a warning when deprecated class or function is used artifact.__cgcloud_core_deprecated__ = True return artifact def is_deprecated( artifact ): return getattr( artifact, '__cgcloud_core_deprecated__ ', False ) cgcloud-releases-1.6.0/core/src/cgcloud/core/docker_box.py000066400000000000000000000133061301512357500235240ustar00rootroot00000000000000import logging from pipes import quote from fabric.operations import run from bd2k.util.strings import interpolate as fmt from cgcloud.core.box import fabric_task from cgcloud.core.ubuntu_box import UbuntuBox from cgcloud.fabric.operations import sudo from cgcloud.lib.util import heredoc log = logging.getLogger( __name__ ) class DockerBox( UbuntuBox ): """ A mixin for Docker. Based on the official shell script from https://docs.docker.com/installation/ubuntulinux/#installation """ @fabric_task def _setup_package_repos( self ): assert run( 'test -e /usr/lib/apt/methods/https', warn_only=True ).succeeded, \ "Need HTTPS support in apt-get in order to install from the Docker repository" super( DockerBox, self )._setup_package_repos( ) sudo( ' '.join( [ 'apt-key', 'adv', '--keyserver', 'hkp://p80.pool.sks-keyservers.net:80', '--recv-keys', '58118E89F3A912897C070ADBF76221572C52609D' ] ) ) codename = self.release( ).codename sudo( fmt( 'echo deb https://apt.dockerproject.org/repo ubuntu-{codename} main ' '> /etc/apt/sources.list.d/docker.list' ) ) @fabric_task def _list_packages_to_install( self ): kernel = run( 'uname -r' ) kernel_version = tuple( map( int, kernel.split( '.' )[ :2 ] ) ) assert kernel_version >= (3, 10), \ "Need at least kernel version 3.10, found '%s'." % kernel kernel = run( 'uname -r' ) assert kernel.endswith( '-generic' ), \ 'Current kernel is not supported by the linux-image-extra-virtual package.' packages = super( DockerBox, self )._list_packages_to_install( ) packages += [ 'docker-engine=1.9.1-0~trusty', 'linux-image-extra-' + kernel, 'linux-image-extra-virtual' ] if run( 'cat /sys/module/apparmor/parameters/enabled' ).lower( ).startswith( 'y' ): packages += [ 'apparmor' ] return packages def _post_install_packages( self ): super( DockerBox, self )._post_install_packages( ) self._setup_docker( ) def _docker_users( self ): return [ self.admin_account( ) ] def _docker_data_prefixes( self ): return [ self._ephemeral_mount_point( 0 ) ] @fabric_task def _setup_docker( self ): for docker_user in set( self._docker_users( ) ): sudo( "usermod -aG docker " + docker_user ) prefixes = self._docker_data_prefixes( ) if prefixes: prefixes = ' '.join( map( quote, prefixes ) ) self._run_init_script( 'docker', 'stop' ) # Make sure Docker's aufs backend isn't mounted anymore sudo( 'umount /var/lib/docker/aufs', warn_only=True ) # Backup initial state of data directory so we can initialize an empty ephemeral volume sudo( 'tar -czC /var/lib docker > /var/lib/docker.tar.gz' ) # Then delete it and recreate it as an empty directory to serve as the bind mount point sudo( 'rm -rf /var/lib/docker && mkdir /var/lib/docker' ) self._register_init_script( 'dockerbox', heredoc( """ description "Placement of /var/lib/docker" console log start on starting docker stop on stopped docker pre-start script echo echo "This is the dockerbox pre-start script" set -ex if mountpoint -q /var/lib/docker; then echo "The directory '/var/lib/docker' is already mounted, exiting." else for prefix in {prefixes}; do # Prefix must refer to a separate volume, e.g. ephemeral or EBS if mountpoint -q "$prefix"; then # Make sure Docker's aufs backend isn't mounted anymore umount /var/lib/docker/aufs || true if test -d "$prefix/var/lib/docker"; then echo "The directory '$prefix/var/lib/docker' already exists, using it." else mkdir -p "$prefix/var/lib" # If /var/lib/docker contains files ... if python -c 'import os, sys; sys.exit( 0 if os.listdir( sys.argv[1] ) else 1 )' /var/lib/docker; then # ... move it to prefix ... mv /var/lib/docker "$prefix/var/lib" # ... and recreate it as an empty mount point, ... mkdir -p /var/lib/docker else # ... otherwise untar the initial backup. tar -xzC "$prefix/var/lib" < /var/lib/docker.tar.gz fi fi # Now bind-mount into /var/lib/docker mount --bind "$prefix/var/lib/docker" /var/lib/docker break else echo "The prefix directory '$prefix' is not a mount point, skipping." fi done fi end script""" ) ) self._run_init_script( 'docker', 'start' ) cgcloud-releases-1.6.0/core/src/cgcloud/core/fedora_box.py000066400000000000000000000055761301512357500235270ustar00rootroot00000000000000from abc import abstractmethod import re from operator import attrgetter from fabric.operations import sudo from cgcloud.core.box import fabric_task from cgcloud.core.agent_box import AgentBox from cgcloud.core.cloud_init_box import CloudInitBox from cgcloud.core.rc_local_box import RcLocalBox from cgcloud.core.yum_box import YumBox class FedoraBox( YumBox, AgentBox, CloudInitBox, RcLocalBox ): """ A box that boots of an official Fedora cloud AMI """ @abstractmethod def release( self ): """ :return: the version number of the Fedora release, e.g. 17 :rtype: int """ raise NotImplementedError def admin_account( self ): return "fedora" if self.release( ) >= 19 else "ec2-user" def _base_image( self, virtualization_type ): release = self.release( ) name = None if release < 21: name = 'Fedora-x86_64-%i-*' % release elif release == 21: name = 'Fedora-Cloud-Base-*-21.x86_64-*' else: name = 'Fedora-Cloud-Base-%s-*.x86_64-*' % release images = self.ctx.ec2.get_all_images( owners=[ '125523088429' ], filters={ 'name': name, 'root-device-type': 'ebs', 'virtualization-type': virtualization_type } ) images = [ i for i in images if not re.search( 'Alpha|Beta', i.name ) ] if not images: raise self.NoSuchImageException( "Can't find any AMIs for Fedora %i and virtualization type %s" % ( release, virtualization_type ) ) images.sort( key=attrgetter( 'name' ), reverse=True ) if False: if len( images ) > 1: raise RuntimeError( "Found more than one AMI for Fedora %i and virtualization type %s" % ( release, virtualization_type ) ) return images[0] def _list_packages_to_install( self ): return super( FedoraBox, self )._list_packages_to_install( ) + [ 'redhat-lsb' # gets us lsb_release ] def _get_package_substitutions( self ): return super( FedoraBox, self )._get_package_substitutions( ) + [ # Without openssl-devel, the httplib module disables HTTPS support. The underlying # 'import _ssl' fails with ImportError: /usr/lib64/python2.7/lib-dynload/_ssl.so: # symbol SSLeay_version, version OPENSSL_1.0.1 not defined in file libcrypto.so.10 # with link time reference. This packet substitution ensures that if Python is to be installed, openssl-devel is too. ( 'python', ( 'python', 'openssl-devel' ) ) ] @fabric_task def _get_rc_local_path( self ): rc_local_path = '/etc/rc.d/rc.local' sudo( 'test -f {f} || echo "#!/bin/sh" > {f} && chmod +x {f}'.format( f=rc_local_path ) ) return rc_local_path cgcloud-releases-1.6.0/core/src/cgcloud/core/generic_boxes.py000066400000000000000000000216741301512357500242300ustar00rootroot00000000000000from urlparse import urlparse from fabric.operations import run, sudo, os from cgcloud.core.deprecated import deprecated from cgcloud.core.box import fabric_task from cgcloud.core.centos_box import CentosBox from cgcloud.core.fedora_box import FedoraBox from cgcloud.core.ubuntu_box import UpstartUbuntuBox, SystemdUbuntuBox @deprecated class GenericCentos5Box( CentosBox ): """ Good ole CentOS 5 from 1995, more or less """ def release( self ): return '5.8' @classmethod def recommended_instance_type( cls ): # On t1.micro, the agent installation runs out of memory return "m1.small" @classmethod def supported_virtualization_types( cls ): return [ 'paravirtual' ] def __update_sudo( self ): """ 5.8 has sudo 1.7.2p1 whose -i switch is horribly broken. For example, sudo -u jenkins -i bash -c 'echo bla >> ~/foo' doesn't work as expected. In sudo 1.8.7, it does. We do need sudo -i in some of the subclasses (see cghub.fabric.operations for how we hack -i into Fabric 1.7.x) and so we install a newer version of the sudo rpm from the sudo maintainer. This method should to be invoked early on during setup. """ self._yum_local( is_update=True, rpm_urls=[ 'ftp://ftp.sudo.ws/pub/sudo/packages/Centos/5/sudo-1.8.14-4.el5.x86_64.rpm' ] ) def _on_instance_ready( self, first_boot ): super( GenericCentos5Box, self )._on_instance_ready( first_boot ) if self.generation == 0 and first_boot: self.__update_sudo( ) if False: self._update_openssh( ) def _ephemeral_mount_point( self, i ): return "/mnt" if i == 0 else None # FIXME: These two methods assume that this class is derived from AgentBox. def _get_package_substitutions( self ): return super( GenericCentos5Box, self )._get_package_substitutions( ) + [ ('python', 'python26'), ('python-devel', 'python26-devel') ] def _post_install_packages( self ): if 'python' in self._list_packages_to_install( ): self.__update_python( ) super( GenericCentos5Box, self )._post_install_packages( ) @fabric_task def __update_python( self ): # The pip from the python-pip package is hard-wired to the python 2.4 from the python # package. Also it's ancient, fossilized crap. To get an up-to-date pip that is # wired to python 2.6 from the python26 package we have to jump though some hoops. # First, we need to ignore certs since the CA package on CentOS 5 is, you guessed it, # out of date. We do this globally because the downloaded .py scripts execute wget # internally. Nevertheless, we got cert errors with github.com and so we are using # curl instead to download the scripts from there. sudo( 'echo "check_certificate=off" > /root/.wgetrc' ) # Then install setuptools ... run( 'curl -O https://bitbucket.org/pypa/setuptools/raw/bootstrap/ez_setup.py' ) sudo( 'python2.6 ez_setup.py' ) # .. and pip. run( 'curl -O https://raw.githubusercontent.com/pypa/pip/master/contrib/get-pip.py' ) sudo( 'python2.6 get-pip.py' ) sudo( 'rm /root/.wgetrc' ) class GenericCentos6Box( CentosBox ): """ Generic box with Centos 6.4 """ def release( self ): return '6.4' def _ephemeral_mount_point( self, i ): return "/mnt/ephemeral" if i == 0 else None def _on_instance_ready( self, first_boot ): super( GenericCentos6Box, self )._on_instance_ready( first_boot ) if self.generation == 0 and first_boot: if False: self._update_openssh( ) @deprecated class GenericUbuntuLucidBox( UpstartUbuntuBox ): def release( self ): return self.Release( codename='lucid', version='10.04' ) @classmethod def supported_virtualization_types( cls ): return [ 'paravirtual' ] def _get_virtual_block_device_prefix( self ): return "/dev/sd" @fabric_task def __update_sudo( self ): """ See GenericCentos5Box """ url = 'ftp://ftp.sudo.ws/pub/sudo/packages/Ubuntu/10.04/sudo_1.8.14-4_amd64.deb' package = os.path.basename( urlparse( url ).path ) run( 'wget ' + url ) sudo( 'sudo dpkg --force-confold -i ' + package ) run( 'rm ' + package ) def _on_instance_ready( self, first_boot ): super( GenericUbuntuLucidBox, self )._on_instance_ready( first_boot ) if self.generation == 0 and first_boot: self.__update_sudo( ) def _get_package_substitutions( self ): return super( GenericUbuntuLucidBox, self )._get_package_substitutions( ) + [ ('git', 'git-core') ] @deprecated class GenericUbuntuMaverickBox( UpstartUbuntuBox ): def release( self ): return self.Release( codename='maverick', version='10.10' ) @classmethod def supported_virtualization_types( cls ): return [ 'paravirtual' ] @deprecated class GenericUbuntuNattyBox( UpstartUbuntuBox ): def release( self ): return self.Release( codename='natty', version='11.04' ) @classmethod def supported_virtualization_types( cls ): return [ 'paravirtual' ] @deprecated class GenericUbuntuOneiricBox( UpstartUbuntuBox ): def release( self ): return self.Release( codename='oneiric', version='11.10' ) @classmethod def supported_virtualization_types( cls ): return [ 'paravirtual' ] class GenericUbuntuPreciseBox( UpstartUbuntuBox ): """ Generic box with Ubuntu 12.04 LTS (EOL April 2017) """ def release( self ): return self.Release( codename='precise', version='12.04' ) @deprecated class GenericUbuntuQuantalBox( UpstartUbuntuBox ): def release( self ): return self.Release( codename='quantal', version='12.10' ) @deprecated class GenericUbuntuRaringBox( UpstartUbuntuBox ): def release( self ): return self.Release( codename='raring', version='13.04' ) @deprecated class GenericUbuntuSaucyBox( UpstartUbuntuBox ): def release( self ): return self.Release( codename='saucy', version='13.10' ) class GenericUbuntuTrustyBox( UpstartUbuntuBox ): """ Generic box with Ubuntu 14.04 LTS (EOL April 2019) """ def release( self ): return self.Release( codename='trusty', version='14.04' ) @deprecated class GenericUbuntuUtopicBox( UpstartUbuntuBox ): def release( self ): return self.Release( codename='utopic', version='14.10' ) class GenericUbuntuVividBox( SystemdUbuntuBox ): """ Generic box with Ubuntu 15.04 (EOL February 4, 2016) """ def release( self ): return self.Release( codename='vivid', version='15.04' ) @deprecated class GenericFedora17Box( FedoraBox ): """ This one doesn't work since the AMI was deleted by the Fedora guys """ def release( self ): return 17 @deprecated class GenericFedora18Box( FedoraBox ): """ This one doesn't work since the AMI was deleted by the Fedora guys """ def release( self ): return 18 @deprecated class GenericFedora19Box( FedoraBox ): def release( self ): return 19 @classmethod def recommended_instance_type( cls ): # On t1.micro, the agent installation runs out of memory return "m1.small" @classmethod def supported_virtualization_types( cls ): return [ 'paravirtual' ] @deprecated class GenericFedora20Box( FedoraBox ): def release( self ): return 20 @classmethod def recommended_instance_type( cls ): # On t1.micro, the agent installation runs out of memory return "m1.small" @classmethod def supported_virtualization_types( cls ): return [ 'paravirtual' ] # FIXME: Consider pulling this up def _populate_cloud_config( self, instance_type, user_data ): super( GenericFedora20Box, self )._populate_cloud_config( instance_type, user_data ) user_data[ 'bootcmd' ][ 0:0 ] = [ self._get_package_installation_command( 'yum-plugin-fastestmirror' ), [ 'yum', 'clean', 'all' ] ] class GenericFedora21Box( FedoraBox ): """ Generic box with Fedora 21 """ def release( self ): return 21 class GenericFedora22Box( FedoraBox ): """ Generic box with Fedora 22 """ def release( self ): return 22 def _on_instance_ready( self, first_boot ): if first_boot: self.__fix_stupid_locale_problem( ) super( GenericFedora22Box, self )._on_instance_ready( first_boot ) @fabric_task def __fix_stupid_locale_problem( self ): """ The bug: https://bugzilla.redhat.com/show_bug.cgi?id=1261249 The workaround: https://www.banym.de/linux/fedora/problems-with-missing-locale-files-on-fedora-20-made-libvirtd-service-not-starting """ sudo( 'localedef -c -i en_US -f UTF-8 en_US.UTF-8' ) cgcloud-releases-1.6.0/core/src/cgcloud/core/init_box.py000066400000000000000000000044341301512357500232220ustar00rootroot00000000000000from StringIO import StringIO from abc import abstractmethod from fabric.operations import sudo, put from cgcloud.core.box import Box, fabric_task class AbstractInitBox( Box ): @abstractmethod def _register_init_script( self, name, script ): raise NotImplementedError( ) @abstractmethod def _run_init_script( self, name, command='start' ): raise NotImplementedError( ) class UpstartBox( AbstractInitBox ): """ A box that uses Ubuntu's upstart """ @fabric_task def _register_init_script( self, name, script ): path = '/etc/init/%s.conf' % name put( local_path=StringIO( script ), remote_path=path, use_sudo=True ) sudo( "chown root:root '%s'" % path ) @fabric_task def _run_init_script( self, name, command='start' ): sudo( "service %s %s" % ( name, command ) ) class SysvInitdBox( AbstractInitBox ): """ A box that supports SysV-style init scripts. This is more or less a kitchen sink of functionality that seems to work on CentOS and Fedora. """ @staticmethod def _init_script_path( name ): return '/etc/init.d/%s' % name @fabric_task def _register_init_script( self, name, script ): script_path = self._init_script_path( name ) put( local_path=StringIO( script ), remote_path=script_path, mode=0755, use_sudo=True ) sudo( "chown root:root '%s'" % script_path ) sudo( 'sudo chkconfig --add %s' % name ) @fabric_task def _run_init_script( self, name, command='start' ): sudo( "service %s %s" % ( name, command ) ) class SystemdBox( AbstractInitBox ): """ A box that supports systemd which hopefully will supercede all other init systems for Linux. I don't care which *expletive* init system they settle on as long as they stop reinventing the wheel with a different number of corners. """ @fabric_task def _register_init_script( self, name, script ): path = '/lib/systemd/system/%s.service' % name put( local_path=StringIO( script ), remote_path=path, use_sudo=True ) sudo( "chown root:root '%s'" % path ) @fabric_task def _run_init_script( self, name, command='start' ): sudo( 'systemctl %s %s' % ( command, name ) ) cgcloud-releases-1.6.0/core/src/cgcloud/core/mesos_box.py000066400000000000000000000031621301512357500234020ustar00rootroot00000000000000from fabric.operations import run from bd2k.util.strings import interpolate as fmt from cgcloud.core.box import fabric_task from cgcloud.core.ubuntu_box import UbuntuBox from cgcloud.fabric.operations import sudo, pip class MesosBox( UbuntuBox ): """ A mixin for getting Mesos installed from Mesosphere's Debian repository """ def _mesos_version( self ): return '0.25.1' def _mesos_egg_version( self ): return '0.25.0' @fabric_task def _setup_package_repos( self ): super( MesosBox, self )._setup_package_repos( ) sudo( 'apt-key adv --keyserver keyserver.ubuntu.com --recv E56151BF' ) codename = self.release( ).codename sudo( fmt( 'echo "deb http://repos.mesosphere.io/ubuntu {codename} main" ' '> /etc/apt/sources.list.d/mesosphere.list' ) ) def _list_packages_to_install( self ): return super( MesosBox, self )._list_packages_to_install( ) + [ 'python2.7', 'mesos=' + self._mesos_version( ) + '-*' ] def _post_install_packages( self ): super( MesosBox, self )._post_install_packages( ) self.__install_mesos_egg( ) @fabric_task def __install_mesos_egg( self ): egg = 'mesos-' + self._mesos_egg_version( ) + '-py2.7-linux-x86_64.egg' version = self.release( ).version run( fmt( 'wget http://downloads.mesosphere.io/master/ubuntu/{version}/{egg}' ) ) # We need a newer version of protobuf than what comes default on Ubuntu pip( 'install --upgrade protobuf', use_sudo=True ) sudo( 'easy_install -a ' + egg ) run( 'rm ' + egg ) cgcloud-releases-1.6.0/core/src/cgcloud/core/package_manager_box.py000066400000000000000000000137451301512357500253510ustar00rootroot00000000000000from abc import abstractmethod from itertools import chain from cgcloud.core.box import Box class PackageManagerBox( Box ): """ A box that uses a package manager like apt-get or yum. """ @abstractmethod def _sync_package_repos( self ): """ Update the cached package descriptions from remote package repositories, e.g. apt-get update on Ubuntu """ raise NotImplementedError( ) @abstractmethod def _upgrade_installed_packages( self ): """ Update all installed package to their lates version, e.g. apt-get update on Ubuntu. """ raise NotImplementedError( ) @abstractmethod def _install_packages( self, packages ): """ Install the given packages :param packages: A list of package names """ raise NotImplementedError( ) def _setup_package_repos( self ): """ Set up additional remote package repositories. """ pass def _list_packages_to_install( self ): """ Return the list of packages to be installed. """ return [ 'htop' ] def _pre_install_packages( self ): """ Invoked immediately before package installation. """ pass def _post_install_packages( self ): """ Invoked immediately after package installation. """ pass def _get_package_substitutions( self ): """ Return a list of package substitutions. Each substitution is a tuple of two elements. The first element, aka the original, is the name of a package to be installed, the second element, aka the substitutes, is an iterable of names of the packages that should be used instead. An empty iterable will prevent the original from being installed. If the second element is an instance of basestring, it will be treated like a singleton of that string. If the second ekement is None, it will be treated like an empty iterable. Substitutes are subjected to substitution, too. The dictionary may contain cycles. The returned list will be passed to the dict() constructor. If it contains more than one tuple with the same first element, only the last entry will be significant. For example, [ ('a','b'), ('a','c') ] is equivalent to [ ('a','c') ]. """ return [ ] def setup( self, upgrade_installed_packages=False ): """ :param upgrade_installed_packages: Bring the package repository as well as any installed packages up to date, i.e. do what on Ubuntu is achieved by doing 'sudo apt-get update ; sudo apt-get upgrade'. """ self._setup_package_repos( ) self._sync_package_repos( ) self._pre_install_packages( ) substitutions = dict( self._get_package_substitutions( ) ) packages = self._list_packages_to_install( ) packages = list( self.__substitute_packages( substitutions, packages ) ) self._install_packages( packages ) self._post_install_packages( ) if upgrade_installed_packages: self._upgrade_installed_packages( ) # The upgrade might involve a kernel update, so we'll reboot to be safe self.reboot( ) @abstractmethod def _ssh_service_name( self ): raise NotImplementedError( ) def _substitute_package( self, package ): """ Return the set of packages that substitute the given package on this box. """ substitutions = dict( self._get_package_substitutions( ) ) return self.__substitute_packages( substitutions, [ package ] ) @classmethod def __substitute_package( cls, substitutions, package, history=None ): """ Apply the given substitutions map on the package argument. Handles cycles as well as None keys and values. >>> substitute_package = PackageManagerBox._PackageManagerBox__substitute_package >>> substitute_package( {}, 'a' ) set(['a']) >>> substitute_package( { 'a': 'a' }, 'a' ) set(['a']) >>> substitute_package( { 'a': None }, 'a' ) set([]) >>> substitute_package( { 'a': [] }, 'a' ) set([]) >>> substitute_package( { 'a': 'b' }, 'a' ) set(['b']) >>> substitute_package( { 'a': ['b'] }, 'a' ) set(['b']) >>> substitute_package( { 'a': 'b' }, 'b' ) set(['b']) >>> substitute_package( { 'a': ['b'] }, 'b' ) set(['b']) >>> substitute_package( { 'a': 'b' }, 'a' ) set(['b']) >>> substitute_package( { 'a': 'b', 'b':'c', 'c':'a' }, 'a' ) set(['a']) >>> substitute_package( { 'a':['a','b'], 'b':['b','c'], 'c':['c','a'] }, 'a' ) == {'a','b','c'} True >>> substitute_package( { 'a':['a','b'], 'b':None }, 'a' ) set(['a']) >>> substitute_package( { 'a':['a','b'], 'b':[] }, 'a' ) set(['a']) >>> substitute_package( { 'a':['a','b'], 'b':'c' }, 'a' ) == {'a', 'c'} True """ if not isinstance( package, basestring ): raise ValueError( "Package must be a string" ) if history is None: history = { package } else: if package in history: return { package } history.add( package ) try: substitutes = substitutions[ package ] except KeyError: return { package } if substitutes is None: return set( ) elif isinstance( substitutes, basestring ): substitute = substitutes return cls.__substitute_package( substitutions, substitute, history ) else: return cls.__substitute_packages( substitutions, substitutes, history ) @classmethod def __substitute_packages( cls, substitutions, substitutes, history=None ): return set( chain.from_iterable( cls.__substitute_package( substitutions, substitute, history ) for substitute in substitutes ) ) cgcloud-releases-1.6.0/core/src/cgcloud/core/project.py000066400000000000000000000047771301512357500230670ustar00rootroot00000000000000import glob import os import pkg_resources from bd2k.util.collections import rindex def project_artifacts( project_name ): """ Similar to project_artifact but including dependent project artifacts """ # FIXME: This is a bit simplistic if project_name == 'lib': return [ project_artifact( project_name ) ] else: return [ project_artifact( 'lib' ), project_artifact( project_name ) ] def project_artifact( project_name ): """ Resolve the name of a sibling project to something that can be passed to pip in order to get that project installed. The version of the sibling project is assumed to be identical to the currently installed version of this project (cgcloud-core). If the version can't be determined, a source distribution is looked up in the 'dist' subdirectory of the sibling project. This is likely to be the case in development mode, i.e. if this project was installed via 'setup.py develop'. If neither version nor source distribution can be determined, an exception will be raised. :param project_name: the name of a sibling project such as 'agent' or 'spark-tools' :return: Either an absolute path to a source distribution or a requirement specifier to be looked up in the Python package index (PyPI). """ dir_path = os.path.abspath( __file__ ).split( os.path.sep ) try: # If the 'src' directory is in the module's file path, we must be in development mode. i = rindex( dir_path, 'src' ) except ValueError: # Otherwise, we must be installed and need to determine our current version. version = pkg_resources.get_distribution( 'cgcloud-core' ).version return 'cgcloud-%s==%s' % (project_name, version) else: dir_path = os.path.sep.join( dir_path[ :i ] ) project_path = os.path.join( os.path.dirname( dir_path ), project_name ) sdist_glob = os.path.join( project_path, 'dist', 'cgcloud-%s*.tar.gz' % project_name ) sdist = glob.glob( sdist_glob ) if len( sdist ) == 1: sdist = sdist[ 0 ] elif sdist: raise RuntimeError( "Can't decide which of these is the '%s' source distribution: %s" % ( project_name, sdist) ) else: raise RuntimeError( "Can't find '%s' source distribution. Looking for '%s'. You may " "just need to run 'make sdist' to fix this" % ( project_name, sdist_glob) ) return sdist cgcloud-releases-1.6.0/core/src/cgcloud/core/rc_local_box.py000066400000000000000000000137301301512357500240340ustar00rootroot00000000000000from collections import namedtuple from contextlib import closing from StringIO import StringIO import re from fabric.operations import get, put, sudo from cgcloud.lib.util import prepend_shell_script from cgcloud.core.box import fabric_task, Box InitCommand = namedtuple( "InitCommand", [ "command", "provides", "depends" ] ) class RcLocalBox( Box ): """ A mixin for implementing Box._register_init_command(), i.e. the ability to run an arbitrary command everytime a box is booted, using the rc.local mechanism that most distributions provide. """ def __init__( self, ctx ): super( RcLocalBox, self ).__init__( ctx ) self._init_commands = [ ] @fabric_task def _register_init_command( self, cmd ): rc_local_path = self._get_rc_local_path( ) self._prepend_remote_shell_script( script=cmd, remote_path=rc_local_path, use_sudo=True, mirror_local_mode=True ) sudo( 'chown root:root {0} && chmod +x {0}'.format( rc_local_path ) ) @fabric_task def _get_rc_local_path( self ): """ Return the canonical path to /etc/rc.local or an equivalent shell script that gets executed during boot up. The last component in the path must not be be a symlink, other components may be. """ # might be a symlink but prepend_remote_shell_script doesn't work with symlinks return sudo( 'readlink -f /etc/rc.local' ) @fabric_task def _prepend_remote_shell_script( self, script, remote_path, **put_kwargs ): """ Insert the given script into the remote file at the given path before the first script line. See prepend_shell_script() for a definition of script line. :param script: the script to be inserted :param remote_path: the path to the file on the remote host :param put_kwargs: arguments passed to Fabric's put operation """ with closing( StringIO( ) ) as out_file: with closing( StringIO( ) ) as in_file: get( remote_path=remote_path, local_path=in_file ) in_file.seek( 0 ) prepend_shell_script( '\n' + script, in_file, out_file ) out_file.seek( 0 ) put( remote_path=remote_path, local_path=out_file, **put_kwargs ) env_entry_re = re.compile( r'^\s*([^=\s]+)\s*=\s*"?(.*?)"?\s*$' ) @classmethod def _patch_etc_environment( cls, env_file, dirs=None, dirs_var='PATH', env_pairs=None ): r""" Patch /etc/environment by A) adding a list of directories to a PATH o PATH-like variable and/or B) adding other environment variables to it. :param env_file: A seekable file handle to /etc/environment or a file of that format :param list dirs: A list of directory paths to be added to the /etc/environment entry for PATH, or the entry referenced by dirs_var :param str dirs_var: The name of the variable to append `dirs` to :param dict env_pairs: A dictionary with other environment variable to append >>> f=StringIO( 'FOO = " BAR " \n PATH =foo:bar\nBLA="FASEL"' ) >>> f.seek( 0, 2 ) # seek to end as if file was opened with mode 'a' >>> RcLocalBox._patch_etc_environment( f, dirs=[ "new1" ] ) >>> f.getvalue() 'BLA="FASEL"\nFOO=" BAR "\nPATH="foo:bar:new1"\n' >>> RcLocalBox._patch_etc_environment( f, dirs=[ "new2" ], dirs_var='PATH2' ) >>> f.getvalue() 'BLA="FASEL"\nFOO=" BAR "\nPATH="foo:bar:new1"\nPATH2="new2"\n' """ def parse_entry( s ): m = cls.env_entry_re.match( s ) return m.group( 1 ), m.group( 2 ) env_file.seek( 0 ) env = dict( parse_entry( _ ) for _ in env_file.read( ).splitlines( ) ) # Do we have directories to add to a path? if dirs is not None: path = filter( None, env.get( dirs_var, '' ).split( ':' ) ) path.extend( dirs ) env[ dirs_var ] = ':'.join( path ) # Do we have other environment variables to write? if env_pairs is not None: for (k, v) in env_pairs.iteritems(): env[k] = v env_file.seek( 0 ) env_file.truncate( 0 ) for var in sorted( env.items( ) ): env_file.write( '%s="%s"\n' % var ) # FIXME: This is here for an experimental feature (ordering commands that depend on each other) if False: def toposort2( data ): """ Dependencies are expressed as a dictionary whose keys are items and whose values are a set of dependent items. Output is a list of sets in topological order. The first set consists of items with no dependences, each subsequent set consists of items that depend upon items in the preceeding sets. >>> toposort2({ ... 2: {11}, ... 9: {11, 8}, ... 10: {11, 3}, ... 11: {7, 5}, ... 8: {7, 3}, ... }) ) [3, 5, 7] [8, 11] [2, 9, 10] """ from functools import reduce # Ignore self dependencies. for k, v in data.items( ): v.discard( k ) # Find all items that don't depend on anything. extra_items_in_deps = reduce( set.union, data.itervalues( ) ) - set( data.iterkeys( ) ) # Add empty dependences where needed data.update( { item: set( ) for item in extra_items_in_deps } ) while True: ordered = set( item for item, dep in data.iteritems( ) if not dep ) if not ordered: break yield ordered data = { item: (dep - ordered) for item, dep in data.iteritems( ) if item not in ordered } assert not data, "Cyclic dependencies exist among these items:\n%s" % '\n'.join( repr( x ) for x in data.iteritems( ) ) cgcloud-releases-1.6.0/core/src/cgcloud/core/source_control_client.py000066400000000000000000000020121301512357500257730ustar00rootroot00000000000000from fabric.operations import run from cgcloud.fabric.operations import sudo from cgcloud.core.box import fabric_task from cgcloud.core.package_manager_box import PackageManagerBox class SourceControlClient( PackageManagerBox ): """ A box that uses source control software """ @fabric_task def setup_repo_host_keys(self, user=None): # # Pre-seed the host keys from bitbucket and github, such that ssh doesn't prompt during # the initial checkouts. # for host in [ 'bitbucket.org', 'github.com' ]: command = 'ssh-keyscan -t rsa %s >> ~/.ssh/known_hosts' % host if user is None: run( command ) elif user == 'root': sudo( command ) else: sudo( command, user=user, sudo_args='-i' ) def _list_packages_to_install(self): return super( SourceControlClient, self )._list_packages_to_install( ) + [ 'git', 'subversion', 'mercurial' ] cgcloud-releases-1.6.0/core/src/cgcloud/core/task.py000066400000000000000000000015711301512357500223500ustar00rootroot00000000000000import fabric.tasks class Task( fabric.tasks.Task ): """ A Fabric task for EC2 boxes. Use this as the base class for custom Fabric tasks to be run on an EC2 box, as represented by an instance of Ec2Box. Pass instances of this class to Ec2Box .execute(). Use this only if your intend to create a hierarchy of task classes. Otherwise, it is much easier to write tasks as plain methods in a concrete subclass of Ec2Box and pass those method to Ec2Box.execute() This class extends Fabric's Task by using the class name as the name of the task and maintaining a link to the box instance this task is executed on. """ def __init__(self, box): """ Initializes this task for the given box. :param box: the box :type box: Box""" super( Task, self ).__init__( name=self.__class__.__name__ ) self.box = box cgcloud-releases-1.6.0/core/src/cgcloud/core/test/000077500000000000000000000000001301512357500220075ustar00rootroot00000000000000cgcloud-releases-1.6.0/core/src/cgcloud/core/test/__init__.py000066400000000000000000000056161301512357500241300ustar00rootroot00000000000000import os import sys from contextlib import contextmanager from itertools import ifilter from tempfile import mkstemp import subprocess32 from bd2k.util.iterables import concat from boto.utils import logging from cgcloud.core.cli import main, CGCloud from cgcloud.lib.test import CgcloudTestCase log = logging.getLogger( __name__ ) class CoreTestCase( CgcloudTestCase ): @classmethod def setUpClass( cls ): CGCloud.setup_logging( ) CGCloud.silence_boto_and_paramiko( ) super( CoreTestCase, cls ).setUpClass( ) ssh_opts = ('-o', 'UserKnownHostsFile=/dev/null', '-o', 'StrictHostKeyChecking=no') @classmethod def ssh_opts_str( cls ): return ' '.join( cls.ssh_opts ) def _assert_remote_failure( self, role ): """ Proof that failed remote commands lead to test failures """ self._ssh( role, 'true' ) try: self._ssh( role, 'false' ) self.fail( ) except SystemExit as e: self.assertEqual( e.code, 1 ) @classmethod def _ssh( cls, role, *args, **kwargs ): cls._cgcloud( *concat( 'ssh', dict_to_opts( kwargs ), role, cls.ssh_opts, args ) ) @classmethod def _rsync( cls, role, *args, **kwargs ): cls._cgcloud( *concat( 'rsync', dict_to_opts( kwargs, ssh_opts=cls.ssh_opts_str( ) ), role, args ) ) def _send_file( self, role, content, name ): script, script_path = mkstemp( ) try: os.write( script, content ) except: os.close( script ) raise else: os.close( script ) self._rsync( role, script_path, ':' + name ) finally: os.unlink( script_path ) @classmethod def _cgcloud( cls, *args ): log.info( 'Running %r', args ) if os.environ.get( 'CGCLOUD_TEST_EXEC', "" ): subprocess32.check_call( concat( 'cgcloud', args ) ) else: main( args ) @contextmanager def out_stderr( ): with open( os.devnull, 'a' ) as f: f, sys.stderr = sys.stderr, f try: yield finally: f, sys.stderr = sys.stderr, f def dict_to_opts( d=None, **kwargs ): """ >>> list( dict_to_opts( dict( foo=True ) ) ) ['--foo'] >>> list( dict_to_opts( dict( foo=False) ) ) [] >>> list( dict_to_opts( foo=True ) ) ['--foo'] >>> list( dict_to_opts( dict( foo_bar=1 ), x=3 ) ) ['--foo-bar=1', '-x=3'] """ if d is None: d = kwargs elif kwargs: d = dict( d, **kwargs ) def to_opt( k, v ): s = '--' + k.replace( '_', '-' ) if len( k ) > 1 else '-' + k if v is True: return s elif v is False: return None else: return s + '=' + str( v ) return ifilter( None, (to_opt( k, v ) for k, v in d.iteritems( )) ) cgcloud-releases-1.6.0/core/src/cgcloud/core/test/conftest.py000066400000000000000000000015321301512357500242070ustar00rootroot00000000000000def pytest_configure( config ): # One of PyTest's nanny features is to redirect stdin to a thing that refuses to be read # from. It is supposed to prevent tests from accidentally getting blocked waiting for user # input. I have never in my life had a test that blocked on stdin without it being completely # obvious, even without this nanny redirect. However, I've repeatedly run into issues where # this redirection gets in the way, mainly with Fabric: # # http://jenkins.cgcloud.info/job/cgcloud/304/testReport/junit/src.cgcloud.core.test.test_core/CoreTests/test_generic_fedora_22_box/ # # This workaround disables that nanny feature. capman = config.pluginmanager.get_plugin( 'capturemanager' ) if capman._capturing.in_ is not None: capman._capturing.in_.done( ) capman._capturing.in_ = None cgcloud-releases-1.6.0/core/src/cgcloud/core/test/test_core.py000066400000000000000000000046561301512357500243630ustar00rootroot00000000000000import logging import os from bd2k.util.exceptions import panic from cgcloud.core import roles from cgcloud.core.test import CoreTestCase, out_stderr log = logging.getLogger( __name__ ) class CoreTests( CoreTestCase ): """ Tests the typical life-cycle of instances and images """ _multiprocess_shared_ = True roles = roles( ) def _test( self, box_cls ): role = box_cls.role( ) self._cgcloud( 'create', role ) try: self._cgcloud( 'stop', role ) self._cgcloud( 'image', role ) try: self._cgcloud( 'terminate', role ) self._cgcloud( 'recreate', role ) file_name = 'foo-' + role self._ssh( role, 'touch', file_name ) self._rsync( role, ':' + file_name, '.' ) self.assertTrue( os.path.exists( file_name ) ) os.unlink( file_name ) self._cgcloud( 'terminate', role ) finally: self._cgcloud( 'delete-image', role ) except: with panic( log ): self._cgcloud( 'terminate', '--quick', role ) @classmethod def make_tests( cls ): for box_cls in cls.roles: test_method = (lambda _box_cls: lambda self: cls._test( self, _box_cls ))( box_cls ) test_method.__name__ = 'test_%s' % box_cls.role( ).replace( '-', '_' ) setattr( cls, test_method.__name__, test_method ) def test_illegal_argument( self ): # Capture sys.stderr so we don't pollute the log of a successful run with an error message with out_stderr( ): self.assertRaises( SystemExit, self._cgcloud, 'delete-image', self.roles[ 0 ].role( ), '-1' ) def test_pytest_capture_workaround( self ): # To see this test fail, comment out the workaround in conftest.py and run this test from # the command line. Note that when running the test from PyCharm you will not be able to # see it fail because PyCharm's runner is hard-wired to disable PyTest's capture. from fabric.operations import run from fabric.context_managers import settings with settings( host_string='localhost' ): # We need a command that doesn't exit immediately such that the loop body in Fabric's # input_loop() is actually run at least once. run( 'sleep 1' ) CoreTests.make_tests( ) cgcloud-releases-1.6.0/core/src/cgcloud/core/ubuntu_box.py000066400000000000000000000136621301512357500236040ustar00rootroot00000000000000from abc import abstractmethod from ast import literal_eval from collections import namedtuple import contextlib import csv import logging import urllib2 from StringIO import StringIO from fabric.operations import sudo, put, run from cgcloud.core.box import fabric_task from cgcloud.core.init_box import UpstartBox, SystemdBox from cgcloud.core.agent_box import AgentBox from cgcloud.core.cloud_init_box import CloudInitBox from cgcloud.core.package_manager_box import PackageManagerBox from cgcloud.core.rc_local_box import RcLocalBox from cgcloud.fabric.operations import remote_sudo_popen from cgcloud.lib.util import heredoc BASE_URL = 'http://cloud-images.ubuntu.com' log = logging.getLogger( __name__ ) class UbuntuBox( AgentBox, CloudInitBox, RcLocalBox ): """ A box representing EC2 instances that boot from one of Ubuntu's cloud-image AMIs """ Release = namedtuple( 'Release', ('codename', 'version') ) @abstractmethod def release( self ): """ :return: the code name of the Ubuntu release :rtype: UbuntuBox.Release """ raise NotImplementedError( ) def _get_debconf_selections( self ): """ Override in concrete a subclass to add custom debconf selections. :return: A list of lines to be piped to debconf-set-selections (no newline at the end) :rtype: list[str] """ return [ ] def admin_account( self ): return 'ubuntu' class TemplateDict( dict ): def matches( self, other ): return all( v == other.get( k ) for k, v in self.iteritems( ) ) def _base_image( self, virtualization_type ): release = self.release( ).codename template = self.TemplateDict( release=release, purpose='server', release_type='release', storage_type='ebs', arch='amd64', region=self.ctx.region, hypervisor=virtualization_type ) url = '%s/query/%s/server/released.current.txt' % (BASE_URL, release) matches = [ ] with contextlib.closing( urllib2.urlopen( url ) ) as stream: images = csv.DictReader( stream, fieldnames=[ 'release', 'purpose', 'release_type', 'release_date', 'storage_type', 'arch', 'region', 'ami_id', 'aki_id', 'dont_know', 'hypervisor' ], delimiter='\t' ) for image in images: if template.matches( image ): matches.append( image ) if len( matches ) < 1: raise self.NoSuchImageException( "Can't find Ubuntu AMI for release %s and virtualization type %s" % ( release, virtualization_type) ) if len( matches ) > 1: raise RuntimeError( 'More than one matching image: %s' % matches ) image_info = matches[ 0 ] image_id = image_info[ 'ami_id' ] return self.ctx.ec2.get_image( image_id ) apt_get = 'DEBIAN_FRONTEND=readline apt-get -q -y' @fabric_task def _sync_package_repos( self ): put( remote_path='/etc/apt/apt.conf.d/99timeout', use_sudo=True, local_path=StringIO( heredoc( """ Acquire::http::Timeout "10"; Acquire::ftp::Timeout "10"; """ ) ), ) for i in range( 5 ): cmd = self.apt_get + ' update' result = sudo( cmd, warn_only=True ) if result.succeeded: return # https://bugs.launchpad.net/ubuntu/+source/apt/+bug/972077 # https://lists.debian.org/debian-dak/2012/05/threads.html#00006 if 'Hash Sum mismatch' in result: log.warn( "Detected race condition during in '%s'" ) else: raise RuntimeError( "Command '%s' failed" % cmd ) raise RuntimeError( "Command '%s' repeatedly failed with race condition. Giving up." ) @fabric_task def _upgrade_installed_packages( self ): sudo( '%s upgrade' % self.apt_get ) @fabric_task def _install_packages( self, packages ): packages = " ".join( packages ) sudo( '%s --no-install-recommends install %s' % (self.apt_get, packages) ) def _get_package_installation_command( self, package ): return [ 'apt-get', 'install', '-y', '--no-install-recommends', '--force-yes' ] + list( self._substitute_package( package ) ) def _pre_install_packages( self ): super( UbuntuBox, self )._pre_install_packages( ) debconf_selections = self._get_debconf_selections( ) if debconf_selections: self.__debconf_set_selections( debconf_selections ) @fabric_task def __debconf_set_selections( self, debconf_selections ): with remote_sudo_popen( 'debconf-set-selections' ) as f: f.write( '\n'.join( debconf_selections ) ) def _ssh_service_name( self ): return 'ssh' class UpstartUbuntuBox( UbuntuBox, UpstartBox ): pass class SystemdUbuntuBox( UbuntuBox, SystemdBox ): pass class Python27UpdateUbuntuBox( UbuntuBox ): """ A mixin for retrieving 2.7.x updates of Python from Felix Krull's deadsnakes PPA (not the one with old and new Pythons, but the one dedicated to 2.7 point updates. https://launchpad.net/~fkrull/+archive/ubuntu/deadsnakes-python2.7 """ @fabric_task def _setup_package_repos( self ): super( Python27UpdateUbuntuBox, self )._setup_package_repos( ) sudo( 'add-apt-repository -y ppa:fkrull/deadsnakes-python2.7' ) # FIXME: This should go some place else @fabric_task def _remote_python_version( self, python='python' ): """ Returns a version tuple for the Python installed on the instance represented by this box :rtype: tuple """ return literal_eval( run( python + " -c 'import sys; print tuple(sys.version_info)'" ) ) cgcloud-releases-1.6.0/core/src/cgcloud/core/version.py000077700000000000000000000000001301512357500257622../../../version.pyustar00rootroot00000000000000cgcloud-releases-1.6.0/core/src/cgcloud/core/yum_box.py000066400000000000000000000052071301512357500230700ustar00rootroot00000000000000import os.path from urlparse import urlparse from fabric.operations import sudo, run from cgcloud.core.init_box import SysvInitdBox from cgcloud.core.box import fabric_task from cgcloud.core.package_manager_box import PackageManagerBox class YumBox( PackageManagerBox, SysvInitdBox ): """ A box that uses redhat's yum package manager """ def _sync_package_repos( self ): return False @fabric_task def _install_packages( self, packages ): """ yum's error handling is a bit odd: If you pass two packages to install and one fails while the other succeeds, yum exits with 0. To work around this, we need to invoke rpm to check for successful installation separately of every package. Also, beware that some older yums exit with 0 even if the package doesn't exist: $ sudo yum install jasdgjhsadgajshd && echo yes $ yes :param packages: a list of package names """ sudo( 'yum install -d 1 -y %s' % ' '.join( "'%s'" % package for package in packages ) ) # make sure it is really installed for package in packages: run( 'rpm -q %s' % package ) def _get_package_installation_command( self, package ): return [ 'yum', 'install', '-d', '1', '-y' ] + list( self._substitute_package( package ) ) @fabric_task def _upgrade_installed_packages( self ): sudo( 'yum update -y -d 1' ) @fabric_task def _yum_remove( self, package ): sudo( "yum -d 1 -y remove '%s'" % package ) @fabric_task def _yum_local( self, is_update, rpm_urls ): """ Download the RPM at the given URL and run 'yum localupdate' on it. :param rpm_urls: A list of HTTP or FTP URLs ending in a valid RPM file name. """ rpms = [ ] for rpm_url in rpm_urls: run( "wget '%s'" % rpm_url ) rpm = os.path.basename( urlparse( rpm_url ).path ) rpms.append( rpm ) sudo( "yum -d 1 -y local{command} {rpms} --nogpgcheck".format( command='update' if is_update else 'install', rpms=' '.join( "'%s'" % rpm for rpm in rpms ) ) ) for rpm in rpms: # extract package name from RPM, then check if package is actually installed # since we can't rely on yum to report errors run( "rpm -q $(rpm -qp --queryformat '%%{N}' '%s')" % rpm ) run( "rm '%s'" % rpm ) def _get_package_substitutions( self ): return super( YumBox, self )._get_package_substitutions( ) + [ ( 'python-dev', 'python-devel' ), ] def _ssh_service_name( self ): return 'sshd' cgcloud-releases-1.6.0/core/src/cgcloud/fabric/000077500000000000000000000000001301512357500213265ustar00rootroot00000000000000cgcloud-releases-1.6.0/core/src/cgcloud/fabric/__init__.py000066400000000000000000000000001301512357500234250ustar00rootroot00000000000000cgcloud-releases-1.6.0/core/src/cgcloud/fabric/operations.py000066400000000000000000000171641301512357500240740ustar00rootroot00000000000000import os import sys import time from StringIO import StringIO from contextlib import contextmanager from fcntl import fcntl, F_GETFL, F_SETFL from pipes import quote from threading import Thread from bd2k.util.expando import Expando from bd2k.util.iterables import concat from bd2k.util.strings import interpolate as fmt from fabric.operations import sudo as real_sudo, get, put, run from fabric.state import env import fabric.io import fabric.operations def sudo( command, sudo_args=None, **kwargs ): """ Work around https://github.com/fabric/fabric/issues/503 """ if sudo_args is not None: old_prefix = env.sudo_prefix env.sudo_prefix = '%s %s' % (old_prefix, sudo_args) try: return real_sudo( command, **kwargs ) finally: if sudo_args is not None: env.sudo_prefix = old_prefix def runv( *args, **kwargs ): run( command=join_argv( args ), **kwargs ) def sudov( *args, **kwargs ): sudo( command=join_argv( args ), **kwargs ) def pip( args, path='pip', use_sudo=False ): """ Run pip. :param args: a string or sequence of strings to be passed to pip as command line arguments. If given a sequence of strings, its elements will be quoted if necessary and joined with a single space in between. :param path: the path to pip :param use_sudo: whther to run pip as sudo """ if isinstance( args, (str, unicode) ): command = path + ' ' + args else: command = join_argv( concat( path, args ) ) # Disable pseudo terminal creation to prevent pip from spamming output with progress bar. kwargs = Expando( pty=False ) if use_sudo: f = sudo # Set HOME so pip's cache doesn't go into real user's home, potentially creating files # not owned by that user (older versions of pip) or printing a warning about caching # being disabled. kwargs.sudo_args = '-H' else: f = run f( command, **kwargs ) def join_argv( command ): return ' '.join( map( quote, command ) ) def virtualenv( name, distributions=None, pip_distribution='pip', executable=None ): """ Installs a set of distributions (aka PyPI packages) into a virtualenv under /opt and optionally links an executable from that virtualenv into /usr/loca/bin. :param name: the name of the directory under /opt that will hold the virtualenv :param distributions: a list of distributions to be installed into the virtualenv. Defaults to [ name ]. You can also list other "pip install" options, like --pre. :param pip_distribution: if non-empty, the distribution and optional version spec to upgrade pip to. Defaults to the latest version of pip. Set to empty string to prevent pip from being upgraded. Downgrades from the system-wide pip version currently don't work. :param executable: The name of an executable in the virtualenv's bin directory that should be symlinked into /usr/local/bin. The executable must be provided by the distributions that are installed in the virtualenv. """ # FIXME: consider --no-pip and easy_installing pip to support downgrades if distributions is None: distributions = [ name ] venv = '/opt/' + name admin = run( 'whoami' ) sudo( fmt( 'mkdir -p {venv}' ) ) sudo( fmt( 'chown {admin}:{admin} {venv}' ) ) try: run( fmt( 'virtualenv {venv}' ) ) if pip_distribution: pip( path=venv + '/bin/pip', args=[ 'install', '--upgrade', pip_distribution ] ) pip( path=venv + '/bin/pip', args=concat( 'install', distributions ) ) finally: sudo( fmt( 'chown -R root:root {venv}' ) ) if executable: sudo( fmt( 'ln -snf {venv}/bin/{executable} /usr/local/bin/' ) ) @contextmanager def remote_open( remote_path, use_sudo=False ): """ Equivalent of open( remote_path, "a+" ) as if run on the remote system """ buf = StringIO( ) get( remote_path=remote_path, local_path=buf ) yield buf buf.seek( 0 ) put( local_path=buf, remote_path=remote_path, use_sudo=use_sudo ) # noinspection PyPep8Naming class remote_popen( object ): """ A context manager that yields a file handle and a >>> from fabric.context_managers import hide, settings >>> with settings(host_string='localhost'): ... with hide( 'output' ): ... # Disable shell since it may print additional stuff to console ... with remote_popen( 'sort -n', shell=False ) as f: ... f.write( '\\n'.join( map( str, [ 3, 2, 1] ) ) ) [localhost] run: sort -n 3 2 1 Above is the echoed input, below the sorted output. >>> print f.result 1 2 3 """ def __init__( self, *args, **kwargs ): try: if kwargs[ 'pty' ]: raise RuntimeError( "The 'pty' keyword argument must be omitted or set to False" ) except KeyError: kwargs[ 'pty' ] = False self.args = args self.kwargs = kwargs # FIXME: Eliminate this buffer and have caller write directly into the pipe self.stdin = StringIO( ) self.stdin.result = None def __enter__( self ): return self.stdin def __exit__( self, exc_type, exc_val, exc_tb ): if exc_type is None: _r, _w = os.pipe( ) def copy( ): with os.fdopen( _w, 'w' ) as w: w.write( self.stdin.getvalue( ) ) t = Thread( target=copy ) t.start( ) try: _stdin = sys.stdin.fileno( ) _old_stdin = os.dup( _stdin ) os.close( _stdin ) assert _stdin == os.dup( _r ) # monkey-patch Fabric _input_loop = fabric.operations.input_loop fabric.operations.input_loop = input_loop try: self.stdin.result = self._run( ) finally: fabric.operations.input_loop = _input_loop os.close( _stdin ) os.dup( _old_stdin ) finally: t.join( ) return False def _run( self ): return run( *self.args, **self.kwargs ) # noinspection PyPep8Naming class remote_sudo_popen( remote_popen ): def _run( self ): sudo( *self.args, **self.kwargs ) # Version of Fabric's input_loop that handles EOF on stdin and reads more greedily with # non-blocking mode. # TODO: We should open a ticket for this. from select import select from fabric.network import ssh def input_loop( chan, using_pty ): opts = fcntl( sys.stdin.fileno( ), F_GETFL ) fcntl( sys.stdin.fileno( ), F_SETFL, opts | os.O_NONBLOCK ) try: while not chan.exit_status_ready( ): r, w, x = select( [ sys.stdin ], [ ], [ ], 0.0 ) have_char = (r and r[ 0 ] == sys.stdin) if have_char and chan.input_enabled: # Send all local stdin to remote end's stdin bytes = sys.stdin.read( ) if bytes is None: pass elif not bytes: chan.shutdown_write( ) break else: chan.sendall( bytes ) # Optionally echo locally, if needed. if not using_pty and env.echo_stdin: # Not using fastprint() here -- it prints as 'user' # output level, don't want it to be accidentally hidden sys.stdout.write( bytes ) sys.stdout.flush( ) time.sleep( ssh.io_sleep ) finally: fcntl( sys.stdin.fileno( ), F_SETFL, opts ) cgcloud-releases-1.6.0/core/tests.py000066400000000000000000000003641301512357500174100ustar00rootroot00000000000000parallelizable_keywords = [ 'test_generic_centos_6_box', 'test_generic_fedora_21_box', 'test_generic_fedora_22_box', 'test_generic_ubuntu_precise_box', 'test_generic_ubuntu_trusty_box', 'test_generic_ubuntu_vivid_box' ] cgcloud-releases-1.6.0/jenkins.sh000066400000000000000000000011341301512357500167350ustar00rootroot00000000000000virtualenv venv . venv/bin/activate pip install pytest==2.9.2 export CGCLOUD_ME=jenkins@jenkins-master make develop sdist # We want to use -k/--keep-going such that make doesn't fail the build on the first subproject for # which the tests fail and keeps testing the other projects. Unfortunately, that takes away the # convenience of specifiying multiple targets in one make invocation since make would not stop on a # failing target. ( for target in $make_targets; do ( if [ "$target" = test ]; then make --jobs --keep-going $target else make $target fi ) || exit done ) cgcloud-releases-1.6.0/jenkins/000077500000000000000000000000001301512357500164025ustar00rootroot00000000000000cgcloud-releases-1.6.0/jenkins/.gitignore000066400000000000000000000000671301512357500203750ustar00rootroot00000000000000/build /dist *.egg-info *.pyc /MANIFEST.in /version.py cgcloud-releases-1.6.0/jenkins/README.rst000066400000000000000000000164271301512357500201030ustar00rootroot00000000000000The CGCloud Jenkins project contains the roles for running a distributed continuous integration environment in EC2 with one Jenkins master VM and multiple slave VMs. A Jenkins slave is a machine that the master delegates builds to. Slaves are launched on demand and are shutdown after a certain amount of idle time. The different slave roles are blueprints for setting up a slave VM that has the necessary prerequisites for running a particular Jenkins build. Quickstart ========== Activate the virtualenv cgcloud was installed in and install ``cgcloud-jenkins``:: :: cd virtualenv cgcloud source cgcloud/bin/activate pip install cgcloud-jenkins export CGCLOUD_PLUGINS="cgcloud.jenkins:$CGCLOUD_PLUGINS" If you get ``DistributionNotFound: No distributions matching the version for cgcloud-jenkins``, try running ``pip install --pre cgcloud-jenkins``. Running ``cgcloud list-roles`` should now list the additional roles defined in the plugin:: ... jenkins-master ubuntu-lucid-genetorrent-jenkins-slave ubuntu-precise-genetorrent-jenkins-slave ubuntu-saucy-genetorrent-jenkins-slave ubuntu-trusty-genetorrent-jenkins-slave centos5-genetorrent-jenkins-slave centos6-genetorrent-jenkins-slave fedora19-genetorrent-jenkins-slave fedora20-genetorrent-jenkins-slave ubuntu-lucid-generic-jenkins-slave ubuntu-precise-generic-jenkins-slave ubuntu-saucy-generic-jenkins-slave ubuntu-trusty-generic-jenkins-slave centos5-generic-jenkins-slave centos6-generic-jenkins-slave fedora19-generic-jenkins-slave fedora20-generic-jenkins-slave centos5-rpmbuild-jenkins-slave centos6-rpmbuild-jenkins-slave load-test-box data-browser-jenkins-slave Master And Slave Roles ====================== The plugin defines a role for the master (``jenkins-master``) and various slave roles for running builds for certain building CGL projects. There are also a bunch of generic slaves that are not customized for a particular project. The master (``jenkins-master``) is a long-running box that hosts the Jenkins web application. The Jenkins installation (code and data) is cordoned off in the home directory of a separate ``jenkins`` user. That home directory actually resides on a secondary EBS volume whose life cycle is independent from that of the master box, i.e. VM instance. This allows us to update the OS of the master without having to setup Jenkins from scratch every time we do so. The remaining roles define the Jenkins slaves. A Jenkins slave is a short-running box with which the master establishes an SSH connection for the purpose of triggering a *remote build*. The CGCLoud Jenkins plugin (this project) is used to create the VM images and register them with the master such that the master can launch a slave instance when needed to run a remote build on the platform provided by the slave. Jenkins ======= Jenkins is a continuous integration server/web applicaton running on the ``jenkins-master``. Jenkins uses so called *projects* that define where to get the source, how to build and test the source and which build artifacts to archive. Builds can be run automatically whenever a push is made, on a fixed schedule or manually. Builds are executed by an agent. Agents can run locally on the Jenkins master or remotely on one or more slaves. Jenkins uses its own plugin system to extend and modify the default behavior. We use the EC2 plugin which allows us to create slaves on demand in EC2 from images created by cgcloud in conjunction with this project. Mind the distinction between CGCloud Jenkins which is plugs into CGCLoud and the hundreds of plugins that extend Jenkins. The Jenkins web UI can always be accessed by tunneling port 8080 through SSH. Running `cgcloud ssh jenkins-master` sets up the necessary port forwarding. Authorization and authentication in Jenkins itself is disabled on a fresh instance but can be enabled and further customized using Jenkins plugins. Note: Anyone with SSH access to the master can access Jenkins and do anything with it. Tutorial: Creating a Continuous Integration Environment ======================================================= In this tutorial we'll create a continuous integration environment consisting of a Jenkins master and several slaves. The tutorial assumes that you completed the Quickstart section of the CGCloud README. Creating The Master ------------------- Create the Jenkins master instance:: cgcloud create jenkins-master As a test, SSH into the master as the administrative user:: cgcloud ssh -a jenkins-master exit The administrative user has ``sudo`` privileges. Its name varies from platform to platform but ``cgcloud`` keeps track of that for you. For yet another test, SSH into the master as the *jenkins* user:: cgcloud ssh jenkins-master This is the user that the Jenkins server runs as. Next, create an image of the master such that you can always recreate a 100% identical clone:: cgcloud stop jenkins-master cgcloud image jenkins-master cgcloud terminate jenkins-master cgcloud recreate jenkins-master The first command is necessary to stop the master because only a stopped instance can be imaged. The ``image`` command creates the actual AMI image. The ``terminate`` command disposes of the instance. This will delete the ``/`` partition while leaving the ``/var/lib/jenkins`` partition around. The latter is stored on a separate EBS volume called ``jenkins-data``. In other words, the ``terminate`` command leaves us with two things: 1) the AMI for a master box and 2) the Jenkins data volume. The ``recreate`` command then creates a new instance from the most recently created image *and* attaches the ``jenkins-data`` volume that instance. Creating The Slaves ------------------- Open a new shell window and create the first slave:: cgcloud list-roles cgcloud create docker-jenkins-slave SSH into it:: cgcloud ssh -a docker-jenkins-slave Notice that * The admin user has sudo rights:: sudo whoami * The builds directory in the Jenkins user's home is symbolically linked to ephemeral storage:: sudo ls -l ~jenkins * git and docker are installed:: git --version docker --version exit Now stop, image and terminate the box:: cgcloud stop docker-jenkins-slave cgcloud image docker-jenkins-slave cgcloud terminate docker-jenkins-slave Finally, register all slaves with the master:: cgcloud register-slaves jenkins-master docker-jenkins-slave The ``register-slaves`` command adds a section to Jenkins' config.xml defines how to spawn an EC2 instance of ``docker-jenkins-slave`` from the AMI we just created. The slave description also associates the slave with the label ``docker``. If a project definition requests to be run on slaves labelled ``docker``, an instance will be created from the AMI. Once the instance is up, the Jenkins master will launch the agent on via SSH. Finally, the master will ask the agent to run a build for that project. If a slave labelled ``docker`` already exists, it will be used instead of creating a new one. You can customize how may concurrent builds run on each slave by increasing the number of agents running on a slave. By default only one slave per role will be launched but you can configure Jenkins to launch more than one if the queue contains multiple builds for a given label. cgcloud-releases-1.6.0/jenkins/setup.cfg000066400000000000000000000002601301512357500202210ustar00rootroot00000000000000[pytest] # Look for any python file, the default of test_*.py wouldn't work for us python_files=*.py # Also run doctests addopts = --doctest-modules -k 'not create_all_slaves' cgcloud-releases-1.6.0/jenkins/setup.py000066400000000000000000000012621301512357500201150ustar00rootroot00000000000000from __future__ import absolute_import from setuptools import setup, find_packages from version import cgcloud_version, fabric_dep setup( name='cgcloud-jenkins', version=cgcloud_version, author="Hannes Schmidt", author_email="hannes@ucsc.edu", url='https://github.com/BD2KGenomics/cgcloud', description='Setup and manage a Jenkins continuous integration cluster in EC2', package_dir={ '': 'src' }, packages=find_packages( 'src' ), namespace_packages=[ 'cgcloud' ], install_requires=[ 'cgcloud-lib==' + cgcloud_version, 'cgcloud-core==' + cgcloud_version, fabric_dep ] ) cgcloud-releases-1.6.0/jenkins/src/000077500000000000000000000000001301512357500171715ustar00rootroot00000000000000cgcloud-releases-1.6.0/jenkins/src/cgcloud/000077500000000000000000000000001301512357500206115ustar00rootroot00000000000000cgcloud-releases-1.6.0/jenkins/src/cgcloud/__init__.py000066400000000000000000000000731301512357500227220ustar00rootroot00000000000000__import__( 'pkg_resources' ).declare_namespace( __name__ )cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/000077500000000000000000000000001301512357500222525ustar00rootroot00000000000000cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/__init__.py000066400000000000000000000025161301512357500243670ustar00rootroot00000000000000def roles( ): from cgcloud.jenkins.jenkins_master import JenkinsMaster from cgcloud.jenkins.generic_jenkins_slaves import (UbuntuLucidGenericJenkinsSlave, Centos5GenericJenkinsSlave, Centos6GenericJenkinsSlave, Fedora19GenericJenkinsSlave, Fedora20GenericJenkinsSlave, UbuntuPreciseGenericJenkinsSlave, UbuntuTrustyGenericJenkinsSlave) from cgcloud.jenkins.cgcloud_jenkins_slave import CgcloudJenkinsSlave from cgcloud.jenkins.rpmbuild_jenkins_slaves import (Centos5RpmbuildJenkinsSlave, Centos6RpmbuildJenkinsSlave) from cgcloud.jenkins.s3am_jenkins_slave import S3amJenkinsSlave from cgcloud.jenkins.toil_jenkins_slave import ToilJenkinsSlave from cgcloud.jenkins.docker_jenkins_slave import DockerJenkinsSlave return sorted( locals( ).values( ), key=lambda cls: cls.__name__ ) def command_classes( ): from cgcloud.jenkins.commands import RegisterSlaves return sorted( locals( ).values( ), key=lambda cls: cls.__name__ ) cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/cgcloud_jenkins_slave.py000066400000000000000000000100021301512357500271500ustar00rootroot00000000000000from cgcloud.core.common_iam_policies import ec2_full_policy from cgcloud.core.ubuntu_box import Python27UpdateUbuntuBox from cgcloud.lib import test_namespace_suffix_length from cgcloud.lib.util import abreviated_snake_case_class_name from cgcloud.jenkins.generic_jenkins_slaves import UbuntuTrustyGenericJenkinsSlave class CgcloudJenkinsSlave( UbuntuTrustyGenericJenkinsSlave, Python27UpdateUbuntuBox ): """ Jenkins slave for runing CGCloud's unit tests """ @classmethod def recommended_instance_type( cls ): return "m3.xlarge" def _list_packages_to_install( self ): return super( CgcloudJenkinsSlave, self )._list_packages_to_install( ) + [ # for PyCrypto: 'python-dev', 'autoconf', 'automake', 'binutils', 'gcc', 'make', 'libyaml-dev' ] def _get_iam_ec2_role( self ): iam_role_name, policies = super( CgcloudJenkinsSlave, self )._get_iam_ec2_role( ) iam_role_name += '--' + abreviated_snake_case_class_name( CgcloudJenkinsSlave ) cgcloud_bucket_arn = "arn:aws:s3:::%s" % self.ctx.s3_bucket_name policies.update( dict( ec2_full=ec2_full_policy, # FIXME: Be more specific iam_cgcloud_jenkins_slave_pass_role=dict( Version="2012-10-17", Statement=[ # This assumes that if instance lives in /, then tests running on the instance # will run in /test-5571439d. If the instance lives in /foo, then tests running # on the instance will run in /foo/test-5571439d. dict( Effect="Allow", Resource=self._pass_role_arn(), Action="iam:PassRole" ) ] ), register_keypair=dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource="arn:aws:s3:::*", Action="s3:ListAllMyBuckets" ), dict( Effect="Allow", Action="s3:*", Resource=[ cgcloud_bucket_arn, cgcloud_bucket_arn + "/*" ] ), dict( Effect="Allow", Action=[ "sns:Publish", "sns:CreateTopic" ], Resource='arn:aws:sns:*:%s:cgcloud-agent-notifications' % self.ctx.account ) ] ), iam_cgcloud_jenkins_slave=dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource="*", Action=[ "iam:ListRoles", "iam:CreateRole", "iam:DeleteRole", "iam:ListRolePolicies", "iam:DeleteRolePolicy", "iam:GetRolePolicy", "iam:PutRolePolicy", "iam:ListInstanceProfiles", "iam:GetInstanceProfile", "iam:CreateInstanceProfile", "iam:DeleteInstanceProfile", "iam:RemoveRoleFromInstanceProfile", "iam:AddRoleToInstanceProfile", "iam:DeleteInstanceProfile" ] ) ] ) ) ) return iam_role_name, policies def _pass_role_arn( self ): """ Return a pattern that a role name must match if it is to be passed to an instance created by code running on this Jenkins slave. """ # This is a bit convoluted, but it is still better than optionally allowing wildcards in # the name validation in Context.absolute_name(). The ? wildcard is not very well # documented but I found evidence for it here: # http://docs.aws.amazon.com/IAM/latest/UserGuide/PolicyVariables.html#policy-vars-specialchars pass_role_arn = self._role_arn( iam_role_name_prefix='test/testnamespacesuffixpattern/' ) pass_role_arn = pass_role_arn.replace( 'testnamespacesuffixpattern', "?" * test_namespace_suffix_length ) return pass_role_arn cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/commands.py000066400000000000000000000043031301512357500244250ustar00rootroot00000000000000from fnmatch import fnmatch import os from cgcloud.core.commands import InstanceCommand class RegisterSlaves( InstanceCommand ): """ Adds the specified slave images to Jenkins' EC2 configuration on the given master to the extend that the specified master can spawn later these slaves to run builds as needed. """ def __init__( self, application, **kwargs ): super( RegisterSlaves, self ).__init__( application, **kwargs ) self.option( '--slaves', '-s', metavar='ROLE_GLOB', nargs='*', default=[ '*-jenkins-slave' ], help='A list of roles names or role name patterns (shell globs) of the ' 'slaves that should be added to the Jenkins config. For each matching ' 'slave, the most recently created image will be registered using the ' 'recommended instance type for that slave.' ) self.option( '--clean', '-C', default=False, action='store_true', help='Clear the list of slaves in the master before registering new slaves. ' 'Beware that this option removes slaves that were registered through ' 'other means, e.g. via the web UI.' ) self.option( '--instance-type', '-t', metavar='TYPE', default=os.environ.get( 'CGCLOUD_INSTANCE_TYPE', None ), help='The type of EC2 instance to register the slave with, e.g. t1.micro, ' 'm1.small, m1.medium, or m1.large etc. The value of the environment ' 'variable CGCLOUD_INSTANCE_TYPE, if that variable is present, overrides ' 'the default, an instance type appropriate for the role.' ) def run_on_instance( self, options, master ): master.register_slaves( [ slave_cls for role, slave_cls in self.application.roles.iteritems( ) for role_glob in options.slaves if fnmatch( role, role_glob ) ], clean=options.clean, instance_type=options.instance_type ) cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/docker_jenkins_slave.py000066400000000000000000000014351301512357500270110ustar00rootroot00000000000000from cgcloud.core.ubuntu_box import Python27UpdateUbuntuBox from cgcloud.jenkins.generic_jenkins_slaves import UbuntuTrustyGenericJenkinsSlave from cgcloud.core.docker_box import DockerBox class DockerJenkinsSlave( UbuntuTrustyGenericJenkinsSlave, DockerBox, Python27UpdateUbuntuBox ): """ A box for running the cgl-docker-lib builds on. Probably a bit of a misnomer but so far the only cgl-docker-lib particular is the dependency on make. """ def _list_packages_to_install( self ): return super( DockerJenkinsSlave, self )._list_packages_to_install( ) + [ 'make' ] def _docker_users( self ): return super( DockerJenkinsSlave, self )._docker_users( ) + [ 'jenkins' ] @classmethod def recommended_instance_type( cls ): return 'm3.large' cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/generic_jenkins_slaves.py000066400000000000000000000124471301512357500273460ustar00rootroot00000000000000from cgcloud.core.generic_boxes import * from cgcloud.fabric.operations import sudo from cgcloud.jenkins.jenkins_slave import JenkinsSlave from cgcloud.core.ubuntu_box import UbuntuBox class GenericJenkinsSlave( JenkinsSlave ): """ Generic Jenkins slave """ pass class CentosGenericJenkinsSlave( CentosBox, GenericJenkinsSlave ): """ Generic Jenkins slave for CentOS """ def _list_packages_to_install( self ): # TODO: List JRE explicitly (it is already installed on RightScale CentOS images) return super( CentosGenericJenkinsSlave, self )._list_packages_to_install( ) + [ ] @fabric_task def _setup_build_user( self ): super( CentosGenericJenkinsSlave, self )._setup_build_user( ) sudo( "echo 'Defaults:jenkins !requiretty' >> /etc/sudoers" ) sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /bin/rpm' >> /etc/sudoers" ) sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /usr/bin/yum' >> /etc/sudoers" ) @fabric_task def _post_install_packages( self ): super( CentosGenericJenkinsSlave, self )._post_install_packages( ) # FIXME: These are public but we should rebuild them and host them within our control self._yum_local( is_update=False, rpm_urls=[ 'http://public-artifacts.cghub.ucsc.edu.s3.amazonaws.com/custom-centos-packages/python27-2.7.2-cghub.x86_64.rpm', 'http://public-artifacts.cghub.ucsc.edu.s3.amazonaws.com/custom-centos-packages/python27-devel-2.7.2-cghub.x86_64.rpm', 'http://public-artifacts.cghub.ucsc.edu.s3.amazonaws.com/custom-centos-packages/python27-setuptools-0.6c11-cghub.noarch.rpm' ] ) class Centos5GenericJenkinsSlave( CentosGenericJenkinsSlave, GenericCentos5Box ): """ Generic Jenkins slave for CentOS 5 """ pass class Centos6GenericJenkinsSlave( CentosGenericJenkinsSlave, GenericCentos6Box ): """ Generic Jenkins slave for CentOS 6 """ pass class UbuntuGenericJenkinsSlave( UbuntuBox, GenericJenkinsSlave ): """ Generic Jenkins slave for Ubuntu """ def _list_packages_to_install( self ): return super( UbuntuGenericJenkinsSlave, self )._list_packages_to_install( ) + [ 'openjdk-7-jre-headless', 'gdebi-core' ] # comes in handy when installing .deb's with dependencies @fabric_task def _setup_build_user( self ): super( UbuntuGenericJenkinsSlave, self )._setup_build_user( ) sudo( "echo 'Defaults:jenkins !requiretty' >> /etc/sudoers" ) for prog in ('apt-get', 'dpkg', 'gdebi'): sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /usr/bin/%s' >> /etc/sudoers" % prog ) def _get_debconf_selections( self ): # On Lucid, somehow postfix gets pulled in as a dependency kicking the frontend into # interactive mode. The same happens when installing GridEngine. return super( UbuntuGenericJenkinsSlave, self )._get_debconf_selections( ) + [ "postfix postfix/main_mailer_type string 'No configuration'", "postfix postfix/mailname string %s" % self.host_name ] class UbuntuLucidGenericJenkinsSlave( UbuntuGenericJenkinsSlave, GenericUbuntuLucidBox ): """ Generic Jenkins slave for Ubuntu 10.04 LTS (EOL April 2015) """ def _setup_package_repos( self ): super( UbuntuLucidGenericJenkinsSlave, self )._setup_package_repos( ) self.__add_git_ppa( ) self.__add_python_ppa( ) @fabric_task def __add_git_ppa( self ): sudo( 'add-apt-repository -y ppa:git-core/ppa' ) @fabric_task def __add_python_ppa( self ): sudo( 'apt-add-repository -y ppa:fkrull/deadsnakes/ubuntu' ) def _list_packages_to_install( self ): return super( UbuntuLucidGenericJenkinsSlave, self )._list_packages_to_install( ) + [ 'python2.7', 'python2.7-dev' ] def _get_package_substitutions( self ): return super( UbuntuLucidGenericJenkinsSlave, self )._get_package_substitutions( ) + [ ('openjdk-7-jre-headless', 'openjdk-6-jre') ] class UbuntuPreciseGenericJenkinsSlave( UbuntuGenericJenkinsSlave, GenericUbuntuPreciseBox ): """ Generic Jenkins slave for Ubuntu 12.04 LTS (EOL April 2017) """ pass class UbuntuTrustyGenericJenkinsSlave( UbuntuGenericJenkinsSlave, GenericUbuntuTrustyBox ): """ Generic Jenkins slave for Ubuntu 14.04 LTS (EOL April 2019) """ pass class FedoraGenericJenkinsSlave( FedoraBox, GenericJenkinsSlave ): """ Generic Jenkins slave for Fedora """ def _list_packages_to_install( self ): return super( FedoraGenericJenkinsSlave, self )._list_packages_to_install( ) + [ 'java-1.7.0-openjdk' ] @fabric_task def _setup_build_user( self ): super( FedoraGenericJenkinsSlave, self )._setup_build_user( ) sudo( "echo 'Defaults:jenkins !requiretty' >> /etc/sudoers" ) sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /bin/rpm' >> /etc/sudoers" ) sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /usr/bin/yum' >> /etc/sudoers" ) class Fedora19GenericJenkinsSlave( FedoraGenericJenkinsSlave, GenericFedora19Box ): """ Generic Jenkins slave for Fedora 19 """ pass class Fedora20GenericJenkinsSlave( FedoraGenericJenkinsSlave, GenericFedora20Box ): """ Generic Jenkins slave for Fedora 20 """ pass cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/jenkins_master.py000066400000000000000000000326101301512357500256420ustar00rootroot00000000000000from StringIO import StringIO from contextlib import contextmanager import logging from textwrap import dedent from xml.etree import ElementTree from fabric.context_managers import hide from fabric.operations import run, sudo, put, get from cgcloud.lib.ec2 import EC2VolumeHelper from cgcloud.lib.util import UserError, abreviated_snake_case_class_name from cgcloud.core.box import fabric_task from cgcloud.core.generic_boxes import GenericUbuntuTrustyBox from cgcloud.core.source_control_client import SourceControlClient log = logging.getLogger( __name__ ) # FIXME: __create_jenkins_keypair and __inject_aws_credentials fail when the Jenkins volume is fresh # since certain files like config.xml don't exist (because Jenkins hasn't written them out yet or # because the plugin isn't installed yet. The workaround is to install all stop the instance ( # FIXME: __create_jenkins_keypair still uses the old configuration section to inject the private # key into Jenkins. Since then Jenkins switched to a new credentials system rendering the old # method ineffective. We should switch to the new system or remove the code. After all it is easy # enought to configure the credentials by hand. class Jenkins: user = 'jenkins' """ The name of the user account that Jenkins runs as. Note that we are not free to chose this as it is determined by the jenkins package for Ubuntu """ group = 'nogroup' """ The name of the group that Jenkins runs as. """ data_device_ext = '/dev/sdf' """ EC2's name of the block device to which to attach the Jenkins data volume """ data_device_int = '/dev/xvdf' """ The kernel's name of the block device to which to attach the Jenkins data volume """ data_volume_name = 'jenkins-data' """ The value of the Name tag of the Jenkins data volume """ data_volume_fs_label = data_volume_name """ The label of the file system on the Jenkins data volume """ data_volume_size_gb = 100 """ The size of the Jenkins data volume """ home = '/var/lib/jenkins' """ The jenkins user's home directory on the build master """ jenkins = vars( Jenkins ) class JenkinsMaster( GenericUbuntuTrustyBox, SourceControlClient ): """ An instance of this class represents the build master in EC2 """ def __init__( self, ctx ): super( JenkinsMaster, self ).__init__( ctx ) self.volume = None @classmethod def recommended_instance_type( cls ): return "m3.large" def other_accounts( self ): return super( JenkinsMaster, self ).other_accounts( ) + [ Jenkins.user ] def default_account( self ): return Jenkins.user def prepare( self, *args, **kwargs ): self.volume = EC2VolumeHelper( ec2=self.ctx.ec2, name=self.ctx.to_aws_name( Jenkins.data_volume_name ), size=Jenkins.data_volume_size_gb, availability_zone=self.ctx.availability_zone ) return super( JenkinsMaster, self ).prepare( *args, **kwargs ) def _on_instance_running( self, first_boot ): if first_boot: self.volume.attach( self.instance_id, device=Jenkins.data_device_ext ) super( JenkinsMaster, self )._on_instance_running( first_boot ) @fabric_task def _setup_package_repos( self ): # # Jenkins # super( JenkinsMaster, self )._setup_package_repos( ) run( "wget -q -O - 'http://pkg.jenkins-ci.org/debian/jenkins-ci.org.key' " "| sudo apt-key add -" ) sudo( "echo deb http://pkg.jenkins-ci.org/debian binary/ " "> /etc/apt/sources.list.d/jenkins.list" ) # # Enable multiverse sources # sudo( 'apt-add-repository multiverse' ) def _list_packages_to_install( self ): packages = super( JenkinsMaster, self )._list_packages_to_install( ) return packages + [ 'ec2-api-tools' ] @fabric_task def _install_packages( self, packages ): super( JenkinsMaster, self )._install_packages( packages ) # work around https://issues.jenkins-ci.org/browse/JENKINS-20407 sudo( 'mkdir /var/run/jenkins' ) # Use confold so it doesn't get hung up on our pre-staged /etc/default/jenkins sudo( 'apt-get -q -y -o Dpkg::Options::=--force-confold install jenkins' ) @fabric_task def _pre_install_packages( self ): # # Pre-stage the defaults file for Jenkins. It differs from the maintainer's version in the # following ways: (please document all changes in this comment) # # 1) cruft was removed # 2) --httpListenAddress=127.0.0.1 was added to make Jenkins listen locally only # instance_type = self.instance.instance_type etc_default_jenkins = StringIO( dedent( '''\ NAME=jenkins JAVA=/usr/bin/java JAVA_ARGS="-Xmx{jvm_heap_size}" #JAVA_ARGS="-Djava.net.preferIPv4Stack=true" # make jenkins listen on IPv4 address PIDFILE=/var/run/jenkins/jenkins.pid JENKINS_USER={user} JENKINS_WAR=/usr/share/jenkins/jenkins.war JENKINS_HOME="{home}" RUN_STANDALONE=true # log location. this may be a syslog facility.priority JENKINS_LOG=/var/log/jenkins/$NAME.log #JENKINS_LOG=daemon.info # See http://github.com/jenkinsci/jenkins/commit/2fb288474e980d0e7ff9c4a3b768874835a3e92e MAXOPENFILES=8192 HTTP_PORT=8080 AJP_PORT=-1 JENKINS_ARGS="\\ --webroot=/var/cache/jenkins/war \\ --httpPort=$HTTP_PORT \\ --ajp13Port=$AJP_PORT \\ --httpListenAddress=127.0.0.1 \\ " '''.format( jvm_heap_size='256m' if instance_type == 't1.micro' else '1G', **jenkins ) ) ) put( etc_default_jenkins, '/etc/default/jenkins', use_sudo=True, mode=0644 ) sudo( 'chown root:root /etc/default/jenkins' ) # # Prepare data volume if necessary # sudo( 'mkdir -p %s' % Jenkins.home ) # Only format empty volumes if sudo( 'file -sL %s' % Jenkins.data_device_int ) == '%s: data' % Jenkins.data_device_int: sudo( 'mkfs -t ext4 %s' % Jenkins.data_device_int ) sudo( 'e2label {data_device_int} {data_volume_fs_label}'.format( **jenkins ) ) else: # if the volume is not empty, verify the file system label label = sudo( 'e2label %s' % Jenkins.data_device_int ) if label != Jenkins.data_volume_fs_label: raise AssertionError( "Unexpected volume label: '%s'" % label ) # # Mount data volume permanently # sudo( "echo 'LABEL={data_volume_fs_label} {home} ext4 defaults 0 2' " ">> /etc/fstab".format( **jenkins ) ) sudo( 'mount -a' ) # in case the UID is different on the volume sudo( 'useradd -d {home} -g {group} -s /bin/bash {user}'.format( **jenkins ) ) sudo( 'chown -R {user} {home}'.format( **jenkins ) ) @classmethod def ec2_keypair_name( cls, ctx ): return Jenkins.user + '@' + ctx.to_aws_name( cls.role( ) ) @fabric_task( user=Jenkins.user ) def __create_jenkins_keypair( self ): key_path = '%s/.ssh/id_rsa' % Jenkins.home ec2_keypair_name = self.ec2_keypair_name( self.ctx ) ssh_privkey, ssh_pubkey = self._provide_generated_keypair( ec2_keypair_name, key_path ) with self.__patch_jenkins_config( ) as config: text_by_xpath = { './/hudson.plugins.ec2.EC2Cloud/privateKey/privateKey': ssh_privkey } for xpath, text in text_by_xpath.iteritems( ): for element in config.iterfind( xpath ): if element.text != text: element.text = text @fabric_task def _post_install_packages( self ): super( JenkinsMaster, self )._post_install_packages( ) self._propagate_authorized_keys( Jenkins.user, Jenkins.group ) self.setup_repo_host_keys( user=Jenkins.user ) self.__create_jenkins_keypair( ) def _ssh_args( self, user, command ): # Add port forwarding to Jenkins' web UI command = [ '-L', 'localhost:8080:localhost:8080' ] + command return super( JenkinsMaster, self )._ssh_args( user, command ) @fabric_task( user=Jenkins.user ) def register_slaves( self, slave_clss, clean=False, instance_type=None ): with self.__patch_jenkins_config( ) as config: templates = config.find( './/hudson.plugins.ec2.EC2Cloud/templates' ) if templates is None: raise UserError( "Can't find any configuration for the Jenkins Amazon EC2 plugin. Make sure it is " "installed and configured on the %s in %s." % ( self.role( ), self.ctx.namespace) ) template_element_name = 'hudson.plugins.ec2.SlaveTemplate' if clean: for old_template in templates.findall( template_element_name ): templates.getchildren( ).remove( old_template ) for slave_cls in slave_clss: slave = slave_cls( self.ctx ) images = slave.list_images( ) try: image = images[ -1 ] except IndexError: raise UserError( "No images for '%s'" % slave_cls.role( ) ) new_template = slave.slave_config_template( image, instance_type ) description = new_template.find( 'description' ).text found = False for old_template in templates.findall( template_element_name ): if old_template.find( 'description' ).text == description: if found: raise RuntimeError( 'More than one existing slave definition for %s. ' 'Fix and try again' % description ) i = templates.getchildren( ).index( old_template ) templates[ i ] = new_template found = True if not found: templates.append( new_template ) # newer versions of Jenkins add class="empty-list" attribute if there are no templates if templates.attrib.get( 'class' ) == 'empty-list': templates.attrib.pop( 'class' ) def _image_block_device_mapping( self ): # Do not include the data volume in the snapshot bdm = self.instance.block_device_mapping bdm[ Jenkins.data_device_ext ].no_device = True return bdm def _get_iam_ec2_role( self ): iam_role_name, policies = super( JenkinsMaster, self )._get_iam_ec2_role( ) iam_role_name += '--' + abreviated_snake_case_class_name( JenkinsMaster ) policies.update( dict( ec2_full=dict( Version="2012-10-17", Statement=[ # FIXME: Be more specific dict( Effect="Allow", Resource="*", Action="ec2:*" ) ] ), jenkins_master_iam_pass_role=dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource=self._role_arn( ), Action="iam:PassRole" ) ] ), jenkins_master_s3=dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource="arn:aws:s3:::*", Action="s3:ListAllMyBuckets" ), dict( Effect="Allow", Action="s3:*", Resource=[ "arn:aws:s3:::public-artifacts.cghub.ucsc.edu", "arn:aws:s3:::public-artifacts.cghub.ucsc.edu/*" ] ) ] ) ) ) return iam_role_name, policies @contextmanager def __patch_jenkins_config( self ): """ A context manager that retrieves the Jenkins configuration XML, deserializes it into an XML ElementTree, yields the XML tree, then serializes the tree and saves it back to Jenkins. """ config_file = StringIO( ) if run( 'test -f ~/config.xml', quiet=True ).succeeded: fresh_instance = False get( remote_path='~/config.xml', local_path=config_file ) else: # Get the in-memory config as the on-disk one may be absent on a fresh instance. # Luckily, a fresh instance won't have any configured security. fresh_instance = True config_url = 'http://localhost:8080/computer/(master)/config.xml' with hide( 'output' ): config_file.write( run( 'curl "%s"' % config_url ) ) config_file.seek( 0 ) config = ElementTree.parse( config_file ) yield config config_file.truncate( 0 ) config.write( config_file, encoding='utf-8', xml_declaration=True ) if fresh_instance: self.__service_jenkins( 'stop' ) try: put( local_path=config_file, remote_path='~/config.xml' ) finally: if fresh_instance: self.__service_jenkins( 'start' ) else: log.warn( 'Visit the Jenkins web UI and click Manage Jenkins - Reload ' 'Configuration from Disk' ) @fabric_task def __service_jenkins( self, command ): sudo( 'service jenkins %s' % command ) cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/jenkins_slave.py000066400000000000000000000141741301512357500254660ustar00rootroot00000000000000from bd2k.util.xml.builder import E from cgcloud.core.agent_box import AgentBox from cgcloud.lib.util import snake_to_camel, UserError from cgcloud.fabric.operations import sudo from cgcloud.core.box import fabric_task from cgcloud.core.source_control_client import SourceControlClient from cgcloud.jenkins.jenkins_master import Jenkins, JenkinsMaster build_dir = '/home/jenkins/builds' class JenkinsSlave( SourceControlClient, AgentBox ): """ A box that represents EC2 instances which can serve as a Jenkins build agent. This class is typically used as a mix-in. """ def other_accounts( self ): return super( JenkinsSlave, self ).other_accounts( ) + [ Jenkins.user ] def default_account( self ): return Jenkins.user def _post_install_packages( self ): super( JenkinsSlave, self )._post_install_packages( ) self._setup_build_user( ) # TODO: We should probably remove this and let the agent take care of it def __get_master_pubkey( self ): ec2_keypair_name = JenkinsMaster.ec2_keypair_name( self.ctx ) ec2_keypair = self.ctx.ec2.get_key_pair( ec2_keypair_name ) if ec2_keypair is None: raise UserError( "Missing EC2 keypair named '%s'. You must create the master before " "creating slaves." % ec2_keypair_name ) return self.ctx.download_ssh_pubkey( ec2_keypair ) def _populate_ec2_keypair_globs( self, ec2_keypair_globs ): super( JenkinsSlave, self )._populate_ec2_keypair_globs( ec2_keypair_globs ) ec2_keypair_globs.append( JenkinsMaster.ec2_keypair_name( self.ctx ) ) @fabric_task def _setup_build_user( self ): """ Setup a user account that accepts SSH connections from Jenkins such that it can act as a Jenkins slave. """ kwargs = dict( user=Jenkins.user, dir=build_dir, ephemeral=self._ephemeral_mount_point( 0 ), pubkey=self.__get_master_pubkey( ).strip( ) ) # Create the build user # sudo( 'useradd -m -s /bin/bash {0}'.format( Jenkins.user ) ) self._propagate_authorized_keys( Jenkins.user ) # Ensure that jenkins@jenkins-master can log into this box as the build user # sudo( "echo '{pubkey}' >> ~/.ssh/authorized_keys".format( **kwargs ), user=Jenkins.user, sudo_args='-i' ) self.setup_repo_host_keys( user=Jenkins.user ) # Setup working directory for all builds in either the build user's home or as a symlink to # the ephemeral volume if available. Remember, the ephemeral volume comes back empty every # time the box starts. # if sudo( 'test -d {ephemeral}'.format( **kwargs ), quiet=True ).failed: sudo( 'mkdir {ephemeral}'.format( **kwargs ) ) chown_cmd = "mount {ephemeral} || true ; chown -R {user}:{user} {ephemeral}".format( **kwargs ) # chown ephemeral storage now ... sudo( chown_cmd ) # ... and every time instance boots. Note that command must work when set -e is in effect. self._register_init_command( chown_cmd ) # link build directory as symlink to ephemeral volume sudo( 'ln -snf {ephemeral} {dir}'.format( **kwargs ), user=Jenkins.user, sudo_args='-i' ) def __jenkins_labels( self ): labels = self.role( ).split( '-' ) return [ l for l in labels if l not in [ 'jenkins', 'slave' ] ] def slave_config_template( self, image, instance_type=None ): """ Returns the slave template, i.e. a fragment of Jenkins configuration that, if added to the master's main config file, controls how EC2 instances of this slave box are created and managed by the master. :param image: the image to boot slave instances from :type image: boto.ec2.image.Image :return: an XML element containing the slave template :rtype: xml.etree.ElementTree.Element """ if instance_type is None: instance_type = self.recommended_instance_type( ) self._set_instance_options( image.tags ) spec = dict( instance_type=instance_type ) self._spec_block_device_mapping( spec, image ) return E( 'hudson.plugins.ec2.SlaveTemplate', E.ami( image.id ), # By convention we use the description element as the primary identifier. We # don't need to use the absolute role name since we are not going to mix slaves # from different namespaces: E.description( self.role( ) ), E.zone( self.ctx.availability_zone ), E.securityGroups( self.ctx.to_aws_name( self._security_group_name( ) ) ), E.remoteFS( build_dir ), E.sshPort( '22' ), E.type( snake_to_camel( instance_type, separator='.' ) ), E.labels( ' '.join( self.__jenkins_labels( ) ) ), E.mode( 'EXCLUSIVE' ), E.initScript( 'while ! touch %s/.writable; do sleep 1; done' % build_dir ), E.userData( spec.get( 'user_data', '' ) ), E.numExecutors( '1' ), E.remoteAdmin( Jenkins.user ), # Using E.foo('') instead of just E.foo() yields instead of , # consistent with how Jenkins serializes its config: E.rootCommandPrefix( '' ), E.jvmopts( '' ), E.subnetId( '' ), E.idleTerminationMinutes( '30' ), E.iamInstanceProfile( self.get_instance_profile_arn( ) ), E.useEphemeralDevices( 'true' ), E.instanceCap( '1' ), E.stopOnTerminate( 'false' ), E.tags( *[ E( 'hudson.plugins.ec2.EC2Tag', E.name( k ), E.value( v ) ) for k, v in self._get_instance_options( ).iteritems( ) if v is not None ] ), E.usePrivateDnsName( 'false' ) ) cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/rpmbuild_jenkins_slaves.py000066400000000000000000000034571301512357500275510ustar00rootroot00000000000000from cgcloud.core.box import fabric_task from cgcloud.core.centos_box import CentosBox from cgcloud.core.generic_boxes import GenericCentos5Box, GenericCentos6Box from cgcloud.fabric.operations import sudo from cgcloud.jenkins.jenkins_slave import JenkinsSlave class CentosRpmbuildJenkinsSlave( CentosBox, JenkinsSlave ): """ Jenkins slave for building RPMs on CentOS """ def _list_packages_to_install(self): return super( CentosRpmbuildJenkinsSlave, self )._list_packages_to_install( ) + [ 'rpmdevtools', 'tk-devel', 'tcl-devel', 'expat-devel', 'db4-devel', 'gdbm-devel', 'sqlite-devel', 'bzip2-devel', 'openssl-devel', 'ncurses-devel', 'readline-devel', # for building the Apache RPM: 'mock', 'apr-devel', 'apr-util-devel', 'pcre-devel', # for OpenSSH RPM: 'pam-devel' ] @fabric_task def _setup_build_user(self): super( CentosRpmbuildJenkinsSlave, self )._setup_build_user( ) # Some RPM builds depend on the product of other RPM builds to be installed so we need to # be able to run rpm in between RPM builds sudo( "echo 'Defaults:jenkins !requiretty' >> /etc/sudoers" ) sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /bin/rpm' >> /etc/sudoers" ) sudo( "useradd -s /sbin/nologin mockbuild" ) # goes with the mock package class Centos5RpmbuildJenkinsSlave(CentosRpmbuildJenkinsSlave, GenericCentos5Box): """ Jenkins slave for building RPMs on CentOS 5 """ pass class Centos6RpmbuildJenkinsSlave(CentosRpmbuildJenkinsSlave, GenericCentos6Box): """ Jenkins slave for building RPMs on CentOS 6 """ pass cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/s3am_jenkins_slave.py000066400000000000000000000066011301512357500264050ustar00rootroot00000000000000from cgcloud.core.ubuntu_box import Python27UpdateUbuntuBox from cgcloud.jenkins.generic_jenkins_slaves import UbuntuTrustyGenericJenkinsSlave from cgcloud.core.box import fabric_task from cgcloud.core.common_iam_policies import s3_full_policy from cgcloud.fabric.operations import remote_sudo_popen from cgcloud.lib.util import abreviated_snake_case_class_name, heredoc class S3amJenkinsSlave( UbuntuTrustyGenericJenkinsSlave, Python27UpdateUbuntuBox ): """ Jenkins slave for running the S3AM build """ @classmethod def recommended_instance_type( cls ): return "m4.xlarge" def _list_packages_to_install( self ): return super( S3amJenkinsSlave, self )._list_packages_to_install( ) + [ 'python-dev', 'gcc', 'make', 'libcurl4-openssl-dev' # pycurl ] def _post_install_packages( self ): super( S3amJenkinsSlave, self )._post_install_packages( ) self.__patch_asynchat( ) def _get_iam_ec2_role( self ): iam_role_name, policies = super( S3amJenkinsSlave, self )._get_iam_ec2_role( ) iam_role_name += '--' + abreviated_snake_case_class_name( S3amJenkinsSlave ) policies.update( dict( s3_full=s3_full_policy ) ) return iam_role_name, policies @fabric_task def __patch_asynchat( self ): """ This bites us in pyftpdlib during S3AM unit tests: http://jenkins.cgcloud.info/job/s3am/13/testReport/junit/src.s3am.test.s3am_tests/CoreTests/test_copy/ The patch is from https://hg.python.org/cpython/rev/d422062d7d36 http://bugs.python.org/issue16133 Fixed in 2.7.9: https://hg.python.org/cpython/raw-file/v2.7.9/Misc/NEWS """ if self._remote_python_version() < (2,7,9): with remote_sudo_popen( 'patch -d /usr/lib/python2.7 -p2' ) as patch: patch.write( heredoc( ''' diff --git a/Lib/asynchat.py b/Lib/asynchat.py --- a/Lib/asynchat.py +++ b/Lib/asynchat.py @@ -46,12 +46,17 @@ method) up to the terminator, and then c you - by calling your self.found_terminator() method. """ +import asyncore +import errno import socket -import asyncore from collections import deque from sys import py3kwarning from warnings import filterwarnings, catch_warnings +_BLOCKING_IO_ERRORS = (errno.EAGAIN, errno.EALREADY, errno.EINPROGRESS, + errno.EWOULDBLOCK) + + class async_chat (asyncore.dispatcher): """This is an abstract class. You must derive from this class, and add the two methods collect_incoming_data() and found_terminator()""" @@ -109,6 +114,8 @@ class async_chat (asyncore.dispatcher): try: data = self.recv (self.ac_in_buffer_size) except socket.error, why: + if why.args[0] in _BLOCKING_IO_ERRORS: + return self.handle_error() return''' ) ) cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/test/000077500000000000000000000000001301512357500232315ustar00rootroot00000000000000cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/test/__init__.py000066400000000000000000000000261301512357500253400ustar00rootroot00000000000000__author__ = 'hannes' cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/test/conftest.py000066400000000000000000000000701301512357500254250ustar00rootroot00000000000000from cgcloud.core.test.conftest import pytest_configure cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/test/create_all_slaves.py000066400000000000000000000317531301512357500272640ustar00rootroot00000000000000from Queue import Queue from abc import ABCMeta, abstractmethod from functools import partial from threading import Thread import unittest import os import uuid import sys from bd2k.util.fnmatch import fnmatch try: # Note that subprocess isn't thread-safe so subprocess is actually required. I'm just putting # this in a try-except to make the test loader happy. from subprocess32 import check_call, check_output except ImportError: from subprocess import check_call, check_output # This is more of an experiment rather than a full-fledged test. It works on multiple EC2 # instances in parallel, therefore making it well suited for semi-interactive use since you # don't have to wait as long for errors to show up. It runs all cgcloud invocations in tmux panes # inside a detached session. The tests print the tmux session ID so you can attach to it while # the test is running or afterwards for a post-mortem. # # Caveats: A successfull test will leave the tmux session running. Each test creates a new # session so you should clean up once in a while. The easisest way to do so is to run 'tmux # kill-server'. # Must have tmux, a fork of GNU Screen, installed for this. # Subprocess32 a backport of Python 3.2 must also be installed (via pip). 2.7's stock subprocess # keeps dead-locking on me. project_root = os.path.dirname( os.path.dirname( __file__ ) ) cgcloud = 'cgcloud' production = True if production: namespace = '/' include_master = False else: namespace = '/hannes/' include_master = True class Pane( object ): """ An abstraction of a tmux pane. A pane represents a terminal that you can run commands in. Commands run asynchronously but you can synchronized on them using the result() method. You should pre-allocate all panes you need before running commands in any of them. Commands are run using the run() method. The join() method blocks until the command finishes. The tmux pane remains open after the command finishes so you can do post-portem analysis on it, the main reason I wrote this. All panes in the interpreter share a single tmux session. The session has only one window but panes can be broken out manually after attaching to the session. """ session = 'cgcloud-%s' % uuid.uuid4( ) panes = [ ] def log( self, s ): sys.stderr.write( s + '\n' ) sys.stderr.flush( ) def __init__( self ): super( Pane, self ).__init__( ) # One tmux channel for success, one for failures. See tmux(1). self.channel_ids = tuple( uuid.uuid4( ) for _ in range( 2 ) ) # A queue between the daemon threads that service the channels and the client code. The # queue items are the channel index, 0 for failure, 1 or success. self.queue = Queue( maxsize=1 ) # The pane index. self.index = len( self.panes ) window = '%s:0' % self.session if self.index == 0: self.log( "Run 'tmux attach -t %s' to monitor output" % self.session ) check_call( [ 'tmux', 'new-session', '-d', '-s', self.session, '-x', '100', '-y', '80' ] ) self.tmux_id = check_output( [ 'tmux', 'list-panes', '-t', window, '-F', '#{pane_id}' ] ).strip( ) else: self.tmux_id = check_output( [ 'tmux', 'split-window', '-v', '-t', window, '-PF', '#{pane_id}' ] ).strip( ) check_call( [ 'tmux', 'select-layout', '-t', window, 'even-vertical' ] ) self.panes.append( self ) self.threads = tuple( self._start_thread( i ) for i in range( 2 ) ) def _start_thread( self, channel_index ): thread = Thread( target=partial( self._wait, channel_index ) ) thread.daemon = True thread.start( ) return thread def _wait( self, channel_index ): while True: check_call( [ 'tmux', 'wait', str( self.channel_ids[ channel_index ] ) ] ) self.queue.put( channel_index ) def run( self, cmd, ignore_failure=False ): fail_ch, success_ch = self.channel_ids if ignore_failure: cmd = '( %s ) ; tmux wait -S %s' % (cmd, success_ch) else: cmd = '( %s ) && tmux wait -S %s || tmux wait -S %s' % (cmd, success_ch, fail_ch) check_call( [ 'tmux', 'send-keys', '-t', self.tmux_id, cmd, 'C-m' ] ) def result( self ): return (False, True)[ self.queue.get( ) ] class Command( object ): """ A glorified string template for cgcloud command lines. The default values for the template arguments specified at construction can be overriden when the command is actually run, i.e. when the template is instantiated. The value for a template parameter can be either a static value or a callable taking two arguments, role and ordinal. The callable will be evaluated at instantiation time with the role and ordinal of the concrete box cgcloud should be run against. A command can be set to ignore failures, in which case a non-zero exit code from cgcloud does not fail the test. A command can be 'reverse' which means that it should be run against the list of boxes in the reverse order. How exactly "reverse" is implemented depends on the client. """ def __init__( self, command, template, ignore_failure=False, reverse=False, **template_args ): super( Command, self ).__init__( ) self.template = "{cgcloud} {command} -n {namespace} " + template self.template_args = template_args.copy( ) self.template_args.update( cgcloud=cgcloud, command=command, namespace=namespace ) self.ignore_failure = ignore_failure self.reverse = reverse def run( self, pane, role, ordinal, **template_args ): """ Instantiate this command line template and run it in the specified pane against the box of the specified role and ordinal, substituting additional template parameters with the given keyword arguments. """ # start with defaults _template_args = self.template_args.copy( ) # update with overrides _template_args.update( template_args ) # expand callables _template_args = dict( (k, v( role, ordinal ) if callable( v ) else v) for k, v in _template_args.iteritems( ) ) # set role and ordinal _template_args.update( role=role, ordinal=ordinal ) # finally, run the command in the pane pane.run( self.template.format( **_template_args ), ignore_failure=self.ignore_failure ) # Factory methods for cgcloud commands: def create( options="" ): return Command( "create", "--never-terminate {options} {role}", options=options ) def recreate( options="" ): return Command( "recreate", "--never-terminate {options} {role}", options=options ) def start( options="" ): return Command( "start", "-o {ordinal} {options} {role}", options=options ) def stop( options="" ): return Command( "stop", "-o {ordinal} {options} {role}", reverse=True, options=options ) def ssh( ssh_command="", options="" ): return Command( "ssh", "-o {ordinal} {options} {role} {ssh_command}", ssh_command=ssh_command, options=options ) def rsync( rsync_args, options="" ): return Command( "rsync", "-o {ordinal} {options} {role} {rsync_args}", rsync_args=rsync_args, options=options ) def image( options="" ): return Command( "image", "-o {ordinal} {options} {role}", options=options ) def terminate( options="" ): return Command( "terminate", "-o {ordinal} {options} {role}", ignore_failure=True, reverse=True, options=options ) class BaseTest( unittest.TestCase ): __metaclass__ = ABCMeta @abstractmethod def _execute_command( self, command ): pass def _list_roles( self, slave_glob ): slaves = [ slave for slave in check_output( [ cgcloud, 'list-roles' ] ).split( '\n' ) if fnmatch( slave, slave_glob ) ] return slaves def _test( self, *commands ): for command in commands: self._execute_command( command ) class DevEnvTest( BaseTest ): """ Tests the creation of the Jenkins master and its slaves for continuous integration. """ # slave_glob = '*-genetorrent-jenkins-slave' # slave_glob = '*-generic-jenkins-slave' # slave_glob = '*-rpmbuild-jenkins-slave' slave_glob = 'centos5-*-jenkins-slave' def _init_panes( self ): slave_roles = self._list_roles( self.slave_glob ) self.master_pane = Pane( ) if include_master else None self.slave_panes = dict( (slave_role, Pane( )) for slave_role in slave_roles ) def test_everything( self ): self._init_panes( ) self._test( create( ), stop( ), image( ), start( ), terminate( ), recreate( ), ssh( ), terminate( ) ) def _execute_command( self, command ): def test_master( ): if self.master_pane is not None: command.run( self.master_pane, 'jenkins-master', ordinal=-1 ) self.assertTrue( self.master_pane.result( ) ) def test_slaves( ): for slave_role, pane in self.slave_panes.iteritems( ): command.run( pane, slave_role, ordinal=-1 ) for pane in self.slave_panes.itervalues( ): self.assertTrue( pane.result( ) ) tests = [ test_master, test_slaves ] for test in reversed( tests ) if command.reverse else tests: test( ) class LoadTest( BaseTest ): key_file = '~/MORDOR1.pem' # local path, this will copied to each box role = 'load-test-box' # name of the cgcloud role base_url = 'https://stage.cghub.ucsc.edu/cghub/data/analysis/download/' instance_type = "m3.2xlarge" if False: uuids = [ "b08210ce-b0c1-4d6a-8762-0f981c27d692", "ffb4cff4-06ea-4332-8002-9aff51d5d388", "5c07378f-cafe-42db-a66e-d608f2f0e982", "7fffef66-627f-43f7-96b3-6672e1cb6b59", "7ec3fa29-bbec-4d08-839b-c1cd60909ed0", "4714ee84-26cd-48e7-860d-a115af0fca48", "9266e7ca-c6f9-4187-ab8b-f11f6c65bc71", "9cd637b0-9b68-4fd7-bd9e-fa41e5329242", "71ec0937-7812-4b35-87de-77174fdb28bc", "d49add54-27d2-4d77-b719-19f4d77c10c3" ] else: uuids = [ "7c619bf2-6470-4e01-9391-1c5db775537e", # 166GBs "27a1b0dc-3f1a-4606-9bd7-8b7a0a89e066", # 166GBs "027d9b42-cf22-429a-9741-da6049a5f192", # 166GBs "0600bae1-2d63-41fd-9dee-b5d3cd21b3ee", # 166GBs "c3cf7d48-e0c1-4605-a951-34ad83916361", # 166GBs # "4c87ef17-3d1b-478f-842f-4bb855abdda1", # 166GBs, unauthorized for MORDOR1.pem "44806b1a-2d77-4b67-9774-67e8a5555f88", # 166GBs "727e2955-67a3-431c-9c7c-547e6b8b7c95", # 166GBs "99728596-1409-4d5e-b2dc-744b5ba2aeab", # 166GBs # "c727c612-1be1-8c27-e040-ad451e414a7f" # >500GBs, causes 409 during download, maybe fixed now ] num_instances = len( uuids ) num_children = 8 def test_load( self ): self._init_panes( ) self._test( # recreate( "-t %s" % self.instance_type ), # rsync( '-v %s :' % self.key_file ), # ssh( self._gtdownload ), terminate( '--quick' ), ) def _gtdownload( self, role, ordinal ): return "gtdownload -d {base_url}{uuid} -c {key_file} -vv --null-storage --max-children {num_children}".format( base_url=self.base_url, uuid=self.uuids[ ordinal ], key_file=os.path.basename( self.key_file ), num_children=self.num_children ) def _init_panes( self ): self.panes = [ Pane( ) for _ in range( 0, self.num_instances ) ] def _execute_command( self, command ): for i, pane in enumerate( self.panes ): command.run( pane, self.role, ordinal=(i - self.num_instances) ) for pane in self.panes: self.assertTrue( pane.result( ) ) class TrackerStressTest( BaseTest ): role = 'load-test-box' # name of the cgcloud role stress_tracker_script = '/Users/hannes/workspace/cghub/tests/stress_tracker' instance_type = 'm3.2xlarge' num_instances = 8 def test_tracker_stress( self ): self._init_panes( ) self._test( # recreate( '-t %s' % self.instance_type ), # rsync( '-v %s :' % self.stress_tracker_script ), # ssh( 'python %s' % os.path.basename( self.stress_tracker_script ) ), terminate( '--quick' ), ) def _init_panes( self ): self.panes = [ Pane( ) for _ in range( 0, self.num_instances ) ] def _execute_command( self, command ): for i, pane in enumerate( self.panes ): command.run( pane, self.role, ordinal=(i - self.num_instances) ) for pane in self.panes: self.assertTrue( pane.result( ) ) if __name__ == '__main__': unittest.main( ) cgcloud-releases-1.6.0/jenkins/src/cgcloud/jenkins/toil_jenkins_slave.py000066400000000000000000000436261301512357500265210ustar00rootroot00000000000000from StringIO import StringIO import time import re from fabric.operations import run, put from bd2k.util.strings import interpolate as fmt from cgcloud.core.apache import ApacheSoftwareBox from cgcloud.core.mesos_box import MesosBox from cgcloud.jenkins.cgcloud_jenkins_slave import CgcloudJenkinsSlave from cgcloud.jenkins.jenkins_master import Jenkins from cgcloud.core.box import fabric_task from cgcloud.core.common_iam_policies import s3_full_policy, sdb_full_policy from cgcloud.core.docker_box import DockerBox from cgcloud.fabric.operations import sudo, remote_sudo_popen, remote_open from cgcloud.lib.util import abreviated_snake_case_class_name, heredoc hadoop_version = '2.6.2' # The major version of Hadoop that the Spark binaries were built against spark_hadoop_version = '2.6' spark_version = '1.5.2' install_dir = '/opt' # Inherits CgcloudJenkinsSlave because the Toil integration tests invoke cgcloud to launch more # instances, similar to what the CGCloud tests do. class ToilJenkinsSlave( CgcloudJenkinsSlave, DockerBox, MesosBox, ApacheSoftwareBox ): """ Jenkins slave for running the Toil build and tests on """ @classmethod def recommended_instance_type( cls ): return "m3.large" def _list_packages_to_install( self ): return super( ToilJenkinsSlave, self )._list_packages_to_install( ) + [ 'python-dev', 'gcc', 'make', 'libffi-dev', # pynacl -> toil, Azure client-side encryption 'libcurl4-openssl-dev', # pycurl -> SPARQLWrapper -> rdflib>=4.2.0 -> cwltool -> toil 'slurm-llnl', 'bc', # SLURM ] + [ 'gridengine-' + p for p in ('common', 'master', 'client', 'exec') ] def _get_debconf_selections( self ): return super( ToilJenkinsSlave, self )._get_debconf_selections( ) + [ 'gridengine-master shared/gridenginemaster string localhost', 'gridengine-master shared/gridenginecell string default', 'gridengine-master shared/gridengineconfig boolean true' ] def _post_install_packages( self ): super( ToilJenkinsSlave, self )._post_install_packages( ) self.setup_repo_host_keys( ) self.__disable_mesos_daemons( ) self.__install_parasol( ) self.__patch_distutils( ) self.__configure_gridengine( ) self.__configure_slurm( ) self.__install_yarn( ) self.__install_spark( ) @fabric_task def _setup_build_user( self ): super( ToilJenkinsSlave, self )._setup_build_user( ) # Allow mount and umount such that Toil tests can use an isolated loopback filesystem for # TMPDIR (and therefore Toil's work directory), thereby preventing the tracking of # left-over files from being skewed by other activities on the ephemeral file system, # like build logs, creation of .pyc files, etc. for prog in ('mount', 'umount'): sudo( "echo 'jenkins ALL=(ALL) NOPASSWD: /bin/%s' >> /etc/sudoers" % prog ) @fabric_task def __disable_mesos_daemons( self ): for daemon in ('master', 'slave'): sudo( 'echo manual > /etc/init/mesos-%s.override' % daemon ) @fabric_task def __install_parasol( self ): run( "git clone https://github.com/BD2KGenomics/parasol-binaries.git" ) sudo( "cp parasol-binaries/* /usr/local/bin" ) run( "rm -rf parasol-binaries" ) @fabric_task def __install_yarn ( self ): # Download and extract Hadoop path = fmt( 'hadoop/common/hadoop-{hadoop_version}/hadoop-{hadoop_version}.tar.gz' ) self._install_apache_package( path, install_dir ) # patch path with remote_open( '/etc/environment', use_sudo=True ) as f: yarn_path = fmt( '{install_dir}/hadoop' ) self._patch_etc_environment( f, env_pairs=dict( HADOOP_HOME=yarn_path ) ) @fabric_task def __install_spark ( self ): # Download and extract Spark path = fmt( 'spark/spark-{spark_version}/spark-{spark_version}-bin-hadoop{spark_hadoop_version}.tgz' ) self._install_apache_package( path, install_dir ) # Patch paths with remote_open( '/etc/environment', use_sudo=True ) as f: spark_home = fmt( '{install_dir}/spark' ) # These two PYTHONPATH entries are also added by the 'pyspark' wrapper script. # We need to replicate them globally because we want to be able to just do # 'import pyspark' in Toil's Spark service code and associated tests. python_path = [ fmt( '{spark_home}/python' ), run( fmt( 'ls {spark_home}/python/lib/py4j-*-src.zip' ).strip() ) ] self._patch_etc_environment( f, env_pairs=dict( SPARK_HOME=spark_home ), dirs=python_path, dirs_var='PYTHONPATH' ) def _pass_role_arn( self ): # Very permissive. But it is needed to accommodate the appliance based provisioning tests # in Toil as they don't use CGCloud's concept of namespaces. return 'arn:aws:iam::%s:role/*' % self.ctx.account def _get_iam_ec2_role( self ): iam_role_name, policies = super( ToilJenkinsSlave, self )._get_iam_ec2_role( ) iam_role_name += '--' + abreviated_snake_case_class_name( ToilJenkinsSlave ) policies.update( dict( s3_full=s3_full_policy, sdb_full=sdb_full_policy ) ) return iam_role_name, policies @fabric_task def __patch_distutils( self ): """ https://hg.python.org/cpython/rev/cf70f030a744/ https://bitbucket.org/pypa/setuptools/issues/248/exit-code-is-zero-when-upload-fails Fixed in 2.7.8: https://hg.python.org/cpython/raw-file/v2.7.8/Misc/NEWS """ if self._remote_python_version( ) < (2, 7, 8): with remote_sudo_popen( 'patch -d /usr/lib/python2.7 -p2' ) as patch: patch.write( heredoc( """ --- a/Lib/distutils/command/upload.py +++ b/Lib/distutils/command/upload.py @@ -10,7 +10,7 @@ import urlparse import cStringIO as StringIO from hashlib import md5 -from distutils.errors import DistutilsOptionError +from distutils.errors import DistutilsError, DistutilsOptionError from distutils.core import PyPIRCCommand from distutils.spawn import spawn from distutils import log @@ -181,7 +181,7 @@ class upload(PyPIRCCommand): self.announce(msg, log.INFO) except socket.error, e: self.announce(str(e), log.ERROR) - return + raise except HTTPError, e: status = e.code reason = e.msg @@ -190,5 +190,6 @@ class upload(PyPIRCCommand): self.announce('Server response (%s): %s' % (status, reason), log.INFO) else: - self.announce('Upload failed (%s): %s' % (status, reason), - log.ERROR) + msg = 'Upload failed (%s): %s' % (status, reason) + self.announce(msg, log.ERROR) + raise DistutilsError(msg)""" ) ) @fabric_task def __configure_gridengine( self ): """ Configure the GridEngine daemons (master and exec) and creata a default queue. Ensure that the queue is updated to reflect the number of cores actually available. """ ws = re.compile( r'\s+' ) nl = re.compile( r'[\r\n]+' ) def qconf( opt, **kwargs ): return qconf_dict( opt, kwargs ) def qconf_dict( opt, d=None, file_name='qconf.tmp' ): if d: # qconf can't read from stdin for some reason, neither -, /dev/stdin or /dev/fd/0 works s = '\n'.join( ' '.join( i ) for i in d.iteritems( ) ) + '\n' put( remote_path=file_name, local_path=StringIO( s ) ) sudo( ' '.join( [ 'qconf', opt, file_name ] ) ) run( ' '.join( [ 'rm', file_name ] ) ) else: return dict( tuple( ws.split( l, 1 ) ) for l in nl.split( run( 'SGE_SINGLE_LINE=1 qconf ' + opt ) ) if l and not l.startswith( '#' ) ) # Add the user defined in fname to the Sun Grid Engine cluster. qconf( '-Auser', name=Jenkins.user, oticket='0', fshare='0', delete_time='0', default_project='NONE' ) # Adds users to Sun Grid Engine user access lists (ACLs). sudo( 'qconf -au %s arusers' % Jenkins.user ) # Add hosts hostname to the list of hosts allowed to submit Sun Grid Engine jobs and # control their behavior only. sudo( 'qconf -as localhost' ) # Remove all currently defined execution hosts run( 'for i in `qconf -sel`; do sudo qconf -de $i ; done' ) # Add an execution host qconf( '-Ae', hostname='localhost', load_scaling='NONE', complex_values='NONE', user_lists='arusers', xuser_lists='NONE', projects='NONE', xprojects='NONE', usage_scaling='NONE', report_variables='NONE' ) # Add a parallel environment qconf( '-Ap', pe_name='smp', slots='999', user_lists='NONE', xuser_lists='NONE', start_proc_args='/bin/true', stop_proc_args='/bin/true', allocation_rule='$pe_slots', control_slaves='FALSE', job_is_first_task='TRUE', urgency_slots='min', accounting_summary='FALSE' ) # Add a queue, the slots and processors will be adjusted dynamically, by an init script qconf( '-Aq', qname='all.q', processors='1', slots='1', hostlist='localhost', seq_no='0', load_thresholds='np_load_avg=1.75', suspend_thresholds='NONE', nsuspend='1', suspend_interval='00:05:00', priority='0', min_cpu_interval='00:05:00', qtype='BATCH INTERACTIVE', ckpt_list='NONE', pe_list='make smp', rerun='FALSE', tmpdir='/tmp', shell='/bin/bash', prolog='NONE', epilog='NONE', shell_start_mode='posix_compliant', starter_method='NONE', suspend_method='NONE', resume_method='NONE', terminate_method='NONE', notify='00:00:60', owner_list='NONE', user_lists='arusers', xuser_lists='NONE', subordinate_list='NONE', complex_values='NONE', projects='NONE', xprojects='NONE', calendar='NONE', initial_state='default', s_rt='INFINITY', h_rt='INFINITY', s_cpu='INFINITY', h_cpu='INFINITY', s_fsize='INFINITY', h_fsize='INFINITY', s_data='INFINITY', h_data='INFINITY', s_stack='INFINITY', h_stack='INFINITY', s_core='INFINITY', h_core='INFINITY', s_rss='INFINITY', h_rss='INFINITY', s_vmem='INFINITY', h_vmem='INFINITY' ) # Enable on-demand scheduling. This will eliminate the long time that jobs spend waiting # in the qw state. There is no -Asconf so we have to fake it using -ssconf and -Msconf. sconf = qconf( '-ssconf' ) sconf.update( dict( flush_submit_sec='1', flush_finish_sec='1', schedule_interval='0:0:1' ) ) qconf_dict( '-Msconf', sconf ) # Enable immediate flushing of the accounting file. The SGE batch system in Toil uses the # qacct program to determine the exit code of a finished job. The qacct program reads # the accounting file. By default, this file is written to every 15 seconds which means # that it may take up to 15 seconds before a finished job is seen by Toil. An # accounting_flush_time value of 00:00:00 causes the accounting file to be flushed # immediately, allowing qacct to report the status of finished jobs immediately. Again, # there is no -Aconf, so we fake it with -sconf and -Mconf. Also, the file name has to be # 'global'. conf = qconf( '-sconf' ) params = dict( tuple( e.split( '=' ) ) for e in conf[ 'reporting_params' ].split( ' ' ) ) params[ 'accounting_flush_time' ] = '00:00:00' conf[ 'reporting_params' ] = ' '.join( '='.join( e ) for e in params.iteritems( ) ) qconf_dict( '-Mconf', conf, file_name='global' ) # Register an init-script that ensures GridEngine uses localhost instead of hostname path = '/var/lib/gridengine/default/common/' self._register_init_script( 'gridengine-pre', heredoc( """ description "GridEngine pre-start configuration" console log start on filesystem pre-start script echo localhost > {path}/act_qmaster ; chown sgeadmin:sgeadmin {path}/act_qmaster echo localhost `hostname -f` > {path}/host_aliases end script""" ) ) # Register an init-script that adjust the queue config to reflect the number of cores self._register_init_script( 'gridengine-post', heredoc( """ description "GridEngine post-start configuration" console log # I would rather depend on the gridengine daemons but don't know how as they are # started by SysV init scripts. Supposedly the 'rc' job is run last. start on started rc pre-start script cores=$(grep -c '^processor' /proc/cpuinfo) qconf -mattr queue processors $cores `qselect` qconf -mattr queue slots $cores `qselect` end script""" ) ) # Run pre-start script for daemon in ('exec', 'master'): sudo( '/etc/init.d/gridengine-%s stop' % daemon ) sudo( "killall -9 -r 'sge_.*'", warn_only=True ) # the exec daemon likes to hang self._run_init_script( 'gridengine-pre' ) for daemon in ('master', 'exec'): sudo( '/etc/init.d/gridengine-%s start' % daemon ) # Run post-start script self._run_init_script( 'gridengine-post' ) while 'execd is in unknown state' in run( 'qstat -f -q all.q -explain a', warn_only=True ): time.sleep( 1 ) @fabric_task def __configure_slurm( self ): """ Configures SLURM in a single-node configuration with text-file accounting :return: """ # Create munge key and start sudo('/usr/sbin/create-munge-key') sudo('/usr/sbin/service munge start') slurm_acct_file = '/var/log/slurm-llnl/slurm-acct.txt' # Default values placed into compute node config, will be replaced by pre script slurm_conf = heredoc(""" ClusterName=jenkins-testing ControlMachine=localhost SlurmUser=slurm SlurmctldPort=6817 SlurmdPort=6818 StateSaveLocation=/tmp SlurmdSpoolDir=/tmp/slurmd SwitchType=switch/none MpiDefault=none SlurmctldPidFile=/var/run/slurmctld.pid SlurmdPidFile=/var/run/slurmd.pid ProctrackType=proctrack/pgid CacheGroups=0 ReturnToService=0 SlurmctldTimeout=300 SlurmdTimeout=300 InactiveLimit=0 MinJobAge=300 KillWait=30 Waittime=0 SchedulerType=sched/backfill SelectType=select/cons_res FastSchedule=1 # LOGGING SlurmctldDebug=3 SlurmdDebug=3 JobCompType=jobcomp/none # ACCOUNTING AccountingStorageLoc={slurm_acct_file} AccountingStorageType=accounting_storage/filetxt AccountingStoreJobComment=YES JobAcctGatherFrequency=30 JobAcctGatherType=jobacct_gather/linux # COMPUTE NODES NodeName=localhost CPUs=1 State=UNKNOWN RealMemory=256 PartitionName=debug Nodes=localhost Default=YES MaxTime=INFINITE State=UP """) slurm_conf_tmp = '/tmp/slurm.conf' slurm_conf_file = '/etc/slurm-llnl/slurm.conf' # Put config file in: /etc/slurm-llnl/slurm.conf put( remote_path=slurm_conf_tmp, local_path=StringIO( slurm_conf ) ) sudo( 'mkdir -p /etc/slurm-llnl') sudo( 'mv %s %s' % (slurm_conf_tmp, slurm_conf_file ) ) sudo('chown root:root %s' % slurm_conf_file ) # Touch the accounting job file and make sure it's owned by slurm user sudo('mkdir -p /var/log/slurm-llnl') sudo('touch %s' % slurm_acct_file) sudo('chown slurm:slurm %s' % slurm_acct_file) sudo('chmod 644 %s' % slurm_acct_file) # Register an init-script that sets the CPUs and RealMemory in slurm.conf # slurm.conf needs cpus and memory in order to handle jobs with these resource requests self._register_init_script( 'slurm-llnl-pre', heredoc( """ description "Slurm pre-start configuration" console log start on filesystem pre-start script CPUS=$(/usr/bin/nproc) MEMORY=$(cat /proc/meminfo | grep MemTotal | awk '{{print $2, "/ 1024"}}' | bc) sed -i "s/CPUs=[0-9]\+/CPUs=${{CPUS}}/" {slurm_conf_file} sed -i "s/RealMemory=[0-9]\+/RealMemory=${{MEMORY}}/" {slurm_conf_file} end script""" ) ) # Start slurm services self._run_init_script('slurm-llnl-pre') self._run_init_script('slurm-llnl') # Ensure partition is up sudo('scontrol update NodeName=localhost State=Down') sudo('scontrol update NodeName=localhost State=Resume') def _docker_users( self ): return super( ToilJenkinsSlave, self )._docker_users( ) + [ self.default_account( ) ] cgcloud-releases-1.6.0/lib/000077500000000000000000000000001301512357500155075ustar00rootroot00000000000000cgcloud-releases-1.6.0/lib/.gitignore000066400000000000000000000000671301512357500175020ustar00rootroot00000000000000/build /dist *.egg-info *.pyc /MANIFEST.in /version.py cgcloud-releases-1.6.0/lib/setup.cfg000066400000000000000000000002641301512357500173320ustar00rootroot00000000000000[pytest] # Look for any python file, the default of test_*.py wouldn't work for us python_files=*.py # Also run doctests addopts = --doctest-modules norecursedirs = cgcloud_Crypto cgcloud-releases-1.6.0/lib/setup.py000066400000000000000000000011121301512357500172140ustar00rootroot00000000000000from __future__ import absolute_import from setuptools import setup, find_packages from version import cgcloud_version, bd2k_python_lib_dep, boto_dep setup( name='cgcloud-lib', version=cgcloud_version, author='Hannes Schmidt', author_email='hannes@ucsc.edu', url='https://github.com/BD2KGenomics/cgcloud', description='Components shared between cgcloud-core and cgcloud-agent', package_dir={ '': 'src' }, packages=find_packages( 'src' ), namespace_packages=[ 'cgcloud' ], install_requires=[ bd2k_python_lib_dep, boto_dep ] ) cgcloud-releases-1.6.0/lib/src/000077500000000000000000000000001301512357500162765ustar00rootroot00000000000000cgcloud-releases-1.6.0/lib/src/cgcloud/000077500000000000000000000000001301512357500177165ustar00rootroot00000000000000cgcloud-releases-1.6.0/lib/src/cgcloud/__init__.py000066400000000000000000000000731301512357500220270ustar00rootroot00000000000000__import__( 'pkg_resources' ).declare_namespace( __name__ )cgcloud-releases-1.6.0/lib/src/cgcloud/lib/000077500000000000000000000000001301512357500204645ustar00rootroot00000000000000cgcloud-releases-1.6.0/lib/src/cgcloud/lib/__init__.py000066400000000000000000000001341301512357500225730ustar00rootroot00000000000000from bd2k.util.d32 import standard as d32 aws_d32 = d32 test_namespace_suffix_length = 13 cgcloud-releases-1.6.0/lib/src/cgcloud/lib/context.py000066400000000000000000000770311301512357500225320ustar00rootroot00000000000000# coding=utf-8 import hashlib from contextlib import contextmanager import json import os import urllib import re import socket import itertools import logging from boto import ec2, iam, sns, sqs, vpc from boto.s3.key import Key as S3Key from boto.exception import S3ResponseError, BotoServerError from boto.s3.connection import S3Connection from boto.sqs.connection import SQSConnection from boto.sns.connection import SNSConnection from boto.vpc import VPCConnection from boto.iam.connection import IAMConnection from boto.ec2.keypair import KeyPair from bd2k.util import fnmatch from bd2k.util import memoize from boto.utils import get_instance_metadata from cgcloud.lib.message import Message from cgcloud.lib.util import ec2_keypair_fingerprint, UserError log = logging.getLogger( __name__ ) class Context( object ): """ Encapsulates all EC2-specific settings used by components in this project """ availability_zone_re = re.compile( r'^([a-z]{2}-[a-z]+-[1-9][0-9]*)([a-z])$' ) name_prefix_re = re.compile( r'^(/([0-9a-z][0-9a-z._-]*))*' ) name_re = re.compile( name_prefix_re.pattern + '/?$' ) namespace_re = re.compile( name_prefix_re.pattern + '/$' ) def __init__( self, availability_zone, namespace ): """ Create an Context object. :param availability_zone: The availability zone to place EC2 resources like volumes and instances into. The AWS region to operate in is implied by this parameter since the region is a prefix of the availability zone string :param namespace: The prefix for names of EC2 resources. The namespace is string starting in '/' followed by zero or more components, separated by '/'. Components are non-empty strings consisting only of alphanumeric characters, '.', '-' or '_' and that don't start with '_'. The namespace argument will be encoded as ASCII. Unicode strings that can't be encoded as ASCII will be rejected. A note about our namespaces vs IAM's resource paths. IAM paths don't provide namespace isolation. In other words, it is not possible to have two users of the same name in two different paths. The by itself name has to be unique. For that reason, IAM resource paths are pretty much useless. >>> ctx = Context( 'us-west-1b', None ) Traceback (most recent call last): .... ValueError: Need namespace >>> Context('us-west-1b', namespace='/').namespace '/' >>> Context('us-west-1b', namespace='/foo/').namespace '/foo/' >>> Context('us-west-1b', namespace='/foo/bar/').namespace '/foo/bar/' >>> Context('us-west-1b', namespace='') Traceback (most recent call last): .... ValueError: Invalid namespace '' >>> Context('us-west-1b', namespace='foo') Traceback (most recent call last): .... ValueError: Invalid namespace 'foo' >>> Context('us-west-1b', namespace='/foo') Traceback (most recent call last): .... ValueError: Invalid namespace '/foo' >>> Context('us-west-1b', namespace='//foo/') Traceback (most recent call last): .... ValueError: Invalid namespace '//foo/' >>> Context('us-west-1b', namespace='/foo//') Traceback (most recent call last): .... ValueError: Invalid namespace '/foo//' >>> Context('us-west-1b', namespace='han//nes') Traceback (most recent call last): .... ValueError: Invalid namespace 'han//nes' >>> Context('us-west-1b', namespace='/_foo/') Traceback (most recent call last): .... ValueError: Invalid namespace '/_foo/' >>> Context('us-west-1b', namespace=u'/foo/').namespace '/foo/' >>> Context('us-west-1b', namespace=u'/föo/').namespace Traceback (most recent call last): .... ValueError: 'ascii' codec can't encode characters in position 2-3: ordinal not in range(128) >>> import string >>> component = string.ascii_lowercase + string.digits + '-_.' >>> namespace = '/' + component + '/' >>> Context('us-west-1b', namespace=namespace).namespace == namespace True """ super( Context, self ).__init__( ) self.__iam = None self.__vpc = None self.__s3 = None self.__sns = None self.__sqs = None self.availability_zone = availability_zone m = self.availability_zone_re.match( availability_zone ) if not m: raise ValueError( "Can't extract region from availability zone '%s'" % availability_zone ) self.region = m.group( 1 ) if namespace is None: raise ValueError( 'Need namespace' ) try: namespace = namespace.encode( 'ascii' ) except UnicodeEncodeError as e: raise ValueError( e ) namespace = self.resolve_me( namespace ) if not re.match( self.namespace_re, namespace ): raise ValueError( "Invalid namespace '%s'" % namespace ) self.namespace = namespace @property def iam( self ): """ :rtype: IAMConnection """ if self.__iam is None: self.__iam = self.__aws_connect( iam, 'universal' ) return self.__iam # VPCConnection extends EC2Connection so we can use one instance of the former for both @property def vpc( self ): """ :rtype: VPCConnection """ if self.__vpc is None: self.__vpc = self.__aws_connect( vpc ) return self.__vpc # ec2 = vpc works, too, but confuses the type hinter in PyCharm @property def ec2( self ): """ :rtype: VPCConnection """ return self.vpc @property def s3( self ): """ :rtype: S3Connection """ if self.__s3 is None: # We let S3 route buckets to regions for us. If we connected to a specific region, # bucket lookups (HEAD request against bucket URL) would fail with 301 status but # without a Location header. self.__s3 = S3Connection( ) return self.__s3 @property def sns( self ): """ :rtype: SNSConnection """ if self.__sns is None: self.__sns = self.__aws_connect( sns ) return self.__sns @property def sqs( self ): """ :rtype: SQSConnection """ if self.__sqs is None: self.__sqs = self.__aws_connect( sqs ) return self.__sqs def __aws_connect( self, aws_module, region=None, **kwargs ): if region is None: region = self.region conn = aws_module.connect_to_region( region, **kwargs ) if conn is None: raise RuntimeError( "%s couldn't connect to region %s" % ( aws_module.__name__, region) ) return conn def __enter__( self ): return self def __exit__( self, exc_type, exc_val, exc_tb ): self.close( ) def close( self ): if self.__vpc is not None: self.__vpc.close( ) if self.__s3 is not None: self.__s3.close( ) if self.__iam is not None: self.__iam.close( ) if self.__sns is not None: self.__sns.close( ) if self.__sqs is not None: self.__sqs.close( ) @staticmethod def is_absolute_name( name ): """ Returns True if the given name starts with a namespace. """ return name[ 0:1 ] == '/' class InvalidPathError( ValueError ): def __init__( self, invalid_path ): super( Context.InvalidPathError, self ).__init__( "Invalid path '%s'" % invalid_path ) def absolute_name( self, name ): """ Returns the absolute form of the specified resource name. If the specified name is already absolute, that name will be returned unchanged, otherwise the given name will be prefixed with the namespace this object was configured with. Relative names starting with underscores are disallowed. >>> ctx = Context( 'us-west-1b', namespace='/' ) >>> ctx.absolute_name('bar') '/bar' >>> ctx.absolute_name('/bar') '/bar' >>> ctx.absolute_name('') '/' >>> ctx.absolute_name('/') '/' >>> ctx.absolute_name('_bar') Traceback (most recent call last): .... InvalidPathError: Invalid path '/_bar' >>> ctx.absolute_name('/_bar') Traceback (most recent call last): .... InvalidPathError: Invalid path '/_bar' >>> ctx = Context( 'us-west-1b', namespace='/foo/' ) >>> ctx.absolute_name('bar') '/foo/bar' >>> ctx.absolute_name('bar/') '/foo/bar/' >>> ctx.absolute_name('bar1/bar2') '/foo/bar1/bar2' >>> ctx.absolute_name('/bar') '/bar' >>> ctx.absolute_name('') '/foo/' >>> ctx.absolute_name('/') '/' >>> ctx.absolute_name('_bar') Traceback (most recent call last): .... InvalidPathError: Invalid path '/foo/_bar' >>> ctx.absolute_name('/_bar') Traceback (most recent call last): .... InvalidPathError: Invalid path '/_bar' """ if self.is_absolute_name( name ): result = name else: result = self.namespace + name if not self.name_re.match( result ): raise self.InvalidPathError( result ) return result def to_aws_name( self, name ): """ Returns a transliteration of the name that safe to use for resource names on AWS. If the given name is relative, it converted to its absolute form before the transliteration. The transliteration uses two consequitive '_' to encode a single '_' and a single '_' to separate the name components. AWS-safe names are by definition absolute such that the leading separator can be removed. This leads to fairly readable AWS-safe names, especially for names in the root namespace, where the transliteration is the identity function if the input does not contain any '_'. This scheme only works if name components don't start with '_'. Without that condition, '/_' would become '___' the inverse of which is '_/'. >>> ctx = Context( 'us-west-1b', namespace='/' ) >>> ctx.to_aws_name( 'foo' ) 'foo' >>> ctx.from_aws_name( 'foo' ) 'foo' Illegal paths that would introduce ambiguity need to raise an exception >>> ctx.to_aws_name('/_') Traceback (most recent call last): .... InvalidPathError: Invalid path '/_' >>> ctx.to_aws_name('/_/') Traceback (most recent call last): .... InvalidPathError: Invalid path '/_/' >>> ctx.from_aws_name('___') Traceback (most recent call last): .... InvalidPathError: Invalid path '/_/' >>> ctx.to_aws_name( 'foo_bar') 'foo__bar' >>> ctx.from_aws_name( 'foo__bar') 'foo_bar' >>> ctx.to_aws_name( '/sub_ns/foo_bar') 'sub__ns_foo__bar' >>> ctx.to_aws_name( 'sub_ns/foo_bar') 'sub__ns_foo__bar' >>> ctx.from_aws_name( 'sub__ns_foo__bar' ) 'sub_ns/foo_bar' >>> ctx.to_aws_name( 'g_/' ) 'g___' >>> ctx.from_aws_name( 'g___' ) 'g_/' >>> ctx = Context( 'us-west-1b', namespace='/this_ns/' ) >>> ctx.to_aws_name( 'foo' ) 'this__ns_foo' >>> ctx.from_aws_name( 'this__ns_foo' ) 'foo' >>> ctx.to_aws_name( 'foo_bar') 'this__ns_foo__bar' >>> ctx.from_aws_name( 'this__ns_foo__bar') 'foo_bar' >>> ctx.to_aws_name( '/other_ns/foo_bar' ) 'other__ns_foo__bar' >>> ctx.from_aws_name( 'other__ns_foo__bar' ) '/other_ns/foo_bar' >>> ctx.to_aws_name( 'other_ns/foo_bar' ) 'this__ns_other__ns_foo__bar' >>> ctx.from_aws_name( 'this__ns_other__ns_foo__bar' ) 'other_ns/foo_bar' >>> ctx.to_aws_name( '/this_ns/foo_bar' ) 'this__ns_foo__bar' >>> ctx.from_aws_name( 'this__ns_foo__bar' ) 'foo_bar' """ name = self.absolute_name( name ) assert name.startswith( '/' ) return name[ 1: ].replace( '_', '__' ).replace( '/', '_' ) def from_aws_name( self, name ): """ The inverse of to_aws_name(), except that the namespace is stripped from the input if it is relative to this context's name space. >>> zone = 'us-west-1b' >>> Context( zone, namespace='/foo/' ).from_aws_name('bar__x') '/bar_x' >>> Context( zone, namespace='/foo_x/' ).from_aws_name('foo__x_bar') 'bar' >>> Context( zone, namespace='/' ).from_aws_name('foo__x_bar__x') 'foo_x/bar_x' >>> Context( zone, namespace='/bla/' ).from_aws_name('foo__x_bar__x') '/foo_x/bar_x' """ name = '_'.join( s.replace( '_', '/' ) for s in name.split( '__' ) ) name = '/' + name if not self.name_re.match( name ): raise self.InvalidPathError( name ) if name.startswith( self.namespace ): name = name[ len( self.namespace ): ] return name def base_name( self, name ): """ Return the last component of a name, absolute or relative. >>> ctx = Context( 'us-west-1b', namespace='/foo/bar/') >>> ctx.base_name('') '' >>> ctx.base_name('/') '' >>> ctx.base_name('/a') 'a' >>> ctx.base_name('/a/') '' >>> ctx.base_name('/a/b') 'b' >>> ctx.base_name('/a/b/') '' """ return name.split( '/' )[ -1 ] def contains_name( self, name ): return not self.is_absolute_name( name ) or name.startswith( self.namespace ) def contains_aws_name( self, aws_name ): """ >>> def c(n): return Context( 'us-west-1b', namespace=n) >>> c('/foo/' ).contains_aws_name('bar_x') False >>> c('/foo/' ).contains_aws_name('foo_x') True >>> c('/foo/' ).contains_aws_name('foo_bar_x') True >>> c('/foo/' ).contains_aws_name('bar_foo_x') False >>> c('/' ).contains_aws_name('bar_x') True >>> c('/' ).contains_aws_name('foo_x') True >>> c('/' ).contains_aws_name('foo_bar_x') True >>> c('/' ).contains_aws_name('bar_foo_x') True """ return self.contains_name( self.from_aws_name( aws_name ) ) def try_contains_aws_name( self, aws_name ): try: return self.contains_aws_name( aws_name ) except self.InvalidPathError: return False @property @memoize def account( self ): try: arn = self.iam.get_user( ).arn except: # Agent boxes run with IAM role credentials instead of user credentials. arn = get_instance_metadata( )[ 'iam' ][ 'info' ][ 'InstanceProfileArn' ] _, partition, service, region, account, resource = arn.split( ':', 6 ) return account @property @memoize def s3_bucket_name( self ): return self.account + '-cgcloud' ssh_pubkey_s3_key_prefix = 'ssh_pubkey:' @property def s3_location( self ): if self.region == 'us-east-1': return '' else: return self.region def upload_ssh_pubkey( self, ssh_pubkey, fingerprint ): bucket = self.s3.lookup( self.s3_bucket_name ) if bucket is None: bucket = self.s3.create_bucket( self.s3_bucket_name, location=self.s3_location ) s3_entry = S3Key( bucket ) s3_entry.key = self.ssh_pubkey_s3_key_prefix + fingerprint s3_entry.set_contents_from_string( ssh_pubkey ) def register_ssh_pubkey( self, ec2_keypair_name, ssh_pubkey, force=False ): """ Import the given OpenSSH public key as a 'key pair' into EC2. There is no way to get to the actual public key once it has been imported to EC2. Openstack lets you do that and I don't see why Amazon decided to omit this functionality. To work around this, we store the public key in S3, identified by the public key's fingerprint. As long as we always check the fingerprint of the downloaded public SSH key against that of the EC2 keypair key, this method is resilient against malicious modifications of the keys stored in S3. :param ec2_keypair_name: the desired name of the EC2 key pair :param ssh_pubkey: the SSH public key in OpenSSH's native format, i.e. format that is used in ~/ .ssh/authorized_keys :param force: overwrite existing EC2 keypair of the given name """ fingerprint = ec2_keypair_fingerprint( ssh_pubkey, reject_private_keys=True ) ec2_keypair = self.ec2.get_key_pair( ec2_keypair_name ) if ec2_keypair is not None: if ec2_keypair.name != ec2_keypair_name: raise AssertionError( "Key pair names don't match." ) if ec2_keypair.fingerprint != fingerprint: if force: self.ec2.delete_key_pair( ec2_keypair_name ) ec2_keypair = None else: raise UserError( "Key pair %s already exists in EC2, but its fingerprint %s is " "different from the fingerprint %s of the key to be imported. Use " "the force option to overwrite the existing key pair." % (ec2_keypair.name, ec2_keypair.fingerprint, fingerprint) ) if ec2_keypair is None: ec2_keypair = self.ec2.import_key_pair( ec2_keypair_name, ssh_pubkey ) assert ec2_keypair.fingerprint == fingerprint self.upload_ssh_pubkey( ssh_pubkey, fingerprint ) self.__publish_key_update_agent_message( ) return ec2_keypair def expand_keypair_globs( self, globs ): """ Returns a list of EC2 key pair objects matching the specified globs. The order of the objects in the returned list will be consistent with the order of the globs and it will not contain any elements more than once. In other words, the returned list will start with all key pairs matching the first glob, followed by key pairs matching the second glob but not the first glob and so on. :rtype: list of KeyPair """ def iam_lookup( glob ): if glob.startswith( '@@' ): return (_.user_name for _ in self.iam.get_group( 'developers' ).users) elif glob.startswith( '@' ): return (self.iam.get_user( glob[ 1: ] ).user_name,) else: return (glob,) globs = itertools.chain.from_iterable( map( iam_lookup, globs ) ) result = [ ] keypairs = dict( (keypair.name, keypair) for keypair in self.ec2.get_all_key_pairs( ) ) for glob in globs: i = len( result ) for name, keypair in keypairs.iteritems( ): if fnmatch.fnmatch( name, glob ): result.append( keypair ) # since we can't modify the set during iteration for keypair in result[ i: ]: keypairs.pop( keypair.name ) return result def download_ssh_pubkey( self, ec2_keypair ): try: bucket = self.s3.get_bucket( self.s3_bucket_name ) s3_entry = S3Key( bucket ) s3_entry.key = self.ssh_pubkey_s3_key_prefix + ec2_keypair.fingerprint ssh_pubkey = s3_entry.get_contents_as_string( ) except S3ResponseError as e: if e.status == 404: raise UserError( "There is no matching SSH pub key stored in S3 for EC2 key pair %s. Has " "it been registered, e.g using the cgcloud's register-key command?" % ec2_keypair.name ) else: raise fingerprint_len = len( ec2_keypair.fingerprint.split( ':' ) ) if fingerprint_len == 20: # 160 bit SHA-1 # The fingerprint is that of a private key. We can't get at the private key so we # can't verify the public key either. So this is inherently insecure. However, # remember that the only reason why we are dealing with n EC2-generated private # key is that the Jenkins' EC2 plugin expects a 20 byte fingerprint. See # https://issues.jenkins-ci.org/browse/JENKINS-20142 for details. Once that issue # is fixed, we can switch back to just using imported keys and 16-byte fingerprints. pass elif fingerprint_len == 16: # 128 bit MD5 fingerprint = ec2_keypair_fingerprint( ssh_pubkey ) if ec2_keypair.fingerprint != fingerprint: raise UserError( "Fingerprint mismatch for key %s! Expected %s but got %s. The EC2 keypair " "doesn't match the public key stored in S3." % (ec2_keypair.name, ec2_keypair.fingerprint, fingerprint) ) return ssh_pubkey @property @memoize def iam_user_name( self ): try: return self.iam.get_user( ).user_name except: log.warn( "IAMConnection.get_user() failed.", exc_info=True ) return None current_user_placeholder = '__me__' @staticmethod def drop_hostname( email ): """ >>> Context.drop_hostname("foo") 'foo' >>> Context.drop_hostname("foo@bar.com") 'foo' >>> Context.drop_hostname("") '' >>> Context.drop_hostname("@") '' """ try: n = email.index( "@" ) except ValueError: return email else: return email[ 0:n ] def resolve_me( self, s, drop_hostname=True ): placeholder = self.current_user_placeholder if placeholder in s: try: me = os.environ[ 'CGCLOUD_ME' ] except KeyError: me = self.iam_user_name if not me: raise UserError( "Can't determine current IAM user name. Be sure to put valid AWS credentials " "in ~/.boto or ~/.aws/credentials. For details, refer to %s. On an EC2 " "instance that is authorized via IAM roles, you can set the CGCLOUD_ME " "environment variable (uncommon)." % 'http://boto.readthedocs.org/en/latest/boto_config_tut.html' ) if drop_hostname: me = self.drop_hostname( me ) me = me.lower() # namespaces must be lower case return s.replace( placeholder, me ) else: return s def setup_iam_ec2_role( self, role_name, policies ): aws_role_name = self.to_aws_name( role_name ) try: self.iam.create_role( aws_role_name, assume_role_policy_document=json.dumps( { "Version": "2012-10-17", "Statement": [ { "Effect": "Allow", "Principal": { "Service": [ "ec2.amazonaws.com" ] }, "Action": [ "sts:AssumeRole" ] } ] } ) ) except BotoServerError as e: if e.status == 409 and e.error_code == 'EntityAlreadyExists': pass else: raise self.__setup_entity_policies( aws_role_name, policies, list_policies=self.iam.list_role_policies, delete_policy=self.iam.delete_role_policy, get_policy=self.iam.get_role_policy, put_policy=self.iam.put_role_policy ) return aws_role_name def setup_iam_user_policies( self, user_name, policies ): try: self.iam.create_user( user_name ) except BotoServerError as e: if e.status == 409 and e.error_code == 'EntityAlreadyExists': pass else: raise self.__setup_entity_policies( user_name, policies, list_policies=self.iam.get_all_user_policies, delete_policy=self.iam.delete_user_policy, get_policy=self.iam.get_user_policy, put_policy=self.iam.put_user_policy ) def __setup_entity_policies( self, entity_name, policies, list_policies, delete_policy, get_policy, put_policy ): # Delete superfluous policies policy_names = set( list_policies( entity_name ).policy_names ) for policy_name in policy_names.difference( set( policies.keys( ) ) ): delete_policy( entity_name, policy_name ) # Create expected policies for policy_name, policy in policies.iteritems( ): current_policy = None try: current_policy = json.loads( urllib.unquote( get_policy( entity_name, policy_name ).policy_document ) ) except BotoServerError as e: if e.status == 404 and e.error_code == 'NoSuchEntity': pass else: raise if current_policy != policy: put_policy( entity_name, policy_name, json.dumps( policy ) ) _agent_topic_name = "cgcloud-agent-notifications" @property def agent_queue_name( self ): host_qualifier = socket.gethostname( ).replace( '.', '-' ) return self._agent_topic_name + '/' + host_qualifier @property @memoize def agent_topic_arn( self ): """ The ARN of the SNS topic on which the agents listen for messages and returns its ARN. """ # Note that CreateTopic is idempotent return self.sns.create_topic( self._agent_topic_name )[ 'CreateTopicResponse' ][ 'CreateTopicResult' ][ 'TopicArn' ] def publish_agent_message( self, message ): """ :type message: Message """ self.sns.publish( self.agent_topic_arn, message.to_sns( ) ) def __publish_key_update_agent_message( self ): self.publish_agent_message( Message( type=Message.TYPE_UPDATE_SSH_KEYS ) ) def reset_namespace_security( self ): """ Delete all - IAM instance profiles, - IAM roles, - IAM policies and - EC2 security groups associated with this context, or rather the namespace this context represents. """ self.delete_instance_profiles( self.local_instance_profiles( ) ) self.delete_roles( self.local_roles( ) ) self.delete_security_groups( self.local_security_groups( ) ) def local_instance_profiles( self ): return [ p for p in self._get_all_instance_profiles( ) if self.try_contains_aws_name( p.instance_profile_name ) ] def _get_all_instance_profiles( self ): return self._pager( self.iam.list_instance_profiles, 'instance_profiles' ) def _pager( self, requestor_callable, result_attribute_name ): marker = None while True: result = requestor_callable( marker=marker ) for p in getattr( result, result_attribute_name ): yield p if result.is_truncated == 'true': marker = result.marker else: break def delete_instance_profiles( self, instance_profiles ): log.debug( 'Deleting profiles %r', instance_profiles ) for p in instance_profiles: profile_name = p.instance_profile_name with out_exception( 'instance profile', profile_name ): # currently EC2 allows only one role per profile if p.roles: role_name = p.roles.member.role_name log.debug( 'Removing role %s from profile %s', role_name, profile_name ) self.iam.remove_role_from_instance_profile( profile_name, role_name ) log.debug( 'Deleting profile %s', profile_name ) self.iam.delete_instance_profile( profile_name ) def local_roles( self ): return [ r for r in self._get_all_roles( ) if self.try_contains_aws_name( r.role_name ) ] def _get_all_roles( self ): return self._pager( self.iam.list_roles, 'roles' ) def delete_roles( self, roles ): log.debug( 'Deleting roles %r', roles ) for r in roles: with out_exception( 'role', r.role_name ): for policy_name in self.iam.list_role_policies( r.role_name ).policy_names: self.iam.delete_role_policy( r.role_name, policy_name ) self.iam.delete_role( r.role_name ) def local_security_groups( self ): return [ sg for sg in self.ec2.get_all_security_groups( ) if self.try_contains_aws_name( sg.name ) ] def delete_security_groups( self, security_groups ): log.debug( 'Deleting security groups %r', security_groups ) for sg in security_groups: with out_exception( 'security group', sg.name ): sg.delete( ) def unused_fingerprints( self ): """ Find all unused fingerprints. This method works globally and does not consider the namespace represented by this context. :rtype: set[str] """ keypairs = self.expand_keypair_globs( '*' ) ec2_fingerprints = set( keypair.fingerprint for keypair in keypairs ) bucket = self.s3.get_bucket( self.s3_bucket_name, validate=False ) prefix = self.ssh_pubkey_s3_key_prefix s3_fingerprints = set( key.name[ len( prefix ): ] for key in bucket.list( prefix=prefix ) ) unused_fingerprints = s3_fingerprints - ec2_fingerprints return unused_fingerprints def delete_fingerprints( self, fingerprints ): """ Delete the given fingerprints. :type fingerprints: Iterable(str) """ bucket = self.s3.get_bucket( self.s3_bucket_name, validate=False ) key_names = [ self.ssh_pubkey_s3_key_prefix + fingerprint for fingerprint in fingerprints ] bucket.delete_keys( key_names ) def unused_snapshots( self ): """ Find all snapshots created for AMIs owned by the current AWS account for which the AMI has since been unregistered. This method works globally and does not consider the namespace represented by this context. :rtype: set[str] """ all_snapshots = self.ec2.get_all_snapshots( owner='self', filters=dict( description='Created by CreateImage*' ) ) all_snapshots = set( snapshot.id for snapshot in all_snapshots ) used_snapshots = set( bdt.snapshot_id for image in self.ec2.get_all_images( owners=[ 'self' ] ) for bdt in image.block_device_mapping.itervalues( ) if bdt.snapshot_id is not None ) return all_snapshots - used_snapshots def delete_snapshots( self, unused_snapshots ): """ Delete the snapshots with the given IDs. :type unused_snapshots: collections.Iterable[str] """ for snapshot_id in unused_snapshots: log.info( 'Deleting snapshot %s', snapshot_id ) self.ec2.delete_snapshot( snapshot_id ) @contextmanager def out_exception( object_type, object_name ): try: yield except: log.warn( "Failed to remove %s '%s'", object_type, object_name, exc_info=True ) cgcloud-releases-1.6.0/lib/src/cgcloud/lib/ec2.py000066400000000000000000000430011301512357500215050ustar00rootroot00000000000000import errno import logging import time from collections import Iterator from operator import attrgetter from bd2k.util.exceptions import panic from bd2k.util.retry import retry from boto.ec2.ec2object import TaggedEC2Object from boto.ec2.instance import Instance from boto.ec2.spotinstancerequest import SpotInstanceRequest from boto.exception import EC2ResponseError, BotoServerError from cgcloud.lib.util import UserError a_short_time = 5 a_long_time = 60 * 60 log = logging.getLogger( __name__ ) def not_found( e ): return e.error_code.endswith( '.NotFound' ) def retry_ec2( retry_after=a_short_time, retry_for=10 * a_short_time, retry_while=not_found ): t = retry_after return retry( delays=(t, t, t * 2, t * 4), timeout=retry_for, predicate=retry_while ) class EC2VolumeHelper( object ): """ A helper for creating, looking up and attaching an EBS volume in EC2 """ def __init__( self, ec2, name, size, availability_zone, volume_type="standard" ): """ :param ec2: the Boto EC2 connection object :type ec2: boto.ec2.connection.EC2Connection """ super( EC2VolumeHelper, self ).__init__( ) self.availability_zone = availability_zone self.ec2 = ec2 self.name = name self.volume_type = volume_type volume = self.__lookup( ) if volume is None: log.info( "Creating volume %s, ...", self.name ) volume = self.ec2.create_volume( size, availability_zone, volume_type=self.volume_type ) self.__wait_transition( volume, { 'creating' }, 'available' ) volume.add_tag( 'Name', self.name ) log.info( '... created %s.', volume.id ) volume = self.__lookup( ) self.volume = volume def attach( self, instance_id, device ): if self.volume.attach_data.instance_id == instance_id: log.info( "Volume '%s' already attached to instance '%s'." % (self.volume.id, instance_id) ) else: self.__assert_attachable( ) self.ec2.attach_volume( volume_id=self.volume.id, instance_id=instance_id, device=device ) self.__wait_transition( self.volume, { 'available' }, 'in-use' ) if self.volume.attach_data.instance_id != instance_id: raise UserError( "Volume %s is not attached to this instance." ) def __lookup( self ): """ Ensure that an EBS volume of the given name is available in the current availability zone. If the EBS volume exists but has been placed into a different zone, or if it is not available, an exception will be thrown. :rtype: boto.ec2.volume.Volume """ volumes = self.ec2.get_all_volumes( filters={ 'tag:Name': self.name } ) if len( volumes ) < 1: return None if len( volumes ) > 1: raise UserError( "More than one EBS volume named %s" % self.name ) return volumes[ 0 ] @staticmethod def __wait_transition( volume, from_states, to_state ): wait_transition( volume, from_states, to_state, attrgetter( 'status' ) ) def __assert_attachable( self ): if self.volume.status != 'available': raise UserError( "EBS volume %s is not available." % self.name ) expected_zone = self.availability_zone if self.volume.zone != expected_zone: raise UserError( "Availability zone of EBS volume %s is %s but should be %s." % (self.name, self.volume.zone, expected_zone) ) class UnexpectedResourceState( Exception ): def __init__( self, resource, to_state, state ): super( UnexpectedResourceState, self ).__init__( "Expected state of %s to be '%s' but got '%s'" % (resource, to_state, state) ) def wait_transition( resource, from_states, to_state, state_getter=attrgetter( 'state' ) ): """ Wait until the specified EC2 resource (instance, image, volume, ...) transitions from any of the given 'from' states to the specified 'to' state. If the instance is found in a state other that the to state or any of the from states, an exception will be thrown. :param resource: the resource to monitor :param from_states: a set of states that the resource is expected to be in before the transition occurs :param to_state: the state of the resource when this method returns """ state = state_getter( resource ) while state in from_states: time.sleep( a_short_time ) for attempt in retry_ec2( ): with attempt: resource.update( validate=True ) state = state_getter( resource ) if state != to_state: raise UnexpectedResourceState( resource, to_state, state ) def running_on_ec2( ): try: with open( '/sys/hypervisor/uuid' ) as f: return f.read( 3 ) == 'ec2' except IOError as e: if e.errno == errno.ENOENT: return False else: raise from collections import namedtuple InstanceType = namedtuple( 'InstanceType', [ 'name', # the API name of the instance type 'cores', # the number of cores 'ecu', # the computational power of the core times the number of cores 'memory', # RAM in GB 'virtualization_types', # the supported virtualization types, in order of preference 'disks', # the number of ephemeral (aka 'instance store') volumes 'disk_type', # the type of ephemeral volume 'disk_capacity', # the capacity of each ephemeral volume in GB 'spot_availability' # can this instance type be used on the spot market? ] ) hvm = 'hvm' # hardware virtualization pv = 'paravirtual' # para-virtualization ssd = 'SSD' # solid-state disk hdd = 'HDD' # spinning disk variable_ecu = -1 # variable ecu _ec2_instance_types = [ # current generation instance types InstanceType( 't2.micro', 1, variable_ecu, 1, [ hvm ], 0, None, 0, False ), InstanceType( 't2.small', 1, variable_ecu, 2, [ hvm ], 0, None, 0, False ), InstanceType( 't2.medium', 2, variable_ecu, 4, [ hvm ], 0, None, 0, False ), InstanceType( 't2.large', 2, variable_ecu, 8, [ hvm ], 0, None, 0, False ), InstanceType( 'm3.medium', 1, 3, 3.75, [ hvm, pv ], 1, ssd, 4, True ), InstanceType( 'm3.large', 2, 6.5, 7.5, [ hvm, pv ], 1, ssd, 32, True ), InstanceType( 'm3.xlarge', 4, 13, 15, [ hvm, pv ], 2, ssd, 40, True ), InstanceType( 'm3.2xlarge', 8, 26, 30, [ hvm, pv ], 2, ssd, 80, True ), InstanceType( 'm4.large', 2, 6.5, 8, [ hvm ], 0, None, 0, True ), InstanceType( 'm4.xlarge', 4, 13, 16, [ hvm ], 0, None, 0, True ), InstanceType( 'm4.2xlarge', 8, 26, 32, [ hvm ], 0, None, 0, True ), InstanceType( 'm4.4xlarge', 16, 53.5, 64, [ hvm ], 0, None, 0, True ), InstanceType( 'm4.10xlarge', 40, 124.5, 160, [ hvm ], 0, None, 0, True ), InstanceType( 'c4.large', 2, 8, 3.75, [ hvm ], 0, None, 0, True ), InstanceType( 'c4.xlarge', 4, 16, 7.5, [ hvm ], 0, None, 0, True ), InstanceType( 'c4.2xlarge', 8, 31, 15, [ hvm ], 0, None, 0, True ), InstanceType( 'c4.4xlarge', 16, 62, 30, [ hvm ], 0, None, 0, True ), InstanceType( 'c4.8xlarge', 36, 132, 60, [ hvm ], 0, None, 0, True ), InstanceType( 'c3.large', 2, 7, 3.75, [ hvm, pv ], 2, ssd, 16, True ), InstanceType( 'c3.xlarge', 4, 14, 7.5, [ hvm, pv ], 2, ssd, 40, True ), InstanceType( 'c3.2xlarge', 8, 28, 15, [ hvm, pv ], 2, ssd, 80, True ), InstanceType( 'c3.4xlarge', 16, 55, 30, [ hvm, pv ], 2, ssd, 160, True ), InstanceType( 'c3.8xlarge', 32, 108, 60, [ hvm, pv ], 2, ssd, 320, True ), InstanceType( 'g2.2xlarge', 8, 26, 15, [ hvm ], 1, ssd, 60, True ), InstanceType( 'r3.large', 2, 6.5, 15, [ hvm ], 1, ssd, 32, True ), InstanceType( 'r3.xlarge', 4, 13, 30.5, [ hvm ], 1, ssd, 80, True ), InstanceType( 'r3.2xlarge', 8, 26, 61, [ hvm ], 1, ssd, 160, True ), InstanceType( 'r3.4xlarge', 16, 52, 122, [ hvm ], 1, ssd, 320, True ), InstanceType( 'r3.8xlarge', 32, 104, 244, [ hvm ], 2, ssd, 320, True ), InstanceType( 'i2.xlarge', 4, 14, 30.5, [ hvm ], 1, ssd, 800, False ), InstanceType( 'i2.2xlarge', 8, 27, 61, [ hvm ], 2, ssd, 800, False ), InstanceType( 'i2.4xlarge', 16, 53, 122, [ hvm ], 4, ssd, 800, False ), InstanceType( 'i2.8xlarge', 32, 104, 244, [ hvm ], 8, ssd, 800, False ), InstanceType( 'd2.xlarge', 4, 14, 30.5, [ hvm ], 3, hdd, 2000, True ), InstanceType( 'd2.2xlarge', 8, 28, 61, [ hvm ], 6, hdd, 2000, True ), InstanceType( 'd2.4xlarge', 16, 56, 122, [ hvm ], 12, hdd, 2000, True ), InstanceType( 'd2.8xlarge', 36, 116, 244, [ hvm ], 24, hdd, 2000, True ), # previous generation instance types InstanceType( 'm1.small', 1, 1, 1.7, [ pv ], 1, hdd, 160, True ), InstanceType( 'm1.medium', 1, 2, 3.75, [ pv ], 1, hdd, 410, True ), InstanceType( 'm1.large', 2, 4, 7.5, [ pv ], 2, hdd, 420, True ), InstanceType( 'm1.xlarge', 4, 8, 15, [ pv ], 4, hdd, 420, True ), InstanceType( 'c1.medium', 2, 5, 1.7, [ pv ], 1, hdd, 350, True ), InstanceType( 'c1.xlarge', 8, 20, 7, [ pv ], 4, hdd, 420, True ), InstanceType( 'cc2.8xlarge', 32, 88, 60.5, [ hvm ], 4, hdd, 840, True ), InstanceType( 'm2.xlarge', 2, 6.5, 17.1, [ pv ], 1, hdd, 420, True ), InstanceType( 'm2.2xlarge', 4, 13, 34.2, [ pv ], 1, hdd, 850, True ), InstanceType( 'm2.4xlarge', 8, 26, 68.4, [ pv ], 2, hdd, 840, True ), InstanceType( 'cr1.8xlarge', 32, 88, 244, [ hvm ], 2, ssd, 120, True ), InstanceType( 'hi1.4xlarge', 16, 35, 60.5, [ hvm, pv ], 2, ssd, 1024, True ), InstanceType( 'hs1.8xlarge', 16, 35, 117, [ hvm, pv ], 24, hdd, 2048, False ), InstanceType( 't1.micro', 1, variable_ecu, 0.615, [ pv ], 0, None, 0, True ) ] ec2_instance_types = dict( (_.name, _) for _ in _ec2_instance_types ) def wait_instances_running( ec2, instances ): """ Wait until no instance in the given iterable is 'pending'. Yield every instance that entered the running state as soon as it does. :param boto.ec2.connection.EC2Connection ec2: the EC2 connection to use for making requests :param Iterator[Instance] instances: the instances to wait on :rtype: Iterator[Instance] """ running_ids = set( ) other_ids = set( ) while True: pending_ids = set( ) for i in instances: if i.state == 'pending': pending_ids.add( i.id ) elif i.state == 'running': assert i.id not in running_ids running_ids.add( i.id ) yield i else: assert i.id not in other_ids other_ids.add( i.id ) yield i log.info( '%i instance(s) pending, %i running, %i other.', *map( len, (pending_ids, running_ids, other_ids) ) ) if not pending_ids: break seconds = max( a_short_time, min( len( pending_ids ), 10 * a_short_time ) ) log.info( 'Sleeping for %is', seconds ) time.sleep( seconds ) for attempt in retry_ec2( ): with attempt: instances = ec2.get_only_instances( list( pending_ids ) ) def wait_spot_requests_active( ec2, requests, timeout=None, tentative=False ): """ Wait until no spot request in the given iterator is in the 'open' state or, optionally, a timeout occurs. Yield spot requests as soon as they leave the 'open' state. :param Iterator[SpotInstanceRequest] requests: :param float timeout: Maximum time in seconds to spend waiting or None to wait forever. If a timeout occurs, the remaining open requests will be cancelled. :param bool tentative: if True, give up on a spot request at the earliest indication of it not being fulfilled immediately :rtype: Iterator[list[SpotInstanceRequest]] """ if timeout is not None: timeout = time.time( ) + timeout active_ids = set( ) other_ids = set( ) open_ids = None def cancel( ): log.warn( 'Cancelling remaining %i spot requests.', len( open_ids ) ) ec2.cancel_spot_instance_requests( list( open_ids ) ) def spot_request_not_found( e ): error_code = 'InvalidSpotInstanceRequestID.NotFound' return isinstance( e, EC2ResponseError ) and e.error_code == error_code try: while True: open_ids, eval_ids, fulfill_ids = set( ), set( ), set( ) batch = [ ] for r in requests: if r.state == 'open': open_ids.add( r.id ) if r.status.code == 'pending-evaluation': eval_ids.add( r.id ) elif r.status.code == 'pending-fulfillment': fulfill_ids.add( r.id ) else: log.info( 'Request %s entered status %s indicating that it will not be ' 'fulfilled anytime soon.', r.id, r.status.code ) elif r.state == 'active': assert r.id not in active_ids active_ids.add( r.id ) batch.append( r ) else: assert r.id not in other_ids other_ids.add( r.id ) batch.append( r ) if batch: yield batch log.info( '%i spot requests(s) are open (%i of which are pending evaluation and %i ' 'are pending fulfillment), %i are active and %i are in another state.', *map( len, (open_ids, eval_ids, fulfill_ids, active_ids, other_ids) ) ) if not open_ids or tentative and not eval_ids and not fulfill_ids: break sleep_time = 2 * a_short_time if timeout is not None and time.time( ) + sleep_time >= timeout: log.warn( 'Timed out waiting for spot requests.' ) break log.info( 'Sleeping for %is', sleep_time ) time.sleep( sleep_time ) for attempt in retry_ec2( retry_while=spot_request_not_found ): with attempt: requests = ec2.get_all_spot_instance_requests( list( open_ids ) ) except: if open_ids: with panic( log ): cancel( ) raise else: if open_ids: cancel( ) def create_spot_instances( ec2, price, image_id, spec, num_instances=1, timeout=None, tentative=False, tags=None ): """ :rtype: Iterator[list[Instance]] """ def spotRequestNotFound( e ): return e.error_code == "InvalidSpotInstanceRequestID.NotFound" for attempt in retry_ec2( retry_for=a_long_time, retry_while=inconsistencies_detected ): with attempt: requests = ec2.request_spot_instances( price, image_id, count=num_instances, **spec ) if tags is not None: for requestID in (request.id for request in requests): for attempt in retry_ec2( retry_while=spotRequestNotFound ): with attempt: ec2.create_tags( [ requestID ], tags ) num_active, num_other = 0, 0 # noinspection PyUnboundLocalVariable,PyTypeChecker # request_spot_instances's type annotation is wrong for batch in wait_spot_requests_active( ec2, requests, timeout=timeout, tentative=tentative ): instance_ids = [ ] for request in batch: if request.state == 'active': instance_ids.append( request.instance_id ) num_active += 1 else: log.info( 'Request %s in unexpected state %s.', request.id, request.state ) num_other += 1 if instance_ids: # This next line is the reason we batch. It's so we can get multiple instances in # a single request. yield ec2.get_only_instances( instance_ids ) if not num_active: message = 'None of the spot requests entered the active state' if tentative: log.warn( message + '.' ) else: raise RuntimeError( message ) if num_other: log.warn( '%i request(s) entered a state other than active.', num_other ) def inconsistencies_detected( e ): if not isinstance( e, BotoServerError ): return False if e.code == 'InvalidGroup.NotFound': return True m = e.error_message.lower( ) return 'invalid iam instance profile' in m or 'no associated iam roles' in m def create_ondemand_instances( ec2, image_id, spec, num_instances=1 ): """ Requests the RunInstances EC2 API call but accounts for the race between recently created instance profiles, IAM roles and an instance creation that refers to them. :rtype: list[Instance] """ instance_type = spec[ 'instance_type' ] log.info( 'Creating %s instance(s) ... ', instance_type ) for attempt in retry_ec2( retry_for=a_long_time, retry_while=inconsistencies_detected ): with attempt: return ec2.run_instances( image_id, min_count=num_instances, max_count=num_instances, **spec ).instances def tag_object_persistently( tagged_ec2_object, tags_dict ): """ Object tagging occasionally fails with "NotFound" types of errors so we need to retry a few times. Sigh ... :type tagged_ec2_object: TaggedEC2Object """ for attempt in retry_ec2( ): with attempt: tagged_ec2_object.add_tags( tags_dict ) cgcloud-releases-1.6.0/lib/src/cgcloud/lib/message.py000066400000000000000000000034371301512357500224710ustar00rootroot00000000000000import base64 import json class UnknownVersion( Exception ): def __init__( self, version ): super( UnknownVersion, self ).__init__( "Unknown message version %d" % version ) self.version = version class Message( object ): """ A message, mostly for passing information about events to agents. The message version is used to differentiate between incompatible message formats. For example, adding a field is a compatible change if there is a default value for that field, and does not require incrementing the version. Message consumers should ignore versions they don't understand. """ TYPE_UPDATE_SSH_KEYS = 1 @classmethod def from_sqs( cls, sqs_message ): """ :param sqs_message: the SQS message to initializes this instance from, assuiming that the SQS message originates from a SQS queue that is subscribed to an SNS topic :type sqs_message: SQSMessage :return: the parsed message or None if the message is of an unkwown version :rtype: Message """ sns_message = json.loads( sqs_message.get_body( ) ) return Message.from_sns( sns_message[ 'Message' ] ) @classmethod def from_sns( cls, message ): return cls.from_dict( json.loads( base64.standard_b64decode( message ) ) ) @classmethod def from_dict( cls, message ): version = message[ 'version' ] if version == 1: return cls( type=message[ 'type' ] ) else: raise UnknownVersion( version ) def __init__( self, type ): super( Message, self ).__init__( ) self.type = type def to_dict( self ): return dict( version=1, type=self.type ) def to_sns( self ): return base64.standard_b64encode( json.dumps( self.to_dict( ) ) )cgcloud-releases-1.6.0/lib/src/cgcloud/lib/test/000077500000000000000000000000001301512357500214435ustar00rootroot00000000000000cgcloud-releases-1.6.0/lib/src/cgcloud/lib/test/__init__.py000066400000000000000000000040301301512357500235510ustar00rootroot00000000000000import os import time from struct import pack from unittest import TestCase from boto.utils import get_instance_metadata from cgcloud.lib import aws_d32, test_namespace_suffix_length from cgcloud.lib.context import Context from cgcloud.lib.ec2 import running_on_ec2 class CgcloudTestCase( TestCase ): """ A base class for CGCloud test cases. When run with CGCLOUD_NAMESPACE unset, a new test namespace will be prepared during setup and cleaned up during teardown. Otherwise, the configured namespace will be used but not cleaned up. """ __namespace = None cleanup = True ctx = None @classmethod def setUpClass( cls ): super( CgcloudTestCase, cls ).setUpClass( ) if running_on_ec2( ): os.environ.setdefault( 'CGCLOUD_ZONE', get_instance_metadata( )[ 'placement' ][ 'availability-zone' ] ) # Using the d32 of a binary string that starts with a 4-byte, big-endian time stamp # yields compact names whose lexicographical sorting is consistent with the historical # order. We add the process ID so we can run tests concurrently in child processes using # the pytest-xdist plugin. suffix = aws_d32.encode( pack( '>II', int( time.time( ) ), os.getpid( ) ) ) assert len( suffix ) == test_namespace_suffix_length cls.__namespace = '/test/%s/' % suffix os.environ.setdefault( 'CGCLOUD_NAMESPACE', cls.__namespace ) cls.ctx = Context( availability_zone=os.environ[ 'CGCLOUD_ZONE' ], namespace=os.environ[ 'CGCLOUD_NAMESPACE' ] ) @classmethod def tearDownClass( cls ): # Only cleanup if the context is using the default test namespace. If another namespace # is configured, we can't assume that all resources were created by the test and that # they can therefore be removed. if cls.cleanup and cls.ctx.namespace == cls.__namespace: cls.ctx.reset_namespace_security( ) super( CgcloudTestCase, cls ).tearDownClass( ) cgcloud-releases-1.6.0/lib/src/cgcloud/lib/util.py000066400000000000000000000753731301512357500220320ustar00rootroot00000000000000import argparse import base64 import hashlib import logging import multiprocessing import multiprocessing.pool import os import re import struct import subprocess import sys from StringIO import StringIO from abc import ABCMeta, abstractmethod from collections import Sequence from itertools import islice, count from math import sqrt from textwrap import dedent from bd2k.util.iterables import concat from bd2k.util.strings import interpolate log = logging.getLogger( __name__ ) try: from cgcloud.crypto.PublicKey import RSA except ImportError: from cgcloud_Crypto.PublicKey import RSA cores = multiprocessing.cpu_count( ) def unpack_singleton( singleton ): """ Expects a iterable with exactly one element and returns that element. If the iterable is empty or yields more than one element an exception will be thrown. >>> unpack_singleton([0]) 0 >>> unpack_singleton([]) Traceback (most recent call last): .... RuntimeError: Expected singleton, got empty iterable >>> unpack_singleton([0,1]) Traceback (most recent call last): .... RuntimeError: Expected singleton, got iterable with more than one element """ it = iter( singleton ) try: result = it.next( ) except StopIteration: raise RuntimeError( "Expected singleton, got empty iterable" ) try: it.next( ) raise RuntimeError( "Expected singleton, got iterable with more than one element" ) except StopIteration: return result def mean( xs ): """ Return the mean value of a sequence of values. >>> mean([2,4,4,4,5,5,7,9]) 5.0 >>> mean([9,10,11,7,13]) 10.0 >>> mean([1,1,10,19,19]) 10.0 >>> mean([10,10,10,10,10]) 10.0 >>> mean([1,"b"]) Traceback (most recent call last): ... ValueError: Input can't have non-numeric elements >>> mean([]) Traceback (most recent call last): ... ValueError: Input can't be empty """ try: return sum( xs ) / float( len( xs ) ) except TypeError: raise ValueError( "Input can't have non-numeric elements" ) except ZeroDivisionError: raise ValueError( "Input can't be empty" ) def std_dev( xs ): """ Returns the standard deviation of the given iterable of numbers. From http://rosettacode.org/wiki/Standard_deviation#Python An empty list, or a list with non-numeric elements will raise a TypeError. >>> std_dev([2,4,4,4,5,5,7,9]) 2.0 >>> std_dev([9,10,11,7,13]) 2.0 >>> std_dev([1,1,10,19,19]) 8.049844718999243 >>> std_dev({1,1,10,19,19}) == std_dev({19,10,1}) True >>> std_dev([10,10,10,10,10]) 0.0 >>> std_dev([1,"b"]) Traceback (most recent call last): ... ValueError: Input can't have non-numeric elements >>> std_dev([]) Traceback (most recent call last): ... ValueError: Input can't be empty """ m = mean( xs ) # this checks our pre-conditions, too return sqrt( sum( (x - m) ** 2 for x in xs ) / float( len( xs ) ) ) def camel_to_snake( s, separator='_' ): """ Converts camel to snake case >>> camel_to_snake('CamelCase') 'camel_case' >>> camel_to_snake('Camel_Case') 'camel_case' >>> camel_to_snake('camelCase') 'camel_case' >>> camel_to_snake('USA') 'usa' >>> camel_to_snake('TeamUSA') 'team_usa' >>> camel_to_snake('Team_USA') 'team_usa' >>> camel_to_snake('R2D2') 'r2_d2' >>> camel_to_snake('ToilPre310Box',separator='-') 'toil-pre-310-box' >>> camel_to_snake('Toil310Box',separator='-') 'toil-310-box' """ s = re.sub( '([a-z0-9])([A-Z])', r'\1%s\2' % separator, s ) s = re.sub( '([a-z])([A-Z0-9])', r'\1%s\2' % separator, s ) return s.lower( ) def snake_to_camel( s, separator='_' ): """ Converts snake to camel case >>> snake_to_camel('') '' >>> snake_to_camel('_x____yz') 'XYz' >>> snake_to_camel('camel_case') 'CamelCase' >>> snake_to_camel('r2_d2') 'R2D2' >>> snake_to_camel('m1.small', '.') 'M1Small' """ return ''.join( [ w.capitalize( ) for w in s.split( separator ) ] ) def abreviated_snake_case_class_name( cls, root_cls=object ): """ Returns the snake-case (with '-' instead of '_') version of the name of a given class with the name of another class removed from the end. :param cls: the class whose name to abreviate :param root_cls: an ancestor of cls, whose name will be removed from the end of the name of cls :return: cls.__name__ with root_cls.__name__ removed, converted to snake case with - as the separator >>> class Dog: pass >>> abreviated_snake_case_class_name(Dog) 'dog' >>> class Dog: pass >>> abreviated_snake_case_class_name(Dog,Dog) '' >>> class BarkingDog(Dog): pass >>> abreviated_snake_case_class_name(BarkingDog,Dog) 'barking' >>> class SleepingGrowlingDog(Dog): pass >>> abreviated_snake_case_class_name(SleepingGrowlingDog,Dog) 'sleeping-growling' >>> class Lumpi(SleepingGrowlingDog): pass >>> abreviated_snake_case_class_name(Lumpi,Dog) 'lumpi' """ name = cls.__name__ suffix = root_cls.__name__ if name.endswith( suffix ): name = name[ :-len( suffix ) ] return camel_to_snake( name, separator='-' ) class UserError( RuntimeError ): def __init__( self, message=None, cause=None ): if message is None == cause is None: raise RuntimeError( "Must pass either message or cause." ) super( UserError, self ).__init__( message if cause is None else cause.message ) def app_name( ): return os.path.splitext( os.path.basename( sys.argv[ 0 ] ) )[ 0 ] class Application( object ): """ An attempt at modularizing command line parsing (argparse). This is an experiment. The general idea is to expose an application's functionality on the command line as separate subcommands, each subcommmand is represented by a separate class each of which gets its own subparser (an argparse concept). This collects both, the subcommand's functionality and the code that sets up the command line interface to that functionality under the umbrella of a single class. >>> class FooCommand( Command ): ... def __init__(self, app): ... super( FooCommand, self ).__init__( app, help='Do some voodoo' ) ... self.option( '--verbose', action='store_true' ) ... ... def run(self, options): ... print 'Voodoo Magic' if options.verbose else 'Juju' >>> app = Application() >>> app.add( FooCommand ) >>> app.run( [ "foo", "--verbose" ] ) # foo is the command name Voodoo Magic >>> app.run( [ "foo" ] ) Juju """ def __init__( self ): """ Initializes the argument parser """ super( Application, self ).__init__( ) self.args = None self.parser = argparse.ArgumentParser( formatter_class=ArgParseHelpFormatter ) # noinspection PyProtectedMember self.parser._positionals.title = 'Commands' # noinspection PyProtectedMember self.parser._optionals.title = 'Global options' self.subparsers = self.parser.add_subparsers( help='Application commands', dest='command_name' ) self.commands = { } def option( self, *args, **kwargs ): self._option( self.parser, args, kwargs ) @classmethod def _option( cls, target, args, kwargs ): try: completer = kwargs.pop( 'completer' ) except KeyError: completer = None argument = target.add_argument( *args, **kwargs ) if completer is not None: argument.completer = completer def add( self, command_class ): """ Instantiates a command of the specified class and adds it to this application. """ command = command_class( self ) self.commands[ command.name( ) ] = command def run( self, args=None ): """ Parses the command line into an options object using arparse and invokes the requested command's run() method with that options object. """ # Pull in bash auto completion if available try: # noinspection PyUnresolvedReferences import argcomplete except ImportError: pass else: argcomplete.autocomplete( self.parser ) self.args = args options = self.parser.parse_args( args ) self.prepare( options ) command = self.commands[ options.command_name ] command.run( options ) def prepare( self, options ): pass class Command( object ): """ An abstract base class for an applications commands. """ __metaclass__ = ABCMeta @abstractmethod def run( self, options ): """ Execute this command. :param options: the parsed command line arguments """ raise NotImplementedError( ) def __init__( self, application, **kwargs ): """ Initializes this command. :param application: The application this command belongs to. :type application: Application :param kwargs: optional arguments to the argparse's add_parser() method """ super( Command, self ).__init__( ) self.application = application doc = self.__class__.__doc__ help_ = doc.split( '\n\n', 1 )[ 0 ] if doc else None if not 'help' in kwargs: kwargs[ 'help' ] = help_ if not 'description' in kwargs: kwargs[ 'description' ] = doc self.parser = application.subparsers.add_parser( self.name( ), formatter_class=ArgParseHelpFormatter, **kwargs ) # noinspection PyProtectedMember self.parser._positionals.title = 'Command arguments' # noinspection PyProtectedMember self.parser._optionals.title = 'Command options' self.group = None def option( self, *args, **kwargs ): target = self.parser if self.group is None else self.group # noinspection PyProtectedMember self.application._option( target, args, kwargs ) def name( self ): """ Returns the name of this command as referred to by the user when invoking it via the command line. The command name is the snake-case version (with dashes instead of underscores) of this command's class name, minus its 'Command' suffix. >>> class FooBarCommand(Command): ... def run( self, options ): ... pass >>> app=Application() >>> FooBarCommand(app).name() 'foo-bar' """ # noinspection PyTypeChecker return abreviated_snake_case_class_name( type( self ), Command ) def begin_mutex( self, **kwargs ): self.group = self.parser.add_mutually_exclusive_group( **kwargs ) def end_mutex( self ): self.group = None class ArgParseHelpFormatter( argparse.ArgumentDefaultsHelpFormatter ): # noinspection PyBroadException try: with open( os.devnull, 'a' ) as devnull: rows, columns = map( int, subprocess.check_output( [ 'stty', 'size' ], stderr=devnull ).split( ) ) except: rows, columns = None, None def __init__( self, *args, **kwargs ): super( ArgParseHelpFormatter, self ).__init__( *args, width=min( 100, self.columns ), max_help_position=30, **kwargs ) empty_line_re = re.compile( r'^\s*(#.*)$' ) def prepend_shell_script( script, in_file, out_file ): """ Writes all lines from the specified input to the specified output. Input and output are both assumed to be file-like objects. Reading from the input as well as writing to the output starts at the current position in the respective file-like object. Unless the given script is empty or None, and before writing the first script line from the input, the given script will be written to the output, followed by a new line. A script line is a line that is not empty. An empty line is a line that contains only whitespace, a comment or both. >>> i,o = StringIO(''), StringIO() >>> prepend_shell_script('hello',i,o) >>> o.getvalue() 'hello\\n' >>> i,o = StringIO(''), StringIO() >>> prepend_shell_script('',i,o) >>> o.getvalue() '' >>> i,o = StringIO('\\n'), StringIO() >>> prepend_shell_script('hello',i,o) >>> o.getvalue() 'hello\\n\\n' >>> i,o = StringIO('#foo\\n'), StringIO() >>> prepend_shell_script('hello',i,o) >>> o.getvalue() '#foo\\nhello\\n' >>> i,o = StringIO(' # foo \\nbar\\n'), StringIO() >>> prepend_shell_script('hello',i,o) >>> o.getvalue() ' # foo \\nhello\\nbar\\n' >>> i,o = StringIO('bar\\n'), StringIO() >>> prepend_shell_script('hello',i,o) >>> o.getvalue() 'hello\\nbar\\n' >>> i,o = StringIO('#foo'), StringIO() >>> prepend_shell_script('hello',i,o) >>> o.getvalue() '#foo\\nhello\\n' >>> i,o = StringIO('#foo\\nbar # bla'), StringIO() >>> prepend_shell_script('hello',i,o) >>> o.getvalue() '#foo\\nhello\\nbar # bla\\n' >>> i,o = StringIO(' bar # foo'), StringIO() >>> prepend_shell_script('hello',i,o) >>> o.getvalue() 'hello\\n bar # foo\\n' """ def write_line( line ): out_file.write( line ) if not line.endswith( '\n' ): out_file.write( '\n' ) line = None for line in in_file: if not empty_line_re.match( line ): break write_line( line ) line = None if script: write_line( script ) if line: write_line( line ) for line in in_file: write_line( line ) def partition_seq( seq, size ): """ Splits a sequence into an iterable of subsequences. All subsequences are of the given size, except the last one, which may be smaller. If the input list is modified while the returned list is processed, the behavior of the program is undefined. :param seq: the list to split :param size: the desired size of the sublists, must be > 0 :type size: int :return: an iterable of sublists >>> list(partition_seq("",1)) [] >>> list(partition_seq("abcde",2)) ['ab', 'cd', 'e'] >>> list(partition_seq("abcd",2)) ['ab', 'cd'] >>> list(partition_seq("abcde",1)) ['a', 'b', 'c', 'd', 'e'] >>> list(partition_seq("abcde",0)) Traceback (most recent call last): ... ValueError: Size must be greater than 0 >>> l=[1,2,3,4] >>> i = iter( partition_seq(l,2) ) >>> l.pop(0) 1 >>> i.next() [2, 3] """ if size < 1: raise ValueError( 'Size must be greater than 0' ) return (seq[ pos:pos + size ] for pos in xrange( 0, len( seq ), size )) def ec2_keypair_fingerprint( ssh_key, reject_private_keys=False ): """ Computes the fingerrint of a public or private OpenSSH key in the way Amazon does it for keypairs resulting from either importing a SSH public key or generating a new keypair. :param ssh_key: a RSA public key in OpenSSH format, or an RSA private key in PEM format :return: The fingerprint of the key, in pairs of two hex digits with a colon between pairs. >>> ssh_pubkey = 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCvdDMvcwC1/5ByUhO1wh1sG6ficwgGHRab/p'\\ ... 'm6LN60rgxv+u2eJRao2esGB9Oyt863+HnjKj/NBdaiHTHcAHNq/TapbvEjgHaKgrVdfeMdQbJhWjJ97rql9Yn8k'\\ ... 'TNsXOeSyTW7rIKE0zeQkrwhsztmATumbQmJUMR7uuI31BxhQUfD/CoGZQrxFalWLDZcrcYY13ynplaNA/Hd/vP6'\\ ... 'qWO5WC0dTvzROEp7VwzJ7qeN2kP1JTh+kgVRoYd9mSm6x9UVjY6jQtZHa01Eg05sFraWgvNAvKhk9LS9Kiwhq8D'\\ ... 'xHdWdTamnGLtwXYQbn7RjG3UADAiTOWk+QSmU2igZvQ2F hannes@soe.ucsc.edu\\n' >>> ec2_keypair_fingerprint(ssh_pubkey) 'a5:5a:64:8a:1e:3f:4e:46:cd:1f:e9:b3:fc:cf:c5:19' >>> # This is not a private key that is in use, in case you were wondering >>> ssh_private_key = \\ ... '-----BEGIN RSA PRIVATE KEY-----\\n'+\\ ... 'MIIEpQIBAAKCAQEAi3shPK00+/6dwW8u+iDkUYiwIKl/lv0Ay5IstLszwb3CA4mVRlyq769HzE8f\\n'\\ ... 'cnzQUX/NI8y9MTO0UNt2JDMJWW5L49jmvxV0TjxQjKg8KcNzYuHsEny3k8LxezWMsmwlrrC89O6e\\n'\\ ... 'oo6boc8ForSdjVdIlJbvWu/82dThyFgTjWd5B+1O93xw8/ejqY9PfZExBeqpKjm58OUByTpVhvWe\\n'\\ ... 'jmbZ9BL60XJhwz9bDTrlKpjcGsMZ74G6XfQAhyyqXYeD/XOercCSJgQ/QjYKcPE9yMRyucHyuYZ8\\n'\\ ... 'HKzmG+u4p5ffnFb43tKzWCI330JQcklhGTldyqQHDWA41mT1QMoWfwIDAQABAoIBAF50gryRWykv\\n'\\ ... 'cuuUfI6ciaGBXCyyPBomuUwicC3v/Au+kk1M9Y7RoFxyKb/88QHZ7kTStDwDITfZmMmM5QN8oF80\\n'\\ ... 'pyXkM9bBE6MLi0zFfQCXQGN9NR4L4VGqGVfjmqUVQat8Omnv0fOpeVFpXZqij3Mw4ZDmaa7+iA+H\\n'\\ ... '72J56ru9i9wcBNqt//Kh5BXARekp7tHzklYrlqJd03ftDRp9GTBIFAsaPClTBpnPVhwD/rAoJEhb\\n'\\ ... 'KM9g/EMjQ28cUMQSHSwOyi9Rg/LtwFnER4u7pnBz2tbJFvLlXE96IQbksQL6/PTJ9H6Zpp+1fDcI\\n'\\ ... 'k/MKSQZtQOgfV8V1wlvHX+Q0bxECgYEA4LHj6o4usINnSy4cf6BRLrCA9//ePa8UjEK2YDC5rQRV\\n'\\ ... 'huFWqWJJSjWI9Ofjh8mZj8NvTJa9RW4d4Rn6F7upOuAer9obwfrmi4BEQSbvUwxQIuHOZ6itH/0L\\n'\\ ... 'klqQBuhJeyr3W+2IhudJUQz9MEoddOfYIybXqkF7XzDl2x6FcjcCgYEAnunySmjt+983gUKK9DgK\\n'\\ ... '/k1ki41jCAcFlGd8MbLEWkJpwt3FJFiyq6vVptoVH8MBnVAOjDneP6YyNBv5+zm3vyMuVJtKNcAP\\n'\\ ... 'MAxrl5/gyIBHRxD+avoqpQX/17EmrFsbMaG8IM0ZWB2lSDt45sDvpmSlcTjzrHIEGoBbOzkOefkC\\n'\\ ... 'gYEAgmS5bxSz45teBjLsNuRCOGYVcdX6krFXq03LqGaeWdl6CJwcPo/bGEWZBQbM86/6fYNcw4V2\\n'\\ ... 'sSQGEuuQRtWQj6ogJMzd7uQ7hhkZgvWlTPyIRLXloiIw1a9zV6tWiaujeOamRaLC6AawdWikRbG9\\n'\\ ... 'BmrE8yFHZnY5sjQeL9q2dmECgYEAgp5w1NCirGCxUsHLTSmzf4tFlZ9FQxficjUNVBxIYJguLkny\\n'\\ ... '/Qka8xhuqJKgwlabQR7IlmIKV+7XXRWRx/mNGsJkFo791GhlE21iEmMLdEJcVAGX3X57BuGDhVrL\\n'\\ ... 'GuhX1dfGtn9e0ZqsfE7F9YWodfBMPGA/igK9dLsEQg2H5KECgYEAvlv0cPHP8wcOL3g9eWIVCXtg\\n'\\ ... 'aQ+KiDfk7pihLnHTJVZqXuy0lFD+O/TqxGOOQS/G4vBerrjzjCXXXxi2FN0kDJhiWlRHIQALl6rl\\n'\\ ... 'i2LdKfL1sk1IA5PYrj+LmBuOLpsMHnkoH+XRJWUJkLvowaJ0aSengQ2AD+icrc/EIrpcdjU=\\n'+\\ ... '-----END RSA PRIVATE KEY-----\\n' >>> ec2_keypair_fingerprint(ssh_private_key) 'ac:23:ae:c3:9a:a3:78:b1:0f:8a:31:dd:13:cc:b1:8e:fb:51:42:f8' """ rsa_key = RSA.importKey( ssh_key ) is_private_key = rsa_key.has_private( ) if is_private_key and reject_private_keys: raise ValueError( 'Private keys are disallowed' ) der_rsa_key = rsa_key.exportKey( format='DER', pkcs=(8 if is_private_key else 1) ) key_hash = (hashlib.sha1 if is_private_key else hashlib.md5)( der_rsa_key ) return ':'.join( partition_seq( key_hash.hexdigest( ), 2 ) ) def private_to_public_key( private_ssh_key ): """ Returns the public key in OpenSSH format (as used in the authorized_keys file) for a given private RSA key in PEM format. >>> ssh_private_key = \\ ... '-----BEGIN RSA PRIVATE KEY-----\\n'+\\ ... 'MIIEpQIBAAKCAQEAi3shPK00+/6dwW8u+iDkUYiwIKl/lv0Ay5IstLszwb3CA4mVRlyq769HzE8f\\n'+\\ ... 'cnzQUX/NI8y9MTO0UNt2JDMJWW5L49jmvxV0TjxQjKg8KcNzYuHsEny3k8LxezWMsmwlrrC89O6e\\n'+\\ ... 'oo6boc8ForSdjVdIlJbvWu/82dThyFgTjWd5B+1O93xw8/ejqY9PfZExBeqpKjm58OUByTpVhvWe\\n'+\\ ... 'jmbZ9BL60XJhwz9bDTrlKpjcGsMZ74G6XfQAhyyqXYeD/XOercCSJgQ/QjYKcPE9yMRyucHyuYZ8\\n'+\\ ... 'HKzmG+u4p5ffnFb43tKzWCI330JQcklhGTldyqQHDWA41mT1QMoWfwIDAQABAoIBAF50gryRWykv\\n'+\\ ... 'cuuUfI6ciaGBXCyyPBomuUwicC3v/Au+kk1M9Y7RoFxyKb/88QHZ7kTStDwDITfZmMmM5QN8oF80\\n'+\\ ... 'pyXkM9bBE6MLi0zFfQCXQGN9NR4L4VGqGVfjmqUVQat8Omnv0fOpeVFpXZqij3Mw4ZDmaa7+iA+H\\n'+\\ ... '72J56ru9i9wcBNqt//Kh5BXARekp7tHzklYrlqJd03ftDRp9GTBIFAsaPClTBpnPVhwD/rAoJEhb\\n'+\\ ... 'KM9g/EMjQ28cUMQSHSwOyi9Rg/LtwFnER4u7pnBz2tbJFvLlXE96IQbksQL6/PTJ9H6Zpp+1fDcI\\n'+\\ ... 'k/MKSQZtQOgfV8V1wlvHX+Q0bxECgYEA4LHj6o4usINnSy4cf6BRLrCA9//ePa8UjEK2YDC5rQRV\\n'+\\ ... 'huFWqWJJSjWI9Ofjh8mZj8NvTJa9RW4d4Rn6F7upOuAer9obwfrmi4BEQSbvUwxQIuHOZ6itH/0L\\n'+\\ ... 'klqQBuhJeyr3W+2IhudJUQz9MEoddOfYIybXqkF7XzDl2x6FcjcCgYEAnunySmjt+983gUKK9DgK\\n'+\\ ... '/k1ki41jCAcFlGd8MbLEWkJpwt3FJFiyq6vVptoVH8MBnVAOjDneP6YyNBv5+zm3vyMuVJtKNcAP\\n'+\\ ... 'MAxrl5/gyIBHRxD+avoqpQX/17EmrFsbMaG8IM0ZWB2lSDt45sDvpmSlcTjzrHIEGoBbOzkOefkC\\n'+\\ ... 'gYEAgmS5bxSz45teBjLsNuRCOGYVcdX6krFXq03LqGaeWdl6CJwcPo/bGEWZBQbM86/6fYNcw4V2\\n'+\\ ... 'sSQGEuuQRtWQj6ogJMzd7uQ7hhkZgvWlTPyIRLXloiIw1a9zV6tWiaujeOamRaLC6AawdWikRbG9\\n'+\\ ... 'BmrE8yFHZnY5sjQeL9q2dmECgYEAgp5w1NCirGCxUsHLTSmzf4tFlZ9FQxficjUNVBxIYJguLkny\\n'+\\ ... '/Qka8xhuqJKgwlabQR7IlmIKV+7XXRWRx/mNGsJkFo791GhlE21iEmMLdEJcVAGX3X57BuGDhVrL\\n'+\\ ... 'GuhX1dfGtn9e0ZqsfE7F9YWodfBMPGA/igK9dLsEQg2H5KECgYEAvlv0cPHP8wcOL3g9eWIVCXtg\\n'+\\ ... 'aQ+KiDfk7pihLnHTJVZqXuy0lFD+O/TqxGOOQS/G4vBerrjzjCXXXxi2FN0kDJhiWlRHIQALl6rl\\n'+\\ ... 'i2LdKfL1sk1IA5PYrj+LmBuOLpsMHnkoH+XRJWUJkLvowaJ0aSengQ2AD+icrc/EIrpcdjU=\\n'+\\ ... '-----END RSA PRIVATE KEY-----\\n' >>> ssh_pubkey = 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCLeyE8rTT7/p3Bby76IORRiLA'\\ ... 'gqX+W/QDLkiy0uzPBvcIDiZVGXKrvr0fMTx9yfNBRf80jzL0xM7RQ23YkMwlZbkvj2Oa/FXROPFC'\\ ... 'MqDwpw3Ni4ewSfLeTwvF7NYyybCWusLz07p6ijpuhzwWitJ2NV0iUlu9a7/zZ1OHIWBONZ3kH7U7'\\ ... '3fHDz96Opj099kTEF6qkqObnw5QHJOlWG9Z6OZtn0EvrRcmHDP1sNOuUqmNwawxnvgbpd9ACHLKp'\\ ... 'dh4P9c56twJImBD9CNgpw8T3IxHK5wfK5hnwcrOYb67inl9+cVvje0rNYIjffQlBySWEZOV3KpAc'\\ ... 'NYDjWZPVAyhZ/' >>> private_to_public_key(ssh_private_key) == ssh_pubkey True """ rsa_key = RSA.importKey( private_ssh_key ) if rsa_key.has_private( ): return rsa_key.publickey( ).exportKey( format='OpenSSH' ) else: raise ValueError( 'Expected private key' ) def volume_label_hash( s ): """ Linux volume labels are typically limited to 12 or 16 characters while the strings we want to use for them are longer, usually a namespaced role name with additional data at the end. This hash function returns a 12-character string that is reasonably representative of the input string. >>> volume_label_hash( 'hannes_spark-master__0' ) 'i0u77fnocoo' >>> volume_label_hash( '' ) 'PZ2FQWP48Ho' >>> volume_label_hash( ' ' ) 'oIf03JUELnY' >>> volume_label_hash( '1' ) 'yQYSos_Mpxk' """ h = hashlib.md5( s ) h = h.digest( ) assert len( h ) == 16 hi, lo = struct.unpack( '!QQ', h ) h = hi ^ lo h = struct.pack( '!Q', h ) assert len( h ) == 8 h = base64.urlsafe_b64encode( h ) assert h[ -1 ] == '=' return h[ :-1 ] def prefix_lines( text, prefix ): """ Prefix each non-empty line in the given text with the given prefix. >>> prefix_lines('',' ') '' >>> prefix_lines(' ',' ') ' ' >>> prefix_lines('\\n',' ') '\\n' >>> prefix_lines('x',' ') ' x' >>> prefix_lines('x\\n',' ') ' x\\n' >>> prefix_lines('x\\ny\\n', ' ' ) ' x\\n y\\n' >>> prefix_lines('x\\ny', ' ' ) ' x\\n y' """ return '\n'.join( prefix + l if l else l for l in text.split( '\n' ) ) def heredoc( s, indent=None ): """ Here-documents [1] for Python. Unindents the given string and interpolates format()-like placeholders with local variables from the calling method's stack frame. The interpolation part is a bit like black magic but it is tremendously useful. [1]: https://en.wikipedia.org/wiki/Here_document >>> x, y = 42, 7 >>> heredoc( ''' ... x is {x} ... y is {y} ... ''' ) 'x is 42\\ny is 7\\n' """ if s[ 0 ] == '\n': s = s[ 1: ] if s[ -1 ] != '\n': s += '\n' s = dedent( s ) if indent is not None: s = prefix_lines( s, indent ) return interpolate( s, skip_frames=1 ) try: # noinspection PyUnresolvedReferences from concurrent.futures import ThreadPoolExecutor except ImportError: # Fall back to the old implementation that uses the undocument thread pool in # multiprocessing. It does not allow interruption via Ctrl-C. from contextlib import contextmanager @contextmanager def thread_pool( size ): """ A context manager that yields a thread pool of the given size. On normal closing, this context manager closes the pool and joins all threads in it. On exceptions, the pool will be terminated but threads won't be joined. """ pool = multiprocessing.pool.ThreadPool( processes=size ) try: yield pool except: pool.terminate( ) raise else: pool.close( ) pool.join( ) else: # If the futures backport is installed, use that as it is documented and handles Ctrl-C more # gracefully. # noinspection PyPep8Naming class thread_pool( object ): """ A context manager that yields a thread pool of the given size. On normal closing, this context manager closes the pool and joins all threads in it. On exceptions, the pool will be terminated but threads won't be joined. """ def __init__( self, size ): self.executor = ThreadPoolExecutor( size ) def __enter__( self ): return self # noinspection PyUnusedLocal def __exit__( self, exc_type, exc_val, exc_tb ): self.executor.shutdown( wait=exc_type is None ) def apply_async( self, fn, args, callback=None ): future = self.executor.submit( fn, *args ) if callback is not None: future.add_done_callback( lambda f: callback( f.result( ) ) ) def map( self, fn, iterable ): return list( self.executor.map( fn, iterable ) ) def pmap( f, seq, pool_size=cores ): """ Apply the given function to each element of the given sequence and return a sequence of the result of each function application. Do so in parallel, using a thread pool no larger than the given size. :param callable f: the function to be applied :param Sequence seq: the input sequence :param int pool_size: the desired pool size, if absent the number of CPU cores will be used. The actual pool size may be smaller if the input sequence is small. A pool size of 0 will make this function behave exactly like the map() builtin, i.e. the function will be applied serially in the current thread. >>> pmap( lambda (a, b): a + b, [], pool_size=0 ) [] >>> pmap( lambda (a, b): a + b, [ (1, 2) ], pool_size=0 ) [3] >>> pmap( lambda (a, b): a + b, [ (1, 2), (3, 4) ], pool_size=0 ) [3, 7] >>> pmap( lambda a, b: a + b, [ (1, 2), (3, 4) ], pool_size=0 ) Traceback (most recent call last): ... TypeError: () takes exactly 2 arguments (1 given) >>> pmap( lambda (a, b): a + b, [], pool_size=1 ) [] >>> pmap( lambda (a, b): a + b, [ (1, 2) ], pool_size=1 ) [3] >>> pmap( lambda (a, b): a + b, [ (1, 2), (3, 4) ], pool_size=1 ) [3, 7] >>> pmap( lambda a, b: a + b, [ (1, 2), (3, 4) ], pool_size=1 ) Traceback (most recent call last): ... TypeError: () takes exactly 2 arguments (1 given) >>> pmap( lambda (a, b): a + b, [], pool_size=2 ) [] >>> pmap( lambda (a, b): a + b, [ (1, 2) ], pool_size=2 ) [3] >>> pmap( lambda (a, b): a + b, [ (1, 2), (3, 4) ], pool_size=2 ) [3, 7] >>> pmap( lambda a, b: a + b, [ (1, 2), (3, 4) ], pool_size=2 ) Traceback (most recent call last): ... TypeError: () takes exactly 2 arguments (1 given) """ __check_pool_size( pool_size ) n = len( seq ) if n: if pool_size == 0: return map( f, seq ) else: with thread_pool( min( pool_size, n ) ) as pool: return pool.map( f, seq ) else: return [ ] def papply( f, seq, pool_size=cores, callback=None ): """ Apply the given function to each element of the given sequence, optionally invoking the given callback with the result of each application. Do so in parallel, using a thread pool no larger than the given size. :param callable f: the function to be applied :param Sequence seq: the input sequence :param int pool_size: the desired pool size, if absent the number of CPU cores will be used. The actual pool size may be smaller if the input sequence is small.A pool size of 0 will make this function emulate the apply() builtin, i.e. f (and the callback, if provided) will be invoked serially in the current thread. :param callable callback: an optional function to be invoked with the return value of f >>> l=[]; papply( lambda a, b: a + b, [], pool_size=0, callback=l.append ); l [] >>> l=[]; papply( lambda a, b: a + b, [ (1, 2) ], pool_size=0, callback=l.append); l [3] >>> l=[]; papply( lambda a, b: a + b, [ (1, 2), (3, 4) ], pool_size=0, callback=l.append ); l [3, 7] >>> l=[]; papply( lambda a, b: a + b, [], pool_size=1, callback=l.append ); l [] >>> l=[]; papply( lambda a, b: a + b, [ (1, 2) ], pool_size=1, callback=l.append); l [3] >>> l=[]; papply( lambda a, b: a + b, [ (1, 2), (3, 4) ], pool_size=1, callback=l.append ); l [3, 7] >>> l=[]; papply( lambda a, b: a + b, [], pool_size=2, callback=l.append ); l [] >>> l=[]; papply( lambda a, b: a + b, [ (1, 2) ], pool_size=2, callback=l.append); l [3] >>> l=[]; papply( lambda a, b: a + b, [ (1, 2), (3, 4) ], pool_size=2, callback=l.append ); l [3, 7] """ __check_pool_size( pool_size ) n = len( seq ) if n: if pool_size == 0: for args in seq: result = apply( f, args ) if callback is not None: callback( result ) else: with thread_pool( min( pool_size, n ) ) as pool: for args in seq: pool.apply_async( f, args, callback=callback ) def __check_pool_size( pool_size ): if pool_size < 0: raise ValueError( 'Pool size must be >= 0' ) def allocate_cluster_ordinals( num, used ): """ Return an iterator containing a given number of unused cluster ordinals. The result is guaranteed to yield each ordinal exactly once, i.e. the result is set-like. The argument set and the result iterator will be disjoint. The sum of all ordinals in the argument and the result is guaranteed to be minimal, i.e. the function will first fill the gaps in the argument before allocating higher values. The result will yield ordinal in ascending order. :param int num: the number of ordinal to allocate :param set[int] used: a set of currently used ordinal :rtype: iterator >>> f = allocate_cluster_ordinals >>> list(f(0,set())) [] >>> list(f(1,set())) [0] >>> list(f(0,{0})) [] >>> list(f(1,{0})) [1] >>> list(f(0,{0,1})) [] >>> list(f(1,{0,1})) [2] >>> list(f(0,{0,2})) [] >>> list(f(1,{0,2})) [1] >>> list(f(2,{0,2})) [1, 3] >>> list(f(3,{0,2})) [1, 3, 4] """ assert isinstance( used, set ) first_free = max( used ) + 1 if used else 0 complete = set( range( 0, len( used ) ) ) gaps = sorted( complete - used ) return islice( concat( gaps, count( first_free ) ), num ) cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/000077500000000000000000000000001301512357500212565ustar00rootroot00000000000000cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/Hash/000077500000000000000000000000001301512357500221415ustar00rootroot00000000000000cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/Hash/MD5.py000066400000000000000000000062661301512357500231120ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # =================================================================== # The contents of this file are dedicated to the public domain. To # the extent that dedication to the public domain is not available, # everyone is granted a worldwide, perpetual, royalty-free, # non-exclusive license to exercise all rights associated with the # contents of this file for any purpose whatsoever. # No rights are reserved. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # =================================================================== """MD5 cryptographic hash algorithm. MD5 is specified in RFC1321_ and produces the 128 bit digest of a message. >>> from cgcloud_Crypto.Hash import MD5 >>> >>> h = MD5.new() >>> h.update(b'Hello') >>> print h.hexdigest() MD5 stand for Message Digest version 5, and it was invented by Rivest in 1991. This algorithm is insecure. Do not use it for new designs. .. _RFC1321: http://tools.ietf.org/html/rfc1321 """ from __future__ import nested_scopes _revision__ = "$Id$" __all__ = ['new', 'block_size', 'digest_size'] from cgcloud_Crypto.Util.py3compat import * if sys.version_info[0] == 2 and sys.version_info[1] == 1: from cgcloud_Crypto.Util.py21compat import * def __make_constructor(): try: # The md5 module is deprecated in Python 2.6, so use hashlib when possible. from hashlib import md5 as _hash_new except ImportError: from md5 import new as _hash_new h = _hash_new() if hasattr(h, 'new') and hasattr(h, 'name') and hasattr(h, 'digest_size') and hasattr(h, 'block_size'): # The module from stdlib has the API that we need. Just use it. return _hash_new else: # Wrap the hash object in something that gives us the expected API. _copy_sentinel = object() class _MD5(object): digest_size = 16 block_size = 64 name = "md5" def __init__(self, *args): if args and args[0] is _copy_sentinel: self._h = args[1] else: self._h = _hash_new(*args) def copy(self): return _MD5(_copy_sentinel, self._h.copy()) def update(self, *args): f = self.update = self._h.update f(*args) def digest(self): f = self.digest = self._h.digest return f() def hexdigest(self): f = self.hexdigest = self._h.hexdigest return f() _MD5.new = _MD5 return _MD5 new = __make_constructor() del __make_constructor #: The size of the resulting hash in bytes. digest_size = new().digest_size #: The internal block size of the hash algorithm in bytes. block_size = new().block_size cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/Hash/__init__.py000066400000000000000000000102641301512357500242550ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # =================================================================== # The contents of this file are dedicated to the public domain. To # the extent that dedication to the public domain is not available, # everyone is granted a worldwide, perpetual, royalty-free, # non-exclusive license to exercise all rights associated with the # contents of this file for any purpose whatsoever. # No rights are reserved. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # =================================================================== """Hashing algorithms Hash functions take arbitrary binary strings as input, and produce a random-like output of fixed size that is dependent on the input; it should be practically infeasible to derive the original input data given only the hash function's output. In other words, the hash function is *one-way*. It should also not be practically feasible to find a second piece of data (a *second pre-image*) whose hash is the same as the original message (*weak collision resistance*). Finally, it should not be feasible to find two arbitrary messages with the same hash (*strong collision resistance*). The output of the hash function is called the *digest* of the input message. In general, the security of a hash function is related to the length of the digest. If the digest is *n* bits long, its security level is roughly comparable to the the one offered by an *n/2* bit encryption algorithm. Hash functions can be used simply as a integrity check, or, in association with a public-key algorithm, can be used to implement digital signatures. The hashing modules here all support the interface described in `PEP 247`_ , "API for Cryptographic Hash Functions". .. _`PEP 247` : http://www.python.org/dev/peps/pep-0247/ :undocumented: _MD2, _MD4, _RIPEMD160, _SHA224, _SHA256, _SHA384, _SHA512 """ __all__ = [ 'MD5' ] __revision__ = "$Id$" import sys if sys.version_info[0] == 2 and sys.version_info[1] == 1: from cgcloud_Crypto.Util.py21compat import * from cgcloud_Crypto.Util.py3compat import * def new(algo, *args): """Initialize a new hash object. The first argument to this function may be an algorithm name or another hash object. This function has significant overhead. It's recommended that you instead import and use the individual hash modules directly. """ # Try just invoking algo.new() # We do this first so that this is the fastest. try: new_func = algo.new except AttributeError: pass else: return new_func(*args) # Try getting the algorithm name. if isinstance(algo, str): name = algo else: try: name = algo.name except AttributeError: raise ValueError("unsupported hash type %r" % (algo,)) # Got the name. Let's see if we have a PyCrypto implementation. try: new_func = _new_funcs[name] except KeyError: # No PyCrypto implementation. Try hashlib. try: import hashlib except ImportError: # There is no hashlib. raise ValueError("unsupported hash type %s" % (name,)) return hashlib.new(name, *args) else: # We have a PyCrypto implementation. Instantiate it. return new_func(*args) # This dict originally gets the following _*_new methods, but its members get # replaced with the real new() methods of the various hash modules as they are # used. We do it without locks to improve performance, which is safe in # CPython because dict access is atomic in CPython. This might break PyPI. _new_funcs = {} def _md5_new(*args): from cgcloud_Crypto.Hash import MD5 _new_funcs['MD5'] = _new_funcs['md5'] = MD5.new return MD5.new(*args) _new_funcs['MD5'] = _new_funcs['md5'] = _md5_new del _md5_new cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/IO/000077500000000000000000000000001301512357500215655ustar00rootroot00000000000000cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/IO/PEM.py000066400000000000000000000061151301512357500225630ustar00rootroot00000000000000# -*- coding: ascii -*- # # Util/PEM.py : Privacy Enhanced Mail utilities # # =================================================================== # The contents of this file are dedicated to the public domain. To # the extent that dedication to the public domain is not available, # everyone is granted a worldwide, perpetual, royalty-free, # non-exclusive license to exercise all rights associated with the # contents of this file for any purpose whatsoever. # No rights are reserved. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # =================================================================== """Set of functions for encapsulating data according to the PEM format. PEM (Privacy Enhanced Mail) was an IETF standard for securing emails via a Public Key Infrastructure. It is specified in RFC 1421-1424. Even though it has been abandoned, the simple message encapsulation it defined is still widely used today for encoding *binary* cryptographic objects like keys and certificates into text. """ __all__ = ['encode', 'decode'] import sys if sys.version_info[0] == 2 and sys.version_info[1] == 1: from cgcloud_Crypto.Util.py21compat import * from cgcloud_Crypto.Util.py3compat import * import re from binascii import hexlify, unhexlify, a2b_base64, b2a_base64 from cgcloud_Crypto.Hash import MD5 def decode(pem_data, passphrase=None): """Decode a PEM block into binary. :Parameters: pem_data : string The PEM block. passphrase : byte string If given and the PEM block is encrypted, the key will be derived from the passphrase. :Returns: A tuple with the binary data, the marker string, and a boolean to indicate if decryption was performed. :Raises ValueError: If decoding fails, if the PEM file is encrypted and no passphrase has been provided or if the passphrase is incorrect. """ # Verify Pre-Encapsulation Boundary r = re.compile("\s*-----BEGIN (.*)-----\n") m = r.match(pem_data) if not m: raise ValueError("Not a valid PEM pre boundary") marker = m.group(1) # Verify Post-Encapsulation Boundary r = re.compile("-----END (.*)-----\s*$") m = r.search(pem_data) if not m or m.group(1) != marker: raise ValueError("Not a valid PEM post boundary") # Removes spaces and slit on lines lines = pem_data.replace(" ", '').split() # Decrypts, if necessary if lines[1].startswith('Proc-Type:4,ENCRYPTED'): assert False # code deleted else: objdec = None # Decode body data = a2b_base64(b(''.join(lines[1:-1]))) enc_flag = False if objdec: assert False # code deleted return (data, marker, enc_flag) cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/IO/PKCS8.py000066400000000000000000000123211301512357500227660ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # PublicKey/PKCS8.py : PKCS#8 functions # # =================================================================== # The contents of this file are dedicated to the public domain. To # the extent that dedication to the public domain is not available, # everyone is granted a worldwide, perpetual, royalty-free, # non-exclusive license to exercise all rights associated with the # contents of this file for any purpose whatsoever. # No rights are reserved. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # =================================================================== """ Module for handling private keys wrapped according to `PKCS#8`_. PKCS8 is a standard for storing and transferring private key information. The wrapped key can either be clear or encrypted. All encryption algorithms are based on passphrase-based key derivation. The following mechanisms are fully supported: * *PBKDF2WithHMAC-SHA1AndAES128-CBC* * *PBKDF2WithHMAC-SHA1AndAES192-CBC* * *PBKDF2WithHMAC-SHA1AndAES256-CBC* * *PBKDF2WithHMAC-SHA1AndDES-EDE3-CBC* The following mechanisms are only supported for importing keys. They are much weaker than the ones listed above, and they are provided for backward compatibility only: * *pbeWithMD5AndRC2-CBC* * *pbeWithMD5AndDES-CBC* * *pbeWithSHA1AndRC2-CBC* * *pbeWithSHA1AndDES-CBC* .. _`PKCS#8`: http://www.ietf.org/rfc/rfc5208.txt """ import sys if sys.version_info[0] == 2 and sys.version_info[1] == 1: from cgcloud_Crypto.Util.py21compat import * from cgcloud_Crypto.Util.py3compat import * from cgcloud_Crypto.Util.asn1 import * # from Crypto.IO._PBES import PBES1, PBES2 __all__ = ['wrap', 'unwrap'] def decode_der(obj_class, binstr): """Instantiate a DER object class, decode a DER binary string in it, and return the object.""" der = obj_class() der.decode(binstr) return der def wrap(private_key, key_oid, passphrase=None, protection=None, prot_params=None, key_params=None, randfunc=None): """Wrap a private key into a PKCS#8 blob (clear or encrypted). :Parameters: private_key : byte string The private key encoded in binary form. The actual encoding is algorithm specific. In most cases, it is DER. key_oid : string The object identifier (OID) of the private key to wrap. It is a dotted string, like "``1.2.840.113549.1.1.1``" (for RSA keys). passphrase : (binary) string The secret passphrase from which the wrapping key is derived. Set it only if encryption is required. protection : string The identifier of the algorithm to use for securely wrapping the key. The default value is '``PBKDF2WithHMAC-SHA1AndDES-EDE3-CBC``'. prot_params : dictionary Parameters for the protection algorithm. +------------------+-----------------------------------------------+ | Key | Description | +==================+===============================================+ | iteration_count | The KDF algorithm is repeated several times to| | | slow down brute force attacks on passwords. | | | The default value is 1 000. | +------------------+-----------------------------------------------+ | salt_size | Salt is used to thwart dictionary and rainbow | | | attacks on passwords. The default value is 8 | | | bytes. | +------------------+-----------------------------------------------+ key_params : DER object The algorithm parameters associated to the private key. It is required for algorithms like DSA, but not for others like RSA. randfunc : callable Random number generation function; it should accept a single integer N and return a string of random data, N bytes long. If not specified, a new RNG will be instantiated from ``Crypto.Random``. :Return: The PKCS#8-wrapped private key (possibly encrypted), as a binary string. """ if key_params is None: key_params = DerNull() # # PrivateKeyInfo ::= SEQUENCE { # version Version, # privateKeyAlgorithm PrivateKeyAlgorithmIdentifier, # privateKey PrivateKey, # attributes [0] IMPLICIT Attributes OPTIONAL # } # pk_info = newDerSequence( 0, newDerSequence( DerObjectId(key_oid), key_params ), newDerOctetString(private_key) ) pk_info_der = pk_info.encode() if not passphrase: return pk_info_der assert False # code deleted cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/IO/__init__.py000066400000000000000000000027421301512357500237030ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # =================================================================== # The contents of this file are dedicated to the public domain. To # the extent that dedication to the public domain is not available, # everyone is granted a worldwide, perpetual, royalty-free, # non-exclusive license to exercise all rights associated with the # contents of this file for any purpose whatsoever. # No rights are reserved. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # =================================================================== """ Modules for reading and writing cryptographic data. ======================== ============================================= Module Description ======================== ============================================= Crypto.Util.PEM Set of functions for encapsulating data according to the PEM format. Crypto.Util.PKCS8 Set of functions for wrapping/unwrapping private keys. ======================== ============================================= """ __all__ = ['PEM', 'PKCS8'] cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/PublicKey/000077500000000000000000000000001301512357500231455ustar00rootroot00000000000000cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/PublicKey/RSA.py000066400000000000000000000456301301512357500241540ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # PublicKey/RSA.py : RSA public key primitive # # Written in 2008 by Dwayne C. Litzenberger # # =================================================================== # The contents of this file are dedicated to the public domain. To # the extent that dedication to the public domain is not available, # everyone is granted a worldwide, perpetual, royalty-free, # non-exclusive license to exercise all rights associated with the # contents of this file for any purpose whatsoever. # No rights are reserved. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # =================================================================== """RSA public-key cryptography algorithm (signature and encryption). RSA_ is the most widespread and used public key algorithm. Its security is based on the difficulty of factoring large integers. The algorithm has withstood attacks for 30 years, and it is therefore considered reasonably secure for new designs. The algorithm can be used for both confidentiality (encryption) and authentication (digital signature). It is worth noting that signing and decryption are significantly slower than verification and encryption. The cryptograhic strength is primarily linked to the length of the modulus *n*. In 2012, a sufficient length is deemed to be 2048 bits. For more information, see the most recent ECRYPT_ report. Both RSA ciphertext and RSA signature are as big as the modulus *n* (256 bytes if *n* is 2048 bit long). This module provides facilities for generating fresh, new RSA keys, constructing them from known components, exporting them, and importing them. >>> from cgcloud_Crypto.PublicKey import RSA >>> >>> key = RSA.generate(2048) >>> f = open('mykey.pem','w') >>> f.write(key.exportKey('PEM')) >>> f.close() ... >>> f = open('mykey.pem','r') >>> key = RSA.importKey(f.read()) Even though you may choose to directly use the methods of an RSA key object to perform the primitive cryptographic operations (e.g. `_RSAobj.encrypt`), it is recommended to use one of the standardized schemes instead (like `Crypto.Cipher.PKCS1_v1_5` or `Crypto.Signature.PKCS1_v1_5`). .. _RSA: http://en.wikipedia.org/wiki/RSA_%28algorithm%29 .. _ECRYPT: http://www.ecrypt.eu.org/documents/D.SPA.17.pdf :sort: generate,construct,importKey,error """ __revision__ = "$Id$" __all__ = ['generate', 'construct', 'error', 'importKey', 'RSAImplementation', '_RSAobj', 'oid' , 'algorithmIdentifier' ] import sys if sys.version_info[0] == 2 and sys.version_info[1] == 1: from cgcloud_Crypto.Util.py21compat import * from cgcloud_Crypto.Util.py3compat import * from cgcloud_Crypto.Util.number import bytes_to_long, long_to_bytes from cgcloud_Crypto.PublicKey import _slowmath from cgcloud_Crypto.IO import PKCS8, PEM from cgcloud_Crypto.Util.asn1 import * import binascii import struct from cgcloud_Crypto.Util.number import inverse try: from cgcloud_Crypto.PublicKey import _fastmath except ImportError: _fastmath = None def decode_der(obj_class, binstr): """Instantiate a DER object class, decode a DER binary string in it, and return the object.""" der = obj_class() der.decode(binstr) return der class _RSAobj(): """Class defining an actual RSA key. :undocumented: __getstate__, __setstate__, __repr__, __getattr__ """ #: Dictionary of RSA parameters. #: #: A public key will only have the following entries: #: #: - **n**, the modulus. #: - **e**, the public exponent. #: #: A private key will also have: #: #: - **d**, the private exponent. #: - **p**, the first factor of n. #: - **q**, the second factor of n. #: - **u**, the CRT coefficient (1/p) mod q. keydata = ['n', 'e', 'd', 'p', 'q', 'u'] def __init__(self, implementation, key, randfunc=None): self.implementation = implementation self.key = key # if randfunc is None: # randfunc = Random.new().read # self._randfunc = randfunc def __getattr__(self, attrname): if attrname in self.keydata: # For backward compatibility, allow the user to get (not set) the # RSA key parameters directly from this object. return getattr(self.key, attrname) else: raise AttributeError("%s object has no %r attribute" % (self.__class__.__name__, attrname,)) def has_private(self): return self.key.has_private() def size(self): return self.key.size() def can_blind(self): return True def can_encrypt(self): return True def can_sign(self): return True def publickey(self): return self.implementation.construct((self.key.n, self.key.e)) def exportKey(self, format='PEM', passphrase=None, pkcs=1, protection=None): """Export this RSA key. :Parameters: format : string The format to use for wrapping the key: - *'DER'*. Binary encoding. - *'PEM'*. Textual encoding, done according to `RFC1421`_/`RFC1423`_. - *'OpenSSH'*. Textual encoding, done according to OpenSSH specification. Only suitable for public keys (not private keys). passphrase : string For private keys only. The pass phrase used for deriving the encryption key. pkcs : integer For *DER* and *PEM* format only. The PKCS standard to follow for assembling the components of the key. You have two choices: - **1** (default): the public key is embedded into an X.509 ``SubjectPublicKeyInfo`` DER SEQUENCE. The private key is embedded into a `PKCS#1`_ ``RSAPrivateKey`` DER SEQUENCE. - **8**: the private key is embedded into a `PKCS#8`_ ``PrivateKeyInfo`` DER SEQUENCE. This value cannot be used for public keys. protection : string The encryption scheme to use for protecting the private key. If ``None`` (default), the behavior depends on ``format``: - For *DER*, the *PBKDF2WithHMAC-SHA1AndDES-EDE3-CBC* scheme is used. The following operations are performed: 1. A 16 byte Triple DES key is derived from the passphrase using `Crypto.Protocol.KDF.PBKDF2` with 8 bytes salt, and 1 000 iterations of `Crypto.Hash.HMAC`. 2. The private key is encrypted using CBC. 3. The encrypted key is encoded according to PKCS#8. - For *PEM*, the obsolete PEM encryption scheme is used. It is based on MD5 for key derivation, and Triple DES for encryption. Specifying a value for ``protection`` is only meaningful for PKCS#8 (that is, ``pkcs=8``) and only if a pass phrase is present too. The supported schemes for PKCS#8 are listed in the `Crypto.IO.PKCS8` module (see ``wrap_algo`` parameter). :Return: A byte string with the encoded public or private half of the key. :Raise ValueError: When the format is unknown or when you try to encrypt a private key with *DER* format and PKCS#1. :attention: If you don't provide a pass phrase, the private key will be exported in the clear! .. _RFC1421: http://www.ietf.org/rfc/rfc1421.txt .. _RFC1423: http://www.ietf.org/rfc/rfc1423.txt .. _`PKCS#1`: http://www.ietf.org/rfc/rfc3447.txt .. _`PKCS#8`: http://www.ietf.org/rfc/rfc5208.txt """ if passphrase is not None: passphrase = tobytes(passphrase) if format=='OpenSSH': eb = long_to_bytes(self.e) nb = long_to_bytes(self.n) if bord(eb[0]) & 0x80: eb=bchr(0x00)+eb if bord(nb[0]) & 0x80: nb=bchr(0x00)+nb keyparts = [ b('ssh-rsa'), eb, nb ] keystring = b('').join([ struct.pack(">I",len(kp))+kp for kp in keyparts]) return b('ssh-rsa ')+binascii.b2a_base64(keystring)[:-1] # DER format is always used, even in case of PEM, which simply # encodes it into BASE64. if self.has_private(): binary_key = newDerSequence( 0, self.n, self.e, self.d, self.p, self.q, self.d % (self.p-1), self.d % (self.q-1), inverse(self.q, self.p) ).encode() if pkcs==1: keyType = 'RSA PRIVATE' if format=='DER' and passphrase: raise ValueError("PKCS#1 private key cannot be encrypted") else: # PKCS#8 if format=='PEM' and protection is None: keyType = 'PRIVATE' binary_key = PKCS8.wrap(binary_key, oid, None) else: keyType = 'ENCRYPTED PRIVATE' if not protection: protection = 'PBKDF2WithHMAC-SHA1AndDES-EDE3-CBC' binary_key = PKCS8.wrap(binary_key, oid, passphrase, protection) passphrase = None else: keyType = "RSA PUBLIC" binary_key = newDerSequence( algorithmIdentifier, newDerBitString( newDerSequence( self.n, self.e ) ) ).encode() if format=='DER': return binary_key if format=='PEM': pem_str = PEM.encode(binary_key, keyType+" KEY", passphrase, self._randfunc) return tobytes(pem_str) raise ValueError("Unknown key format '%s'. Cannot export the RSA key." % format) class RSAImplementation(object): """ An RSA key factory. This class is only internally used to implement the methods of the `Crypto.PublicKey.RSA` module. :sort: __init__,generate,construct,importKey :undocumented: _g*, _i* """ def __init__(self, **kwargs): """Create a new RSA key factory. :Keywords: use_fast_math : bool Specify which mathematic library to use: - *None* (default). Use fastest math available. - *True* . Use fast math. - *False* . Use slow math. default_randfunc : callable Specify how to collect random data: - *None* (default). Use Random.new().read(). - not *None* . Use the specified function directly. :Raise RuntimeError: When **use_fast_math** =True but fast math is not available. """ use_fast_math = kwargs.get('use_fast_math', None) if use_fast_math is None: # Automatic if _fastmath is not None: self._math = _fastmath else: self._math = _slowmath elif use_fast_math: # Explicitly select fast math if _fastmath is not None: self._math = _fastmath else: raise RuntimeError("fast math module not available") else: # Explicitly select slow math self._math = _slowmath self.error = self._math.error self._default_randfunc = kwargs.get('default_randfunc', None) self._current_randfunc = None def construct(self, tup): """Construct an RSA key from a tuple of valid RSA components. The modulus **n** must be the product of two primes. The public exponent **e** must be odd and larger than 1. In case of a private key, the following equations must apply: - e != 1 - p*q = n - e*d = 1 mod (p-1)(q-1) - p*u = 1 mod q :Parameters: tup : tuple A tuple of long integers, with at least 2 and no more than 6 items. The items come in the following order: 1. RSA modulus (n). 2. Public exponent (e). 3. Private exponent (d). Only required if the key is private. 4. First factor of n (p). Optional. 5. Second factor of n (q). Optional. 6. CRT coefficient, (1/p) mod q (u). Optional. :Return: An RSA key object (`_RSAobj`). """ key = self._math.rsa_construct(*tup) return _RSAobj(self, key) def _importKeyDER(self, extern_key, passphrase=None): """Import an RSA key (public or private half), encoded in DER form.""" try: der = decode_der(DerSequence, extern_key) # Try PKCS#1 first, for a private key if len(der) == 9 and der.hasOnlyInts() and der[0] == 0: # ASN.1 RSAPrivateKey element del der[6:] # Remove d mod (p-1), # d mod (q-1), and # q^{-1} mod p der.append(inverse(der[4], der[5])) # Add p^{-1} mod q del der[0] # Remove version return self.construct(der[:]) # Keep on trying PKCS#1, but now for a public key if len(der) == 2: try: # The DER object is an RSAPublicKey SEQUENCE with # two elements if der.hasOnlyInts(): return self.construct(der[:]) # The DER object is a SubjectPublicKeyInfo SEQUENCE # with two elements: an 'algorithmIdentifier' and a # 'subjectPublicKey'BIT STRING. # 'algorithmIdentifier' takes the value given at the # module level. # 'subjectPublicKey' encapsulates the actual ASN.1 # RSAPublicKey element. if der[0] == algorithmIdentifier: bitmap = decode_der(DerBitString, der[1]) rsaPub = decode_der(DerSequence, bitmap.value) if len(rsaPub) == 2 and rsaPub.hasOnlyInts(): return self.construct(rsaPub[:]) except (ValueError, EOFError): pass # Try PKCS#8 (possibly encrypted) k = PKCS8.unwrap(extern_key, passphrase) if k[0] == oid: return self._importKeyDER(k[1], passphrase) except (ValueError, EOFError): pass raise ValueError("RSA key format is not supported") def importKey(self, extern_key, passphrase=None): """Import an RSA key (public or private half), encoded in standard form. :Parameter extern_key: The RSA key to import, encoded as a string. An RSA public key can be in any of the following formats: - X.509 ``subjectPublicKeyInfo`` DER SEQUENCE (binary or PEM encoding) - `PKCS#1`_ ``RSAPublicKey`` DER SEQUENCE (binary or PEM encoding) - OpenSSH (textual public key only) An RSA private key can be in any of the following formats: - PKCS#1 ``RSAPrivateKey`` DER SEQUENCE (binary or PEM encoding) - `PKCS#8`_ ``PrivateKeyInfo`` or ``EncryptedPrivateKeyInfo`` DER SEQUENCE (binary or PEM encoding) - OpenSSH (textual public key only) For details about the PEM encoding, see `RFC1421`_/`RFC1423`_. The private key may be encrypted by means of a certain pass phrase either at the PEM level or at the PKCS#8 level. :Type extern_key: string :Parameter passphrase: In case of an encrypted private key, this is the pass phrase from which the decryption key is derived. :Type passphrase: string :Return: An RSA key object (`_RSAobj`). :Raise ValueError/IndexError/TypeError: When the given key cannot be parsed (possibly because the pass phrase is wrong). .. _RFC1421: http://www.ietf.org/rfc/rfc1421.txt .. _RFC1423: http://www.ietf.org/rfc/rfc1423.txt .. _`PKCS#1`: http://www.ietf.org/rfc/rfc3447.txt .. _`PKCS#8`: http://www.ietf.org/rfc/rfc5208.txt """ extern_key = tobytes(extern_key) if passphrase is not None: passphrase = tobytes(passphrase) if extern_key.startswith(b('-----')): # This is probably a PEM encoded key. (der, marker, enc_flag) = PEM.decode(tostr(extern_key), passphrase) if enc_flag: passphrase = None return self._importKeyDER(der, passphrase) if extern_key.startswith(b('ssh-rsa ')): # This is probably an OpenSSH key keystring = binascii.a2b_base64(extern_key.split(b(' '))[1]) keyparts = [] while len(keystring) > 4: l = struct.unpack(">I", keystring[:4])[0] keyparts.append(keystring[4:4 + l]) keystring = keystring[4 + l:] e = bytes_to_long(keyparts[1]) n = bytes_to_long(keyparts[2]) return self.construct([n, e]) if bord(extern_key[0]) == 0x30: # This is probably a DER encoded key return self._importKeyDER(extern_key, passphrase) raise ValueError("RSA key format is not supported") #: `Object ID`_ for the RSA encryption algorithm. This OID often indicates #: a generic RSA key, even when such key will be actually used for digital #: signatures. #: #: .. _`Object ID`: http://www.alvestrand.no/objectid/1.2.840.113549.1.1.1.html oid = "1.2.840.113549.1.1.1" #: This is the standard DER object that qualifies a cryptographic algorithm #: in ASN.1-based data structures (e.g. X.509 certificates). algorithmIdentifier = DerSequence( [DerObjectId(oid).encode(), # algorithm field DerNull().encode()] # parameters field ).encode() _impl = RSAImplementation() #: #: Construct an RSA key object from a tuple of valid RSA components. #: #: See `RSAImplementation.construct`. #: construct = _impl.construct #: #: Import an RSA key (public or private half), encoded in standard form. #: #: See `RSAImplementation.importKey`. #: importKey = _impl.importKey error = _impl.error # vim:set ts=4 sw=4 sts=4 expandtab: cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/PublicKey/__init__.py000066400000000000000000000035021301512357500252560ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # =================================================================== # The contents of this file are dedicated to the public domain. To # the extent that dedication to the public domain is not available, # everyone is granted a worldwide, perpetual, royalty-free, # non-exclusive license to exercise all rights associated with the # contents of this file for any purpose whatsoever. # No rights are reserved. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # =================================================================== """Public-key encryption and signature algorithms. Public-key encryption uses two different keys, one for encryption and one for decryption. The encryption key can be made public, and the decryption key is kept private. Many public-key algorithms can also be used to sign messages, and some can *only* be used for signatures. ======================== ============================================= Module Description ======================== ============================================= Crypto.PublicKey.DSA Digital Signature Algorithm (Signature only) Crypto.PublicKey.ElGamal (Signing and encryption) Crypto.PublicKey.RSA (Signing, encryption, and blinding) ======================== ============================================= :undocumented: _DSA, _RSA, _fastmath, _slowmath, pubkey """ __all__ = ['RSA'] __revision__ = "$Id$" cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/PublicKey/_slowmath.py000066400000000000000000000043401301512357500255150ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # PubKey/RSA/_slowmath.py : Pure Python implementation of the RSA portions of _fastmath # # Written in 2008 by Dwayne C. Litzenberger # # =================================================================== # The contents of this file are dedicated to the public domain. To # the extent that dedication to the public domain is not available, # everyone is granted a worldwide, perpetual, royalty-free, # non-exclusive license to exercise all rights associated with the # contents of this file for any purpose whatsoever. # No rights are reserved. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # =================================================================== """Pure Python implementation of the RSA-related portions of Crypto.PublicKey._fastmath.""" __revision__ = "$Id$" __all__ = ['rsa_construct'] import sys if sys.version_info[0] == 2 and sys.version_info[1] == 1: from cgcloud_Crypto.Util.py21compat import * from cgcloud_Crypto.Util.number import inverse class error(Exception): pass class _RSAKey(object): def has_private(self): return hasattr(self, 'd') def rsa_construct(n, e, d=None, p=None, q=None, u=None): """Construct an RSAKey object""" assert isinstance(n, long) assert isinstance(e, long) assert isinstance(d, (long, type(None))) assert isinstance(p, (long, type(None))) assert isinstance(q, (long, type(None))) assert isinstance(u, (long, type(None))) obj = _RSAKey() obj.n = n obj.e = e if d is None: return obj obj.d = d if p is not None and q is not None: obj.p = p obj.q = q else: assert False # code deleted if u is not None: obj.u = u else: obj.u = inverse(obj.p, obj.q) return obj # vim:set ts=4 sw=4 sts=4 expandtab: cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/Util/000077500000000000000000000000001301512357500221735ustar00rootroot00000000000000cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/Util/__init__.py000066400000000000000000000036541301512357500243140ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # =================================================================== # The contents of this file are dedicated to the public domain. To # the extent that dedication to the public domain is not available, # everyone is granted a worldwide, perpetual, royalty-free, # non-exclusive license to exercise all rights associated with the # contents of this file for any purpose whatsoever. # No rights are reserved. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # =================================================================== """Miscellaneous modules Contains useful modules that don't belong into any of the other Crypto.* subpackages. ======================== ============================================= Module Description ======================== ============================================= `Crypto.Util.number` Number-theoretic functions (primality testing, etc.) `Crypto.Util.Counter` Fast counter functions for CTR cipher modes. `Crypto.Util.randpool` Random number generation `Crypto.Util.RFC1751` Converts between 128-bit keys and human-readable strings of words. `Crypto.Util.asn1` Minimal support for ASN.1 DER encoding `Crypto.Util.Padding` Set of functions for adding and removing padding. ======================== ============================================= """ __all__ = ['randpool', 'RFC1751', 'number', 'strxor', 'asn1', 'Counter', 'Padding' ] __revision__ = "$Id$" cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/Util/asn1.py000066400000000000000000000716231301512357500234200ustar00rootroot00000000000000# -*- coding: ascii -*- # # Util/asn1.py : Minimal support for ASN.1 DER binary encoding. # # =================================================================== # The contents of this file are dedicated to the public domain. To # the extent that dedication to the public domain is not available, # everyone is granted a worldwide, perpetual, royalty-free, # non-exclusive license to exercise all rights associated with the # contents of this file for any purpose whatsoever. # No rights are reserved. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # =================================================================== """ ASN.1 DER encoding and decoding This module provides minimal support for encoding and decoding `ASN.1`_ DER objects. .. _`ASN.1`: ftp://ftp.rsasecurity.com/pub/pkcs/ascii/layman.asc """ from __future__ import nested_scopes import sys if sys.version_info[0] == 2 and sys.version_info[1] == 1: from cgcloud_Crypto.Util.py21compat import * from cgcloud_Crypto.Util.py3compat import * if sys.version_info[0] == 2 and sys.version_info[1] == 1: from cgcloud_Crypto.Util.py21compat import * from cgcloud_Crypto.Util.number import long_to_bytes, bytes_to_long __all__ = [ 'DerObject', 'DerInteger', 'DerOctetString', 'DerNull', 'DerSequence', 'DerObjectId', 'DerBitString', 'DerSetOf', 'newDerInteger', 'newDerOctetString', 'newDerSequence', 'newDerObjectId', 'newDerBitString', 'newDerSetOf' ] def _isInt(x, onlyNonNegative=False): test = 0 try: test += x except TypeError: return False return not onlyNonNegative or x>=0 class BytesIO_EOF(BytesIO): """This class differs from BytesIO in that an EOFError exception is raised whenever EOF is reached.""" def __init__(self, *params): BytesIO.__init__(self, *params) self.setRecord(False) def setRecord(self, record): self._record = record self._recording = b("") def read(self, length): s = BytesIO.read(self, length) if len(s)127: encoding = long_to_bytes(payloadLen) return bchr(len(encoding)+128) + encoding return bchr(payloadLen) def encode(self): """Return this DER element, fully encoded as a binary byte string.""" # Concatenate identifier octets, length octets, # and contents octets return bchr(self._idOctet) + self._lengthOctets() + self.payload def _decodeLen(self, s): """Decode DER length octets from a file.""" length = bord(s.read_byte()) if length<=127: return length payloadLength = bytes_to_long(s.read(length & 0x7F)) # According to DER (but not BER) the long form is used # only when the length doesn't fit into 7 bits. if payloadLength<=127: raise ValueError("Not a DER length tag (but still valid BER).") return payloadLength def decode(self, derEle): """Decode a complete DER element, and re-initializes this object with it. :Parameters: derEle : byte string A complete DER element. :Raise ValueError: In case of parsing errors. :Raise EOFError: If the DER element is too short. """ s = BytesIO_EOF(derEle) self._decodeFromStream(s) # There shouldn't be other bytes left try: b = s.read_byte() raise ValueError("Unexpected extra data after the DER structure") except EOFError: pass def _decodeFromStream(self, s): """Decode a complete DER element from a file.""" try: idOctet = bord(s.read_byte()) except EOFError: raise _NoDerElementError if self._idOctet != None: if idOctet != self._idOctet: raise ValueError("Unexpected DER tag") else: self._idOctet = idOctet length = self._decodeLen(s) self.payload = s.read(length) class DerInteger(DerObject): """Class to model a DER INTEGER. An example of encoding is: >>> from cgcloud_Crypto.Util.asn1 import DerInteger >>> from binascii import hexlify, unhexlify >>> int_der = DerInteger(9) >>> print hexlify(int_der.encode()) which will show ``020109``, the DER encoding of 9. And for decoding: >>> s = unhexlify(b'020109') >>> try: >>> int_der = DerInteger() >>> int_der.decode(s) >>> print int_der.value >>> except (ValueError, EOFError): >>> print "Not a valid DER INTEGER" the output will be ``9``. """ def __init__(self, value=0, implicit=None): """Initialize the DER object as an INTEGER. :Parameters: value : integer The value of the integer. implicit : integer The IMPLICIT tag to use for the encoded object. It overrides the universal tag for INTEGER (2). """ DerObject.__init__(self, 0x02, b(''), implicit, False) self.value = value #: The integer value def encode(self): """Return the DER INTEGER, fully encoded as a binary string.""" number = self.value self.payload = b('') while True: self.payload = bchr(number&255) + self.payload if 128 <= number <= 255: self.payload = bchr(0x00) + self.payload if -128 <= number <= 255: break number >>= 8 return DerObject.encode(self) def decode(self, derEle): """Decode a complete DER INTEGER DER, and re-initializes this object with it. :Parameters: derEle : byte string A complete INTEGER DER element. :Raise ValueError: In case of parsing errors. :Raise EOFError: If the DER element is too short. """ DerObject.decode(self, derEle) def _decodeFromStream(self, s): """Decode a complete DER INTEGER from a file.""" # Fill up self.payload DerObject._decodeFromStream(self, s) # Derive self.value from self.payload self.value = 0L bits = 1 for i in self.payload: self.value *= 256 self.value += bord(i) bits <<= 8 if self.payload and bord(self.payload[0]) & 0x80: self.value -= bits def newDerInteger(number): """Create a DerInteger object, already initialized with an integer.""" der = DerInteger(number) return der class DerSequence(DerObject): """Class to model a DER SEQUENCE. This object behaves like a dynamic Python sequence. Sub-elements that are INTEGERs behave like Python integers. Any other sub-element is a binary string encoded as a complete DER sub-element (TLV). An example of encoding is: >>> from cgcloud_Crypto.Util.asn1 import DerSequence, DerInteger >>> from binascii import hexlify, unhexlify >>> obj_der = unhexlify('070102') >>> seq_der = DerSequence([4]) >>> seq_der.append(9) >>> seq_der.append(obj_der.encode()) >>> print hexlify(seq_der.encode()) which will show ``3009020104020109070102``, the DER encoding of the sequence containing ``4``, ``9``, and the object with payload ``02``. For decoding: >>> s = unhexlify(b'3009020104020109070102') >>> try: >>> seq_der = DerSequence() >>> seq_der.decode(s) >>> print len(seq_der) >>> print seq_der[0] >>> print seq_der[:] >>> except (ValueError, EOFError): >>> print "Not a valid DER SEQUENCE" the output will be:: 3 4 [4L, 9L, b'\x07\x01\x02'] """ def __init__(self, startSeq=None, implicit=None): """Initialize the DER object as a SEQUENCE. :Parameters: startSeq : Python sequence A sequence whose element are either integers or other DER objects. implicit : integer The IMPLICIT tag to use for the encoded object. It overrides the universal tag for SEQUENCE (16). """ DerObject.__init__(self, 0x10, b(''), implicit, True) if startSeq==None: self._seq = [] else: self._seq = startSeq ## A few methods to make it behave like a python sequence def __delitem__(self, n): del self._seq[n] def __getitem__(self, n): return self._seq[n] def __setitem__(self, key, value): self._seq[key] = value def __setslice__(self,i,j,sequence): self._seq[i:j] = sequence def __delslice__(self,i,j): del self._seq[i:j] def __getslice__(self, i, j): return self._seq[max(0, i):max(0, j)] def __len__(self): return len(self._seq) def __iadd__(self, item): self._seq.append(item) return self def append(self, item): self._seq.append(item) return self def hasInts(self, onlyNonNegative=True): """Return the number of items in this sequence that are integers. :Parameters: onlyNonNegative : boolean If True, negative integers are not counted in. """ def _isInt2(x): return _isInt(x, onlyNonNegative) return len(filter(_isInt2, self._seq)) def hasOnlyInts(self, onlyNonNegative=True): """Return True if all items in this sequence are integers or non-negative integers. This function returns False is the sequence is empty, or at least one member is not an integer. :Parameters: onlyNonNegative : boolean If True, the presence of negative integers causes the method to return False.""" return self._seq and self.hasInts(onlyNonNegative)==len(self._seq) def encode(self): """Return this DER SEQUENCE, fully encoded as a binary string. :Raises ValueError: If some elements in the sequence are neither integers nor byte strings. """ self.payload = b('') for item in self._seq: try: self.payload += item except TypeError: try: self.payload += DerInteger(item).encode() except TypeError: raise ValueError("Trying to DER encode an unknown object") return DerObject.encode(self) def decode(self, derEle): """Decode a complete DER SEQUENCE, and re-initializes this object with it. :Parameters: derEle : byte string A complete SEQUENCE DER element. :Raise ValueError: In case of parsing errors. :Raise EOFError: If the DER element is too short. DER INTEGERs are decoded into Python integers. Any other DER element is not decoded. Its validity is not checked. """ DerObject.decode(self, derEle) def _decodeFromStream(self, s): """Decode a complete DER SEQUENCE from a file.""" self._seq = [] # Fill up self.payload DerObject._decodeFromStream(self, s) # Add one item at a time to self.seq, by scanning self.payload p = BytesIO_EOF(self.payload) while True: try: p.setRecord(True) der = DerObject() der._decodeFromStream(p) # Parse INTEGERs differently if der._idOctet != 0x02: self._seq.append(p._recording) else: derInt = DerInteger() derInt.decode(p._recording) self._seq.append(derInt.value) except _NoDerElementError: break # end def newDerSequence(*der_objs): """Create a DerSequence object, already initialized with all objects passed as parameters.""" der = DerSequence() for obj in der_objs: if isinstance(obj, DerObject): der += obj.encode() else: der += obj return der class DerOctetString(DerObject): """Class to model a DER OCTET STRING. An example of encoding is: >>> from cgcloud_Crypto.Util.asn1 import DerOctetString >>> from binascii import hexlify, unhexlify >>> os_der = DerOctetString(b'\\xaa') >>> os_der.payload += b'\\xbb' >>> print hexlify(os_der.encode()) which will show ``0402aabb``, the DER encoding for the byte string ``b'\\xAA\\xBB'``. For decoding: >>> s = unhexlify(b'0402aabb') >>> try: >>> os_der = DerOctetString() >>> os_der.decode(s) >>> print hexlify(os_der.payload) >>> except (ValueError, EOFError): >>> print "Not a valid DER OCTET STRING" the output will be ``aabb``. """ def __init__(self, value=b(''), implicit=None): """Initialize the DER object as an OCTET STRING. :Parameters: value : byte string The initial payload of the object. If not specified, the payload is empty. implicit : integer The IMPLICIT tag to use for the encoded object. It overrides the universal tag for OCTET STRING (4). """ DerObject.__init__(self, 0x04, value, implicit, False) def newDerOctetString(binstring): """Create a DerOctetString object, already initialized with the binary string.""" if isinstance(binstring, DerObject): der = DerOctetString(binstring.encode()) else: der = DerOctetString(binstring) return der class DerNull(DerObject): """Class to model a DER NULL element.""" def __init__(self): """Initialize the DER object as a NULL.""" DerObject.__init__(self, 0x05, b(''), False) class DerObjectId(DerObject): """Class to model a DER OBJECT ID. An example of encoding is: >>> from cgcloud_Crypto.Util.asn1 import DerObjectId >>> from binascii import hexlify, unhexlify >>> oid_der = DerObjectId("1.2") >>> oid_der.value += ".840.113549.1.1.1" >>> print hexlify(oid_der.encode()) which will show ``06092a864886f70d010101``, the DER encoding for the RSA Object Identifier ``1.2.840.113549.1.1.1``. For decoding: >>> s = unhexlify(b'06092a864886f70d010101') >>> try: >>> oid_der = DerObjectId() >>> oid_der.decode(s) >>> print oid_der.value >>> except (ValueError, EOFError): >>> print "Not a valid DER OBJECT ID" the output will be ``1.2.840.113549.1.1.1``. """ def __init__(self, value='', implicit=None): """Initialize the DER object as an OBJECT ID. :Parameters: value : string The initial Object Identifier (e.g. "1.2.0.0.6.2"). implicit : integer The IMPLICIT tag to use for the encoded object. It overrides the universal tag for OBJECT ID (6). """ DerObject.__init__(self, 0x06, b(''), implicit, False) self.value = value #: The Object ID, a dot separated list of integers def encode(self): """Return the DER OBJECT ID, fully encoded as a binary string.""" comps = map(int,self.value.split(".")) if len(comps)<2: raise ValueError("Not a valid Object Identifier string") self.payload = bchr(40*comps[0]+comps[1]) for v in comps[2:]: enc = [] while v: enc.insert(0, (v & 0x7F) | 0x80) v >>= 7 enc[-1] &= 0x7F self.payload += b('').join(map(bchr, enc)) return DerObject.encode(self) def decode(self, derEle): """Decode a complete DER OBJECT ID, and re-initializes this object with it. :Parameters: derEle : byte string A complete DER OBJECT ID. :Raise ValueError: In case of parsing errors. :Raise EOFError: If the DER element is too short. """ DerObject.decode(self, derEle) def _decodeFromStream(self, s): """Decode a complete DER OBJECT ID from a file.""" # Fill up self.payload DerObject._decodeFromStream(self, s) # Derive self.value from self.payload p = BytesIO_EOF(self.payload) comps = list(map(str, divmod(bord(p.read_byte()),40))) v = 0 try: while True: c = p.read_byte() v = v*128 + (bord(c) & 0x7F) if not (bord(c) & 0x80): comps.append(str(v)) v = 0 except EOFError: pass self.value = '.'.join(comps) def newDerObjectId(dottedstring): """Create a DerObjectId object, already initialized with the given Object Identifier (a dotted string).""" der = DerObjectId(dottedstring) return der class DerBitString(DerObject): """Class to model a DER BIT STRING. An example of encoding is: >>> from cgcloud_Crypto.Util.asn1 import DerBitString >>> from binascii import hexlify, unhexlify >>> bs_der = DerBitString(b'\\xaa') >>> bs_der.value += b'\\xbb' >>> print hexlify(bs_der.encode()) which will show ``040300aabb``, the DER encoding for the bit string ``b'\\xAA\\xBB'``. For decoding: >>> s = unhexlify(b'040300aabb') >>> try: >>> bs_der = DerBitString() >>> bs_der.decode(s) >>> print hexlify(bs_der.value) >>> except (ValueError, EOFError): >>> print "Not a valid DER OCTET STRING" the output will be ``aabb``. """ def __init__(self, value=b(''), implicit=None): """Initialize the DER object as a BIT STRING. :Parameters: value : byte string The initial, packed bit string. If not specified, the bit string is empty. implicit : integer The IMPLICIT tag to use for the encoded object. It overrides the universal tag for OCTET STRING (3). """ DerObject.__init__(self, 0x03, b(''), implicit, False) self.value = value #: The bitstring value (packed) def encode(self): """Return the DER BIT STRING, fully encoded as a binary string.""" # Add padding count byte self.payload = b('\x00') + self.value return DerObject.encode(self) def decode(self, derEle): """Decode a complete DER BIT STRING, and re-initializes this object with it. :Parameters: derEle : byte string A complete DER BIT STRING. :Raise ValueError: In case of parsing errors. :Raise EOFError: If the DER element is too short. """ DerObject.decode(self, derEle) def _decodeFromStream(self, s): """Decode a complete DER BIT STRING DER from a file.""" # Fill-up self.payload DerObject._decodeFromStream(self, s) if self.payload and bord(self.payload[0])!=0: raise ValueError("Not a valid BIT STRING") # Fill-up self.value self.value = b('') # Remove padding count byte if self.payload: self.value = self.payload[1:] def newDerBitString(binstring): """Create a DerStringString object, already initialized with the binary string.""" if isinstance(binstring, DerObject): der = DerBitString(binstring.encode()) else: der = DerBitString(binstring) return der class DerSetOf(DerObject): """Class to model a DER SET OF. An example of encoding is: >>> from cgcloud_Crypto.Util.asn1 import DerBitString >>> from binascii import hexlify, unhexlify >>> so_der = DerSetOf([4,5]) >>> so_der.add(6) >>> print hexlify(so_der.encode()) which will show ``3109020104020105020106``, the DER encoding of a SET OF with items 4,5, and 6. For decoding: >>> s = unhexlify(b'3109020104020105020106') >>> try: >>> so_der = DerSetOf() >>> so_der.decode(s) >>> print [x for x in so_der] >>> except (ValueError, EOFError): >>> print "Not a valid DER SET OF" the output will be ``[4L, 5L, 6L]``. """ def __init__(self, startSet=None, implicit=None): """Initialize the DER object as a SET OF. :Parameters: startSet : container The initial set of integers or DER encoded objects. implicit : integer The IMPLICIT tag to use for the encoded object. It overrides the universal tag for SET OF (17). """ DerObject.__init__(self, 0x11, b(''), implicit, True) self._seq = [] self._elemOctet = None if startSet: for e in startSet: self.add(e) def __getitem__(self, n): return self._seq[n] def __iter__(self): return iter(self._seq) def __len__(self): return len(self._seq) def add(self, elem): """Add an element to the set. :Parameters: elem : byte string or integer An element of the same type of objects already in the set. It can be an integer or a DER encoded object. """ if _isInt(elem): eo = 0x02 else: eo = bord(elem[0]) if self._elemOctet != eo: if self._elemOctet: raise ValueError("New element does not belong to the set") self._elemOctet = eo if not elem in self._seq: self._seq.append(elem) def decode(self, derEle): """Decode a complete SET OF DER element, and re-initializes this object with it. DER INTEGERs are decoded into Python integers. Any other DER element is left undecoded; its validity is not checked. :Parameters: derEle : byte string A complete DER BIT SET OF. :Raise ValueError: In case of parsing errors. :Raise EOFError: If the DER element is too short. """ DerObject.decode(self, derEle) def _decodeFromStream(self, s): """Decode a complete DER SET OF from a file.""" self._seq = [] # Fill up self.payload DerObject._decodeFromStream(self, s) # Add one item at a time to self.seq, by scanning self.payload p = BytesIO_EOF(self.payload) setIdOctet = -1 while True: try: p.setRecord(True) der = DerObject() der._decodeFromStream(p) # Verify that all members are of the same type if setIdOctet < 0: setIdOctet = der._idOctet else: if setIdOctet != der._idOctet: raise ValueError("Not all elements are of the same DER type") # Parse INTEGERs differently if setIdOctet != 0x02: self._seq.append(p._recording) else: derInt = DerInteger() derInt.decode(p._recording) self._seq.append(derInt.value) except _NoDerElementError: break # end def encode(self): """Return this SET OF DER element, fully encoded as a binary string. """ # Elements in the set must be ordered in lexicographic order ordered = [] for item in self._seq: if _isInt(item): bys = DerInteger(item).encode() else: bys = item ordered.append(bys) ordered.sort() self.payload = b('').join(ordered) return DerObject.encode(self) def newDerSetOf(*der_objs): """Create a DerSequence object, already initialized with all objects passed as parameters.""" der = DerSetOf() for obj in der_objs: if isinstance(obj, DerObject): der.add(obj.encode()) else: der.add(obj) return der cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/Util/number.py000066400000000000000000000122671301512357500240450ustar00rootroot00000000000000# # number.py : Number-theoretic functions # # Part of the Python Cryptography Toolkit # # Written by Andrew M. Kuchling, Barry A. Warsaw, and others # # =================================================================== # The contents of this file are dedicated to the public domain. To # the extent that dedication to the public domain is not available, # everyone is granted a worldwide, perpetual, royalty-free, # non-exclusive license to exercise all rights associated with the # contents of this file for any purpose whatsoever. # No rights are reserved. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # =================================================================== # __revision__ = "$Id$" from warnings import warn as _warn import math import sys from cgcloud_Crypto.pct_warnings import GetRandomNumber_DeprecationWarning, PowmInsecureWarning from cgcloud_Crypto.Util.py3compat import * bignum = long try: from cgcloud_Crypto.PublicKey import _fastmath except ImportError: # For production, we are going to let import issues due to gmp/mpir shared # libraries not loading slide silently and use slowmath. If you'd rather # see an exception raised if _fastmath exists but cannot be imported, # uncomment the below # # from distutils.sysconfig import get_config_var # import inspect, os # _fm_path = os.path.normpath(os.path.dirname(os.path.abspath( # inspect.getfile(inspect.currentframe()))) # +"/../../PublicKey/_fastmath"+get_config_var("SO")) # if os.path.exists(_fm_path): # raise ImportError("While the _fastmath module exists, importing "+ # "it failed. This may point to the gmp or mpir shared library "+ # "not being in the path. _fastmath was found at "+_fm_path) _fastmath = None # You need libgmp v5 or later to get mpz_powm_sec. Warn if it's not available. if _fastmath is not None and not _fastmath.HAVE_DECL_MPZ_POWM_SEC: _warn("Not using mpz_powm_sec. You should rebuild using libgmp >= 5 to avoid timing attack vulnerability.", PowmInsecureWarning) # New functions # from _number_new import * # Commented out and replaced with faster versions below ## def long2str(n): ## s='' ## while n>0: ## s=chr(n & 255)+s ## n=n>>8 ## return s ## import types ## def str2long(s): ## if type(s)!=types.StringType: return s # Integers will be left alone ## return reduce(lambda x,y : x*256+ord(y), s, 0L) def inverse(u, v): """inverse(u:long, v:long):long Return the inverse of u mod v. """ u3, v3 = long(u), long(v) u1, v1 = 1L, 0L while v3 > 0: q=divmod(u3, v3)[0] u1, v1 = v1, u1 - v1*q u3, v3 = v3, u3 - v3*q while u1<0: u1 = u1 + v return u1 # Improved conversion functions contributed by Barry Warsaw, after # careful benchmarking import struct def long_to_bytes(n, blocksize=0): """long_to_bytes(n:long, blocksize:int) : string Convert a long integer to a byte string. If optional blocksize is given and greater than zero, pad the front of the byte string with binary zeros so that the length is a multiple of blocksize. """ # after much testing, this algorithm was deemed to be the fastest s = b('') n = long(n) pack = struct.pack while n > 0: s = pack('>I', n & 0xffffffffL) + s n = n >> 32 # strip off leading zeros for i in range(len(s)): if s[i] != b('\000')[0]: break else: # only happens when n == 0 s = b('\000') i = 0 s = s[i:] # add back some pad bytes. this could be done more efficiently w.r.t. the # de-padding being done above, but sigh... if blocksize > 0 and len(s) % blocksize: s = (blocksize - len(s) % blocksize) * b('\000') + s return s def bytes_to_long(s): """bytes_to_long(string) : long Convert a byte string to a long integer. This is (essentially) the inverse of long_to_bytes(). """ acc = 0L unpack = struct.unpack length = len(s) if length % 4: extra = (4 - length % 4) s = b('\000') * extra + s length = length + extra for i in range(0, length, 4): acc = (acc << 32) + unpack('>I', s[i:i+4])[0] return acc # For backwards compatibility... import warnings def long2str(n, blocksize=0): warnings.warn("long2str() has been replaced by long_to_bytes()") return long_to_bytes(n, blocksize) def str2long(s): warnings.warn("str2long() has been replaced by bytes_to_long()") return bytes_to_long(s) def _import_Random(): # This is called in a function instead of at the module level in order to # avoid problems with recursive imports global Random, StrongRandom from cgcloud_Crypto import Random from cgcloud_Crypto.Random.random import StrongRandom cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/Util/py3compat.py000066400000000000000000000100201301512357500244550ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Util/py3compat.py : Compatibility code for handling Py3k / Python 2.x # # Written in 2010 by Thorsten Behrens # # =================================================================== # The contents of this file are dedicated to the public domain. To # the extent that dedication to the public domain is not available, # everyone is granted a worldwide, perpetual, royalty-free, # non-exclusive license to exercise all rights associated with the # contents of this file for any purpose whatsoever. # No rights are reserved. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # =================================================================== """Compatibility code for handling string/bytes changes from Python 2.x to Py3k In Python 2.x, strings (of type ''str'') contain binary data, including encoded Unicode text (e.g. UTF-8). The separate type ''unicode'' holds Unicode text. Unicode literals are specified via the u'...' prefix. Indexing or slicing either type always produces a string of the same type as the original. Data read from a file is always of '''str'' type. In Python 3.x, strings (type ''str'') may only contain Unicode text. The u'...' prefix and the ''unicode'' type are now redundant. A new type (called ''bytes'') has to be used for binary data (including any particular ''encoding'' of a string). The b'...' prefix allows one to specify a binary literal. Indexing or slicing a string produces another string. Slicing a byte string produces another byte string, but the indexing operation produces an integer. Data read from a file is of '''str'' type if the file was opened in text mode, or of ''bytes'' type otherwise. Since PyCrypto aims at supporting both Python 2.x and 3.x, the following helper functions are used to keep the rest of the library as independent as possible from the actual Python version. In general, the code should always deal with binary strings, and use integers instead of 1-byte character strings. b(s) Take a text string literal (with no prefix or with u'...' prefix) and make a byte string. bchr(c) Take an integer and make a 1-character byte string. bord(c) Take the result of indexing on a byte string and make an integer. tobytes(s) Take a text string, a byte string, or a sequence of character taken from a byte string and make a byte string. """ __revision__ = "$Id$" import sys if sys.version_info[0] == 2: from types import UnicodeType as _UnicodeType # In Python 2.1, 'unicode' is a function, not a type. def b(s): return s def bchr(s): return chr(s) def bstr(s): return str(s) def bord(s): return ord(s) def tobytes(s): if isinstance(s, _UnicodeType): return s.encode("latin-1") else: return ''.join(s) def tostr(bs): return unicode(bs, 'latin-1') # In Pyton 2.x, StringIO is a stand-alone module from StringIO import StringIO as BytesIO else: def b(s): return s.encode("latin-1") # utf-8 would cause some side-effects we don't want def bchr(s): return bytes([s]) def bstr(s): if isinstance(s,str): return bytes(s,"latin-1") else: return bytes(s) def bord(s): return s def tobytes(s): if isinstance(s,bytes): return s else: if isinstance(s,str): return s.encode("latin-1") else: return bytes(s) def tostr(bs): return bs.decode("latin-1") # In Pyton 3.x, StringIO is a sub-module of io from io import BytesIO # vim:set ts=4 sw=4 sts=4 expandtab: cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/__init__.py000066400000000000000000000037151301512357500233750ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # =================================================================== # The contents of this file are dedicated to the public domain. To # the extent that dedication to the public domain is not available, # everyone is granted a worldwide, perpetual, royalty-free, # non-exclusive license to exercise all rights associated with the # contents of this file for any purpose whatsoever. # No rights are reserved. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # =================================================================== """Python Cryptography Toolkit A collection of cryptographic modules implementing various algorithms and protocols. Subpackages: Crypto.Cipher Secret-key (AES, DES, ARC4) and public-key encryption (RSA PKCS#1) algorithms Crypto.Hash Hashing algorithms (MD5, SHA, HMAC) Crypto.Protocol Cryptographic protocols (Chaffing, all-or-nothing transform, key derivation functions). This package does not contain any network protocols. Crypto.PublicKey Public-key encryption and signature algorithms (RSA, DSA) Crypto.Signature Public-key signature algorithms (RSA PKCS#1) Crypto.Util Various useful modules and functions (long-to-string conversion, random number generation, number theoretic functions) """ __all__ = ['Cipher', 'Hash', 'Protocol', 'PublicKey', 'Util', 'Signature', 'IO'] __version__ = '2.7a1' # See also below and setup.py __revision__ = "$Id$" # New software should look at this instead of at __version__ above. version_info = (2, 7, 0, 'alpha', 1) # See also above and setup.py cgcloud-releases-1.6.0/lib/src/cgcloud_Crypto/pct_warnings.py000066400000000000000000000046001301512357500243260ustar00rootroot00000000000000# -*- coding: ascii -*- # # pct_warnings.py : PyCrypto warnings file # # Written in 2008 by Dwayne C. Litzenberger # # =================================================================== # The contents of this file are dedicated to the public domain. To # the extent that dedication to the public domain is not available, # everyone is granted a worldwide, perpetual, royalty-free, # non-exclusive license to exercise all rights associated with the # contents of this file for any purpose whatsoever. # No rights are reserved. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS # BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN # ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. # =================================================================== # # Base classes. All our warnings inherit from one of these in order to allow # the user to specifically filter them. # class CryptoWarning(Warning): """Base class for PyCrypto warnings""" class CryptoDeprecationWarning(DeprecationWarning, CryptoWarning): """Base PyCrypto DeprecationWarning class""" class CryptoRuntimeWarning(RuntimeWarning, CryptoWarning): """Base PyCrypto RuntimeWarning class""" # # Warnings that we might actually use # class RandomPool_DeprecationWarning(CryptoDeprecationWarning): """Issued when Crypto.Util.randpool.RandomPool is instantiated.""" class ClockRewindWarning(CryptoRuntimeWarning): """Warning for when the system clock moves backwards.""" class GetRandomNumber_DeprecationWarning(CryptoDeprecationWarning): """Issued when Crypto.Util.number.getRandomNumber is invoked.""" class DisableShortcut_DeprecationWarning(CryptoDeprecationWarning): """Issued when Counter.new(disable_shortcut=...) is invoked.""" class PowmInsecureWarning(CryptoRuntimeWarning): """Warning for when _fastmath is built without mpz_powm_sec""" # By default, we want this warning to be shown every time we compensate for # clock rewinding. import warnings as _warnings _warnings.filterwarnings('always', category=ClockRewindWarning, append=1) # vim:set ts=4 sw=4 sts=4 expandtab: cgcloud-releases-1.6.0/mesos-tools/000077500000000000000000000000001301512357500172255ustar00rootroot00000000000000cgcloud-releases-1.6.0/mesos-tools/.gitignore000066400000000000000000000000671301512357500212200ustar00rootroot00000000000000/build /dist *.egg-info *.pyc /MANIFEST.in /version.py cgcloud-releases-1.6.0/mesos-tools/setup.cfg000066400000000000000000000002251301512357500210450ustar00rootroot00000000000000[pytest] # Look for any python file, the default of test_*.py wouldn't work for us python_files=*.py # Also run doctests addopts = --doctest-modules cgcloud-releases-1.6.0/mesos-tools/setup.py000066400000000000000000000011731301512357500207410ustar00rootroot00000000000000from __future__ import absolute_import from setuptools import setup, find_packages from version import cgcloud_version, bd2k_python_lib_dep, boto_dep setup( name="cgcloud-mesos-tools", version=cgcloud_version, author='Christopher Ketchum', author_email='cketchum@ucsc.edu', url='https://github.com/BD2KGenomics/cgcloud', description='Setup and manage an Apache Mesos cluster in EC2', package_dir={ '': 'src' }, packages=find_packages( 'src' ), namespace_packages=[ 'cgcloud' ], install_requires=[ bd2k_python_lib_dep, 'cgcloud-lib==' + cgcloud_version, boto_dep ] ) cgcloud-releases-1.6.0/mesos-tools/src/000077500000000000000000000000001301512357500200145ustar00rootroot00000000000000cgcloud-releases-1.6.0/mesos-tools/src/cgcloud/000077500000000000000000000000001301512357500214345ustar00rootroot00000000000000cgcloud-releases-1.6.0/mesos-tools/src/cgcloud/__init__.py000066400000000000000000000000741301512357500235460ustar00rootroot00000000000000__import__( 'pkg_resources' ).declare_namespace( __name__ ) cgcloud-releases-1.6.0/mesos-tools/src/cgcloud/mesos_tools/000077500000000000000000000000001301512357500240025ustar00rootroot00000000000000cgcloud-releases-1.6.0/mesos-tools/src/cgcloud/mesos_tools/__init__.py000066400000000000000000000343421301512357500261210ustar00rootroot00000000000000import errno import fcntl import itertools import logging import os import re import socket import stat import time from collections import OrderedDict from grp import getgrnam from pwd import getpwnam from subprocess import check_call, check_output, CalledProcessError from urllib2 import urlopen import boto.ec2 from bd2k.util import memoize, less_strict_bool from bd2k.util.files import mkdir_p from boto.ec2.instance import Instance from cgcloud.lib.ec2 import EC2VolumeHelper from cgcloud.lib.util import volume_label_hash initctl = '/sbin/initctl' sudo = '/usr/bin/sudo' log = logging.getLogger( __name__ ) class MesosTools( object ): """ Tools for master discovery and managing the slaves file for Mesos. All of this happens at boot time when a node (master or slave) starts up as part of a cluster. Master discovery works as follows: All instances in a Mesos cluster are tagged with the instance ID of the master. Each instance will look up the private IP of 1) the master instance using the EC2 API (via boto) and 2) itself using the instance metadata endpoint. An entry for "mesos-master" will be added to /etc/hosts. All configuration files use these names instead of hard-coding the IPs. This is all that's needed to boot a working cluster. Optionally, a persistent EBS volume is attached, formmatted (if needed) and mounted. """ def __init__( self, user, shared_dir, ephemeral_dir, persistent_dir, lazy_dirs ): """ :param user: the user the services run as """ super( MesosTools, self ).__init__( ) self.user = user self.shared_dir = shared_dir self.ephemeral_dir = ephemeral_dir self.persistent_dir = persistent_dir self.uid = getpwnam( self.user ).pw_uid self.gid = getgrnam( self.user ).gr_gid self.lazy_dirs = lazy_dirs self._patch_boto_config( ) def _patch_boto_config( self ): from boto import config def inject_default( name, default ): section = 'Boto' value = config.get( section, name ) if value != default: if not config.has_section( section ): config.add_section( section ) config.set( section, name, default ) # Override the 5xx retry limit default of 6 inject_default( 'num_retries', '12' ) def start( self ): """ Invoked at boot time or when the mesosbox service is started. """ while not os.path.exists( '/tmp/cloud-init.done' ): log.info( "Waiting for cloud-init to finish ..." ) time.sleep( 1 ) log.info( "Starting mesosbox" ) self.__setup_etc_hosts( ) self.__mount_ebs_volume( ) self.__create_lazy_dirs( ) if self.master_ip == self.node_ip: node_type = 'master' self.__publish_host_key( ) else: node_type = 'slave' self.__get_master_host_key( ) self.__wait_for_master_ssh( ) if self.shared_dir: self._copy_dir_from_master( self.shared_dir ) self.__prepare_slave_args( ) log.info( "Starting %s services" % node_type ) check_call( [ initctl, 'emit', 'mesosbox-start-%s' % node_type ] ) def stop( self ): """ Invoked at shutdown time or when the mesosbox service is stopped. """ log.info( "Stopping mesosbox" ) self.__patch_etc_hosts( { 'mesos-master': None } ) @classmethod @memoize def instance_data( cls, path ): return urlopen( 'http://169.254.169.254/latest/' + path ).read( ) @classmethod @memoize def meta_data( cls, path ): return cls.instance_data( 'meta-data/' + path ) @classmethod @memoize def user_data( cls ): user_data = cls.instance_data( 'user-data' ) log.info( "User data is '%s'", user_data ) return user_data @property @memoize def node_ip( self ): ip = self.meta_data( 'local-ipv4' ) log.info( "Local IP is '%s'", ip ) return ip @property @memoize def instance_id( self ): instance_id = self.meta_data( 'instance-id' ) log.info( "Instance ID is '%s'", instance_id ) return instance_id @property @memoize def availability_zone( self ): zone = self.meta_data( 'placement/availability-zone' ) log.info( "Availability zone is '%s'", zone ) return zone @property @memoize def region( self ): m = re.match( r'^([a-z]{2}-[a-z]+-[1-9][0-9]*)([a-z])$', self.availability_zone ) assert m region = m.group( 1 ) log.info( "Region is '%s'", region ) return region @property @memoize def ec2( self ): return boto.ec2.connect_to_region( self.region ) @property @memoize def master_id( self ): master_id = self.instance_tag( 'leader_instance_id' ) if not master_id: raise RuntimeError( "Instance not tagged with master's instance ID" ) log.info( "Master's instance ID is '%s'", master_id ) return master_id @property @memoize def master_ip( self ): if self.master_id == self.instance_id: master_ip = self.node_ip log.info( "I am the master" ) else: log.info( "I am a slave" ) master_ip = self.master_instance.private_ip_address log.info( "Master IP is '%s'", master_ip ) return master_ip @property @memoize def is_spot_instance( self ): result = bool( self.this_instance.spot_instance_request_id ) log.info( "I am %s spot instance", "a" if result else "not a" ) return result @memoize def instance( self, instance_id ): """:rtype: Instance""" instances = self.ec2.get_only_instances( instance_ids=[ instance_id ] ) assert len( instances ) == 1 instance = instances[ 0 ] return instance @property @memoize def this_instance( self ): """:rtype: Instance""" instance = self.instance( self.instance_id ) log.info( "I am running on %r", instance.__dict__ ) return instance @property @memoize def master_instance( self ): """:rtype: Instance""" return self.instance( self.master_id ) @memoize def instance_tag( self, key ): """:rtype: str|None""" return self.this_instance.tags.get( key ) def __mount_ebs_volume( self ): """ Attach, format (if necessary) and mount the EBS volume with the same cluster ordinal as this node. """ ebs_volume_size = self.instance_tag( 'ebs_volume_size' ) or '0' ebs_volume_size = int( ebs_volume_size ) if ebs_volume_size: instance_name = self.instance_tag( 'Name' ) cluster_ordinal = int( self.instance_tag( 'cluster_ordinal' ) ) volume_name = '%s__%d' % (instance_name, cluster_ordinal) volume = EC2VolumeHelper( ec2=self.ec2, availability_zone=self.availability_zone, name=volume_name, size=ebs_volume_size, volume_type="gp2" ) # TODO: handle case where volume is already attached device_ext = '/dev/sdf' device = '/dev/xvdf' volume.attach( self.instance_id, device_ext ) # Wait for inode to appear and make sure its a block device while True: try: assert stat.S_ISBLK( os.stat( device ).st_mode ) break except OSError as e: if e.errno == errno.ENOENT: time.sleep( 1 ) else: raise # Only format empty volumes volume_label = volume_label_hash( volume_name ) if check_output( [ 'file', '-sL', device ] ).strip( ) == device + ': data': check_call( [ 'mkfs', '-t', 'ext4', device ] ) check_call( [ 'e2label', device, volume_label ] ) else: # If the volume is not empty, verify the file system label actual_label = check_output( [ 'e2label', device ] ).strip( ) if actual_label != volume_label: raise AssertionError( "Expected volume label '%s' (derived from '%s') but got '%s'" % (volume_label, volume_name, actual_label) ) current_mount_point = self.__mount_point( device ) if current_mount_point is None: mkdir_p( self.persistent_dir ) check_call( [ 'mount', device, self.persistent_dir ] ) elif current_mount_point == self.persistent_dir: pass else: raise RuntimeError( "Can't mount device %s on '%s' since it is already mounted on '%s'" % ( device, self.persistent_dir, current_mount_point) ) else: # No persistent volume is attached and the root volume is off limits, so we will need # to place persistent data on the ephemeral volume. self.persistent_dir = self.ephemeral_dir def __get_master_host_key( self ): log.info( "Getting master's host key" ) master_host_key = self.master_instance.tags.get( 'ssh_host_key' ) if master_host_key: self.__add_host_keys( [ 'mesos-master:' + master_host_key ] ) else: log.warn( "Could not get master's host key" ) def __add_host_keys( self, host_keys, globally=None ): if globally is None: globally = os.geteuid( ) == 0 if globally: known_hosts_path = '/etc/ssh/ssh_known_hosts' else: known_hosts_path = os.path.expanduser( '~/.ssh/known_hosts' ) with open( known_hosts_path, 'a+' ) as f: fcntl.flock( f, fcntl.LOCK_EX ) keys = set( _.strip( ) for _ in f.readlines( ) ) keys.update( ' '.join( _.split( ':' ) ) for _ in host_keys ) if '' in keys: keys.remove( '' ) keys = list( keys ) keys.sort( ) keys.append( '' ) f.seek( 0 ) f.truncate( 0 ) f.write( '\n'.join( keys ) ) def __wait_for_master_ssh( self ): """ Wait until the instance represented by this box is accessible via SSH. """ for _ in itertools.count( ): s = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) try: s.settimeout( 5 ) s.connect( ('mesos-master', 22) ) return except socket.error: pass finally: s.close( ) def _copy_dir_from_master( self, path ): log.info( "Copying %s from master" % path ) if not path.endswith( '/' ): path += '/' for tries in range( 5 ): try: check_call( [ sudo, '-u', self.user, 'rsync', '-av', 'mesos-master:' + path, path ] ) except CalledProcessError as e: log.warn( "rsync returned %i, retrying in 5s", e.returncode ) time.sleep( 5 ) else: return raise RuntimeError( "Failed to copy %s from master" ) def __get_host_key( self ): with open( '/etc/ssh/ssh_host_ecdsa_key.pub' ) as f: return ':'.join( f.read( ).split( )[ :2 ] ) def __publish_host_key( self ): master_host_key = self.__get_host_key( ) self.ec2.create_tags( [ self.master_id ], dict( ssh_host_key=master_host_key ) ) def __create_lazy_dirs( self ): log.info( "Bind-mounting directory structure" ) for (parent, name, persistent) in self.lazy_dirs: assert parent[ 0 ] == os.path.sep logical_path = os.path.join( parent, name ) if persistent is None: tag = 'persist' + logical_path.replace( os.path.sep, '_' ) persistent = less_strict_bool( self.instance_tag( tag ) ) location = self.persistent_dir if persistent else self.ephemeral_dir physical_path = os.path.join( location, parent[ 1: ], name ) mkdir_p( physical_path ) os.chown( physical_path, self.uid, self.gid ) check_call( [ 'mount', '--bind', physical_path, logical_path ] ) def __setup_etc_hosts( self ): hosts = self.instance_tag( 'etc_hosts_entries' ) or "" hosts = parse_etc_hosts_entries( hosts ) hosts[ 'mesos-master' ] = self.master_ip self.__patch_etc_hosts( hosts ) def __patch_etc_hosts( self, hosts ): log.info( "Patching /etc/host" ) # FIXME: The handling of /etc/hosts isn't atomic with open( '/etc/hosts', 'r+' ) as etc_hosts: lines = [ line for line in etc_hosts.readlines( ) if not any( host in line for host in hosts.iterkeys( ) ) ] for host, ip in hosts.iteritems( ): if ip: lines.append( "%s %s\n" % (ip, host) ) etc_hosts.seek( 0 ) etc_hosts.truncate( 0 ) etc_hosts.writelines( lines ) def __mount_point( self, device ): with open( '/proc/mounts' ) as f: for line in f: line = line.split( ) if line[ 0 ] == device: return line[ 1 ] return None def __prepare_slave_args( self ): attributes = dict( preemptable=self.is_spot_instance ) with open( '/var/lib/mesos/slave_args', 'w' ) as f: if attributes: attributes = ';'.join( '%s:%r' % i for i in attributes.items( ) ) f.write( "--attributes=%s" % attributes ) def parse_etc_hosts_entries( hosts ): """ >>> parse_etc_hosts_entries("").items() [] >>> parse_etc_hosts_entries("foo:1.2.3.4").items() [('foo', '1.2.3.4')] >>> parse_etc_hosts_entries(" foo : 1.2.3.4 , bar : 2.3.4.5 ").items() [('foo', '1.2.3.4'), ('bar', '2.3.4.5')] """ return OrderedDict( (ip.strip( ), name.strip( )) for ip, name in (entry.split( ':', 1 ) for entry in hosts.split( ',' ) if entry) ) cgcloud-releases-1.6.0/mesos/000077500000000000000000000000001301512357500160675ustar00rootroot00000000000000cgcloud-releases-1.6.0/mesos/.gitignore000066400000000000000000000000671301512357500200620ustar00rootroot00000000000000/build /dist *.egg-info *.pyc /MANIFEST.in /version.py cgcloud-releases-1.6.0/mesos/LICENSE000066400000000000000000000261361301512357500171040ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "{}" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright {yyyy} {name of copyright owner} Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. cgcloud-releases-1.6.0/mesos/README.rst000066400000000000000000000034711301512357500175630ustar00rootroot00000000000000The CGCloud plugin for Mesos lets you setup a fully configured Apache Mesos cluster in EC2 in just minutes, regardless of the number of nodes. Prerequisites ============= The ``cgcloud-mesos`` package requires that the ``cgcloud-core`` package and its prerequisites_ are present. .. _prerequisites: ../core#prerequisites Installation ============ Read the entire section before pasting any commands and ensure that all prerequisites are installed. It is recommended to install this plugin into the virtualenv you created for CGCloud:: source ~/cgcloud/bin/activate pip install cgcloud-mesos If you get ``DistributionNotFound: No distributions matching the version for cgcloud-mesos``, try running ``pip install --pre cgcloud-mesos``. Be sure to configure_ ``cgcloud-core`` before proceeding. .. _configure: ../core/README.rst#configuration Configuration ============= Modify your ``.profile`` or ``.bash_profile`` by adding the following line:: export CGCLOUD_PLUGINS="cgcloud.mesos:$CGCLOUD_PLUGINS" Login and out (or, on OS X, start a new Terminal tab/window). Verify the installation by running:: cgcloud list-roles The output should include the ``mesos-box`` role. Usage ===== Create a single ``t2.micro`` box to serve as the template for the cluster nodes:: cgcloud create -IT mesos-box The ``I`` option stops the box once it is fully set up and takes an image (AMI) of it. The ``T`` option terminates the box after that. Now create a cluster by booting a master and the slaves from that AMI:: cgcloud create-cluster mesos -s 2 -t m3.large This will launch a master and two slaves using the ``m3.large`` instance type. SSH into the master:: cgcloud ssh mesos-master ... or the first slave:: cgcloud ssh -o 0 mesos-slave ... or the second slave:: cgcloud ssh -o 1 mesos-slave cgcloud-releases-1.6.0/mesos/setup.cfg000066400000000000000000000002251301512357500177070ustar00rootroot00000000000000[pytest] # Look for any python file, the default of test_*.py wouldn't work for us python_files=*.py # Also run doctests addopts = --doctest-modules cgcloud-releases-1.6.0/mesos/setup.py000066400000000000000000000013511301512357500176010ustar00rootroot00000000000000from __future__ import absolute_import from setuptools import setup, find_packages from version import cgcloud_version, bd2k_python_lib_dep, fabric_dep setup( name='cgcloud-mesos', version=cgcloud_version, author='Christopher Ketchum', author_email='cketchum@ucsc.edu', url='https://github.com/BD2KGenomics/cgcloud', description='Setup and manage a Apache Mesos cluster in EC2', package_dir={ '': 'src' }, packages=find_packages( 'src' ), namespace_packages=[ 'cgcloud' ], install_requires=[ 'cgcloud-lib==' + cgcloud_version, 'cgcloud-core==' + cgcloud_version, bd2k_python_lib_dep, fabric_dep ] ) cgcloud-releases-1.6.0/mesos/src/000077500000000000000000000000001301512357500166565ustar00rootroot00000000000000cgcloud-releases-1.6.0/mesos/src/cgcloud/000077500000000000000000000000001301512357500202765ustar00rootroot00000000000000cgcloud-releases-1.6.0/mesos/src/cgcloud/__init__.py000066400000000000000000000000741301512357500224100ustar00rootroot00000000000000__import__( 'pkg_resources' ).declare_namespace( __name__ ) cgcloud-releases-1.6.0/mesos/src/cgcloud/mesos/000077500000000000000000000000001301512357500214245ustar00rootroot00000000000000cgcloud-releases-1.6.0/mesos/src/cgcloud/mesos/__init__.py000066400000000000000000000004671301512357500235440ustar00rootroot00000000000000def roles( ): from cgcloud.mesos.mesos_box import MesosBox, MesosMaster, MesosSlave return sorted( locals( ).values( ), key=lambda cls: cls.__name__ ) def cluster_types( ): from cgcloud.mesos.mesos_cluster import MesosCluster return sorted( locals( ).values( ), key=lambda cls: cls.__name__ ) cgcloud-releases-1.6.0/mesos/src/cgcloud/mesos/mesos_box.py000066400000000000000000000317061301512357500240030ustar00rootroot00000000000000import logging from collections import namedtuple from pipes import quote from bd2k.util.iterables import concat from bd2k.util.strings import interpolate as fmt from fabric.context_managers import settings from fabric.operations import run from cgcloud.core.box import fabric_task from cgcloud.core.cluster import ClusterBox, ClusterLeader, ClusterWorker from cgcloud.core.common_iam_policies import ec2_read_only_policy from cgcloud.core.generic_boxes import GenericUbuntuTrustyBox from cgcloud.core.mesos_box import MesosBox as CoreMesosBox from cgcloud.core.ubuntu_box import Python27UpdateUbuntuBox from cgcloud.fabric.operations import sudo, remote_open, pip, sudov from cgcloud.lib.util import abreviated_snake_case_class_name, heredoc log = logging.getLogger( __name__ ) user = 'mesosbox' install_dir = '/opt/mesosbox' log_dir = '/var/log/mesosbox' ephemeral_dir = '/mnt/ephemeral' persistent_dir = '/mnt/persistent' work_dir = '/var/lib/mesos' Service = namedtuple( 'Service', [ 'init_name', 'description', 'command' ] ) def mesos_service( name, *flags ): command = concat( '/usr/sbin/mesos-{name}', '--log_dir={log_dir}/mesos', flags ) return Service( init_name='mesosbox-' + name, description=fmt( 'Mesos {name} service' ), command=fmt( ' '.join( command ) ) ) mesos_services = dict( master=[ mesos_service( 'master', '--registry=in_memory', # would use "--ip mesos-master" here but that option only supports # IP addresses, not DNS names or /etc/hosts entries '--ip_discovery_command="hostname -i"', '--credentials=/etc/mesos/credentials' ) ], slave=[ mesos_service( 'slave', '--master=mesos-master:5050', '--no-switch_user', '--work_dir=' + work_dir, '--executor_shutdown_grace_period=60secs', # By default Mesos offers the total disk minus what it reserves for # itself, which is half the total disk or 5GiB whichever is smaller. # The code below mimicks that logic except that it uses available disk # space as opposed to total disk. NB: the default unit is MiB in Mesos. "--resources=disk:$(python -c %s)" % quote( heredoc( """ import os df = os.statvfs( "{work_dir}" ) free = df.f_frsize * df.f_bavail >> 20 print max( 0, free - min( free / 2, 5120 ) )""" ).replace( '\n', ';' ) ), '$(cat /var/lib/mesos/slave_args)' ) ] ) class MesosBoxSupport( GenericUbuntuTrustyBox, Python27UpdateUbuntuBox, CoreMesosBox ): """ A node in a Mesos cluster. Both slaves and masters are based on this initial setup. Those specific roles are determined at boot time. Worker nodes need to be passed the master's IP and port before starting up. """ @classmethod def get_role_options( cls ): return super( MesosBoxSupport, cls ).get_role_options( ) + [ cls.RoleOption( name='etc_hosts_entries', type=str, repr=str, inherited=True, help="Additional entries for /etc/hosts in the form " "'foo:1.2.3.4,bar:2.3.4.5'" ) ] def other_accounts( self ): return super( MesosBoxSupport, self ).other_accounts( ) + [ user ] def default_account( self ): return user def __init__( self, ctx ): super( MesosBoxSupport, self ).__init__( ctx ) self.lazy_dirs = set( ) def _populate_security_group( self, group_id ): return super( MesosBoxSupport, self )._populate_security_group( group_id ) + [ dict( ip_protocol='tcp', from_port=0, to_port=65535, src_security_group_group_id=group_id ), dict( ip_protocol='udp', from_port=0, to_port=65535, src_security_group_group_id=group_id ) ] def _get_iam_ec2_role( self ): iam_role_name, policies = super( MesosBoxSupport, self )._get_iam_ec2_role( ) iam_role_name += '--' + abreviated_snake_case_class_name( MesosBoxSupport ) policies.update( dict( ec2_read_only=ec2_read_only_policy, ec2_mesos_box=dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource="*", Action="ec2:CreateTags" ), dict( Effect="Allow", Resource="*", Action="ec2:CreateVolume" ), dict( Effect="Allow", Resource="*", Action="ec2:AttachVolume" ) ] ) ) ) return iam_role_name, policies def _pre_install_packages( self ): super( MesosBoxSupport, self )._pre_install_packages( ) self.__setup_application_user( ) @fabric_task def __setup_application_user( self ): sudo( fmt( 'useradd ' '--home /home/{user} ' '--create-home ' '--user-group ' '--shell /bin/bash {user}' ) ) def _post_install_packages( self ): super( MesosBoxSupport, self )._post_install_packages( ) self._propagate_authorized_keys( user, user ) self.__setup_shared_dir( ) self.__setup_ssh_config( ) self.__create_mesos_keypair( ) self.__setup_mesos( ) self.__install_tools( ) def _shared_dir( self ): return '/home/%s/shared' % self.default_account( ) @fabric_task def __setup_shared_dir( self ): sudov( 'install', '-d', self._shared_dir( ), '-m', '700', '-o', self.default_account( ) ) @fabric_task def __setup_ssh_config( self ): with remote_open( '/etc/ssh/ssh_config', use_sudo=True ) as f: f.write( heredoc( """ Host spark-master CheckHostIP no HashKnownHosts no""" ) ) @fabric_task( user=user ) def __create_mesos_keypair( self ): self._provide_imported_keypair( ec2_keypair_name=self.__ec2_keypair_name( self.ctx ), private_key_path=fmt( "/home/{user}/.ssh/id_rsa" ), overwrite_ec2=True ) # This trick allows us to roam freely within the cluster as the app user while still # being able to have keypairs in authorized_keys managed by cgcloudagent such that # external users can login as the app user, too. The trick depends on AuthorizedKeysFile # defaulting to or being set to .ssh/autorized_keys and .ssh/autorized_keys2 in sshd_config run( "cd .ssh && cat id_rsa.pub >> authorized_keys2" ) def __ec2_keypair_name( self, ctx ): return user + '@' + ctx.to_aws_name( self.role( ) ) @fabric_task def __setup_mesos( self ): sudo( "rm /etc/init/mesos-{master,slave}.conf" ) self._lazy_mkdir( log_dir, 'mesos', persistent=False ) self._lazy_mkdir( '/var/lib', 'mesos', persistent=True ) self.__prepare_credentials( ) self.__register_upstart_jobs( mesos_services ) self._post_install_mesos( ) def _post_install_mesos( self ): pass def __prepare_credentials( self ): # Create the credentials file and transfer ownership to mesosbox sudo( 'mkdir -p /etc/mesos' ) sudo( 'echo toil liot > /etc/mesos/credentials' ) sudo( 'chown mesosbox:mesosbox /etc/mesos/credentials' ) @fabric_task def __install_tools( self ): """ Installs the mesos-master-discovery init script and its companion mesos-tools. The latter is a Python package distribution that's included in cgcloud-mesos as a resource. This is in contrast to the cgcloud agent, which is a standalone distribution. """ tools_dir = install_dir + '/tools' admin = self.admin_account( ) sudo( fmt( 'mkdir -p {tools_dir}' ) ) sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) ) run( fmt( 'virtualenv --no-pip {tools_dir}' ) ) run( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) ) with settings( forward_agent=True ): with self._project_artifacts( 'mesos-tools' ) as artifacts: pip( use_sudo=True, path=tools_dir + '/bin/pip', args=concat( 'install', artifacts ) ) sudo( fmt( 'chown -R root:root {tools_dir}' ) ) mesos_tools = "MesosTools(**%r)" % dict( user=user, shared_dir=self._shared_dir( ), ephemeral_dir=ephemeral_dir, persistent_dir=persistent_dir, lazy_dirs=self.lazy_dirs ) self.lazy_dirs = None # make sure it can't be used anymore once we are done with it self._register_init_script( "mesosbox", heredoc( """ description "Mesos master discovery" console log start on (local-filesystems and net-device-up IFACE!=lo) stop on runlevel [!2345] pre-start script for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <= expiration: self.fail( "Cluster didn't come up in time" ) time.sleep( delay ) else: break cgcloud-releases-1.6.0/mesos/src/cgcloud/mesos/test/conftest.py000066400000000000000000000000701301512357500245770ustar00rootroot00000000000000from cgcloud.core.test.conftest import pytest_configure cgcloud-releases-1.6.0/mesos/src/cgcloud/mesos/test/test_mesos.py000066400000000000000000000043651301512357500251520ustar00rootroot00000000000000import os import logging from cgcloud.mesos.mesos_box import MesosBox, MesosMaster, MesosSlave from cgcloud.mesos.test import MesosTestCase log = logging.getLogger( __name__ ) master = MesosMaster.role( ) slave = MesosSlave.role( ) node = MesosBox.role( ) num_slaves = 2 class MesosClusterTests( MesosTestCase ): """ Covers the creation of a Mesos cluster and running a simple script on it. """ cleanup = True create_image = True @classmethod def setUpClass( cls ): os.environ[ 'CGCLOUD_PLUGINS' ] = 'cgcloud.mesos' super( MesosClusterTests, cls ).setUpClass( ) if cls.create_image: cls._cgcloud( 'create', node, '-I', '-T' ) @classmethod def tearDownClass( cls ): if cls.cleanup and cls.create_image: cls._cgcloud( 'delete-image', node ) super( MesosClusterTests, cls ).tearDownClass( ) def test_mesos( self ): self._create_cluster( ) try: self._assert_remote_failure( master ) self._wait_for_mesos_slaves( master, num_slaves ) self._test_mesos( ) finally: if self.cleanup: self._terminate_cluster( ) def _create_cluster( self, *args ): self._cgcloud( 'create-cluster', 'mesos', '-s', str( num_slaves ), *args ) def _terminate_cluster( self ): self._cgcloud( 'terminate-cluster', 'mesos' ) def _test_mesos( self ): for i in xrange( num_slaves ): self._ssh( slave, 'test ! -f cgcloud_test.tmp', ordinal=i ) # This is probabalistic: we hope that if we do ten times as many tasks as there are nodes # chances are that we hit each node at least once. num_tasks = num_slaves * 10 for i in xrange( num_tasks ): self._ssh( master, 'mesos execute ' '--master=mesos-master:5050 ' '--name=cgcloud_test ' '--command="touch $(pwd)/cgcloud_test.tmp" ' '>> mesos_execute.out' ) self._ssh( master, 'test "$(grep -c TASK_FINISHED mesos_execute.out)" = %i' % num_tasks ) for i in xrange( num_slaves ): self._ssh( slave, 'test -f cgcloud_test.tmp', ordinal=i ) cgcloud-releases-1.6.0/run_tests.py000066400000000000000000000056271301512357500173530ustar00rootroot00000000000000import glob import itertools import logging import os import subprocess import sys log = logging.getLogger( __name__ ) # A "keyword" is an argument to pytest's -k option. It acts as a selector for tests. Each of the # keywords in the list below will be run concurrently. Once they are done, everything else will # be run sequentially. Please note that keywords are match as substrings: Foo will match Foo, # FooBar and BarFoo. # try: if not os.getcwd( ) in sys.path: sys.path.append( os.getcwd( ) ) from tests import parallelizable_keywords except ImportError: parallelizable_keywords = [ ] def run_tests( index, keywords=None, args=None ): cmd = [ sys.executable, '-m', 'pytest', '-vv', '--junitxml', 'nosetests-%s.xml' % index ] if keywords: cmd.extend( [ '-k', keywords ] ) if args: cmd.extend( args ) log.info( 'Running %r', cmd ) return subprocess.Popen( cmd ) def main( args ): for name in glob.glob( 'nosetests-*.xml' ): os.unlink( name ) num_failures = 0 index = itertools.count( ) pids = set( ) # PyTest thinks that absence of tests constitutes an error. # Luckily it has a distinct status code (5) for that. ok_statuses = (0, 5) try: for keyword in parallelizable_keywords: process = run_tests( index=str( next( index ) ), keywords=keyword, args=args ) pids.add( process.pid ) while pids: pid, status = os.wait( ) pids.remove( pid ) if os.WIFEXITED( status ): status = os.WEXITSTATUS( status ) if status not in ok_statuses: num_failures += 1 else: num_failures += 1 except: for pid in pids: os.kill( pid, 15 ) raise if parallelizable_keywords: everything_else = ' and '.join( 'not ' + keyword for keyword in parallelizable_keywords ) else: everything_else = None process = run_tests( index=str( next( index ) ), keywords=everything_else, args=args ) if process.wait( ) not in ok_statuses: num_failures += 1 import xml.etree.ElementTree as ET testsuites = ET.Element( 'testsuites' ) for name in glob.glob( 'nosetests-*.xml' ): log.info( "Reading test report %s", name ) tree = ET.parse( name ) testsuites.append( tree.getroot( ) ) os.unlink( name ) name = 'nosetests.xml' log.info( 'Writing aggregate test report %s', name ) ET.ElementTree( testsuites ).write( name, xml_declaration=True ) if num_failures: log.error( '%i out %i child processes failed', num_failures, next( index ) ) return num_failures if __name__ == '__main__': logging.basicConfig( level=logging.INFO ) sys.exit( main( sys.argv[ 1: ] ) ) cgcloud-releases-1.6.0/spark-tools/000077500000000000000000000000001301512357500172175ustar00rootroot00000000000000cgcloud-releases-1.6.0/spark-tools/.gitignore000066400000000000000000000000671301512357500212120ustar00rootroot00000000000000/build /dist *.egg-info *.pyc /MANIFEST.in /version.py cgcloud-releases-1.6.0/spark-tools/setup.cfg000066400000000000000000000002251301512357500210370ustar00rootroot00000000000000[pytest] # Look for any python file, the default of test_*.py wouldn't work for us python_files=*.py # Also run doctests addopts = --doctest-modules cgcloud-releases-1.6.0/spark-tools/setup.py000066400000000000000000000011631301512357500207320ustar00rootroot00000000000000from __future__ import absolute_import from setuptools import setup, find_packages from version import cgcloud_version, bd2k_python_lib_dep, boto_dep setup( name="cgcloud-spark-tools", version=cgcloud_version, author='Hannes Schmidt', author_email='hannes@ucsc.edu', url='https://github.com/BD2KGenomics/cgcloud', description='Setup and manage a Apache Spark cluster in EC2', package_dir={ '': 'src' }, packages=find_packages( 'src' ), namespace_packages=[ 'cgcloud' ], install_requires=[ bd2k_python_lib_dep, 'cgcloud-lib==' + cgcloud_version, boto_dep ] ) cgcloud-releases-1.6.0/spark-tools/src/000077500000000000000000000000001301512357500200065ustar00rootroot00000000000000cgcloud-releases-1.6.0/spark-tools/src/cgcloud/000077500000000000000000000000001301512357500214265ustar00rootroot00000000000000cgcloud-releases-1.6.0/spark-tools/src/cgcloud/__init__.py000066400000000000000000000000741301512357500235400ustar00rootroot00000000000000__import__( 'pkg_resources' ).declare_namespace( __name__ ) cgcloud-releases-1.6.0/spark-tools/src/cgcloud/spark_tools/000077500000000000000000000000001301512357500237665ustar00rootroot00000000000000cgcloud-releases-1.6.0/spark-tools/src/cgcloud/spark_tools/__init__.py000066400000000000000000000450161301512357500261050ustar00rootroot00000000000000import errno import fcntl import itertools import logging import os import re import socket import stat import time from collections import OrderedDict from grp import getgrnam from pwd import getpwnam from subprocess import check_call, check_output, CalledProcessError, STDOUT from urllib2 import urlopen import boto.ec2 from bd2k.util import memoize, less_strict_bool from bd2k.util.files import mkdir_p from boto.ec2.instance import Instance from cgcloud.lib.ec2 import EC2VolumeHelper from cgcloud.lib.util import volume_label_hash initctl = '/sbin/initctl' sudo = '/usr/bin/sudo' log = logging.getLogger( __name__ ) class SparkTools( object ): """ Tools for master discovery and managing the slaves file for Hadoop and Spark. All of this happens at boot time when a node (master or slave) starts up as part of a cluster. Master discovery works as follows: All instances in a Spark cluster are tagged with the instance ID of the master. Each instance will look up the private IP of 1) the master instance using the EC2 API (via boto) and 2) itself using the instance metadata endpoint. An entry for "spark-master" will be added to /etc/hosts. All configuration files use these names instead of hard-coding the IPs. This is all that's needed to boot a working cluster. In order to facilitate the start-all.sh and stop-all.sh scripts in Hadoop and Spark, the slaves file needs to be populated as well. The master seeds the slaves file by listing all instances tagged with its own instance ID. Additionally, the slaves ssh into the master to have their own IP added to the master's slaves file, thereby enabling the dynamic addition of slaves to a cluster. Both actions are managed by the spark-manage-slaves script. The slaves file in spark/conf and hadoop/etc/hadoop is actually a symlink to a file in /tmp whose name ends in the IP of the master. This is to ensure that a fresh slaves file is used for every incarnation of the AMI and after each restart of the master instance. Optionally, a persistent EBS volume is attached, formmatted (if needed) and mounted. """ def __init__( self, user, shared_dir, install_dir, ephemeral_dir, persistent_dir, lazy_dirs ): """ :param user: the user the services run as :param install_dir: root installation directory, e.g. /opt """ super( SparkTools, self ).__init__( ) self.user = user self.shared_dir = shared_dir self.install_dir = install_dir self.ephemeral_dir = ephemeral_dir self.persistent_dir = persistent_dir self.uid = getpwnam( self.user ).pw_uid self.gid = getgrnam( self.user ).gr_gid self.lazy_dirs = lazy_dirs self._patch_boto_config( ) def _patch_boto_config( self ): from boto import config def inject_default( name, default ): section = 'Boto' value = config.get( section, name ) if value != default: if not config.has_section( section ): config.add_section( section ) config.set( section, name, default ) # Override the 5xx retry limit default of 6 inject_default( 'num_retries', '12' ) def start( self ): """ Invoked at boot time or when the sparkbox service is started. """ while not os.path.exists( '/tmp/cloud-init.done' ): log.info( "Waiting for cloud-init to finish ..." ) time.sleep( 1 ) log.info( "Starting sparkbox" ) self.__setup_etc_hosts( ) self.__mount_ebs_volume( ) self.__create_lazy_dirs( ) if self.master_ip == self.node_ip: node_type = 'master' self.__publish_host_key( ) self.__prepare_slaves_file( ) self.__format_namenode( ) else: node_type = 'slave' self.__get_master_host_key( ) self.__wait_for_master_ssh( ) self.__register_with_master( ) if self.shared_dir: self._copy_dir_from_master( self.shared_dir ) log.info( "Starting %s services" % node_type ) check_call( [ initctl, 'emit', 'sparkbox-start-%s' % node_type ] ) def stop( self ): """ Invoked at shutdown time or when the sparkbox service is stopped. """ log.info( "Stopping sparkbox" ) self.__patch_etc_hosts( { 'spark-master': None } ) def manage_slaves( self, slaves_to_add=None ): """ This method is invoked when the sparkbox-manage-slaves script is run. It has two modes: the first mode initializes the slaves file when the master starts up. All currently running slaves will be added to the slaves file. The second mode adds specific slaves to the slaves, typically just one. This happens when the sparkbox-manage-slaves script is invoked from a slave on the master via ssh. :param slaves_to_add: an iterable yielding strings containing the IP address of a slave. The format is IP : SSH_KEY_ALGO : SSH_HOST_KEY without the spaces. If this parameter is empty or None, all slaves belonging to this master will be listed via EC2 and then added. """ log.info( "Managing slaves file" ) slaves_path = "/tmp/slaves-" + self.master_ip with open( slaves_path, 'a+' ) as f: fcntl.flock( f, fcntl.LOCK_EX ) if slaves_to_add: log.info( "Adding slaves: %r", slaves_to_add ) slaves = set( _.strip( ) for _ in f.readlines( ) ) slaves.update( _.split( ':' )[ 0 ] for _ in slaves_to_add ) else: log.info( "Initializing slaves file" ) reservations = self.ec2.get_all_reservations( filters={ 'tag:leader_instance_id': self.master_id } ) slaves = set( i.private_ip_address for r in reservations for i in r.instances if i.id != self.master_id ) log.info( "Found %i slave.", len( slaves ) ) if '' in slaves: slaves.remove( '' ) slaves = list( slaves ) slaves.sort( ) slaves.append( '' ) f.seek( 0 ) f.truncate( 0 ) f.write( '\n'.join( slaves ) ) if slaves_to_add: log.info( "Adding host keys for slaves" ) self.__add_host_keys( slaves_to_add ) @classmethod @memoize def instance_data( cls, path ): return urlopen( 'http://169.254.169.254/latest/' + path ).read( ) @classmethod @memoize def meta_data( cls, path ): return cls.instance_data( 'meta-data/' + path ) @classmethod @memoize def user_data( cls ): user_data = cls.instance_data( 'user-data' ) log.info( "User data is '%s'", user_data ) return user_data @property @memoize def node_ip( self ): ip = self.meta_data( 'local-ipv4' ) log.info( "Local IP is '%s'", ip ) return ip @property @memoize def instance_id( self ): instance_id = self.meta_data( 'instance-id' ) log.info( "Instance ID is '%s'", instance_id ) return instance_id @property @memoize def availability_zone( self ): zone = self.meta_data( 'placement/availability-zone' ) log.info( "Availability zone is '%s'", zone ) return zone @property @memoize def region( self ): m = re.match( r'^([a-z]{2}-[a-z]+-[1-9][0-9]*)([a-z])$', self.availability_zone ) assert m region = m.group( 1 ) log.info( "Region is '%s'", region ) return region @property @memoize def ec2( self ): return boto.ec2.connect_to_region( self.region ) @property @memoize def master_id( self ): master_id = self.instance_tag( 'leader_instance_id' ) if not master_id: raise RuntimeError( "Instance not tagged with master's instance ID" ) log.info( "Master's instance ID is '%s'", master_id ) return master_id @property @memoize def master_ip( self ): if self.master_id == self.instance_id: master_ip = self.node_ip log.info( "I am the master" ) else: log.info( "I am a slave" ) master_ip = self.master_instance.private_ip_address log.info( "Master IP is '%s'", master_ip ) return master_ip @property @memoize def is_spot_instance( self ): result = bool( self.this_instance.spot_instance_request_id ) log.info( "I am %s spot instance", "a" if result else "not a" ) return result @memoize def instance( self, instance_id ): """:rtype: Instance""" instances = self.ec2.get_only_instances( instance_ids=[ instance_id ] ) assert len( instances ) == 1 instance = instances[ 0 ] return instance @property @memoize def this_instance( self ): """:rtype: Instance""" instance = self.instance( self.instance_id ) log.info( "I am running on %r", instance.__dict__ ) return instance @property @memoize def master_instance( self ): """:rtype: Instance""" return self.instance( self.master_id ) @memoize def instance_tag( self, key ): """:rtype: str|None""" return self.this_instance.tags.get( key ) def __mount_ebs_volume( self ): """ Attach, format (if necessary) and mount the EBS volume with the same cluster ordinal as this node. """ ebs_volume_size = self.instance_tag( 'ebs_volume_size' ) or '0' ebs_volume_size = int( ebs_volume_size ) if ebs_volume_size: instance_name = self.instance_tag( 'Name' ) cluster_ordinal = int( self.instance_tag( 'cluster_ordinal' ) ) volume_name = '%s__%d' % (instance_name, cluster_ordinal) volume = EC2VolumeHelper( ec2=self.ec2, availability_zone=self.availability_zone, name=volume_name, size=ebs_volume_size, volume_type="gp2" ) # TODO: handle case where volume is already attached device_ext = '/dev/sdf' device = '/dev/xvdf' volume.attach( self.instance_id, device_ext ) # Wait for inode to appear and make sure its a block device while True: try: assert stat.S_ISBLK( os.stat( device ).st_mode ) break except OSError as e: if e.errno == errno.ENOENT: time.sleep( 1 ) else: raise # Only format empty volumes volume_label = volume_label_hash( volume_name ) if check_output( [ 'file', '-sL', device ] ).strip( ) == device + ': data': check_call( [ 'mkfs', '-t', 'ext4', device ] ) check_call( [ 'e2label', device, volume_label ] ) else: # If the volume is not empty, verify the file system label actual_label = check_output( [ 'e2label', device ] ).strip( ) if actual_label != volume_label: raise AssertionError( "Expected volume label '%s' (derived from '%s') but got '%s'" % (volume_label, volume_name, actual_label) ) current_mount_point = self.__mount_point( device ) if current_mount_point is None: mkdir_p( self.persistent_dir ) check_call( [ 'mount', device, self.persistent_dir ] ) elif current_mount_point == self.persistent_dir: pass else: raise RuntimeError( "Can't mount device %s on '%s' since it is already mounted on '%s'" % ( device, self.persistent_dir, current_mount_point) ) else: # No persistent volume is attached and the root volume is off limits, so we will need # to place persistent data on the ephemeral volume. self.persistent_dir = self.ephemeral_dir def __get_master_host_key( self ): log.info( "Getting master's host key" ) master_host_key = self.master_instance.tags.get( 'ssh_host_key' ) if master_host_key: self.__add_host_keys( [ 'spark-master:' + master_host_key ] ) else: log.warn( "Could not get master's host key" ) def __add_host_keys( self, host_keys, globally=None ): if globally is None: globally = os.geteuid( ) == 0 if globally: known_hosts_path = '/etc/ssh/ssh_known_hosts' else: known_hosts_path = os.path.expanduser( '~/.ssh/known_hosts' ) with open( known_hosts_path, 'a+' ) as f: fcntl.flock( f, fcntl.LOCK_EX ) keys = set( _.strip( ) for _ in f.readlines( ) ) keys.update( ' '.join( _.split( ':' ) ) for _ in host_keys ) if '' in keys: keys.remove( '' ) keys = list( keys ) keys.sort( ) keys.append( '' ) f.seek( 0 ) f.truncate( 0 ) f.write( '\n'.join( keys ) ) def __wait_for_master_ssh( self ): """ Wait until the instance represented by this box is accessible via SSH. """ for _ in itertools.count( ): s = socket.socket( socket.AF_INET, socket.SOCK_STREAM ) try: s.settimeout( 5 ) s.connect( ('spark-master', 22) ) return except socket.error: pass finally: s.close( ) def _copy_dir_from_master( self, path ): log.info( "Copying %s from master" % path ) if not path.endswith( '/' ): path += '/' for tries in range( 5 ): try: check_call( [ sudo, '-u', self.user, 'rsync', '-av', 'spark-master:' + path, path ] ) except CalledProcessError as e: log.warn( "rsync returned %i, retrying in 5s", e.returncode ) time.sleep( 5 ) else: return raise RuntimeError( "Failed to copy %s from master" ) def __register_with_master( self ): log.info( "Registering with master" ) for tries in range( 5 ): try: check_call( [ sudo, '-u', self.user, 'ssh', 'spark-master', 'sparkbox-manage-slaves', self.node_ip + ":" + self.__get_host_key( ) ] ) except CalledProcessError as e: log.warn( "rsync returned %i, retrying in 5s", e.returncode ) time.sleep( 5 ) else: return raise RuntimeError( "Failed to register with master" ) def __get_host_key( self ): with open( '/etc/ssh/ssh_host_ecdsa_key.pub' ) as f: return ':'.join( f.read( ).split( )[ :2 ] ) def __publish_host_key( self ): master_host_key = self.__get_host_key( ) self.ec2.create_tags( [ self.master_id ], dict( ssh_host_key=master_host_key ) ) def __create_lazy_dirs( self ): log.info( "Bind-mounting directory structure" ) for (parent, name, persistent) in self.lazy_dirs: assert parent[ 0 ] == os.path.sep logical_path = os.path.join( parent, name ) if persistent is None: tag = 'persist' + logical_path.replace( os.path.sep, '_' ) persistent = less_strict_bool( self.instance_tag( tag ) ) location = self.persistent_dir if persistent else self.ephemeral_dir physical_path = os.path.join( location, parent[ 1: ], name ) mkdir_p( physical_path ) os.chown( physical_path, self.uid, self.gid ) logical_path = os.path.join( parent, name ) check_call( [ 'mount', '--bind', physical_path, logical_path ] ) def __prepare_slaves_file( self ): log.info( "Preparing slaves file" ) tmp_slaves = "/tmp/slaves-" + self.master_ip open( tmp_slaves, "a" ).close( ) os.chown( tmp_slaves, self.uid, self.gid ) self.__symlink( self.install_dir + "/hadoop/etc/hadoop/slaves", tmp_slaves ) self.__symlink( self.install_dir + "/spark/conf/slaves", tmp_slaves ) def __format_namenode( self ): log.info( "Formatting namenode" ) try: check_output( [ 'sudo', '-u', self.user, self.install_dir + '/hadoop/bin/hdfs', 'namenode', '-format', '-nonInteractive' ], stderr=STDOUT ) except CalledProcessError as e: if e.returncode == 1 and 'data appears to exist in Storage Directory' in e.output: pass else: raise def __setup_etc_hosts( self ): hosts = self.instance_tag( 'etc_hosts_entries' ) or "" hosts = parse_etc_hosts_entries( hosts ) hosts[ 'spark-master' ] = self.master_ip self.__patch_etc_hosts( hosts ) def __patch_etc_hosts( self, hosts ): log.info( "Patching /etc/host" ) # FIXME: The handling of /etc/hosts isn't atomic with open( '/etc/hosts', 'r+' ) as etc_hosts: lines = [ line for line in etc_hosts.readlines( ) if not any( host in line for host in hosts.iterkeys( ) ) ] for host, ip in hosts.iteritems( ): if ip: lines.append( "%s %s\n" % (ip, host) ) etc_hosts.seek( 0 ) etc_hosts.truncate( 0 ) etc_hosts.writelines( lines ) def __symlink( self, symlink, target ): if os.path.lexists( symlink ): os.unlink( symlink ) os.symlink( target, symlink ) def __mount_point( self, device ): with open( '/proc/mounts' ) as f: for line in f: line = line.split( ) if line[ 0 ] == device: return line[ 1 ] return None def parse_etc_hosts_entries( hosts ): """ >>> parse_etc_hosts_entries("").items() [] >>> parse_etc_hosts_entries("foo:1.2.3.4").items() [('foo', '1.2.3.4')] >>> parse_etc_hosts_entries(" foo : 1.2.3.4 , bar : 2.3.4.5 ").items() [('foo', '1.2.3.4'), ('bar', '2.3.4.5')] """ return OrderedDict( (ip.strip( ), name.strip( )) for ip, name in (entry.split( ':', 1 ) for entry in hosts.split( ',' ) if entry) ) cgcloud-releases-1.6.0/spark/000077500000000000000000000000001301512357500160615ustar00rootroot00000000000000cgcloud-releases-1.6.0/spark/.gitignore000066400000000000000000000000671301512357500200540ustar00rootroot00000000000000/build /dist *.egg-info *.pyc /MANIFEST.in /version.py cgcloud-releases-1.6.0/spark/README.rst000066400000000000000000000064301301512357500175530ustar00rootroot00000000000000The CGCloud plugin for Spark lets you setup a fully configured Apache Spark cluster in EC2 in just minutes, regardless of the number of nodes. While Apache Spark already comes with a script called ``spark-ec2`` that lets you build a cluster in EC2, CGCloud Spark differs from ``spark-ec2`` in the following ways: * Tachyon or Yarn are not included * Setup time does not scale linearly with the number of nodes. Setting up a 100 node cluster takes just as long as setting up a 10 node cluster (2-3 min, as opposed to 45min with ``spark-ec2``). This is made possible by baking all required software into a single AMI. All slave nodes boot up concurrently and join the cluster autonomously in just a few minutes. * Unlike with ``spark-ec2``, the cluster can be stopped and started via the EC2 API or the EC2 console, without involvement of cgcloud. * The Spark services (master and worker) run as an unprivileged user, not root as with spark-ec2. Ditto for the HDFS services (namenode, datanode and secondarynamenode). * The Spark and Hadoop services are started automatically as the instance boots up, via a regular init script. * Nodes can be added easily, simply by booting up new instances from the AMI. They will join the cluster automatically. HDFS may have to be rebalanced after that. * You can customize the AMI that cluster nodes boot from by subclassing the SparkMaster and SparkSlave classes. * CGCloud Spark uses the CGCLoud Agent which takes care of maintaining a list of authorized keypairs on each node. * CGCloud Spark is based on the official Ubuntu Trusty 14.04 LTS, not the Amazon Linux AMI. Prerequisites ============= The ``cgcloud-spark`` package requires that the ``cgcloud-core`` package and its prerequisites_ are present. .. _prerequisites: ../core#prerequisites Installation ============ Read the entire section before pasting any commands and ensure that all prerequisites are installed. It is recommended to install this plugin into the virtualenv you created for CGCloud:: source ~/cgcloud/bin/activate pip install cgcloud-spark If you get ``DistributionNotFound: No distributions matching the version for cgcloud-spark``, try running ``pip install --pre cgcloud-spark``. Be sure to configure_ ``cgcloud-core`` before proceeding. .. _configure: ../core/README.rst#configuration Configuration ============= Modify your ``.profile`` or ``.bash_profile`` by adding the following line:: export CGCLOUD_PLUGINS="cgcloud.spark:$CGCLOUD_PLUGINS" Login and out (or, on OS X, start a new Terminal tab/window). Verify the installation by running:: cgcloud list-roles The output should include the ``spark-box`` role. Usage ===== Create a single ``t2.micro`` box to serve as the template for the cluster nodes:: cgcloud create -IT spark-box The ``I`` option stops the box once it is fully set up and takes an image (AMI) of it. The ``T`` option terminates the box after that. Now create a cluster by booting a master and the slaves from that AMI:: cgcloud create-cluster spark -s 2 -t m3.large This will launch a master and two slaves using the ``m3.large`` instance type. SSH into the master:: cgcloud ssh spark-master ... or the first slave:: cgcloud ssh -o 0 spark-slave ... or the second slave:: cgcloud ssh -o 1 spark-slave cgcloud-releases-1.6.0/spark/setup.cfg000066400000000000000000000002251301512357500177010ustar00rootroot00000000000000[pytest] # Look for any python file, the default of test_*.py wouldn't work for us python_files=*.py # Also run doctests addopts = --doctest-modules cgcloud-releases-1.6.0/spark/setup.py000066400000000000000000000013421301512357500175730ustar00rootroot00000000000000from __future__ import absolute_import from setuptools import setup, find_packages from version import cgcloud_version, bd2k_python_lib_dep, fabric_dep setup( name='cgcloud-spark', version=cgcloud_version, author='Hannes Schmidt', author_email='hannes@ucsc.edu', url='https://github.com/BD2KGenomics/cgcloud', description='Setup and manage a Apache Spark cluster in EC2', package_dir={ '': 'src' }, packages=find_packages( 'src' ), namespace_packages=[ 'cgcloud' ], install_requires=[ bd2k_python_lib_dep, 'cgcloud-lib==' + cgcloud_version, 'cgcloud-core==' + cgcloud_version, fabric_dep ] ) cgcloud-releases-1.6.0/spark/src/000077500000000000000000000000001301512357500166505ustar00rootroot00000000000000cgcloud-releases-1.6.0/spark/src/cgcloud/000077500000000000000000000000001301512357500202705ustar00rootroot00000000000000cgcloud-releases-1.6.0/spark/src/cgcloud/__init__.py000066400000000000000000000000741301512357500224020ustar00rootroot00000000000000__import__( 'pkg_resources' ).declare_namespace( __name__ ) cgcloud-releases-1.6.0/spark/src/cgcloud/spark/000077500000000000000000000000001301512357500214105ustar00rootroot00000000000000cgcloud-releases-1.6.0/spark/src/cgcloud/spark/__init__.py000066400000000000000000000004671301512357500235300ustar00rootroot00000000000000def roles( ): from cgcloud.spark.spark_box import SparkBox, SparkSlave, SparkMaster return sorted( locals( ).values( ), key=lambda cls: cls.__name__ ) def cluster_types( ): from cgcloud.spark.spark_cluster import SparkCluster return sorted( locals( ).values( ), key=lambda cls: cls.__name__ ) cgcloud-releases-1.6.0/spark/src/cgcloud/spark/spark_box.py000066400000000000000000000456751301512357500237730ustar00rootroot00000000000000import logging from StringIO import StringIO from collections import namedtuple from bd2k.util.iterables import concat from bd2k.util.strings import interpolate as fmt from fabric.context_managers import settings from fabric.operations import run, put from cgcloud.core.apache import ApacheSoftwareBox from cgcloud.core.box import fabric_task from cgcloud.core.cluster import ClusterBox, ClusterLeader, ClusterWorker from cgcloud.core.common_iam_policies import ec2_read_only_policy from cgcloud.core.generic_boxes import GenericUbuntuTrustyBox from cgcloud.core.ubuntu_box import Python27UpdateUbuntuBox from cgcloud.fabric.operations import sudo, remote_open, pip, sudov from cgcloud.lib.util import abreviated_snake_case_class_name, heredoc log = logging.getLogger( __name__ ) user = 'sparkbox' install_dir = '/opt/sparkbox' log_dir = "/var/log/sparkbox" ephemeral_dir = '/mnt/ephemeral' persistent_dir = '/mnt/persistent' var_dir = '/var/lib/sparkbox' hdfs_replication = 1 hadoop_version = '2.6.0' spark_version = '1.6.2' # The major version of Hadoop that the Spark binaries were built against spark_hadoop_version = '2.6' Service = namedtuple( 'Service', [ 'init_name', 'description', 'start_script', 'stop_script' ] ) def hdfs_service( name ): script = '{install_dir}/hadoop/sbin/hadoop-daemon.sh {action} {name}' return Service( init_name='hdfs-' + name, description=fmt( "Hadoop DFS {name} service" ), start_script=fmt( script, action='start' ), stop_script=fmt( script, action='stop' ) ) def spark_service( name, script_suffix=None ): if script_suffix is None: script_suffix = name script = '{install_dir}/spark/sbin/{action}-{script_suffix}.sh' return Service( init_name='spark-' + name, description=fmt( "Spark {name} service" ), start_script=fmt( script, action='start' ), stop_script=fmt( script, action='stop' ) ) hadoop_services = dict( master=[ hdfs_service( 'namenode' ), hdfs_service( 'secondarynamenode' ) ], slave=[ hdfs_service( 'datanode' ) ] ) spark_services = dict( master=[ spark_service( 'master' ) ], # FIXME: The start-slaves.sh script actually does ssh localhost on a slave so I am not sure # this is the right thing to do. OTOH, it is the only script starts Tachyon and sets up the # spark:// URL pointing at the master. We would need to duplicate some of its functionality # if we wanted to eliminate the ssh call. slave=[ spark_service( 'slave', 'slaves' ) ] ) class SparkBox( ApacheSoftwareBox, ClusterBox, GenericUbuntuTrustyBox, Python27UpdateUbuntuBox ): """ A node in a Spark cluster; used only to create an image for master and worker boxes Workers and the master undergo the same setup. Whether a node acts as a master or a slave is determined at boot time, via user data. All slave nodes will be passed the IP of the master node. This implies that the master is started first. As soon as its private IP is assigned, typically seconds after the reservation has been submitted, the slaves can be started up. """ @classmethod def get_role_options( cls ): return super( SparkBox, cls ).get_role_options( ) + [ cls.RoleOption( name='etc_hosts_entries', type=str, repr=str, inherited=True, help="Additional entries for /etc/hosts in the form " "'foo:1.2.3.4,bar:2.3.4.5'" ) ] def other_accounts( self ): return super( SparkBox, self ).other_accounts( ) + [ user ] def default_account( self ): return user def __init__( self, ctx ): super( SparkBox, self ).__init__( ctx ) self.lazy_dirs = set( ) def _populate_security_group( self, group_id ): return super( SparkBox, self )._populate_security_group( group_id ) + [ dict( ip_protocol='tcp', from_port=0, to_port=65535, src_security_group_group_id=group_id ), dict( ip_protocol='udp', from_port=0, to_port=65535, src_security_group_group_id=group_id ) ] def _get_iam_ec2_role( self ): iam_role_name, policies = super( SparkBox, self )._get_iam_ec2_role( ) iam_role_name += '--' + abreviated_snake_case_class_name( SparkBox ) policies.update( dict( ec2_read_only=ec2_read_only_policy, ec2_spark_box=dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource="*", Action="ec2:CreateTags" ), dict( Effect="Allow", Resource="*", Action="ec2:CreateVolume" ), dict( Effect="Allow", Resource="*", Action="ec2:AttachVolume" ) ] ) ) ) return iam_role_name, policies @fabric_task def _setup_package_repos( self ): super( SparkBox, self )._setup_package_repos( ) sudo( 'add-apt-repository -y ppa:webupd8team/java' ) def _list_packages_to_install( self ): return super( SparkBox, self )._list_packages_to_install( ) + [ 'oracle-java8-set-default' ] def _get_debconf_selections( self ): return super( SparkBox, self )._get_debconf_selections( ) + [ 'debconf shared/accepted-oracle-license-v1-1 select true', 'debconf shared/accepted-oracle-license-v1-1 seen true' ] def _pre_install_packages( self ): super( SparkBox, self )._pre_install_packages( ) self.__setup_application_user( ) @fabric_task def __setup_application_user( self ): sudo( fmt( 'useradd ' '--home /home/{user} ' '--create-home ' '--user-group ' '--shell /bin/bash {user}' ) ) def _post_install_packages( self ): super( SparkBox, self )._post_install_packages( ) self._propagate_authorized_keys( user, user ) self.__setup_shared_dir( ) self.__setup_ssh_config( ) self.__create_spark_keypair( ) self.__install_hadoop( ) self.__install_spark( ) self.__setup_path( ) self.__install_tools( ) def _shared_dir( self ): return '/home/%s/shared' % self.default_account( ) @fabric_task def __setup_shared_dir( self ): sudov( 'install', '-d', self._shared_dir( ), '-m', '700', '-o', self.default_account( ) ) @fabric_task def __setup_ssh_config( self ): with remote_open( '/etc/ssh/ssh_config', use_sudo=True ) as f: f.write( heredoc( """ Host spark-master CheckHostIP no HashKnownHosts no""" ) ) @fabric_task( user=user ) def __create_spark_keypair( self ): self._provide_imported_keypair( ec2_keypair_name=self.__ec2_keypair_name( self.ctx ), private_key_path=fmt( "/home/{user}/.ssh/id_rsa" ), overwrite_ec2=True ) # This trick allows us to roam freely within the cluster as the app user while still # being able to have keypairs in authorized_keys managed by cgcloudagent such that # external users can login as the app user, too. The trick depends on AuthorizedKeysFile # defaulting to or being set to .ssh/autorized_keys and .ssh/autorized_keys2 in sshd_config run( "cd .ssh && cat id_rsa.pub >> authorized_keys2" ) def __ec2_keypair_name( self, ctx ): return user + '@' + ctx.to_aws_name( self.role( ) ) @fabric_task def __install_hadoop( self ): # Download and extract Hadoop path = fmt( 'hadoop/common/hadoop-{hadoop_version}/hadoop-{hadoop_version}.tar.gz' ) self._install_apache_package( path, install_dir ) # Add environment variables to hadoop_env.sh hadoop_env = dict( HADOOP_LOG_DIR=self._lazy_mkdir( log_dir, "hadoop" ), JAVA_HOME='/usr/lib/jvm/java-8-oracle' ) hadoop_env_sh_path = fmt( "{install_dir}/hadoop/etc/hadoop/hadoop-env.sh" ) with remote_open( hadoop_env_sh_path, use_sudo=True ) as hadoop_env_sh: hadoop_env_sh.write( '\n' ) for name, value in hadoop_env.iteritems( ): hadoop_env_sh.write( fmt( 'export {name}="{value}"\n' ) ) # Configure HDFS hdfs_dir = var_dir + "/hdfs" put( use_sudo=True, remote_path=fmt( '{install_dir}/hadoop/etc/hadoop/hdfs-site.xml' ), local_path=StringIO( self.__to_hadoop_xml_config( { 'dfs.replication': str( hdfs_replication ), 'dfs.permissions': 'false', 'dfs.name.dir': self._lazy_mkdir( hdfs_dir, 'name', persistent=True ), 'dfs.data.dir': self._lazy_mkdir( hdfs_dir, 'data', persistent=True ), 'fs.checkpoint.dir': self._lazy_mkdir( hdfs_dir, 'checkpoint', persistent=True ), 'dfs.namenode.http-address': 'spark-master:50070', 'dfs.namenode.secondary.http-address': 'spark-master:50090' } ) ) ) # Configure Hadoop put( use_sudo=True, remote_path=fmt( '{install_dir}/hadoop/etc/hadoop/core-site.xml' ), local_path=StringIO( self.__to_hadoop_xml_config( { 'fs.default.name': 'hdfs://spark-master:8020' } ) ) ) # Make shell auto completion easier sudo( fmt( 'find {install_dir}/hadoop -name "*.cmd" | xargs rm' ) ) # Install upstart jobs self.__register_upstart_jobs( hadoop_services ) @staticmethod def __to_hadoop_xml_config( properties ): """ >>> print SparkBox._SparkBox__to_hadoop_xml_config( {'foo' : 'bar'} ) foo bar """ s = StringIO( ) s.write( heredoc( """ """ ) ) for name, value in properties.iteritems( ): s.write( heredoc( """ {name} {value} """, indent=' ' ) ) s.write( "\n" ) return s.getvalue( ) @fabric_task def __install_spark( self ): # Download and extract Spark path = fmt( 'spark/spark-{spark_version}/spark-{spark_version}-bin-hadoop{spark_hadoop_version}.tgz' ) self._install_apache_package( path, install_dir ) spark_dir = var_dir + "/spark" # Add environment variables to spark_env.sh spark_env_sh_path = fmt( "{install_dir}/spark/conf/spark-env.sh" ) sudo( fmt( "cp {spark_env_sh_path}.template {spark_env_sh_path}" ) ) spark_env = dict( SPARK_LOG_DIR=self._lazy_mkdir( log_dir, "spark" ), SPARK_WORKER_DIR=self._lazy_mkdir( spark_dir, "work" ), SPARK_LOCAL_DIRS=self._lazy_mkdir( spark_dir, "local" ), JAVA_HOME='/usr/lib/jvm/java-8-oracle', SPARK_MASTER_IP='spark-master', HADOOP_CONF_DIR=fmt( "{install_dir}/hadoop/etc/hadoop" ) ) with remote_open( spark_env_sh_path, use_sudo=True ) as spark_env_sh: spark_env_sh.write( '\n' ) for name, value in spark_env.iteritems( ): spark_env_sh.write( fmt( 'export {name}="{value}"\n' ) ) # Configure Spark properties spark_defaults = { 'spark.eventLog.enabled': 'true', 'spark.eventLog.dir': self._lazy_mkdir( spark_dir, "history" ), 'spark.master': 'spark://spark-master:7077' } spark_defaults_conf_path = fmt( "{install_dir}/spark/conf/spark-defaults.conf" ) sudo( fmt( "cp {spark_defaults_conf_path}.template {spark_defaults_conf_path}" ) ) with remote_open( spark_defaults_conf_path, use_sudo=True ) as spark_defaults_conf: for name, value in spark_defaults.iteritems( ): spark_defaults_conf.write( fmt( "{name}\t{value}\n" ) ) # Make shell auto completion easier sudo( fmt( 'find {install_dir}/spark -name "*.cmd" | xargs rm' ) ) # Install upstart jobs self.__register_upstart_jobs( spark_services ) @fabric_task def __install_tools( self ): """ Installs the spark-master-discovery init script and its companion spark-tools. The latter is a Python package distribution that's included in cgcloud-spark as a resource. This is in contrast to the cgcloud agent, which is a standalone distribution. """ tools_dir = install_dir + '/tools' admin = self.admin_account( ) sudo( fmt( 'mkdir -p {tools_dir}' ) ) sudo( fmt( 'chown {admin}:{admin} {tools_dir}' ) ) run( fmt( 'virtualenv --no-pip {tools_dir}' ) ) run( fmt( '{tools_dir}/bin/easy_install pip==1.5.2' ) ) with settings( forward_agent=True ): with self._project_artifacts( 'spark-tools' ) as artifacts: pip( use_sudo=True, path=tools_dir + '/bin/pip', args=concat( 'install', artifacts ) ) sudo( fmt( 'chown -R root:root {tools_dir}' ) ) spark_tools = "SparkTools(**%r)" % dict( user=user, shared_dir=self._shared_dir( ), install_dir=install_dir, ephemeral_dir=ephemeral_dir, persistent_dir=persistent_dir, lazy_dirs=self.lazy_dirs ) self.lazy_dirs = None # make sure it can't be used anymore once we are done with it self._register_init_script( "sparkbox", heredoc( """ description "Spark/HDFS master discovery" console log start on (local-filesystems and net-device-up IFACE!=lo) stop on runlevel [!2345] pre-start script for i in 1 2 3; do if {tools_dir}/bin/python2.7 - <(md5sum > test.bin.md5) ' '| hdfs dfs -put -f - /test.bin' % test_file_size_mb ) self._ssh( master, 'hdfs dfs -put -f test.bin.md5 /' ) finally: self._terminate_cluster( ) self._create_cluster( '--ebs-volume-size', str( volume_size_gb ) ) try: self._wait_for_slaves( ) self._ssh( master, 'test "$(hdfs dfs -cat /test.bin.md5)" ' '== "$(hdfs dfs -cat /test.bin | md5sum)"' ) finally: if self.cleanup: self._terminate_cluster( ) finally: if self.cleanup: self._delete_volumes( ) def _create_cluster( self, *args ): self._cgcloud( 'create-cluster', 'spark', '-t=m3.medium', '-s', str( num_slaves ), *args ) def _terminate_cluster( self ): self._cgcloud( 'terminate-cluster', 'spark' ) def _wait_for_slaves( self ): delay = 5 expiration = time.time( ) + 10 * 60 commands = [ 'test $(cat %s/spark/conf/slaves | wc -l) = %s' % (install_dir, num_slaves), "hdfs dfsadmin -report -live | fgrep 'Live datanodes (%s)'" % num_slaves ] for command in commands: while True: try: self._ssh( master, command ) except SystemExit: if time.time( ) + delay >= expiration: self.fail( "Cluster didn't come up in time" ) time.sleep( delay ) else: break @unittest.skip( 'Only for interactive invocation' ) def test_word_count_only( self ): self._word_count( ) def _word_count( self ): self._ssh( master, 'hdfs dfs -rm -r -f -skipTrash /test.txt /test.txt.counts' ) self._ssh( master, 'rm -rf test.txt test.txt.counts' ) self._ssh( master, 'curl -o test.txt https://www.apache.org/licenses/LICENSE-2.0.txt' ) self._ssh( master, 'hdfs dfs -put -f test.txt /' ) def word_count( ): # noinspection PyUnresolvedReferences from pyspark import SparkContext sc = SparkContext( appName='PythonPi' ) input = sc.textFile( '/test.txt' ) counts = (input .flatMap( lambda line: line.split( " " ) ) .map( lambda word: (word, 1) ) .reduceByKey( lambda a, b: a + b )) counts.saveAsTextFile( '/test.txt.counts' ) script = 'wordcount.py' body = dedent( '\n'.join( getsource( word_count ).split( '\n' )[ 1: ] ) ) self._send_file( master, body, script ) self._ssh( master, 'spark-submit ' + script ) self._ssh( master, 'hdfs dfs -get /test.txt.counts' ) self._ssh( master, 'test -f test.txt.counts/_SUCCESS' ) for i in xrange( num_slaves ): self._ssh( master, 'test -s test.txt.counts/part-%05d' % i ) def _delete_volumes( self ): pass cgcloud-releases-1.6.0/toil/000077500000000000000000000000001301512357500157105ustar00rootroot00000000000000cgcloud-releases-1.6.0/toil/.gitignore000066400000000000000000000000671301512357500177030ustar00rootroot00000000000000/build /dist *.egg-info *.pyc /MANIFEST.in /version.py cgcloud-releases-1.6.0/toil/README.rst000066400000000000000000000036321301512357500174030ustar00rootroot00000000000000The CGCloud plugin for Toil lets you setup a fully configured Toil/Mesos cluster in EC2 in just minutes, regardless of the number of nodes. Prerequisites ============= The ``cgcloud-toil`` package requires that the ``cgcloud-core`` package and its prerequisites_ are present. .. _prerequisites: ../core#prerequisites Installation ============ Read the entire section before pasting any commands and ensure that all prerequisites are installed. It is recommended to install this plugin into the virtualenv you created for CGCloud:: source ~/cgcloud/bin/activate pip install cgcloud-toil If you get ``DistributionNotFound: No distributions matching the version for cgcloud-toil``, try running ``pip install --pre cgcloud-toil``. Be sure to configure_ ``cgcloud-core`` before proceeding. .. _configure: ../core/README.rst#configuration Configuration ============= Modify your ``.profile`` or ``.bash_profile`` by adding the following line:: export CGCLOUD_PLUGINS="cgcloud.toil:$CGCLOUD_PLUGINS" Login and out (or, on OS X, start a new Terminal tab/window). Verify the installation by running:: cgcloud list-roles The output should include the ``toil-box`` role. Usage ===== Create a single ``t2.micro`` box to serve as the template for the cluster nodes:: cgcloud create -IT toil-box The ``I`` option stops the box once it is fully set up and takes an image (AMI) of it. The ``T`` option terminates the box after that. Substitute ``toil-latest-box`` for ``toil-box`` if you want to use the latest unstable release of Toil. Now create a cluster by booting a leader and the workers from that AMI:: cgcloud create-cluster toil -s 2 -t m3.large This will launch a leader and two workers using the ``m3.large`` instance type. SSH into the leader:: cgcloud ssh toil-leader ... or the first worker:: cgcloud ssh -o 0 toil-worker ... or the second worker:: cgcloud ssh -o 1 toil-worker cgcloud-releases-1.6.0/toil/setup.py000066400000000000000000000014561301512357500174300ustar00rootroot00000000000000from __future__ import absolute_import from setuptools import setup, find_packages from version import cgcloud_version, bd2k_python_lib_dep, fabric_dep setup( name='cgcloud-toil', version=cgcloud_version, author='Christopher Ketchum', author_email='cketchum@ucsc.edu', url='https://github.com/BD2KGenomics/cgcloud', description='Setup and manage a toil and Apache Mesos cluster in EC2', package_dir={ '': 'src' }, packages=find_packages( 'src' ), namespace_packages=[ 'cgcloud' ], install_requires=[ 'cgcloud-lib==' + cgcloud_version, 'cgcloud-core==' + cgcloud_version, 'cgcloud-mesos==' + cgcloud_version, bd2k_python_lib_dep, fabric_dep ] ) cgcloud-releases-1.6.0/toil/src/000077500000000000000000000000001301512357500164775ustar00rootroot00000000000000cgcloud-releases-1.6.0/toil/src/cgcloud/000077500000000000000000000000001301512357500201175ustar00rootroot00000000000000cgcloud-releases-1.6.0/toil/src/cgcloud/__init__.py000066400000000000000000000000741301512357500222310ustar00rootroot00000000000000__import__( 'pkg_resources' ).declare_namespace( __name__ ) cgcloud-releases-1.6.0/toil/src/cgcloud/toil/000077500000000000000000000000001301512357500210665ustar00rootroot00000000000000cgcloud-releases-1.6.0/toil/src/cgcloud/toil/__init__.py000066400000000000000000000007541301512357500232050ustar00rootroot00000000000000def roles( ): from cgcloud.toil.toil_box import (ToilLegacyBox, ToilBox, ToilLatestBox, ToilLeader, ToilWorker) return sorted( locals( ).values( ), key=lambda cls: cls.__name__ ) def cluster_types( ): from cgcloud.toil.toil_cluster import ToilCluster return sorted( locals( ).values( ), key=lambda cls: cls.__name__ ) cgcloud-releases-1.6.0/toil/src/cgcloud/toil/test/000077500000000000000000000000001301512357500220455ustar00rootroot00000000000000cgcloud-releases-1.6.0/toil/src/cgcloud/toil/test/__init__.py000066400000000000000000000000261301512357500241540ustar00rootroot00000000000000__author__ = 'hannes' cgcloud-releases-1.6.0/toil/src/cgcloud/toil/test/conftest.py000066400000000000000000000000701301512357500242410ustar00rootroot00000000000000from cgcloud.core.test.conftest import pytest_configure cgcloud-releases-1.6.0/toil/src/cgcloud/toil/test/test_toil.py000066400000000000000000000142631301512357500244330ustar00rootroot00000000000000import logging import os import tempfile import time import unittest from inspect import getsource from textwrap import dedent from bd2k.util.exceptions import panic from cgcloud.mesos.test import MesosTestCase from cgcloud.toil.toil_box import ToilLeader, ToilBox from cgcloud.toil.toil_box import ToilWorker log = logging.getLogger( __name__ ) leader = ToilLeader.role( ) worker = ToilWorker.role( ) node = ToilBox.role( ) num_workers = 2 class ToilClusterTests( MesosTestCase ): """ Covers the creation of a Toil cluster from scratch and running a simple Toil job that invokes Docker on it. """ cleanup = True create_image = True @classmethod def setUpClass( cls ): os.environ[ 'CGCLOUD_PLUGINS' ] = 'cgcloud.toil:cgcloud.mesos' super( ToilClusterTests, cls ).setUpClass( ) if cls.create_image: cls._cgcloud( 'create', node, '-IT' ) @classmethod def tearDownClass( cls ): if cls.cleanup and cls.create_image: cls._cgcloud( 'delete-image', node ) super( ToilClusterTests, cls ).tearDownClass( ) def test_hello_world( self ): shared_dir = self._prepare_shared_dir( ) self._create_cluster( 1, '--share', shared_dir ) try: self._assert_remote_failure( leader ) self._wait_for_workers( ) self._assert_shared_dir( ) self._assert_s3am( ) self._hello_world( ) finally: if self.cleanup: self._terminate_cluster( ) @unittest.skip( 'Only for interactive invocation' ) def test_hello_world_only( self ): self._hello_world( ) def _prepare_shared_dir( self ): shared_dir = tempfile.mkdtemp( ) with open( os.path.join( shared_dir, 'foo' ), 'w' ) as f: f.write( 'bar' ) # Append / so rsync transfers the content of directory not the directory itself shared_dir = os.path.join( shared_dir, '' ) return shared_dir def _assert_shared_dir( self ): command = 'test "$(cat shared/foo)" == bar' self._ssh( leader, command ) for i in xrange( num_workers ): self._ssh( worker, command, ordinal=i ) def _assert_s3am( self ): self._ssh( leader, 's3am --help' ) def _create_cluster( self, growth, *args ): self._cgcloud( 'create-cluster', 'toil', '-s=%d' % (num_workers - growth), '--ssh-opts', self.ssh_opts_str( ), *args ) if growth: self._cgcloud( 'grow-cluster', 'toil', '-s=%d' % growth ) def _terminate_cluster( self ): self._cgcloud( 'terminate-cluster', 'toil' ) def _hello_world( self ): script = 'hello_world.py' def hello_world( ): # noinspection PyUnresolvedReferences from toil.job import Job from subprocess import check_output import os def hello( name ): assert os.environ[ 'TOIL_WORKDIR' ] == '/var/lib/toil' return check_output( [ 'docker', 'run', '-e', 'FOO=' + name, 'ubuntu', 'bash', '-c', 'echo -n Hello, $FOO!' ] ) if __name__ == '__main__': options = Job.Runner.getDefaultArgumentParser( ).parse_args( ) job = Job.wrapFn( hello, "world", cores=1, memory=1e6, disk=1e6 ) result = Job.Runner.startToil( job, options ) assert result == 'Hello, world!' body = dedent( '\n'.join( getsource( hello_world ).split( '\n' )[ 1: ] ) ) self._send_file( leader, body, script ) def hex64( x ): return hex( int( x ) )[ 2: ].zfill( 8 ) # Could use UUID but prefer historical ordering. Time in s plus PID is sufficiently unique. job_store = 'test-%s%s-toil-job-store' % (hex64( time.time( ) ), hex64( os.getpid( ) )) job_store = ':'.join( ('aws', self.ctx.region, job_store) ) self._ssh( leader, 'toil', 'clean', job_store ) try: self._ssh( leader, 'python2.7', script, '--batchSystem=mesos', '--mesosMaster=mesos-master:5050', job_store ) except: with panic( log ): self._ssh( leader, 'toil', 'clean', job_store ) def test_persistence( self ): # Check that /var/lib/docker is on the persistent volume and that /var/lib/toil can be # switched between ephemeral and persistent. [ Would use docstring but confuses pytest ] foo = '/var/lib/docker/foo' bar = '/var/lib/toil/bar' def compare_device( oper ): return "test $(stat -c '%d' " + foo + ") " + oper + " $(stat -c '%d' " + bar + ")" volume_size_gb = 1 self._create_cluster( 0, '--ebs-volume-size', str( volume_size_gb ), '-O', 'persist_var_lib_toil=True' ) try: try: self._wait_for_workers( ) for ordinal in range( num_workers ): self._ssh( worker, 'sudo touch ' + foo, admin=True, o=ordinal ) self._ssh( worker, 'touch ' + bar, o=ordinal ) # Ensure both files are on the same device (/mnt/persistent) self._ssh( worker, compare_device( "==" ) ) finally: self._terminate_cluster( ) self._create_cluster( 0, '--ebs-volume-size', str( volume_size_gb ), '-O', 'persist_var_lib_toil=False' ) try: self._wait_for_workers( ) for ordinal in range( num_workers ): self._ssh( worker, 'sudo test -f ' + foo, admin=True, o=ordinal ) self._ssh( worker, 'touch ' + bar, o=ordinal ) # Ensure both files are on different devices (/mnt/persistent) self._ssh( worker, compare_device( "!=" ) ) finally: if self.cleanup: self._terminate_cluster( ) finally: if self.cleanup: self._delete_volumes( ) def _wait_for_workers( self ): self._wait_for_mesos_slaves( leader, num_workers ) def _delete_volumes( self ): pass cgcloud-releases-1.6.0/toil/src/cgcloud/toil/toil_box.py000066400000000000000000000175361301512357500232730ustar00rootroot00000000000000import logging import os import re from abc import abstractmethod from bd2k.util import strict_bool from bd2k.util.iterables import concat from fabric.operations import put from cgcloud.core.box import fabric_task from cgcloud.core.cluster import ClusterBox, ClusterWorker, ClusterLeader from cgcloud.core.common_iam_policies import ec2_full_policy, s3_full_policy, sdb_full_policy from cgcloud.core.docker_box import DockerBox from cgcloud.core.version import s3am_dep from cgcloud.fabric.operations import pip, remote_sudo_popen, sudo, virtualenv from cgcloud.lib.util import abreviated_snake_case_class_name, heredoc, UserError from cgcloud.mesos.mesos_box import MesosBoxSupport, user, persistent_dir log = logging.getLogger( __name__ ) class ToilBoxSupport( MesosBoxSupport, DockerBox, ClusterBox ): """ A box with Mesos, Toil and their dependencies installed. """ def _list_packages_to_install( self ): return super( ToilBoxSupport, self )._list_packages_to_install( ) + [ 'python-dev', 'gcc', 'make', 'libcurl4-openssl-dev', # Only for S3AM 'libffi-dev' ] # pynacl -> toil, Azure client-side encryption def _post_install_mesos( self ): super( ToilBoxSupport, self )._post_install_mesos( ) # Override this method instead of _post_install_packages() such that this is run before self.__install_toil( ) self.__install_s3am( ) def _docker_users( self ): return super( ToilBoxSupport, self )._docker_users( ) + [ user ] def _docker_data_prefixes( self ): # We prefer Docker to be stored on the persistent volume if there is one return concat( persistent_dir, super( ToilBoxSupport, self )._docker_data_prefixes( ) ) @fabric_task def _setup_docker( self ): super( ToilBoxSupport, self )._setup_docker( ) # The docker and dockerbox init jobs depend on /mnt/persistent which is set up by the # mesosbox job. Adding a dependency of the docker job on mesosbox should satsify that # dependency. with remote_sudo_popen( 'patch -d /etc/init' ) as patch: patch.write( heredoc( """ --- docker.conf.orig 2015-12-18 23:28:48.693072560 +0000 +++ docker.conf 2015-12-18 23:40:30.553072560 +0000 @@ -1,6 +1,6 @@ description "Docker daemon" -start on (local-filesystems and net-device-up IFACE!=lo) +start on (local-filesystems and net-device-up IFACE!=lo and started mesosbox) stop on runlevel [!2345] limit nofile 524288 1048576 limit nproc 524288 1048576""" ) ) def _enable_agent_metrics( self ): return True @classmethod def get_role_options( cls ): return super( ToilBoxSupport, cls ).get_role_options( ) + [ cls.RoleOption( name='persist_var_lib_toil', type=strict_bool, repr=repr, inherited=True, help='True if /var/lib/toil should be persistent.' ) ] def _get_iam_ec2_role( self ): iam_role_name, policies = super( ToilBoxSupport, self )._get_iam_ec2_role( ) iam_role_name += '--' + abreviated_snake_case_class_name( ToilBoxSupport ) policies.update( dict( toil_iam_pass_role=dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource=self._role_arn( ), Action="iam:PassRole" ) ] ), ec2_full=ec2_full_policy, s3_full=s3_full_policy, sbd_full=sdb_full_policy, ec2_toil_box=dict( Version="2012-10-17", Statement=[ dict( Effect="Allow", Resource="*", Action="ec2:CreateTags" ), dict( Effect="Allow", Resource="*", Action="ec2:CreateVolume" ), dict( Effect="Allow", Resource="*", Action="ec2:AttachVolume" ) ] ) ) ) return iam_role_name, policies @abstractmethod def _toil_pip_args( self ): raise NotImplementedError() @fabric_task def __install_toil( self ): # FIXME: consider using a virtualenv for Toil like we do for s3am # Older versions of pip don't support the 'extra' mechanism used by Toil's setup.py pip( 'install --upgrade pip', use_sudo=True ) pip( concat( 'install', self._toil_pip_args( ) ), use_sudo=True ) self._lazy_mkdir( '/var/lib', 'toil', persistent=None ) sudo( 'echo "TOIL_WORKDIR=/var/lib/toil" >> /etc/environment' ) @fabric_task def __install_s3am( self ): virtualenv( name='s3am', distributions=[ s3am_dep ], pip_distribution='pip==8.0.2', executable='s3am' ) class ToilLegacyBox( ToilBoxSupport ): """ A box with Mesos, Toil 3.1.6 and their dependencies installed. """ def _toil_pip_args( self ): return [ 'toil[aws,mesos,encryption]==3.1.6' ] class ToilBox( ToilBoxSupport ): """ A box with Mesos, the latest stable Toil release and their dependencies installed. """ default_spec = 'toil[aws,mesos,encryption,cwl]==3.3.3' @classmethod def get_role_options( cls ): return super( ToilBox, cls ).get_role_options( ) + [ cls.RoleOption( name='toil_sdists', type=cls.parse_sdists, repr=cls.unparse_sdists, inherited=False, help="A space-separated list of paths to sdists. If this option is " "present, pip will be used to install the specified sdists " "instead of %s. Each path may be immediately followed by a list " "of extras enclosed in square brackets. The Toil sdist should " "come last. An sdist is a .tar.gz file containing the source " "distribution of a Python project. It is typically created by " "running 'python setup.py sdist' from the project root, or, " "in the case of Toil and CGCloud, running 'make sdist'. Example: " "'%s'. " % (cls.default_spec, cls.unparse_sdists( [ ('../cgcloud-lib-1.4a1.dev0.tar.gz', ''), ('dist/toil-3.2.0a2.tar.gz', '[aws,mesos,cgcloud]') ] )) ) ] # Accepts "foo", "foo[bar]" and "foo[bar,bla]". Rejects "foo[]", "foo[bar]x" sdist_re = re.compile( r'([^\[\]]+)((?:\[[^\]]+\])?)$' ) @classmethod def parse_sdists( cls, s ): try: return [ cls.sdist_re.match( sdist ).groups( ) for sdist in s.split( ) ] except: raise UserError( "'%s' is not a valid value for the toil_sdists option." % s ) @classmethod def unparse_sdists( cls, sdists ): return ' '.join( path + extra for path, extra in sdists ) @fabric_task def _toil_pip_args( self ): sdists = self.role_options.get( 'toil_sdists' ) if sdists: result = [ ] for path, extra in sdists: put( local_path=path ) result.append( os.path.basename( path ) + extra ) return result else: return [ '--pre', self.default_spec ] class ToilLatestBox( ToilBox ): """ A box with Mesos, the latest unstable release of Toil and their dependencies installed """ default_spec = 'toil[aws,mesos,encryption,cwl]<=3.5.0' class ToilLeader( ToilBox, ClusterLeader ): """ Leader of a cluster of boxes booted from a toil-box, toil-latest-box or toil-legacy-box image """ pass class ToilWorker( ToilBox, ClusterWorker ): """ Worker in a cluster of boxes booted from a toil-box, toil-latest-box or toil-legacy-box image """ pass cgcloud-releases-1.6.0/toil/src/cgcloud/toil/toil_cluster.py000066400000000000000000000004151301512357500241500ustar00rootroot00000000000000from cgcloud.core.cluster import Cluster from cgcloud.toil.toil_box import ToilLeader, ToilWorker class ToilCluster( Cluster ): @property def worker_role( self ): return ToilWorker @property def leader_role( self ): return ToilLeader cgcloud-releases-1.6.0/version.py000066400000000000000000000012241301512357500167770ustar00rootroot00000000000000cgcloud_version = '1.6.0' bd2k_python_lib_dep = 'bd2k-python-lib>=1.14a1.dev37' boto_dep = 'boto==2.38.0' fabric_dep = 'Fabric==1.10.3' s3am_dep = 's3am==2.0a1.dev105' def main( ): import os from pkg_resources import parse_version is_release_build = not parse_version( cgcloud_version ).is_prerelease suffix = '' if is_release_build else '.dev' + os.environ.get( 'BUILD_NUMBER', '0' ) for name, value in globals( ).items( ): if name.startswith( 'cgcloud_' ): value += suffix if name.split( '_' )[ -1 ] in ('dep', 'version'): print "%s='%s'" % (name, value) if __name__ == '__main__': main( )