pax_global_header00006660000000000000000000000064126157106630014521gustar00rootroot0000000000000052 comment=7b24fffee7f083e117cd1cfcc5e0ccee5a123e74 bioblend-0.7.0/000077500000000000000000000000001261571066300133035ustar00rootroot00000000000000bioblend-0.7.0/.gitignore000066400000000000000000000005611261571066300152750ustar00rootroot00000000000000*.py[co] *~ # Packages *.egg *.egg-info dist build eggs parts bin var sdist develop-eggs .installed.cfg .eggs # Installer logs pip-log.txt # Unit test / coverage reports .coverage .tox #Translations *.mo #Mr Developer .mr.developer.cfg #Vim *.swp #Code coverage cover #eclipse/pydev .project .pydevproject # compiled docs docs/_build # Python virtualenv .venv bioblend-0.7.0/.travis.yml000066400000000000000000000065261261571066300154250ustar00rootroot00000000000000sudo: false language: python python: - "2.7" env: - TOX_ENV=py34 GALAXY_VERSION=dev - TOX_ENV=py33 GALAXY_VERSION=dev - TOX_ENV=py27 GALAXY_VERSION=dev - TOX_ENV=py27 GALAXY_VERSION=release_15.07 - TOX_ENV=py27 GALAXY_VERSION=release_15.05 - TOX_ENV=py27 GALAXY_VERSION=release_15.03 - TOX_ENV=py27 GALAXY_VERSION=release_15.01 - TOX_ENV=py27 GALAXY_VERSION=release_14.10 - TOX_ENV=py27 GALAXY_VERSION=release_14.08 - TOX_ENV=py27 GALAXY_VERSION=release_14.06 - TOX_ENV=py27 GALAXY_VERSION=release_14.04 - TOX_ENV=py27 GALAXY_VERSION=release_14.02 - TOX_ENV=py26 GALAXY_VERSION=dev install: - python setup.py install - cp run_galaxy.sh $HOME - pip install "tox>=1.8.0" before_script: # Install Galaxy - cd $HOME - wget https://github.com/galaxyproject/galaxy/archive/${GALAXY_VERSION}.tar.gz - tar xvzf ${GALAXY_VERSION}.tar.gz | tail # Releases/dev branch are named differently - cd galaxy-${GALAXY_VERSION}/ # Create a PostgreSQL database for Galaxy. The default SQLite3 database makes test fail randomly because of "database locked" error. - createdb -U postgres galaxy-travis - if [ -f universe_wsgi.ini.sample ]; then GALAXY_CONFIG_FILE=universe_wsgi.ini; else GALAXY_CONFIG_FILE=config/galaxy.ini; fi - if [ -f universe_wsgi.ini.sample ]; then GALAXY_CONFIG_DIR=.; else GALAXY_CONFIG_DIR=config; fi # Change Galaxy configuration to use the newly created PostgreSQL database - sed -e 's|^#database_connection.*|database_connection = postgresql://postgres:@localhost/galaxy-travis|' $GALAXY_CONFIG_FILE.sample > $GALAXY_CONFIG_FILE # Setup Galaxy master API key and admin user - GALAXY_MASTER_API_KEY=`date --rfc-3339=ns | md5sum | cut -f 1 -d ' '` - GALAXY_USER_EMAIL=${USER}@localhost.localdomain - sed -i -e "s/^#master_api_key.*/master_api_key = $GALAXY_MASTER_API_KEY/" -e "s/^#admin_users.*/admin_users = $GALAXY_USER_EMAIL/" $GALAXY_CONFIG_FILE # Change configuration needed by many tests - sed -i -e 's/^#allow_user_dataset_purge.*/allow_user_dataset_purge = True/' $GALAXY_CONFIG_FILE # Change Galaxy configuration needed by some library tests - sed -i -e 's/^#allow_library_path_paste.*/allow_library_path_paste = True/' $GALAXY_CONFIG_FILE - sed -i -e 's/^#enable_beta_workflow_modules.*/enable_beta_workflow_modules = True/' $GALAXY_CONFIG_FILE - if [ -f test/functional/tools/samples_tool_conf.xml ]; then sed -i -e "s/^#tool_config_file.*/tool_config_file = $GALAXY_CONFIG_DIR\/tool_conf.xml.sample,$GALAXY_CONFIG_DIR\/shed_tool_conf.xml.sample,test\/functional\/tools\/samples_tool_conf.xml/" $GALAXY_CONFIG_FILE; fi # Start Galaxy and wait for successful server start - GALAXY_RUN_ALL=1 ../run_galaxy.sh --daemon --wait # Use the master API key to create the admin user and get its API key - export BIOBLEND_GALAXY_URL=http://localhost:8080 - GALAXY_USER=$USER - GALAXY_USER_PASSWD=`date --rfc-3339=ns | md5sum | cut -f 1 -d ' '` - export BIOBLEND_GALAXY_API_KEY=`python $TRAVIS_BUILD_DIR/docs/examples/create_user_get_api_key.py $BIOBLEND_GALAXY_URL $GALAXY_MASTER_API_KEY $GALAXY_USER $GALAXY_USER_EMAIL $GALAXY_USER_PASSWD` - echo "Created new Galaxy user $GALAXY_USER with email $GALAXY_USER_EMAIL , password $GALAXY_USER_PASSWD and API key $BIOBLEND_GALAXY_API_KEY" script: # Run nosetests through setuptools, so it will install test depedencies - cd $TRAVIS_BUILD_DIR && tox -e $TOX_ENV bioblend-0.7.0/ABOUT.rst000066400000000000000000000044121261571066300147100ustar00rootroot00000000000000`BioBlend `_ is a Python library for interacting with `CloudMan`_ and `Galaxy`_'s API. BioBlend is supported and tested on: - Python 2.6, 2.7, 3.3 and 3.4 - Galaxy release_14.02 and later. Conceptually, it makes it possible to script and automate the process of cloud infrastructure provisioning and scaling via CloudMan, and running of analyses via Galaxy. In reality, it makes it possible to do things like this: - Create a CloudMan compute cluster, via an API and directly from your local machine:: from bioblend.cloudman import CloudManConfig from bioblend.cloudman import CloudManInstance cfg = CloudManConfig('', '', 'My CloudMan', 'ami-', 'm1.small', '') cmi = CloudManInstance.launch_instance(cfg) cmi.get_status() - Reconnect to an existing CloudMan instance and manipulate it:: from bioblend.cloudman import CloudManInstance cmi = CloudManInstance("", "") cmi.add_nodes(3) cluster_status = cmi.get_status() cmi.remove_nodes(2) - Interact with Galaxy via a straightforward API:: from bioblend.galaxy import GalaxyInstance gi = GalaxyInstance('', key='your API key') libs = gi.libraries.get_libraries() gi.workflows.show_workflow('workflow ID') gi.workflows.run_workflow('workflow ID', input_dataset_map) - Interact with Galaxy via an object-oriented API:: from bioblend.galaxy.objects import GalaxyInstance gi = GalaxyInstance("URL", "API_KEY") wf = gi.workflows.list()[0] hist = gi.histories.list()[0] inputs = hist.get_datasets()[:2] input_map = dict(zip(wf.input_labels, inputs)) params = {"Paste1": {"delimiter": "U"}} wf.run(input_map, "wf_output", params=params) .. note:: Although this library allows you to blend these two services into a cohesive unit, the library itself can be used with either service irrespective of the other. For example, you can use it to just manipulate CloudMan clusters or to script the interactions with an instance of Galaxy running on your laptop. .. References/hyperlinks used above .. _CloudMan: http://usecloudman.org/ .. _Galaxy: http://usegalaxy.org/ .. _Git repository: https://github.com/afgane/bioblend bioblend-0.7.0/CHANGELOG.md000066400000000000000000000152751261571066300151260ustar00rootroot00000000000000### BioBlend v0.7.0 - November 2, 2015 * BioBlend.objects: enable import of workflows containing dataset collection inputs. * Implement APIs for a modern Galaxy workflow APIs (i.e. delayed scheduling). * Implement APIs to search Tool Shed repositories and tools. * Add support for uploading (importing) from FTP (thanks to Eric Rasche). * Add ``to_posix_lines`` and ``space_to_tab`` params to ``upload_file()``, ``upload_from_ftp()`` and ``paste_content()`` methods of ``ToolClient``. * BioBlend.objects: add ``upload_from_ftp()`` method to ``History``. * Updated the testing framework to work with Galaxy wheels; use TravisCI's container infrastructure; test Galaxy release 15.07. * Updated CloudmanLauncher's ``launch`` method to accept ``subnet_id`` parameter, for VPC support (thanks to Matthew Ralston). * Properly pass extra parameters to cloud instance userdata. * Update placement finding methods and `get_clusters_pd` method to return a dict vs. lists so error messages can be included. * A numer of documentation improvements and minor updates/fixes (see individual commits). ### BioBlend v0.6.1 - July 27, 2015 * BioBlend.objects: Rename ``ObjDatasetClient`` abstract class to ``ObjDatasetContainerClient``. * BioBlend.objects: Add ``ABCMeta`` metaclass and ``list()`` method to ``ObjClient``. * BioBlend.objects: Add ``io_details`` and ``link_details`` parameters to ``ObjToolClient.get()`` method. * Open port 8800 when launching cloud instances for use by NodeJS proxy for Galaxy IPython Interactive Environments. * When launching cloud instances, propagate error messages back to the called. The return types for methods ``create_cm_security_group``, ``create_key_pair`` in ``CloudManLauncher`` class have changed as a result of this. ### BioBlend v0.6.0 - June 30, 2015 * Add support for Python >= 3.3. * Add ``get_library_permissions()`` method to ``LibraryClient``. * Add ``update_group()``, ``get_group_users()``, ``get_group_roles()``, ``add_group_user()``, ``add_group_role()``, ``delete_group_user()`` and ``delete_group_role()`` methods to ``GroupsClient``. * Add ``full_details`` parameter to ``JobsClient.show_job()`` (thanks to Rossano Atzeni). * BioBlend.objects: add ``ObjJobClient`` and ``Job`` wrapper (thanks to Rossano Atzeni). * BioBlend.objects: add check to verify that all tools in a workflow are installed on the Galaxy instance (thanks to Gianmauro Cuccuru). * Remove several deprecated parameters: see commits [19e168f](https://github.com/galaxyproject/bioblend/commit/19e168f5342f4c791d37694d7039a85f2669df71) and [442ae98](https://github.com/galaxyproject/bioblend/commit/442ae98037be7455d57be15542553dc848d99431). * Verify SSL certificates by default. * Add documentation about the Tool Shed and properly link all the docs on ReadTheDocs. * Solidify automated testing by using [tox](https://tox.readthedocs.org/) and [flake8](https://gitlab.com/pycqa/flake8). ### BioBlend v0.5.3 - March 18, 2015 * Project source moved to new URL - https://github.com/galaxyproject/bioblend * Huge improvements to automated testing, tests now run against Galaxy release_14.02 and all later versions to ensure backward compatibility (see `.travis.yml` for details). * Many documentation improvements (thanks to Eric Rasche). * Add Galaxy clients for the tool data tables, the roles, and library folders (thanks to Anthony Bretaudeau). * Add method to get the standard error and standard output for the job corresponding to a Galaxy dataset (thanks to Anthony Bretaudeau). * Add ``get_state()`` method to ``JobsClient``. * Add ``copy_from_dataset()`` method to ``LibraryClient``. * Add ``create_repository()`` method to ``ToolShedClient`` (thanks to Eric Rasche). * Fix ``DatasetClient.download_dataset()`` for certain proxied Galaxy deployments. * Make ``LibraryClient._get_root_folder_id()`` method safer and faster for Galaxy release_13.06 and later. * Deprecate and ignore invalid ``deleted`` parameter to ``WorkflowClient.get_workflows()``. * CloudMan: Add method to fetch instance types. * CloudMan: Update cluster options to reflect change to SLURM. * BioBlend.objects: Deprecate and ignore invalid ``deleted`` parameter to ``ObjWorkflowClient.list()``. * BioBlend.objects: Add ``paste_content()`` method to ``History`` objects. * BioBlend.objects: Add ``copy_from_dataset()`` method and ``root_folder`` property to ``Library`` objects. * BioBlend.objects: Add ``container`` and ``deleted`` attributes to ``Folder`` objects. * BioBlend.objects: Set the ``parent`` attribute of a ``Folder`` object to its parent folder object (thanks to John M. Eppley). * BioBlend.objects: Add ``deleted`` parameter to ``list()`` method of libraries and histories. * BioBlend.objects: Add ``state`` and ``state_details`` attributes to ``History`` objects (thanks to Gianmauro Cuccuru). * BioBlend.objects: Rename ``upload_dataset()`` method to ``upload_file()`` for ``History`` objects. * BioBlend.objects: Rename ``input_ids`` and ``output_ids`` attributes of ``Workflow`` objects to ``source_ids`` and ``sink_ids`` respectively. * Add ``run_bioblend_tests.sh`` script (useful for Continuous Integration testing). ### BioBlend v0.5.2 - October 17, 2014 * BioBlend.objects: enable email&password auth * Enable Tool Shed tar ball uploads * BioBlend.objects: allow deletion of history and library datasets * BioBlend.objects: fixed library dataset downloads * Fixed the Tool Shed tool installation method * Add 'deleted' attribute to DatasetContainer * Handle `data_type` changes in the Oct 2014 Galaxy release * Renamed `get_current_history()` to `get_most_recently_used_history()` * A number of documentation improvements and other small fixes (see the commit messages for more details) ### BioBlend v0.5.1 - August 19, 2014 * Fixed url joining problem described in issue #82 * Enabled Travis Continuous Inetgration testing * Added script to create a user and get its API key * Deprecated ``create_user()`` method in favor of clearer ``create_remote_user()``. Added ``create_local_user()``. * Skip instead of fail tests when ``BIOBLEND_GALAXY_URL`` and ``BIOBLEND_GALAXY_API_KEY`` environment variables are not defined. * Added export and download to objects API * Added export/download history * GalaxyClient: changed ``make_put_request`` to return whole ``requests`` response object * Added Tool wrapper to *BioBlend.objects* plus methods to list tools and get one * Added ``show_tool()`` method to ``ToolClient`` class * Added ``name``, ``in_panel`` and ``trackster`` filters to ``get_tools()`` * Added ``upload_dataset()`` method to ``History`` class. * Removed ``DataInput`` and ``Tool`` classes for workflow steps. ``Tool`` is to be used for running single tools. bioblend-0.7.0/MANIFEST.in000066400000000000000000000001501261571066300150350ustar00rootroot00000000000000global-exclude *.swp .gitignore include *.rst include setup.py graft bioblend graft docs graft tests bioblend-0.7.0/README.rst000066400000000000000000000022631261571066300147750ustar00rootroot00000000000000.. image:: https://img.shields.io/pypi/v/bioblend.svg :target: https://pypi.python.org/pypi/bioblend/ :alt: latest version available on PyPI .. image:: https://img.shields.io/pypi/dm/bioblend.svg :target: https://pypi.python.org/pypi/bioblend/ :alt: PyPI downloads in the last month .. image:: https://readthedocs.org/projects/bioblend/badge/ :alt: Documentation Status :target: https://bioblend.readthedocs.org/ .. image:: https://travis-ci.org/galaxyproject/bioblend.png :target: https://travis-ci.org/galaxyproject/bioblend :alt: Build Status .. image:: https://landscape.io/github/galaxyproject/bioblend/master/landscape.svg?style=flat :target: https://landscape.io/github/galaxyproject/bioblend/master :alt: Code Health BioBlend is a Python library for interacting with `CloudMan`_ and `Galaxy`_'s API. BioBlend is supported and tested on: - Python 2.6, 2.7, 3.3 and 3.4 - Galaxy release_14.02 and later. Full docs are available at http://bioblend.readthedocs.org with a quick library overview also available in `ABOUT.rst <./ABOUT.rst>`_. .. References/hyperlinks used above .. _CloudMan: http://usecloudman.org/ .. _Galaxy: http://usegalaxy.org/ bioblend-0.7.0/bioblend/000077500000000000000000000000001261571066300150615ustar00rootroot00000000000000bioblend-0.7.0/bioblend/__init__.py000066400000000000000000000043051261571066300171740ustar00rootroot00000000000000import logging import os from bioblend.config import Config, BioBlendConfigLocations # Current version of the library __version__ = '0.7.0' # default chunk size (in bytes) for reading remote data try: import resource CHUNK_SIZE = resource.getpagesize() except Exception: CHUNK_SIZE = 4096 config = Config() def get_version(): """ Returns a string with the current version of the library (e.g., "0.2.0") """ return __version__ def init_logging(): """ Initialize BioBlend's logging from a configuration file. """ for config_file in BioBlendConfigLocations: try: logging.config.fileConfig(os.path.expanduser(config_file)) except: pass class NullHandler(logging.Handler): def emit(self, record): pass # By default, do not force any logging by the library. If you want to see the # log messages in your scripts, add the following to the top of your script: # import logging # logging.basicConfig(filename="bioblend.log", level=logging.DEBUG) default_format_string = "%(asctime)s %(name)s [%(levelname)s]: %(message)s" log = logging.getLogger('bioblend') log.addHandler(NullHandler()) init_logging() # Convenience functions to set logging to a particular file or stream # To enable either of these, simply add the following at the top of a # bioblend module: # import bioblend # bioblend.set_stream_logger(__name__) def set_file_logger(name, filepath, level=logging.INFO, format_string=None): global log if not format_string: format_string = default_format_string logger = logging.getLogger(name) logger.setLevel(level) fh = logging.FileHandler(filepath) fh.setLevel(level) formatter = logging.Formatter(format_string) fh.setFormatter(formatter) logger.addHandler(fh) log = logger def set_stream_logger(name, level=logging.DEBUG, format_string=None): global log if not format_string: format_string = default_format_string logger = logging.getLogger(name) logger.setLevel(level) fh = logging.StreamHandler() fh.setLevel(level) formatter = logging.Formatter(format_string) fh.setFormatter(formatter) logger.addHandler(fh) log = logger bioblend-0.7.0/bioblend/cloudman/000077500000000000000000000000001261571066300166635ustar00rootroot00000000000000bioblend-0.7.0/bioblend/cloudman/__init__.py000066400000000000000000000712701261571066300210030ustar00rootroot00000000000000""" API for interacting with a CloudMan instance. """ import functools import json import time import requests from six.moves import range from six.moves.urllib.parse import urlparse import bioblend from bioblend.cloudman.launch import CloudManLauncher from bioblend.util import Bunch def block_until_vm_ready(func): """ This decorator exists to make sure that a launched VM is ready and has received a public IP before allowing the wrapped function call to continue. If the VM is not ready, the function will block until the VM is ready. If the VM does not become ready until the vm_ready_timeout elapses or the VM status returns an error, a VMLaunchException will be thrown. This decorator relies on the wait_until_instance_ready method defined in class GenericVMInstance. All methods to which this decorator is applied must be members of a class which inherit from GenericVMInstance. The following two optional keyword arguments are recognized by this decorator: :type vm_ready_timeout: int :param vm_ready_timeout: Maximum length of time to block before timing out. Once the timeout is reached, a VMLaunchException will be thrown. :type vm_ready_check_interval: int :param vm_ready_check_interval: The number of seconds to pause between consecutive calls when polling the VM's ready status. """ @functools.wraps(func) def wrapper(*args, **kwargs): obj = args[0] timeout = kwargs.pop('vm_ready_timeout', 300) interval = kwargs.pop('vm_ready_check_interval', 10) try: obj.wait_until_instance_ready(timeout, interval) except AttributeError: raise VMLaunchException("Decorated object does not define a wait_until_instance_ready method." "Make sure that the object is of type GenericVMInstance.") return func(*args, **kwargs) return wrapper class VMLaunchException(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value) class CloudManConfig(object): def __init__(self, access_key=None, secret_key=None, cluster_name=None, image_id=None, instance_type='m1.medium', password=None, cloud_metadata=None, cluster_type=None, galaxy_data_option='', initial_storage_size=10, key_name='cloudman_key_pair', security_groups=['CloudMan'], placement='', kernel_id=None, ramdisk_id=None, block_until_ready=False, **kwargs): """ Initializes a CloudMan launch configuration object. :type access_key: str :param access_key: Access credentials. :type secret_key: str :param secret_key: Access credentials. :type cluster_name: str :param cluster_name: Name used to identify this CloudMan cluster. :type image_id: str :param image_id: Machine image ID to use when launching this CloudMan instance. :type instance_type: str :param instance_type: The type of the machine instance, as understood by the chosen cloud provider. (e.g., ``m1.medium``) :type password: str :param password: The administrative password for this CloudMan instance. :type cloud_metadata: Bunch :param cloud_metadata: This object must define the properties required to establish a `boto `_ connection to that cloud. See this method's implementation for an example of the required fields. Note that as long the as provided object defines the required fields, it can really by implemented as anything (e.g., a Bunch, a database object, a custom class). If no value for the ``cloud`` argument is provided, the default is to use the Amazon cloud. :type kernel_id: str :param kernel_id: The ID of the kernel with which to launch the instances :type ramdisk_id: str :param ramdisk_id: The ID of the RAM disk with which to launch the instances :type key_name: str :param key_name: The name of the key pair with which to launch instances :type security_groups: list of str :param security_groups: The IDs of the security groups with which to associate instances :type placement: str :param placement: The availability zone in which to launch the instances :type cluster_type: str :param cluster_type: The ``type``, either 'Galaxy', 'Data', or 'Test', defines the type of cluster platform to initialize. :type galaxy_data_option: str :param galaxy_data_option: The storage type to use for this instance. May be 'transient', 'custom_size' or ''. The default is '', which will result in ignoring the bioblend specified initial_storage_size. 'custom_size' must be used for initial_storage_size to come into effect. :type initial_storage_size: int :param initial_storage_size: The initial storage to allocate for the instance. This only applies if ``cluster_type`` is set to either ``Galaxy`` or ``Data`` and ``galaxy_data_option`` is set to ``custom_size`` :type block_until_ready: bool :param block_until_ready: Specifies whether the launch method will block until the instance is ready and only return once all initialization is complete. The default is False. If False, the launch method will return immediately without blocking. However, any subsequent calls made will automatically block if the instance is not ready and initialized. The blocking timeout and polling interval can be configured by providing extra parameters to the ``CloudManInstance.launch_instance`` method. """ self.set_connection_parameters(access_key, secret_key, cloud_metadata) self.set_pre_launch_parameters( cluster_name, image_id, instance_type, password, kernel_id, ramdisk_id, key_name, security_groups, placement, block_until_ready) self.set_post_launch_parameters(cluster_type, galaxy_data_option, initial_storage_size) self.set_extra_parameters(**kwargs) def set_connection_parameters(self, access_key, secret_key, cloud_metadata=None): self.access_key = access_key self.secret_key = secret_key self.cloud_metadata = cloud_metadata def set_pre_launch_parameters( self, cluster_name, image_id, instance_type, password, kernel_id=None, ramdisk_id=None, key_name='cloudman_key_pair', security_groups=['CloudMan'], placement='', block_until_ready=False): self.cluster_name = cluster_name self.image_id = image_id self.instance_type = instance_type self.password = password self.kernel_id = kernel_id self.ramdisk_id = ramdisk_id self.key_name = key_name self.security_groups = security_groups self.placement = placement self.block_until_ready = block_until_ready def set_post_launch_parameters(self, cluster_type=None, galaxy_data_option='', initial_storage_size=10): self.cluster_type = cluster_type self.galaxy_data_option = galaxy_data_option self.initial_storage_size = initial_storage_size def set_extra_parameters(self, **kwargs): self.kwargs = kwargs class CustomTypeEncoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, (CloudManConfig, Bunch)): key = '__%s__' % obj.__class__.__name__ return {key: obj.__dict__} return json.JSONEncoder.default(self, obj) @staticmethod def CustomTypeDecoder(dct): if '__CloudManConfig__' in dct: return CloudManConfig(**dct['__CloudManConfig__']) elif '__Bunch__' in dct: return Bunch(**dct['__Bunch__']) else: return dct @staticmethod def load_config(fp): return json.load(fp, object_hook=CloudManConfig.CustomTypeDecoder) def save_config(self, fp): json.dump(self, fp, cls=self.CustomTypeEncoder) def validate(self): if self.access_key is None: return "Access key must not be null" elif self.secret_key is None: return "Secret key must not be null" elif self.cluster_name is None: return "Cluster name must not be null" elif self.image_id is None: return "Image ID must not be null" elif self.instance_type is None: return "Instance type must not be null" elif self.password is None: return "Password must not be null" elif self.cluster_type not in [None, 'Test', 'Data', 'Galaxy', 'Shared_cluster']: return "Unrecognized cluster type ({0})".format(self.cluster_type) elif self.galaxy_data_option not in [None, '', 'custom-size', 'transient']: return "Unrecognized galaxy data option ({0})".format(self.galaxy_data_option) elif self.key_name is None: return "Key-pair name must not be null" else: return None class GenericVMInstance(object): def __init__(self, launcher, launch_result): """ Create an instance of the CloudMan API class, which is to be used when manipulating that given CloudMan instance. The ``url`` is a string defining the address of CloudMan, for example "http://115.146.92.174". The ``password`` is CloudMan's password, as defined in the user data sent to CloudMan on instance creation. """ # Make sure the url scheme is defined (otherwise requests will not work) self.vm_error = None self.vm_status = None self.host_name = None self.launcher = launcher self.launch_result = launch_result def _update_host_name(self, host_name): if self.host_name != host_name: self.host_name = host_name @property def instance_id(self): """ Returns the ID of this instance (e.g., ``i-87ey32dd``) if launch was successful or ``None`` otherwise. """ return None if self.launch_result is None else self.launch_result['instance_id'] @property def key_pair_name(self): """ Returns the name of the key pair used by this instance. If instance was not launched properly, returns ``None``. """ return None if self.launch_result is None else self.launch_result['kp_name'] @property def key_pair_material(self): """ Returns the private portion of the generated key pair. It does so only if the instance was properly launched and key pair generated; ``None`` otherwise. """ return None if self.launch_result is None else self.launch_result['kp_material'] def get_machine_status(self): """ Check on the underlying VM status of an instance. This can be used to determine whether the VM has finished booting up and if CloudMan is up and running. Return a ``state`` dict with the current ``instance_state``, ``public_ip``, ``placement``, and ``error`` keys, which capture the current state (the values for those keys default to empty string if no data is available from the cloud). """ if self.launcher: return self.launcher.get_status(self.instance_id) # elif self.host_name: else: state = {'instance_state': "", 'public_ip': "", 'placement': "", 'error': "No reference to the instance object"} return state def _init_instance(self, host_name): self._update_host_name(host_name) def wait_until_instance_ready(self, vm_ready_timeout=300, vm_ready_check_interval=10): """ Wait until the VM state changes to ready/error or timeout elapses. Updates the host name once ready. """ assert vm_ready_timeout > 0 assert vm_ready_timeout > vm_ready_check_interval assert vm_ready_check_interval > 0 if self.host_name: # Host name available. Therefore, instance is ready return for time_left in range(vm_ready_timeout, 0, -vm_ready_check_interval): status = self.get_machine_status() if status['public_ip'] != '' and status['error'] == '': self._init_instance(status['public_ip']) return elif status['error'] != '': msg = "Error launching an instance: {0}".format(status['error']) bioblend.log.error(msg) raise VMLaunchException(msg) else: bioblend.log.warn("Instance not ready yet (it's in state '{0}'); waiting another {1} seconds..." .format(status['instance_state'], time_left)) time.sleep(vm_ready_check_interval) raise VMLaunchException("Waited too long for instance to become ready. Instance Id: %s" % self.instance_id) class CloudManInstance(GenericVMInstance): def __init__(self, url, password, **kwargs): """ Create an instance of the CloudMan API class, which is to be used when manipulating that given CloudMan instance. The ``url`` is a string defining the address of CloudMan, for example "http://115.146.92.174". The ``password`` is CloudMan's password, as defined in the user data sent to CloudMan on instance creation. """ self.initialized = False if kwargs.get('launch_result', None) is not None: # Used internally by the launch_instance method super(CloudManInstance, self).__init__(kwargs['launcher'], kwargs['launch_result']) else: super(CloudManInstance, self).__init__(None, None) self.config = kwargs.pop('cloudman_config', None) if not self.config: self.password = password else: self.password = self.config.password self._set_url(url) def __repr__(self): if self.cloudman_url: return "CloudMan instance at {0}".format(self.cloudman_url) else: return "Waiting for this CloudMan instance to start..." def _update_host_name(self, host_name): """ Overrides the super-class method and makes sure that the ``cloudman_url`` is kept in sync with the host name. """ self._set_url(host_name) def _init_instance(self, hostname): super(CloudManInstance, self)._init_instance(hostname) if self.config.cluster_type: self.initialize(self.config.cluster_type, galaxy_data_option=self.config.galaxy_data_option, initial_storage_size=self.config.initial_storage_size) def _set_url(self, url): """ Keeps the CloudMan URL as well and the hostname in sync. """ if url: parse_result = urlparse(url) # Make sure the URL scheme is defined (otherwise requests will not work) if not parse_result.scheme: url = "http://" + url # Parse the corrected URL again to extract the hostname parse_result = urlparse(url) super(CloudManInstance, self)._update_host_name(parse_result.hostname) self.url = url @property def galaxy_url(self): """ Returns the base URL for this instance, which by default happens to be the URL for Galaxy application. """ return self.url @property def cloudman_url(self): """ Returns the URL for accessing this instance of CloudMan. """ if self.url: return '/'.join([self.url, 'cloud']) return None @staticmethod def launch_instance(cfg, **kwargs): """ Launches a new instance of CloudMan on the specified cloud infrastructure. :type cfg: CloudManConfig :param cfg: A CloudManConfig object containing the initial parameters for this launch. """ validation_result = cfg.validate() if validation_result is not None: raise VMLaunchException( "Invalid CloudMan configuration provided: {0}" .format(validation_result)) launcher = CloudManLauncher(cfg.access_key, cfg.secret_key, cfg.cloud_metadata) result = launcher.launch( cfg.cluster_name, cfg.image_id, cfg.instance_type, cfg.password, cfg.kernel_id, cfg.ramdisk_id, cfg.key_name, cfg.security_groups, cfg.placement, **cfg.kwargs) if result['error'] is not None: raise VMLaunchException("Error launching cloudman instance: {0}".format(result['error'])) instance = CloudManInstance(None, None, launcher=launcher, launch_result=result, cloudman_config=cfg) if cfg.block_until_ready and cfg.cluster_type: instance.get_status() # this will indirect result in initialize being invoked return instance def update(self): """ Update the local object's fields to be in sync with the actual state of the CloudMan instance the object points to. This method should be called periodically to ensure you are looking at the current data. .. versionadded:: 0.2.2 """ ms = self.get_machine_status() # Check if the machine is running and update IP and state self.vm_status = ms.get('instance_state', None) self.vm_error = ms.get('error', None) public_ip = ms.get('public_ip', None) # Update url if we don't have it or is different than what we have if not self.url and (public_ip and self.url != public_ip): self._set_url(public_ip) # See if the cluster has been initialized if self.vm_status == 'running' or self.url: ct = self.get_cluster_type() if ct.get('cluster_type', None): self.initialized = True if self.vm_error: bioblend.log.error(self.vm_error) @block_until_vm_ready def get_cloudman_version(self): """ Returns the cloudman version from the server. Versions prior to Cloudman 2 does not support this call, and therefore, the default is to return 1 """ try: r = self._make_get_request("cloudman_version") return r['version'] except: return 1 @block_until_vm_ready def initialize(self, cluster_type, galaxy_data_option='', initial_storage_size=None, shared_bucket=None): """ Initialize CloudMan platform. This needs to be done before the cluster can be used. The ``cluster_type``, either 'Galaxy', 'Data', or 'Test', defines the type of cluster platform to initialize. """ if not self.initialized: if self.get_cloudman_version() < 2: r = self._make_get_request( "initialize_cluster", parameters={ 'startup_opt': cluster_type, 'g_pss': initial_storage_size, 'shared_bucket': shared_bucket }) else: r = self._make_get_request( "initialize_cluster", parameters={ 'startup_opt': cluster_type, 'galaxy_data_option': galaxy_data_option, 'pss': initial_storage_size, 'shared_bucket': shared_bucket }) self.initialized = True return r @block_until_vm_ready def get_cluster_type(self): """ Get the ``cluster type`` for this CloudMan instance. See the CloudMan docs about the available types. Returns a dictionary, for example: ``{u'cluster_type': u'Test'}``. """ cluster_type = self._make_get_request("cluster_type") if cluster_type['cluster_type']: self.initialized = True return cluster_type @block_until_vm_ready def get_status(self): """ Get status information on this CloudMan instance. """ return self._make_get_request("instance_state_json") @block_until_vm_ready def get_nodes(self): """ Get a list of nodes currently running in this CloudMan cluster. """ instance_feed_json = self._make_get_request("instance_feed_json") return instance_feed_json['instances'] @block_until_vm_ready def get_cluster_size(self): """ Get the size of the cluster in terms of the number of nodes; this count includes the master node. """ return len(self.get_nodes()) @block_until_vm_ready def get_static_state(self): """ Get static information on this CloudMan instance. i.e. state that doesn't change over the lifetime of the cluster """ return self._make_get_request("static_instance_state_json") @block_until_vm_ready def get_master_ip(self): """ Returns the public IP of the master node in this CloudMan cluster """ status_json = self.get_static_state() return status_json['master_ip'] @block_until_vm_ready def get_master_id(self): """ Returns the instance ID of the master node in this CloudMan cluster """ status_json = self.get_static_state() return status_json['master_id'] @block_until_vm_ready def add_nodes(self, num_nodes, instance_type='', spot_price=''): """ Add a number of worker nodes to the cluster, optionally specifying the type for new instances. If ``instance_type`` is not specified, instance(s) of the same type as the master instance will be started. Note that the ``instance_type`` must match the type of instance available on the given cloud. ``spot_price`` applies only to AWS and, if set, defines the maximum price for Spot instances, thus turning this request for more instances into a Spot request. """ payload = {'number_nodes': num_nodes, 'instance_type': instance_type, 'spot_price': spot_price} return self._make_get_request("add_instances", parameters=payload) @block_until_vm_ready def remove_nodes(self, num_nodes, force=False): """ Remove worker nodes from the cluster. The ``num_nodes`` parameter defines the number of worker nodes to remove. The ``force`` parameter (defaulting to False), is a boolean indicating whether the nodes should be forcibly removed rather than gracefully removed. """ payload = {'number_nodes': num_nodes, 'force_termination': force} result = self._make_get_request("remove_instances", parameters=payload) return result @block_until_vm_ready def remove_node(self, instance_id, force=False): """ Remove a specific worker node from the cluster. The ``instance_id`` parameter defines the ID, as a string, of a worker node to remove from the cluster. The ``force`` parameter (defaulting to False), is a boolean indicating whether the node should be forcibly removed rather than gracefully removed. """ payload = {'instance_id': instance_id} return self._make_get_request("remove_instance", parameters=payload) @block_until_vm_ready def reboot_node(self, instance_id): """ Reboot a specific worker node. The ``instance_id`` parameter defines the ID, as a string, of a worker node to reboot. """ payload = {'instance_id': instance_id} return self._make_get_request("reboot_instance", parameters=payload) @block_until_vm_ready def autoscaling_enabled(self): """ Returns a boolean indicating whether autoscaling is enabled. """ return bool(self.get_status()['autoscaling']['use_autoscaling']) @block_until_vm_ready def enable_autoscaling(self, minimum_nodes=0, maximum_nodes=19): """ Enable cluster autoscaling, allowing the cluster to automatically add, or remove, worker nodes, as needed. The number of worker nodes in the cluster is bounded by the ``minimum_nodes`` (default is 0) and ``maximum_nodes`` (default is 19) parameters. """ if not self.autoscaling_enabled(): payload = {'as_min': minimum_nodes, 'as_max': maximum_nodes} self._make_get_request("toggle_autoscaling", parameters=payload) @block_until_vm_ready def disable_autoscaling(self): """ Disable autoscaling, meaning that worker nodes will need to be manually added and removed. """ if self.autoscaling_enabled(): self._make_get_request("toggle_autoscaling") @block_until_vm_ready def adjust_autoscaling(self, minimum_nodes=None, maximum_nodes=None): """ Adjust the autoscaling configuration parameters. The number of worker nodes in the cluster is bounded by the optional ``minimum_nodes`` and ``maximum_nodes`` parameters. If a parameter is not provided then its configuration value does not change. """ if self.autoscaling_enabled(): payload = {'as_min_adj': minimum_nodes, 'as_max_adj': maximum_nodes} self._make_get_request("adjust_autoscaling", parameters=payload) @block_until_vm_ready def is_master_execution_host(self): """ Checks whether the master node has job execution enabled. """ status = self._make_get_request("get_all_services_status") return bool(status['master_is_exec_host']) @block_until_vm_ready def set_master_as_execution_host(self, enable): """ Enables/disables master as execution host. """ if not self.is_master_execution_host(): self._make_get_request("toggle_master_as_exec_host") @block_until_vm_ready def get_galaxy_state(self): """ Get the current status of Galaxy running on the cluster. """ payload = {'srvc': 'Galaxy'} status = self._make_get_request("get_srvc_status", parameters=payload) return {'status': status['status']} @block_until_vm_ready def terminate(self, terminate_master_instance=True, delete_cluster=False): """ Terminate this CloudMan cluster. There is an option to also terminate the master instance (all worker instances will be terminated in the process of cluster termination), and delete the whole cluster. .. warning:: Deleting a cluster is irreversible - all of the data will be permanently deleted. """ payload = {'terminate_master_instance': terminate_master_instance, 'delete_cluster': delete_cluster} result = self._make_get_request("kill_all", parameters=payload, timeout=15) return result def _make_get_request(self, url, parameters={}, timeout=None): """ Private function that makes a GET request to the nominated ``url``, with the provided GET ``parameters``. Optionally, set the ``timeout`` to stop waiting for a response after a given number of seconds. This is particularly useful when terminating a cluster as it may terminate before sending a response. """ req_url = '/'.join([self.cloudman_url, 'root', url]) r = requests.get(req_url, params=parameters, auth=("", self.password), timeout=timeout) try: json = r.json() return json except: return r.text bioblend-0.7.0/bioblend/cloudman/launch.py000066400000000000000000001072041261571066300205130ustar00rootroot00000000000000""" Setup and launch a CloudMan instance. """ import datetime import yaml import socket import boto from boto.compat import http_client from boto.ec2.regioninfo import RegionInfo from boto.exception import EC2ResponseError, S3ResponseError from boto.s3.connection import OrdinaryCallingFormat, S3Connection, SubdomainCallingFormat import six from six.moves.http_client import HTTPConnection from six.moves.urllib.parse import urlparse import bioblend from bioblend.util import Bunch # Uncomment the following line if no logging from boto is desired # bioblend.logging.getLogger('boto').setLevel(bioblend.logging.CRITICAL) # Uncomment the following line if logging at the prompt is desired # bioblend.set_stream_logger(__name__) def instance_types(cloud_name='generic'): """ Return a list of dictionaries containing details about the available instance types for the given `cloud_name`. :type cloud_name: str :param cloud_name: A name of the cloud for which the list of instance types will be returned. Valid values are: `aws`, `nectar`, `generic`. :rtype: list :return: A list of dictionaries describing instance types. Each dict will contain the following keys: `name`, `model`, and `description`. """ instance_list = [] if cloud_name.lower() == 'aws': instance_list.append({"model": "c3.large", "name": "Compute optimized Large", "description": "2 vCPU/4GB RAM"}) instance_list.append({"model": "c3.2xlarge", "name": "Compute optimized 2xLarge", "description": "8 vCPU/15GB RAM"}) instance_list.append({"model": "c3.8xlarge", "name": "Compute optimized 8xLarge", "description": "32 vCPU/60GB RAM"}) elif cloud_name.lower() in ['nectar', 'generic']: instance_list.append({"model": "m1.small", "name": "Small", "description": "1 vCPU / 4GB RAM"}) instance_list.append({"model": "m1.medium", "name": "Medium", "description": "2 vCPU / 8GB RAM"}) instance_list.append({"model": "m1.large", "name": "Large", "description": "4 vCPU / 16GB RAM"}) instance_list.append({"model": "m1.xlarge", "name": "Extra Large", "description": "8 vCPU / 32GB RAM"}) instance_list.append({"model": "m1.xxlarge", "name": "Extra-extra Large", "description": "16 vCPU / 64GB RAM"}) return instance_list class CloudManLauncher(object): def __init__(self, access_key, secret_key, cloud=None): """ Define the environment in which this instance of CloudMan will be launched. Besides providing the credentials, optionally provide the ``cloud`` object. This object must define the properties required to establish a `boto `_ connection to that cloud. See this method's implementation for an example of the required fields. Note that as long the as provided object defines the required fields, it can really by implemented as anything (e.g., a Bunch, a database object, a custom class). If no value for the ``cloud`` argument is provided, the default is to use the Amazon cloud. """ self.access_key = access_key self.secret_key = secret_key if cloud is None: # Default to an EC2-compatible object self.cloud = Bunch(id='1', # for compatibility w/ DB representation name="Amazon", cloud_type="ec2", bucket_default="cloudman", region_name="us-east-1", region_endpoint="ec2.amazonaws.com", ec2_port="", ec2_conn_path="/", cidr_range="", is_secure=True, s3_host="s3.amazonaws.com", s3_port="", s3_conn_path='/') else: self.cloud = cloud self.ec2_conn = self.connect_ec2(self.access_key, self.secret_key, self.cloud) # Define exceptions from http_client that we want to catch and retry self.http_exceptions = (http_client.HTTPException, socket.error, socket.gaierror, http_client.BadStatusLine) def __repr__(self): return "Cloud: {0}; acct ID: {1}".format(self.cloud.name, self.access_key) def launch(self, cluster_name, image_id, instance_type, password, kernel_id=None, ramdisk_id=None, key_name='cloudman_key_pair', security_groups=['CloudMan'], placement='', subnet_id=None, **kwargs): """ Check all the prerequisites (key pair and security groups) for launching a CloudMan instance, compose the user data based on the parameters specified in the arguments and the cloud properties as defined in the object's ``cloud`` field. For the current list of user data fields that can be provided via ``kwargs``, see ``_ Return a dict containing the properties and info with which an instance was launched, namely: ``sg_names`` containing the names of the security groups, ``kp_name`` containing the name of the key pair, ``kp_material`` containing the private portion of the key pair (*note* that this portion of the key is available and can be retrieved *only* at the time the key is created, which will happen only if no key with the name provided in the ``key_name`` argument exists), ``rs`` containing the `boto `_ ``ResultSet`` object, ``instance_id`` containing the ID of a started instance, and ``error`` containing an error message if there was one. """ ret = {'sg_names': [], 'sg_ids': [], 'kp_name': '', 'kp_material': '', 'rs': None, 'instance_id': '', 'error': None} security_group_ids = [] # First satisfy the prerequisites for sg in security_groups: cmsg = self.create_cm_security_group(sg) ret['error'] = cmsg['error'] if ret['error']: return ret if cmsg['name']: ret['sg_names'].append(cmsg['name']) ret['sg_ids'].append(cmsg['sg_id']) security_group_ids.append(cmsg['sg_id']) kp_info = self.create_key_pair(key_name) ret['error'] = kp_info['error'] if ret['error']: return ret ret['kp_name'] = kp_info['name'] ret['kp_material'] = kp_info['material'] # If not provided, try to find a placement # TODO: Should placement always be checked? To make sure it's correct # for existing clusters. if not placement: placement = self._find_placement(cluster_name).get('placement', None) # Compose user data for launching an instance, ensuring we have the required fields kwargs['access_key'] = self.access_key kwargs['secret_key'] = self.secret_key kwargs['cluster_name'] = cluster_name kwargs['password'] = password kwargs['cloud_name'] = self.cloud.name ud = self._compose_user_data(kwargs) # Now launch an instance try: rs = None rs = self.ec2_conn.run_instances(image_id=image_id, instance_type=instance_type, key_name=key_name, security_group_ids=security_group_ids, user_data=ud, kernel_id=kernel_id, ramdisk_id=ramdisk_id, subnet_id=subnet_id, placement=placement) ret['rs'] = rs except EC2ResponseError as e: err_msg = "Problem launching an instance: {0} (code {1}; status {2})" \ .format(e.message, e.error_code, e.status) bioblend.log.exception(err_msg) ret['error'] = err_msg return ret else: if rs: try: bioblend.log.info("Launched an instance with ID %s" % rs.instances[0].id) ret['instance_id'] = rs.instances[0].id ret['instance_ip'] = rs.instances[0].ip_address except EC2ResponseError as e: err_msg = "Problem with the launched instance object: {0} " \ "(code {1}; status {2})" \ .format(e.message, e.error_code, e.status) bioblend.log.exception(err_msg) ret['error'] = err_msg else: ret['error'] = ("No response after launching an instance. Check " "your account permissions and try again.") return ret def create_cm_security_group(self, sg_name='CloudMan'): """ Create a security group with all authorizations required to run CloudMan. If the group already exists, check its rules and add the missing ones. :type sg_name: str :param sg_name: A name for the security group to be created. :rtype: dict :return: A dictionary containing keys ``name`` (with the value being the name of the security group that was created), ``error`` (with the value being the error message if there was an error or ``None`` if no error was encountered), and ``ports`` (containing the list of tuples with port ranges that were opened or attempted to be opened). .. versionchanged:: 0.6.1 The return value changed from a string to a dict """ ports = (('20', '21'), # FTP ('22', '22'), # SSH ('80', '80'), # Web UI ('443', '443'), # SSL Web UI ('8800', '8800'), # NodeJS Proxy for Galaxy IPython IE ('9600', '9700'), # HTCondor ('30000', '30100')) # FTP transfer progress = {'name': None, 'sg_id': None, 'error': None, 'ports': ports} cmsg = None # Check if this security group already exists try: sgs = self.ec2_conn.get_all_security_groups() except EC2ResponseError as e: err_msg = ("Problem getting security groups. This could indicate a " "problem with your account credentials or permissions: " "{0} (code {1}; status {2})" .format(e.message, e.error_code, e.status)) bioblend.log.exception(err_msg) progress['error'] = err_msg return progress for sg in sgs: if sg.name == sg_name: cmsg = sg bioblend.log.debug("Security group '%s' already exists; will " "add authorizations next." % sg_name) break # If it does not exist, create security group if cmsg is None: bioblend.log.debug("Creating Security Group %s" % sg_name) try: cmsg = self.ec2_conn.create_security_group(sg_name, 'A security ' 'group for CloudMan') except EC2ResponseError as e: err_msg = "Problem creating security group '{0}': {1} (code {2}; " \ "status {3})" \ .format(sg_name, e.message, e.error_code, e.status) bioblend.log.exception(err_msg) progress['error'] = err_msg if cmsg: progress['name'] = cmsg.name progress['sg_id'] = cmsg.id # Add appropriate authorization rules # If these rules already exist, nothing will be changed in the SG for port in ports: try: if not self.rule_exists(cmsg.rules, from_port=port[0], to_port=port[1]): cmsg.authorize(ip_protocol='tcp', from_port=port[0], to_port=port[1], cidr_ip='0.0.0.0/0') else: bioblend.log.debug("Rule (%s:%s) already exists in the SG" % (port[0], port[1])) except EC2ResponseError as e: err_msg = "A problem adding security group authorizations: {0} " \ "(code {1}; status {2})" \ .format(e.message, e.error_code, e.status) bioblend.log.exception(err_msg) progress['error'] = err_msg # Add ICMP (i.e., ping) rule required by HTCondor try: if not self.rule_exists(cmsg.rules, from_port='-1', to_port='-1', ip_protocol='icmp'): cmsg.authorize(ip_protocol='icmp', from_port=-1, to_port=-1, cidr_ip='0.0.0.0/0') else: bioblend.log.debug("ICMP rule already exists in {0} SG.".format(sg_name)) except EC2ResponseError as e: err_msg = "A problem with security ICMP rule authorization: {0} " \ "(code {1}; status {2})" \ .format(e.message, e.error_code, e.status) bioblend.log.exception(err_msg) progress['err_msg'] = err_msg # Add rule that allows communication between instances in the same SG g_rule_exists = False # A flag to indicate if group rule already exists for rule in cmsg.rules: for grant in rule.grants: if grant.name == cmsg.name: g_rule_exists = True bioblend.log.debug("Group rule already exists in the SG.") if g_rule_exists: break if not g_rule_exists: try: cmsg.authorize(src_group=cmsg, ip_protocol='tcp', from_port=0, to_port=65535) except EC2ResponseError as e: err_msg = "A problem with security group authorization: {0} " \ "(code {1}; status {2})" \ .format(e.message, e.error_code, e.status) bioblend.log.exception(err_msg) progress['err_msg'] = err_msg bioblend.log.info("Done configuring '%s' security group" % cmsg.name) else: bioblend.log.warning("Did not create security group '{0}'".format(sg_name)) return progress def rule_exists(self, rules, from_port, to_port, ip_protocol='tcp', cidr_ip='0.0.0.0/0'): """ A convenience method to check if an authorization rule in a security group already exists. """ for rule in rules: if rule.ip_protocol == ip_protocol and rule.from_port == from_port and \ rule.to_port == to_port and cidr_ip in [ip.cidr_ip for ip in rule.grants]: return True return False def create_key_pair(self, key_name='cloudman_key_pair'): """ If a key pair with the provided ``key_name`` does not exist, create it. :type sg_name: str :param sg_name: A name for the key pair to be created. :rtype: dict :return: A dictionary containing keys ``name`` (with the value being the name of the key pair that was created), ``error`` (with the value being the error message if there was an error or ``None`` if no error was encountered), and ``material`` (containing the unencrypted PEM encoded RSA private key if the key was created or ``None`` if the key already eixsted). .. versionchanged:: 0.6.1 The return value changed from a tuple to a dict """ progress = {'name': None, 'material': None, 'error': None} kp = None # Check if a key pair under the given name already exists. If it does not, # create it, else return. try: kps = self.ec2_conn.get_all_key_pairs() except EC2ResponseError as e: err_msg = "Problem getting key pairs: {0} (code {1}; status {2})" \ .format(e.message, e.error_code, e.status) bioblend.log.exception(err_msg) progress['error'] = err_msg return progress for akp in kps: if akp.name == key_name: bioblend.log.info("Key pair '%s' already exists; reusing it." % key_name) progress['name'] = akp.name return progress try: kp = self.ec2_conn.create_key_pair(key_name) except EC2ResponseError as e: err_msg = "Problem creating key pair '{0}': {1} (code {2}; status {3})" \ .format(key_name, e.message, e.error_code, e.status) bioblend.log.exception(err_msg) progress['error'] = err_msg return progress bioblend.log.info("Created key pair '%s'" % kp.name) progress['name'] = kp.name progress['material'] = kp.material return progress def get_status(self, instance_id): """ Check on the status of an instance. ``instance_id`` needs to be a ``boto``-library copatible instance ID (e.g., ``i-8fehrdss``).If ``instance_id`` is not provided, the ID obtained when launching *the most recent* instance is used. Note that this assumes the instance being checked on was launched using this class. Also note that the same class may be used to launch multiple instances but only the most recent ``instance_id`` is kept while any others will to be explicitly specified. This method also allows the required ``ec2_conn`` connection object to be provided at invocation time. If the object is not provided, credentials defined for the class are used (ability to specify a custom ``ec2_conn`` helps in case of stateless method invocations). Return a ``state`` dict containing the following keys: ``instance_state``, ``public_ip``, ``placement``, and ``error``, which capture CloudMan's current state. For ``instance_state``, expected values are: ``pending``, ``booting``, ``running``, or ``error`` and represent the state of the underlying instance. Other keys will return an empty value until the ``instance_state`` enters ``running`` state. """ ec2_conn = self.ec2_conn rs = None state = {'instance_state': "", 'public_ip': "", 'placement': "", 'error': ""} # Make sure we have an instance ID if instance_id is None: err = "Missing instance ID, cannot check the state." bioblend.log.error(err) state['error'] = err return state try: rs = ec2_conn.get_all_instances([instance_id]) if rs is not None: inst_state = rs[0].instances[0].update() public_ip = rs[0].instances[0].ip_address state['public_ip'] = public_ip if inst_state == 'running': cm_url = "http://{dns}/cloud".format(dns=public_ip) # Wait until the CloudMan URL is accessible to return the data if self._checkURL(cm_url) is True: state['instance_state'] = inst_state state['placement'] = rs[0].instances[0].placement else: state['instance_state'] = 'booting' else: state['instance_state'] = inst_state except Exception as e: err = "Problem updating instance '%s' state: %s" % (instance_id, e) bioblend.log.error(err) state['error'] = err return state def get_clusters_pd(self, include_placement=True): """ Return *persistent data* of all existing clusters for this account. :type include_placement: bool :param include_placement: Whether or not to include region placement for the clusters. Setting this option will lead to a longer function runtime. :rtype: dict :return: A dictionary containing keys ``clusters`` and ``error``. The value of ``clusters`` will be a dictionary with the following keys ``cluster_name``, ``persistent_data``, ``bucket_name`` and optionally ``placement`` or an empty list if no clusters were found or an error was encountered. ``persistent_data`` key value is yet another dictionary containing given cluster's persistent data. The value for the ``error`` key will contain a string with the error message. .. versionadded:: 0.3 .. versionchanged:: 0.7.0 The return value changed from a list to a dictionary. """ clusters = [] response = {'clusters': clusters, 'error': None} s3_conn = self.connect_s3(self.access_key, self.secret_key, self.cloud) try: buckets = s3_conn.get_all_buckets() except S3ResponseError as e: response['error'] = "S3ResponseError getting buckets: %s" % e except self.http_exceptions as ex: response['error'] = "Exception getting buckets: %s" % ex if response['error']: bioblend.log.exception(response['error']) return response for bucket in [b for b in buckets if b.name.startswith('cm-')]: try: # TODO: first lookup if persistent_data.yaml key exists pd = bucket.get_key('persistent_data.yaml') except S3ResponseError: # This can fail for a number of reasons for non-us and/or # CNAME'd buckets but it is not a terminal error bioblend.log.warning("Problem fetching persistent_data.yaml " "from bucket %s" % bucket) continue if pd: # We are dealing with a CloudMan bucket pd_contents = pd.get_contents_as_string() pd = yaml.load(pd_contents) if 'cluster_name' in pd: cluster_name = pd['cluster_name'] else: for key in bucket.list(): if key.name.endswith('.clusterName'): cluster_name = key.name.split('.clusterName')[0] cluster = {'cluster_name': cluster_name, 'persistent_data': pd, 'bucket_name': bucket.name} # Look for cluster's placement too if include_placement: placement = self._find_placement(cluster_name, cluster) cluster['placement'] = placement clusters.append(cluster) response['clusters'] = clusters return response def get_cluster_pd(self, cluster_name): """ Return *persistent data* (as a dict) associated with a cluster with the given ``cluster_name``. If a cluster with the given name is not found, return an empty dict. .. versionadded:: 0.3 """ cluster = {} clusters = self.get_clusters_pd().get('clusters', []) for c in clusters: if c['cluster_name'] == cluster_name: cluster = c break return cluster def connect_ec2(self, a_key, s_key, cloud=None): """ Create and return an EC2-compatible connection object for the given cloud. See ``_get_cloud_info`` method for more details on the requirements for the ``cloud`` parameter. If no value is provided, the class field is used. """ if cloud is None: cloud = self.cloud ci = self._get_cloud_info(cloud) r = RegionInfo(name=ci['region_name'], endpoint=ci['region_endpoint']) ec2_conn = boto.connect_ec2(aws_access_key_id=a_key, aws_secret_access_key=s_key, # api_version is needed for availability zone support for EC2 api_version='2012-06-01' if ci['cloud_type'] == 'ec2' else None, is_secure=ci['is_secure'], region=r, port=ci['ec2_port'], path=ci['ec2_conn_path'], validate_certs=False) return ec2_conn def connect_s3(self, a_key, s_key, cloud=None): """ Create and return an S3-compatible connection object for the given cloud. See ``_get_cloud_info`` method for more details on the requirements for the ``cloud`` parameter. If no value is provided, the class field is used. """ if cloud is None: cloud = self.cloud ci = self._get_cloud_info(cloud) if ci['cloud_type'] == 'amazon': calling_format = SubdomainCallingFormat() else: calling_format = OrdinaryCallingFormat() s3_conn = S3Connection( aws_access_key_id=a_key, aws_secret_access_key=s_key, is_secure=ci['is_secure'], port=ci['s3_port'], host=ci['s3_host'], path=ci['s3_conn_path'], calling_format=calling_format) return s3_conn def _compose_user_data(self, user_provided_data): """ A convenience method used to compose and properly format the user data required when requesting an instance. ``user_provided_data`` is the data provided by a user required to identify a cluster and user other user requirements. """ form_data = {} # Do not include the following fields in the user data but do include # any 'advanced startup fields' that might be added in the future excluded_fields = ['sg_name', 'image_id', 'instance_id', 'kp_name', 'cloud', 'cloud_type', 'public_dns', 'cidr_range', 'kp_material', 'placement', 'flavor_id'] for key, value in six.iteritems(user_provided_data): if key not in excluded_fields: form_data[key] = value # If the following user data keys are empty, do not include them in the request user data udkeys = ['post_start_script_url', 'worker_post_start_script_url', 'bucket_default', 'share_string'] for udkey in udkeys: if udkey in form_data and form_data[udkey] == '': del form_data[udkey] # If bucket_default was not provided, add a default value to the user data # (missing value does not play nicely with CloudMan's ec2autorun.py) if not form_data.get('bucket_default', None) and self.cloud.bucket_default: form_data['bucket_default'] = self.cloud.bucket_default # Reuse the ``password`` for the ``freenxpass`` user data option if 'freenxpass' not in form_data and 'password' in form_data: form_data['freenxpass'] = form_data['password'] # Convert form_data into the YAML format ud = yaml.dump(form_data, default_flow_style=False, allow_unicode=False) # Also include connection info about the selected cloud ci = self._get_cloud_info(self.cloud, as_str=True) return ud + "\n" + ci def _get_cloud_info(self, cloud, as_str=False): """ Get connection information about a given cloud """ ci = {} ci['cloud_type'] = cloud.cloud_type ci['region_name'] = cloud.region_name ci['region_endpoint'] = cloud.region_endpoint ci['is_secure'] = cloud.is_secure ci['ec2_port'] = cloud.ec2_port if cloud.ec2_port != '' else None ci['ec2_conn_path'] = cloud.ec2_conn_path # Include cidr_range only if not empty if cloud.cidr_range != '': ci['cidr_range'] = cloud.cidr_range ci['s3_host'] = cloud.s3_host ci['s3_port'] = cloud.s3_port if cloud.s3_port != '' else None ci['s3_conn_path'] = cloud.s3_conn_path if as_str: ci = yaml.dump(ci, default_flow_style=False, allow_unicode=False) return ci def _get_volume_placement(self, vol_id): """ Returns the placement of a volume (or None, if it cannot be determined) """ try: vol = self.ec2_conn.get_all_volumes(volume_ids=[vol_id]) except EC2ResponseError as ec2e: bioblend.log.error("EC2ResponseError querying for volume {0}: {1}" .format(vol_id, ec2e)) vol = None if vol: return vol[0].zone else: bioblend.log.error("Requested placement of a volume '%s' that does not exist." % vol_id) return None def _find_placement(self, cluster_name, cluster=None): """ Find a placement zone for a cluster with the name ``cluster_name``. By default, this method will search for and fetch given cluster's *persistent data*; alternatively, *persistent data* can be provided via the ``cluster`` parameter. This dict needs to have ``persistent_data`` key with the contents of cluster's *persistent data*. If the cluster or the volume associated with the cluster cannot be found, cluster placement is set to ``None``. :rtype: dict :return: A dictionary with ``placement`` and ``error`` keywords. .. versionchanged:: 0.7.0 The return value changed from a list to a dictionary. """ placement = None response = {'placement': placement, 'error': None} cluster = cluster or self.get_cluster_pd(cluster_name) if cluster and 'persistent_data' in cluster: pd = cluster['persistent_data'] try: if 'placement' in pd: response['placement'] = pd['placement'] elif 'data_filesystems' in pd: # We have v1 format persistent data so get the volume first and # then the placement zone vol_id = pd['data_filesystems']['galaxyData'][0]['vol_id'] response['placement'] = self._get_volume_placement(vol_id) elif 'filesystems' in pd: # V2 format. for fs in [fs for fs in pd['filesystems'] if fs.get('kind', None) == 'volume' and 'ids' in fs]: vol_id = fs['ids'][0] # All volumes must be in the same zone response['placement'] = self._get_volume_placement(vol_id) # No need to continue to iterate through # filesystems, if we found one with a volume. break except Exception as exc: response['error'] = ("Exception while finding placement for " "cluster '{0}'. This can indicate malformed " "instance data. Or that this method is " "broken: {1}".format(cluster_name, exc)) bioblend.log.error(response['error']) response['placement'] = None else: bioblend.log.debug("Insufficient info about cluster {0} to get placement." .format(cluster_name)) return response def find_placements(self, ec2_conn, instance_type, cloud_type, cluster_name=None): """ Find a list of placement zones that support the specified instance type. If ``cluster_name`` is given and a cluster with the given name exist, return a list with only one entry where the given cluster lives. Searching for available zones for a given instance type is done by checking the spot prices in the potential availability zones for support before deciding on a region: http://blog.piefox.com/2011/07/ec2-availability-zones-and-instance.html Note that, currently, instance-type based zone selection applies only to AWS. For other clouds, all the available zones are returned (unless a cluster is being recreated, in which case the cluster's placement zone is returned sa stored in its persistent data. :rtype: dict :return: A dictionary with ``zones`` and ``error`` keywords. .. versionchanged:: 0.3 Changed method name from ``_find_placements`` to ``find_placements``. Also added ``cluster_name`` parameter. .. versionchanged:: 0.7.0 The return value changed from a list to a dictionary. """ # First look for a specific zone a given cluster is bound to zones = [] response = {'zones': zones, 'error': None} if cluster_name: placement = self._find_placement(cluster_name) if placement.get('error'): response['error'] = placement['error'] return response response['zones'] = placement.get('placement', []) # If placement is not found, look for a list of available zones if not response['zones']: in_the_past = datetime.datetime.now() - datetime.timedelta(hours=1) back_compatible_zone = "us-east-1e" for zone in [z for z in ec2_conn.get_all_zones() if z.state == 'available']: # Non EC2 clouds may not support get_spot_price_history if instance_type is None or cloud_type != 'ec2': zones.append(zone.name) elif ec2_conn.get_spot_price_history(instance_type=instance_type, end_time=in_the_past.isoformat(), availability_zone=zone.name): zones.append(zone.name) zones.sort(reverse=True) # Higher-lettered zones seem to have more availability currently if back_compatible_zone in zones: zones = [back_compatible_zone] + [z for z in zones if z != back_compatible_zone] if len(zones) == 0: response['error'] = ("Did not find availabilty zone for {1}" .format(instance_type)) bioblend.log.error(response['error']) zones.append(back_compatible_zone) return response def _checkURL(self, url): """ Check if the ``url`` is *alive* (i.e., remote server returns code 200(OK) or 401 (unauthorized)). """ try: p = urlparse(url) h = HTTPConnection(p[1]) h.putrequest('HEAD', p[2]) h.endheaders() r = h.getresponse() if r.status in (200, 401): # CloudMan UI is pwd protected so include 401 return True except Exception: # No response or no good response pass return False bioblend-0.7.0/bioblend/config.py000066400000000000000000000040601261571066300167000ustar00rootroot00000000000000import os from six.moves import configparser BioBlendConfigPath = '/etc/bioblend.cfg' BioBlendConfigLocations = [BioBlendConfigPath] UserConfigPath = os.path.join(os.path.expanduser('~'), '.bioblend') BioBlendConfigLocations.append(UserConfigPath) class Config(configparser.SafeConfigParser): """ BioBlend allows library-wide configuration to be set in external files. These configuration files can be used to specify access keys, for example. By default we use two locations for the BioBlend configurations: * System wide: ``/etc/bioblend.cfg`` * Individual user: ``~/.bioblend`` (which works on both Windows and Unix) """ def __init__(self, path=None, fp=None, do_load=True): configparser.SafeConfigParser.__init__(self, {'working_dir': '/mnt/pyami', 'debug': '0'}) if do_load: if path: self.load_from_path(path) elif fp: self.readfp(fp) else: self.read(BioBlendConfigLocations) def get_value(self, section, name, default=None): return self.get(section, name, default) def get(self, section, name, default=None): try: val = configparser.SafeConfigParser.get(self, section, name) except: val = default return val def getint(self, section, name, default=0): try: val = configparser.SafeConfigParser.getint(self, section, name) except: val = int(default) return val def getfloat(self, section, name, default=0.0): try: val = configparser.SafeConfigParser.getfloat(self, section, name) except: val = float(default) return val def getbool(self, section, name, default=False): if self.has_option(section, name): val = self.get(section, name) if val.lower() == 'true': val = True else: val = False else: val = default return val bioblend-0.7.0/bioblend/galaxy/000077500000000000000000000000001261571066300163465ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/__init__.py000066400000000000000000000070571261571066300204700ustar00rootroot00000000000000""" A base representation of an instance of Galaxy """ from bioblend.galaxy.client import Client from bioblend.galaxy import (libraries, histories, workflows, datasets, users, genomes, tools, toolshed, config, visual, quotas, groups, datatypes, jobs, forms, ftpfiles, folders, roles, tool_data) from bioblend.galaxyclient import GalaxyClient class GalaxyInstance(GalaxyClient): def __init__(self, url, key=None, email=None, password=None): """ A base representation of an instance of Galaxy, identified by a URL and a user's API key. After you have created an ``GalaxyInstance`` object, access various modules via the class fields (see the source for the most up-to-date list): ``libraries``, ``histories``, ``workflows``, ``datasets``, and ``users`` are the minimum set supported. For example, to work with histories, and get a list of all the user's histories, the following should be done:: from bioblend import galaxy gi = galaxy.GalaxyInstance(url='http://127.0.0.1:8000', key='your_api_key') hl = gi.histories.get_histories() :type url: str :param url: A FQDN or IP for a given instance of Galaxy. For example: http://127.0.0.1:8080 :type key: str :param key: User's API key for the given instance of Galaxy, obtained from the user preferences. If a key is not supplied, an email address and password must be and key will automatically be created for the user. :type email: str :param email: Galaxy e-mail address corresponding to the user. Ignored if key is supplied directly. :type password: str :param password: Password of Galaxy account corresponding to the above e-mail address. Ignored if key is supplied directly. """ super(GalaxyInstance, self).__init__(url, key, email, password) self.libraries = libraries.LibraryClient(self) self.histories = histories.HistoryClient(self) self.workflows = workflows.WorkflowClient(self) self.datasets = datasets.DatasetClient(self) self.users = users.UserClient(self) self.genomes = genomes.GenomeClient(self) self.tools = tools.ToolClient(self) self.toolShed = toolshed.ToolShedClient(self) self.config = config.ConfigClient(self) self.visual = visual.VisualClient(self) self.quotas = quotas.QuotaClient(self) self.groups = groups.GroupsClient(self) self.roles = roles.RolesClient(self) self.datatypes = datatypes.DatatypesClient(self) self.jobs = jobs.JobsClient(self) self.forms = forms.FormsClient(self) self.ftpfiles = ftpfiles.FTPFilesClient(self) self.tool_data = tool_data.ToolDataClient(self) self.folders = folders.FoldersClient(self) @property def max_get_attempts(self): return Client.max_get_retries() @max_get_attempts.setter def max_get_attempts(self, v): Client.set_max_get_retries(v) @property def get_retry_delay(self): return Client.get_retry_delay() @get_retry_delay.setter def get_retry_delay(self, v): Client.set_get_retry_delay(v) def __repr__(self): """ A nicer representation of this GalaxyInstance object """ return "GalaxyInstance object for Galaxy at {0}".format(self.base_url) bioblend-0.7.0/bioblend/galaxy/client.py000066400000000000000000000172421261571066300202040ustar00rootroot00000000000000""" An interface the clients should implement. This class is primarily a helper for the library and user code should not use it directly. """ import json import time import requests try: # The following import will work only for Requests >= 2.4.0 and is # needed to workaround its "urllib3.exceptions.ProtocolError not # wrapped" bug: https://github.com/kennethreitz/requests/issues/2192 # pylint: disable=E0611,F0401 from requests.packages.urllib3.exceptions import ProtocolError # pylint: enable=E0611,F0401 except ImportError: ProtocolError = None # pylint: disable=C0103 import bioblend as bb class ConnectionError(Exception): """ An exception class that is raised when unexpected HTTP responses come back. Should make it easier to debug when strange HTTP things happen such as a proxy server getting in the way of the request etc. @see: body attribute to see the content of the http response """ def __init__(self, message, body=None): super(ConnectionError, self).__init__(message) self.body = body def __str__(self): return "{0}: {1}".format(self.args[0], self.body) class Client(object): # Class variables that configure GET request retries. Note that since these # are class variables their values are shared by all Client instances -- # i.e., HistoryClient, WorkflowClient, etc. # # Number of attempts before giving up on a GET request. _max_get_retries = 1 # Delay in seconds between subsequent retries. _get_retry_delay = 10 @classmethod def max_get_retries(cls): """ The maximum number of attempts for a GET request. """ return cls._max_get_retries @classmethod def set_max_get_retries(cls, value): """ Set the maximum number of attempts for GET requests. A value greater than one causes failed GET requests to be retried `value` - 1 times. Default: 1 """ if value < 1: raise ValueError("Number of retries must be >= 1 (got: %s)" % value) cls._max_get_retries = value return cls @classmethod def get_retry_delay(cls): """ The delay (in seconds) to wait before retrying a failed GET request. """ return cls._get_retry_delay @classmethod def set_get_retry_delay(cls, value): """ Set the delay (in seconds) to wait before retrying a failed GET request. Default: 10 """ if value < 0: raise ValueError("Retry delay must be >= 0 (got: %s)" % value) cls._get_retry_delay = value return cls def __init__(self, galaxy_instance): """ A generic Client interface defining the common fields. All clients *must* define the following field (which will be used as part of the URL composition (e.g., ``http:///api/libraries``): ``self.module = 'workflows' | 'libraries' | 'histories' | ...`` """ self.gi = galaxy_instance self.url = '/'.join([self.gi.url, self.module]) def _get(self, id=None, deleted=False, contents=None, url=None, params=None, json=True): """ Do a GET request, composing the URL from ``id``, ``deleted`` and ``contents``. Alternatively, an explicit ``url`` can be provided. If ``json`` is set to ``True``, return a decoded JSON object (and treat an empty or undecodable response as an error). The request will optionally be retried as configured by ``max_get_retries`` and ``get_retry_delay``: this offers some resilience in the presence of temporary failures. """ if not url: url = self.gi._make_url(self, module_id=id, deleted=deleted, contents=contents) attempts_left = self.max_get_retries() retry_delay = self.get_retry_delay() bb.log.debug("GET - attempts left: %s; retry delay: %s", attempts_left, retry_delay) msg = '' while attempts_left > 0: attempts_left -= 1 try: r = self.gi.make_get_request(url, params=params) except (requests.exceptions.ConnectionError, ProtocolError) as e: msg = str(e) else: if r is None: msg = "GET: no response" if r.status_code == 200: if not json: return r elif not r.content: msg = "GET: empty response" else: try: return r.json() except ValueError: msg = "GET: invalid JSON : %r" % (r.content,) else: msg = "GET: error %s: %r" % (r.status_code, r.content) msg = "%s, %d attempts left" % (msg, attempts_left) if attempts_left <= 0: bb.log.error(msg) raise ConnectionError(msg) else: bb.log.warn(msg) time.sleep(retry_delay) def _post(self, payload, id=None, deleted=False, contents=None, url=None, files_attached=False): """ Do a generic POST request, composing the url from the contents of the arguments. Alternatively, an explicit ``url`` can be provided to use for the request. ``payload`` must be a dict that contains additional request arguments which will be sent along with the request body. The payload dict may contain file handles (in which case the ``files_attached`` flag must be set to true). If ``files_attached`` is set to ``False``, the request body will be JSON-encoded; otherwise, it will be encoded as multipart/form-data. The return value will contain the response body as a JSON object. """ if not url: url = self.gi._make_url(self, module_id=id, deleted=deleted, contents=contents) return self.gi.make_post_request(url, payload=payload, files_attached=files_attached) def _put(self, payload, id=None, url=None, params=None): """ Do a generic PUT request, composing the url from the contents of the arguments. Alternatively, an explicit ``url`` can be provided to use for the request. ``payload`` must be a dict that contains additional request arguments which will be sent along with the request body. This method returns the HTTP request object. """ if not url: url = self.gi._make_url(self, module_id=id) return self.gi.make_put_request(url, payload=payload, params=params) def _delete(self, payload=None, id=None, deleted=False, contents=None, url=None): """ Do a generic DELETE request, composing the url from the contents of the arguments. Alternatively, an explicit ``url`` can be provided to use for the request. ``payload`` must be a dict that can be converted into a JSON object (which will be done within this method) """ if not url: url = self.gi._make_url(self, module_id=id, deleted=deleted, contents=contents) if payload is not None: payload = json.dumps(payload) r = self.gi.make_delete_request(url, payload=payload) if r.status_code == 200: return r.json() # @see self.body for HTTP response body raise ConnectionError( "Unexpected HTTP status code: %s" % r.status_code, body=r.text ) bioblend-0.7.0/bioblend/galaxy/config/000077500000000000000000000000001261571066300176135ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/config/__init__.py000066400000000000000000000022741261571066300217310ustar00rootroot00000000000000""" Contains possible interaction dealing with Galaxy configuration. """ from bioblend.galaxy.client import Client class ConfigClient(Client): def __init__(self, galaxy_instance): self.module = 'configuration' super(ConfigClient, self).__init__(galaxy_instance) def get_config(self): """ Get a list of attributes about the Galaxy instance. More attributes will be present if the user is an admin. :rtype: list :return: A list of attributes. For example:: {u'allow_library_path_paste': False, u'allow_user_creation': True, u'allow_user_dataset_purge': True, u'allow_user_deletion': False, u'enable_unique_workflow_defaults': False, u'ftp_upload_dir': u'/SOMEWHERE/galaxy/ftp_dir', u'ftp_upload_site': u'galaxy.com', u'library_import_dir': u'None', u'logo_url': None, u'support_url': u'http://wiki.g2.bx.psu.edu/Support', u'terms_url': None, u'user_library_import_dir': None, u'wiki_url': u'http://g2.trac.bx.psu.edu/'} """ return Client._get(self) bioblend-0.7.0/bioblend/galaxy/dataset_collections/000077500000000000000000000000001261571066300223715ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/dataset_collections/__init__.py000066400000000000000000000036311261571066300245050ustar00rootroot00000000000000import six class HasElements(object): def __init__(self, name, type="list", elements=[]): self.name = name self.type = type if isinstance(elements, dict): self.elements = [dict(name=key, id=value, src="hda") for key, value in six.itervalues(elements)] elif elements: self.elements = elements def add(self, element): self.elements.append(element) return self class CollectionDescription(HasElements): def to_dict(self): return dict( name=self.name, collection_type=self.type, element_identifiers=[e.to_dict() for e in self.elements] ) class CollectionElement(HasElements): def to_dict(self): return dict( src="new_collection", name=self.name, collection_type=self.type, element_identifiers=[e.to_dict() for e in self.elements] ) class SimpleElement(object): def __init__(self, value): self.value = value def to_dict(self): return self.value class HistoryDatasetElement(SimpleElement): def __init__(self, name, id): super(HistoryDatasetElement, self).__init__(dict( name=name, src="hda", id=id, )) class HistoryDatasetCollectionElement(SimpleElement): def __init__(self, name, id): super(HistoryDatasetCollectionElement, self).__init__(dict( name=name, src="hdca", id=id, )) class LibraryDatasetElement(SimpleElement): def __init__(self, name, id): super(LibraryDatasetElement, self).__init__(dict( name=name, src="ldda", id=id, )) __all__ = [ "CollectionDescription", "CollectionElement", "HistoryDatasetElement", "HistoryDatasetCollectionElement", "LibraryDatasetElement", ] bioblend-0.7.0/bioblend/galaxy/datasets/000077500000000000000000000000001261571066300201565ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/datasets/__init__.py000066400000000000000000000170131261571066300222710ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy Datasets """ import logging import os import shlex import time import requests from six.moves import range from six.moves.urllib.parse import urljoin from six.moves.urllib.request import urlopen from bioblend.galaxy.client import Client log = logging.getLogger(__name__) class DatasetClient(Client): def __init__(self, galaxy_instance): self.module = 'datasets' super(DatasetClient, self).__init__(galaxy_instance) def show_dataset(self, dataset_id, deleted=False, hda_ldda='hda'): """ Display information about and/or content of a dataset. This can be a history or a library dataset. :type dataset_id: str :param dataset_id: Encoded dataset ID :type deleted: bool :param deleted: Whether to return results for a deleted dataset :type hda_ldda: str :param hda_ldda: Whether to show a history dataset ('hda' - the default) or library dataset ('ldda'). """ params = dict( hda_ldda=hda_ldda, ) return Client._get(self, id=dataset_id, deleted=deleted, params=params) def download_dataset(self, dataset_id, file_path=None, use_default_filename=True, wait_for_completion=False, maxwait=12000): """ Downloads the dataset identified by 'id'. :type dataset_id: str :param dataset_id: Encoded dataset ID :type file_path: str :param file_path: If the file_path argument is provided, the dataset will be streamed to disk at that path (Should not contain filename if use_default_name=True). If the file_path argument is not provided, the dataset content is loaded into memory and returned by the method (Memory consumption may be heavy as the entire file will be in memory). :type use_default_filename: bool :param use_default_filename: If the use_default_name parameter is True, the exported file will be saved as file_path/%s, where %s is the dataset name. If use_default_name is False, file_path is assumed to contain the full file path including filename. :type wait_for_completion: bool :param wait_for_completion: If wait_for_completion is True, this call will block until the dataset is ready. If the dataset state becomes invalid, a DatasetStateException will be thrown. :type maxwait: float :param maxwait: Time (in seconds) to wait for dataset to complete. If the dataset state is not complete within this time, a DatasetTimeoutException will be thrown. :rtype: dict :return: If a file_path argument is not provided, returns a dict containing the file_content. Otherwise returns nothing. """ if wait_for_completion: self._block_until_dataset_ready(dataset_id, maxwait=maxwait) dataset = self.show_dataset(dataset_id) if not dataset['state'] == 'ok': raise DatasetStateException("Dataset not ready. Dataset id: %s, current state: %s" % (dataset_id, dataset['state'])) # Galaxy release_13.01 and earlier does not have file_ext in the dataset # dict, so resort to data_type. # N.B.: data_type cannot be used for Galaxy release_14.10 and later # because it was changed to the Galaxy datatype class file_ext = dataset.get('file_ext', dataset['data_type']) # The preferred download URL is # '/api/histories//contents//display?to_ext=' # since the old URL: # '/dataset//display/to_ext=' # does not work when using REMOTE_USER with access disabled to # everything but /api without auth if 'url' in dataset: # This is Galaxy release_15.03 or later download_url = dataset['download_url'] + '?to_ext=' + file_ext else: # This is Galaxy release_15.01 or earlier, for which the preferred # URL does not work without a key, so resort to the old URL download_url = 'datasets/' + dataset_id + '/display?to_ext=' + file_ext url = urljoin(self.gi.base_url, download_url) # Don't use self.gi.make_get_request as currently the download API does # not require a key r = requests.get(url, verify=self.gi.verify) if file_path is None: return r.content else: if use_default_filename: try: # First try to get the filename from the response headers # We expect tokens 'filename' '=' to be followed by the quoted filename tokens = [x for x in shlex.shlex(r.headers['content-disposition'], posix=True)] header_filepath = tokens[tokens.index('filename') + 2] filename = os.path.basename(header_filepath) except (ValueError, IndexError): # If the filename was not in the header, build a useable filename ourselves. filename = dataset['name'] + '.' + file_ext file_local_path = os.path.join(file_path, filename) else: file_local_path = file_path with open(file_local_path, 'wb') as fp: fp.write(r.content) # Return location file was saved to return file_local_path def _is_dataset_complete(self, dataset_id): dataset = self.show_dataset(dataset_id) state = dataset['state'] return (state == 'ok' or state == 'error') def _block_until_dataset_ready(self, dataset_id, maxwait=12000, interval=30, raise_on_timeout=True): """ Wait until the dataset state changes to ok or error. Based on: https://github.com/salimfadhley/jenkinsapi/blob/master/jenkinsapi/api.py """ assert maxwait > 0 assert maxwait > interval assert interval > 0 for time_left in range(maxwait, 0, -interval): if self._is_dataset_complete(dataset_id): return log.warn("Waiting for dataset %s to complete. Will wait another %is" % (dataset_id, time_left)) time.sleep(interval) if raise_on_timeout: # noinspection PyUnboundLocalVariable raise DatasetTimeoutException("Waited too long for dataset to complete: %s" % dataset_id) def show_stderr(self, dataset_id): """ Display stderr output of a dataset. :type dataset_id: str :param dataset_id: Encoded dataset ID """ res = urlopen(self.url[:-len("/api/datasets/") + 1] + "/datasets/" + dataset_id + "/stderr") return res.read() def show_stdout(self, dataset_id): """ Display stdout output of a dataset. :type dataset_id: str :param dataset_id: Encoded dataset ID """ res = urlopen(self.url[:-len("/api/datasets/") + 1] + "/datasets/" + dataset_id + "/stdout") return res.read() class DatasetStateException(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value) class DatasetTimeoutException(Exception): def __init__(self, value): self.value = value def __str__(self): return repr(self.value) bioblend-0.7.0/bioblend/galaxy/datatypes/000077500000000000000000000000001261571066300203445ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/datatypes/__init__.py000066400000000000000000000032341261571066300224570ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy Datatype """ from bioblend.galaxy.client import Client class DatatypesClient(Client): def __init__(self, galaxy_instance): self.module = 'datatypes' super(DatatypesClient, self).__init__(galaxy_instance) def get_datatypes(self, extension_only=False, upload_only=False): """ Get the list of all installed datatypes. :rtype: list :return: A list of datatype names. For example:: [u'snpmatrix', u'snptest', u'tabular', u'taxonomy', u'twobit', u'txt', u'vcf', u'wig', u'xgmml', u'xml'] """ params = {} if extension_only: params['extension_only'] = True if upload_only: params['upload_only'] = True return Client._get(self, params=params) def get_sniffers(self): """ Get the list of all installed sniffers. :rtype: list :return: A list of sniffer names. For example:: [u'galaxy.datatypes.tabular:Vcf', u'galaxy.datatypes.binary:TwoBit', u'galaxy.datatypes.binary:Bam', u'galaxy.datatypes.binary:Sff', u'galaxy.datatypes.xml:Phyloxml', u'galaxy.datatypes.xml:GenericXml', u'galaxy.datatypes.sequence:Maf', u'galaxy.datatypes.sequence:Lav', u'galaxy.datatypes.sequence:csFasta'] """ url = self.gi._make_url(self) url = '/'.join([url, "sniffers"]) return Client._get(self, url=url) bioblend-0.7.0/bioblend/galaxy/folders/000077500000000000000000000000001261571066300200045ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/folders/__init__.py000066400000000000000000000023611261571066300221170ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy library folders """ from bioblend.galaxy.client import Client class FoldersClient(Client): def __init__(self, galaxy_instance): self.module = 'folders' super(FoldersClient, self).__init__(galaxy_instance) def show_folder(self, folder_id): """ Display information about a folder. :type folder_id: str :param folder_id: the folder's encoded id, prefixed by 'F' :rtype: dict :return: dictionary including details of the folder """ return Client._get(self, id=folder_id) def delete_folder(self, folder_id, undelete=False): """ Marks the folder with the given ``id`` as `deleted` (or removes the `deleted` mark if the `undelete` param is True). :type folder_id: str :param folder_id: the folder's encoded id, prefixed by 'F' :type undelete: bool :param undelete: If set to True, the folder will be undeleted (i.e. the `deleted` mark will be removed) :returns: detailed folder information :rtype: dict """ payload = {'undelete': undelete} return Client._delete(self, payload, id=folder_id) bioblend-0.7.0/bioblend/galaxy/forms/000077500000000000000000000000001261571066300174745ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/forms/__init__.py000066400000000000000000000035311261571066300216070ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy Forms """ from bioblend.galaxy.client import Client class FormsClient(Client): def __init__(self, galaxy_instance): self.module = 'forms' super(FormsClient, self).__init__(galaxy_instance) def get_forms(self): """ Get the list of all forms. :rtype: list :returns: Displays a collection (list) of forms. For example:: [{u'id': u'f2db41e1fa331b3e', u'model_class': u'FormDefinition', u'name': u'First form', u'url': u'/api/forms/f2db41e1fa331b3e'}, {u'id': u'ebfb8f50c6abde6d', u'model_class': u'FormDefinition', u'name': u'second form', u'url': u'/api/forms/ebfb8f50c6abde6d'}] """ return Client._get(self) def show_form(self, form_id): """ Get details of a given form. :type form_id: str :param form_id: Encoded form ID :rtype: dict :return: A description of the given form. For example:: {u'desc': u'here it is ', u'fields': [], u'form_definition_current_id': u'f2db41e1fa331b3e', u'id': u'f2db41e1fa331b3e', u'layout': [], u'model_class': u'FormDefinition', u'name': u'First form', u'url': u'/api/forms/f2db41e1fa331b3e'} """ return Client._get(self, id=form_id) def create_form(self, form_xml_text): """ Create a new form. :type form_xml_text: str :param form_xml_text: Form xml to create a form on galaxy instance :rtype: str :returns: Unique url of newly created form with encoded id """ payload = form_xml_text return Client._post(self, payload=payload) bioblend-0.7.0/bioblend/galaxy/ftpfiles/000077500000000000000000000000001261571066300201625ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/ftpfiles/__init__.py000066400000000000000000000007751261571066300223040ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy FTP Files """ from bioblend.galaxy.client import Client class FTPFilesClient(Client): def __init__(self, galaxy_instance): self.module = 'ftp_files' super(FTPFilesClient, self).__init__(galaxy_instance) def get_ftp_files(self, deleted=False): """ Get a list of local files. :rtype: list :return: A list of dicts with details on individual files on FTP """ return Client._get(self) bioblend-0.7.0/bioblend/galaxy/genomes/000077500000000000000000000000001261571066300200035ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/genomes/__init__.py000066400000000000000000000055671261571066300221310ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy Histories """ from bioblend.galaxy.client import Client class GenomeClient(Client): def __init__(self, galaxy_instance): self.module = 'genomes' super(GenomeClient, self).__init__(galaxy_instance) def get_genomes(self): """ Returns a list of installed genomes """ genomes = Client._get(self) return genomes def show_genome(self, id, num=None, chrom=None, low=None, high=None): """ Returns information about build :type id: str :param id: Genome build ID to use :type num: str :param num: num :type chrom: str :param chrom: chrom :type low: str :param low: low :type high: str :param high: high """ params = {} if num: params['num'] = num if chrom: params['chrom'] = chrom if low: params['low'] = low if high: params['high'] = high return Client._get(self, id, params) def install_genome(self, func='download', source=None, dbkey=None, ncbi_name=None, ensembl_dbkey=None, url_dbkey=None, indexers=None): """ Download and/or index a genome. :type dbkey: str :param dbkey: DB key of the build to download, ignored unless 'UCSC' is specified as the source :type ncbi_name: str :param ncbi_name: NCBI's genome identifier, ignored unless NCBI is specified as the source :type ensembl_dbkey: str :param ensembl_dbkey: Ensembl's genome identifier, ignored unless Ensembl is specified as the source :type url_dbkey: str :param url_dbkey: DB key to use for this build, ignored unless URL is specified as the source :type source: str :param source: Data source for this build. Can be: UCSC, Ensembl, NCBI, URL :type indexers: list :param indexers: POST array of indexers to run after downloading (indexers[] = first, indexers[] = second, ...) :type func: str :param func: Allowed values: 'download', Download and index; 'index', Index only :rtype: dict :return: dict( status: 'ok', job: ) If error: dict( status: 'error', error: ) """ payload = {} if source: payload['source'] = source if func: payload['func'] = func if dbkey: payload['dbkey'] = dbkey if ncbi_name: payload['ncbi_name'] = ncbi_name if ensembl_dbkey: payload['ensembl_dbkey'] = ensembl_dbkey if url_dbkey: payload['url_dbkey'] = url_dbkey if indexers: payload['indexers'] = indexers return Client._post(self, payload) bioblend-0.7.0/bioblend/galaxy/groups/000077500000000000000000000000001261571066300176655ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/groups/__init__.py000066400000000000000000000133001261571066300217730ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy Groups """ from bioblend.galaxy.client import Client class GroupsClient(Client): def __init__(self, galaxy_instance): self.module = 'groups' super(GroupsClient, self).__init__(galaxy_instance) def get_groups(self): """ Get all (not deleted) groups. :rtype: list :return: A list of dicts with details on individual groups. For example:: [ {"name": "Listeria", "url": "/api/groups/33abac023ff186c2", "model_class": "Group", "id": "33abac023ff186c2"}, {"name": "LPN", "url": "/api/groups/73187219cd372cf8", "model_class": "Group", "id": "73187219cd372cf8"} ] """ return Client._get(self) def show_group(self, group_id): """ Get details of a given group. :type group_id: str :param group_id: Encoded group ID :rtype: dict :return: A description of group For example:: {"roles_url": "/api/groups/33abac023ff186c2/roles", "name": "Listeria", "url": "/api/groups/33abac023ff186c2", "users_url": "/api/groups/33abac023ff186c2/users", "model_class": "Group", "id": "33abac023ff186c2"} """ return Client._get(self, id=group_id) def create_group(self, group_name, user_ids=[], role_ids=[]): """ Create a new group. :type group_name: str :param group_name: A name for the new group :type user_ids: list :param user_ids: A list of encoded user IDs to add to the new group :type role_ids: list :param role_ids: A list of encoded role IDs to add to the new group :rtype: list :return: A (size 1) list with newly created group details, like:: [{u'id': u'7c9636938c3e83bf', u'model_class': u'Group', u'name': u'My Group Name', u'url': u'/api/groups/7c9636938c3e83bf'}] """ payload = {} payload['name'] = group_name payload['user_ids'] = user_ids payload['role_ids'] = role_ids return Client._post(self, payload) def update_group(self, group_id, group_name=None, user_ids=[], role_ids=[]): """ Update a group. :type group_id: str :param group_id: Encoded group ID :type group_name: str :param group_name: A new name for the group. If None, the group name is not changed. :type user_ids: list :param user_ids: New list of encoded user IDs for the group. It will substitute the previous list of users (with [] if not specified) :type role_ids: list :param role_ids: New list of encoded role IDs for the group. It will substitute the previous list of roles (with [] if not specified) :rtype: int :return: status code """ payload = {} payload['name'] = group_name payload['user_ids'] = user_ids payload['role_ids'] = role_ids return Client._put(self, payload, id=group_id).status_code def get_group_users(self, group_id): """ Get the list of users associated to the given group. :type group_id: str :param group_id: Encoded group ID :rtype: list of dicts :return: List of group users' info """ url = '/'.join([self.gi._make_url(self, group_id), 'users']) return Client._get(self, url=url) def get_group_roles(self, group_id): """ Get the list of roles associated to the given group. :type group_id: str :param group_id: Encoded group ID :rtype: list of dicts :return: List of group roles' info """ url = '/'.join([self.gi._make_url(self, group_id), 'roles']) return Client._get(self, url=url) def add_group_user(self, group_id, user_id): """ Add a user to the given group. :type group_id: str :param group_id: Encoded group ID :type user_id: str :param user_id: Encoded user ID to add to the group :rtype: dict :return: Added group user's info """ url = '/'.join([self.gi._make_url(self, group_id), 'users', user_id]) return Client._put(self, dict(), url=url).json() def add_group_role(self, group_id, role_id): """ Add a role to the given group. :type group_id: str :param group_id: Encoded group ID :type role_id: str :param role_id: Encoded role ID to add to the group :rtype: dict :return: Added group role's info """ url = '/'.join([self.gi._make_url(self, group_id), 'roles', role_id]) return Client._put(self, {}, url=url).json() def delete_group_user(self, group_id, user_id): """ Remove a user from the given group. :type group_id: str :param group_id: Encoded group ID :type user_id: str :param user_id: Encoded user ID to remove from the group """ url = '/'.join([self.gi._make_url(self, group_id), 'users', user_id]) return Client._delete(self, {}, url=url) def delete_group_role(self, group_id, role_id): """ Remove a role from the given group. :type group_id: str :param group_id: Encoded group ID :type role_id: str :param role_id: Encoded role ID to remove from the group """ url = '/'.join([self.gi._make_url(self, group_id), 'roles', role_id]) return Client._delete(self, {}, url=url) bioblend-0.7.0/bioblend/galaxy/histories/000077500000000000000000000000001261571066300203575ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/histories/__init__.py000066400000000000000000000440351261571066300224760ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy Histories """ import os import re import time import six import bioblend from bioblend.galaxy.client import Client class HistoryClient(Client): def __init__(self, galaxy_instance): self.module = 'histories' super(HistoryClient, self).__init__(galaxy_instance) def create_history(self, name=None): """ Create a new history, optionally setting the ``name``. :type name: str :param name: Optional name for new history :rtype: dict :return: Dictionary containing information about newly created history """ payload = {} if name is not None: payload['name'] = name return Client._post(self, payload) def get_histories(self, history_id=None, name=None, deleted=False): """ Get all histories or filter the specific one(s) via the provided ``name`` or ``history_id``. Provide only one argument, ``name`` or ``history_id``, but not both. If ``deleted`` is set to ``True``, return histories that have been deleted. :type history_id: str :param history_id: Encoded history ID to filter on :type name: str :param name: Name of history to filter on :rtype: list :return: Return a list of history element dicts. If more than one history matches the given ``name``, return the list of all the histories with the given name """ if history_id is not None and name is not None: raise ValueError('Provide only one argument between name or history_id, but not both') histories = Client._get(self, deleted=deleted) if history_id is not None: history = next((_ for _ in histories if _['id'] == history_id), None) histories = [history] if history is not None else [] elif name is not None: histories = [_ for _ in histories if _['name'] == name] return histories def show_history(self, history_id, contents=False, deleted=None, visible=None, details=None, types=None): """ Get details of a given history. By default, just get the history meta information. :type history_id: str :param history_id: Encoded history ID to filter on :type contents: bool :param contents: When ``True``, the complete list of datasets in the given history. :type deleted: str :param deleted: Used when contents=True, includes deleted datasets in history dataset list :type visible: str :param visible: Used when contents=True, includes only visible datasets in history dataset list :type details: str :param details: Used when contents=True, includes dataset details. Set to 'all' for the most information :type types: str :param types: ??? :rtype: dict :return: details of the given history """ params = {} if contents: if details: params['details'] = details if deleted is not None: params['deleted'] = deleted if visible is not None: params['visible'] = visible if types is not None: params['types'] = types.join(",") return Client._get(self, id=history_id, contents=contents, params=params) def delete_dataset(self, history_id, dataset_id): """ Mark corresponding dataset as deleted. :type history_id: str :param history_id: Encoded history ID :type dataset_id: str :param dataset_id: Encoded dataset ID """ url = self.gi._make_url(self, history_id, contents=True) # Append the dataset_id to the base history contents URL url = '/'.join([url, dataset_id]) Client._delete(self, payload={}, url=url) def delete_dataset_collection(self, history_id, dataset_collection_id): """ Mark corresponding dataset collection as deleted. :type history_id: str :param history_id: Encoded history ID :type dataset_collection_id: str :param dataset_collection_id: Encoded dataset collection ID """ url = self.gi._make_url(self, history_id, contents=True) # Append the dataset_id to the base history contents URL url = '/'.join([url, "dataset_collections", dataset_collection_id]) Client._delete(self, payload={}, url=url) def show_dataset(self, history_id, dataset_id): """ Get details about a given history dataset. :type history_id: str :param history_id: Encoded history ID :type dataset_id: str :param dataset_id: Encoded dataset ID """ url = self.gi._make_url(self, history_id, contents=True) # Append the dataset_id to the base history contents URL url = '/'.join([url, dataset_id]) return Client._get(self, url=url) def show_dataset_collection(self, history_id, dataset_collection_id): """ Get details about a given history dataset collection. :type history_id: str :param history_id: Encoded history ID :type dataset_collection_id: str :param dataset_collection_id: Encoded dataset collection ID """ url = self.gi._make_url(self, history_id, contents=True) url = '/'.join([url, "dataset_collections", dataset_collection_id]) return Client._get(self, url=url) def show_matching_datasets(self, history_id, name_filter=None): """ Get dataset details for matching datasets within a history. :type history_id: str :param history_id: Encoded history ID :type name_filter: str :param name_filter: Only datasets whose name matches the ``name_filter`` regular expression will be returned; use plain strings for exact matches and None to match all datasets in the history """ if isinstance(name_filter, six.string_types): name_filter = re.compile(name_filter + '$') return [self.show_dataset(history_id, h['id']) for h in self.show_history(history_id, contents=True) if name_filter is None or name_filter.match(h['name'])] def show_dataset_provenance(self, history_id, dataset_id, follow=False): """ Get details related to how dataset was created (``id``, ``job_id``, ``tool_id``, ``stdout``, ``stderr``, ``parameters``, ``inputs``, etc...). :type history_id: str :param history_id: Encoded history ID :type dataset_id: str :param dataset_id: Encoded dataset ID :type follow: bool :param follow: If ``follow`` is ``True``, recursively fetch dataset provenance information for all inputs and their inputs, etc... """ url = self.gi._make_url(self, history_id, contents=True) url = '/'.join([url, dataset_id, "provenance"]) return Client._get(self, url=url) def update_history(self, history_id, name=None, annotation=None, **kwds): """ Update history metadata information. Some of the attributes that can be modified are documented below. :type history_id: str :param history_id: Encoded history ID :type name: str :param name: Replace history name with the given string :type annotation: str :param annotation: Replace history annotation with given string :type deleted: bool :param deleted: Mark or unmark history as deleted :type published: bool :param published: Mark or unmark history as published :type importable: bool :param importable: Mark or unmark history as importable :type tags: list :param tags: Replace history tags with the given list :rtype: int :return: status code """ kwds['name'] = name kwds['annotation'] = annotation return Client._put(self, kwds, id=history_id).status_code def update_dataset(self, history_id, dataset_id, **kwds): """ Update history dataset metadata. Some of the attributes that can be modified are documented below. :type history_id: str :param history_id: Encoded history ID :type dataset_id: str :param dataset_id: Id of the dataset :type name: str :param name: Replace history dataset name with the given string :type annotation: str :param annotation: Replace history dataset annotation with given string :type deleted: bool :param deleted: Mark or unmark history dataset as deleted :type visible: bool :param visible: Mark or unmark history dataset as visible :rtype: int :return: status code """ url = self.gi._make_url(self, history_id, contents=True) # Append the dataset_id to the base history contents URL url = '/'.join([url, dataset_id]) return Client._put(self, payload=kwds, url=url).status_code def update_dataset_collection(self, history_id, dataset_collection_id, **kwds): """ Update history dataset collection metadata. Some of the attributes that can be modified are documented below. :type history_id: str :param history_id: Encoded history ID :type dataset_collection_id: str :param dataset_collection_id: Encoded dataset_collection ID :type name: str :param name: Replace history dataset collection name with the given string :type deleted: bool :param deleted: Mark or unmark history dataset collection as deleted :type visible: bool :param visible: Mark or unmark history dataset collection as visible :rtype: int :return: status code """ url = self.gi._make_url(self, history_id, contents=True) url = '/'.join([url, "dataset_collections", dataset_collection_id]) return Client._put(self, payload=kwds, url=url).status_code def create_history_tag(self, history_id, tag): """ Create history tag :type history_id: str :param history_id: Encoded history ID :type tag: str :param tag: Add tag to history :rtype: dict :return: A dictionary with information regarding the tag. For example:: {'model_class':'HistoryTagAssociation', 'user_tname': 'NGS_PE_RUN', 'id': 'f792763bee8d277a', 'user_value': None} """ # empty payload since we are adding the new tag using the url payload = {} # creating the url url = self.url url = '/'.join([url, history_id, 'tags', tag]) return Client._post(self, payload, url=url) def upload_dataset_from_library(self, history_id, lib_dataset_id): """ Upload a dataset into the history from a library. Requires the library dataset ID, which can be obtained from the library contents. :type history_id: str :param history_id: Encoded history ID :type lib_dataset_id: str :param lib_dataset_id: Encoded library dataset ID """ payload = { 'content': lib_dataset_id, 'source': 'library', 'from_ld_id': lib_dataset_id, # compatibility with old API } return Client._post(self, payload, id=history_id, contents=True) def create_dataset_collection(self, history_id, collection_description): """ Create a new dataset collection :type history_id: str :param history_id: Encoded history ID :type collection_description: str :param collection_description: a description of the dataset collection """ try: collection_description = collection_description.to_dict() except AttributeError: pass payload = dict( name=collection_description["name"], type="dataset_collection", collection_type=collection_description["collection_type"], element_identifiers=collection_description["element_identifiers"], ) return Client._post(self, payload, id=history_id, contents=True) def download_dataset(self, history_id, dataset_id, file_path, use_default_filename=True): """ Download a ``dataset_id`` from history with ``history_id`` to a file on the local file system, saving it to ``file_path``. Refer to ``bioblend.galaxy.dataset.DatasetClient.download_dataset()`` for the other available parameters. """ meta = self.show_dataset(history_id, dataset_id) if use_default_filename: file_local_path = os.path.join(file_path, meta['name']) else: file_local_path = file_path return self.gi.datasets.download_dataset(dataset_id, file_path=file_local_path, use_default_filename=False) def delete_history(self, history_id, purge=False): """ Delete a history. :type history_id: str :param history_id: Encoded history ID :type purge: bool :param purge: if ``True``, also purge (permanently delete) the history .. note:: For the purge option to work, the Galaxy instance must have the ``allow_user_dataset_purge`` option set to ``True`` in the ``config/galaxy.ini`` configuration file. """ payload = {} if purge is True: payload['purge'] = purge return Client._delete(self, payload, id=history_id) def undelete_history(self, history_id): """ Undelete a history :type history_id: str :param history_id: Encoded history ID """ url = self.gi._make_url(self, history_id, deleted=True) # Append the 'undelete' action to the history URL url = '/'.join([url, 'undelete']) return Client._post(self, payload={}, url=url) def get_status(self, history_id): """ Returns the state of this history :type history_id: str :param history_id: Encoded history ID :rtype: dict :return: A dict documenting the current state of the history. Has the following keys: 'state' = This is the current state of the history, such as ok, error, new etc. 'state_details' = Contains individual statistics for various dataset states. 'percent_complete' = The overall number of datasets processed to completion. """ state = {} history = self.show_history(history_id) state['state'] = history['state'] if history.get('state_details') is not None: state['state_details'] = history['state_details'] total_complete = sum(six.itervalues(history['state_details'])) if total_complete > 0: state['percent_complete'] = 100 * history['state_details']['ok'] / total_complete else: state['percent_complete'] = 0 return state def get_current_history(self): """ Deprecated method. Just an alias for get_most_recently_used_history(). """ return self.get_most_recently_used_history() def get_most_recently_used_history(self): """ Returns the current user's most recently used history (not deleted). """ url = self.gi._make_url(self, None) url = '/'.join([url, 'most_recently_used']) return Client._get(self, url=url) def export_history(self, history_id, gzip=True, include_hidden=False, include_deleted=False, wait=False): """ Start a job to create an export archive for the given history. :type history_id: str :param history_id: history ID :type gzip: bool :param gzip: create .tar.gz archive if ``True``, else .tar :type include_hidden: bool :param include_hidden: whether to include hidden datasets in the export :type include_deleted: bool :param include_deleted: whether to include deleted datasets in the export :type wait: bool :param wait: if ``True``, block until the export is ready; else, return immediately :rtype: str :return: ``jeha_id`` of the export, or empty if ``wait`` is ``False`` and the export is not ready. """ params = { 'gzip': gzip, 'include_hidden': include_hidden, 'include_deleted': include_deleted, } url = '%s/exports' % self.gi._make_url(self, history_id) while True: r = Client._put(self, {}, url=url, params=params) if not wait or r.status_code == 200: break time.sleep(1) contents = r.json() if contents: jeha_id = contents['download_url'].rsplit('/', 1)[-1] else: jeha_id = '' # export is not ready return jeha_id def download_history(self, history_id, jeha_id, outf, chunk_size=bioblend.CHUNK_SIZE): """ Download a history export archive. Use :meth:`export_history` to create an export. :type history_id: str :param history_id: history ID :type jeha_id: str :param jeha_id: jeha ID (this should be obtained via :meth:`export_history`) :type outf: file :param outf: output file object, open for writing in binary mode :type chunk_size: int :param chunk_size: how many bytes at a time should be read into memory """ url = '%s/exports/%s' % ( self.gi._make_url(self, module_id=history_id), jeha_id) r = self.gi.make_get_request(url, stream=True) r.raise_for_status() for chunk in r.iter_content(chunk_size): outf.write(chunk) bioblend-0.7.0/bioblend/galaxy/jobs/000077500000000000000000000000001261571066300173035ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/jobs/__init__.py000066400000000000000000000073501261571066300214210ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy Jobs """ from bioblend.galaxy.client import Client class JobsClient(Client): def __init__(self, galaxy_instance): self.module = 'jobs' super(JobsClient, self).__init__(galaxy_instance) def get_jobs(self): """ Get the list of jobs of the current user. :rtype: list :returns: list of dictionaries containing summary job information. For example:: [{u'create_time': u'2014-03-01T16:16:48.640550', u'exit_code': 0, u'id': u'ebfb8f50c6abde6d', u'model_class': u'Job', u'state': u'ok', u'tool_id': u'fasta2tab', u'update_time': u'2014-03-01T16:16:50.657399'}, {u'create_time': u'2014-03-01T16:05:34.851246', u'exit_code': 0, u'id': u'1cd8e2f6b131e891', u'model_class': u'Job', u'state': u'ok', u'tool_id': u'upload1', u'update_time': u'2014-03-01T16:05:39.558458'}] """ return Client._get(self) def show_job(self, job_id, full_details=False): """ Get details of a given job of the current user. :type job_id: str :param job_id: job ID :type full_details: bool :param full_details: when ``True``, the complete list of details for the given job. :rtype: dict :return: A description of the given job. For example:: {u'create_time': u'2014-03-01T16:17:29.828624', u'exit_code': 0, u'id': u'a799d38679e985db', u'inputs': {u'input': {u'id': u'ebfb8f50c6abde6d', u'src': u'hda'}}, u'model_class': u'Job', u'outputs': {u'output': {u'id': u'a799d38679e985db', u'src': u'hda'}}, u'params': {u'chromInfo': u'"/opt/galaxy-central/tool-data/shared/ucsc/chrom/?.len"', u'dbkey': u'"?"', u'seq_col': u'"2"', u'title_col': u'["1"]'}, u'state': u'ok', u'tool_id': u'tab2fasta', u'update_time': u'2014-03-01T16:17:31.930728'} """ params = {} if full_details: params['full'] = full_details return Client._get(self, id=job_id, params=params) def get_state(self, job_id): """ Display the current state for a given job of the current user. :type job_id: str :param job_id: job ID :rtype: str :return: state of the given job among the following values: `new`, `queued`, `running`, `waiting`, `ok`. If the state cannot be retrieved, an empty string is returned. .. versionadded:: 0.5.3 """ return self.show_job(job_id).get('state', '') def search_jobs(self, job_info): """ Return jobs for the current user based payload content. :type job_info: dict :param job_info: dictionary containing description of the requested job. This is in the same format as a request to POST /api/tools would take to initiate a job :rtype: list :returns: list of dictionaries containing summary job information of the jobs that match the requested job run This method is designed to scan the list of previously run jobs and find records of jobs that had the exact some input parameters and datasets. This can be used to minimize the amount of repeated work, and simply recycle the old results. """ payload = job_info url = self.gi._make_url(self) url = '/'.join([url, "search"]) return Client._post(self, url=url, payload=payload) bioblend-0.7.0/bioblend/galaxy/libraries/000077500000000000000000000000001261571066300203225ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/libraries/__init__.py000066400000000000000000000463301261571066300224410ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy Data Libraries """ from bioblend.galaxy.client import Client from bioblend.util import attach_file class LibraryClient(Client): def __init__(self, galaxy_instance): self.module = 'libraries' super(LibraryClient, self).__init__(galaxy_instance) def create_library(self, name, description=None, synopsis=None): """ Create a data library with the properties defined in the arguments. Return a list of JSON dicts, looking like so:: :type name: str :param name: Name of the new data library :type description: str :param description: Optional data library description :type synopsis: str :param synopsis: Optional data library synopsis :rtype: dict :return: details of the created library: {"id": "f740ab636b360a70", "name": "Library from bioblend", "url": "/api/libraries/f740ab636b360a70"} """ payload = {'name': name} if description: payload['description'] = description if synopsis: payload['synopsis'] = synopsis return Client._post(self, payload) def delete_library(self, library_id): """ Delete a data library. :type library_id: str :param library_id: Encoded data library ID identifying the library to be deleted .. warning:: Deleting a data library is irreversible - all of the data from the library will be permanently deleted. """ payload = {} return Client._delete(self, payload, id=library_id) def __show_item(self, library_id, item_id): """ Get details about a given library item. """ url = self.gi._make_url(self, library_id, contents=True) url = '/'.join([url, item_id]) return Client._get(self, url=url) def delete_library_dataset(self, library_id, dataset_id, purged=False): """ Delete a library dataset in a data library. :type library_id: str :param library_id: library id where dataset is found in :type dataset_id: str :param dataset_id: id of the dataset to be deleted :type purged: bool :param purged: Indicate that the dataset should be purged (permanently deleted) :rtype: dict :return: A dictionary containing the dataset id and whether the dataset has been deleted For example:: {u'deleted': True, u'id': u'60e680a037f41974'} """ url = self.gi._make_url(self, library_id, contents=True) # Append the dataset_id to the base history contents URL url = '/'.join([url, dataset_id]) return Client._delete(self, url=url, payload={'purged': purged}) def show_dataset(self, library_id, dataset_id): """ Get details about a given library dataset. The required ``library_id`` can be obtained from the datasets's library content details. :type library_id: str :param library_id: library id where dataset is found in :type dataset_id: str :param dataset_id: id of the dataset to be inspected :rtype: dict :return: A dictionary containing information about the dataset in the library """ return self.__show_item(library_id, dataset_id) def show_folder(self, library_id, folder_id): """ Get details about a given folder. The required ``folder_id`` can be obtained from the folder's library content details. :type library_id: str :param library_id: library id to inspect folders in :type folder_id: str :param folder_id: id of the folder to be inspected """ return self.__show_item(library_id, folder_id) def _get_root_folder_id(self, library_id): """ Find the root folder (i.e. '/') of a library. :type library_id: str :param library_id: library id to find root of """ l = self.show_library(library_id=library_id) if 'root_folder_id' in l: return l['root_folder_id'] # Galaxy previous to release_13.04 does not have root_folder_id in # library dictionary, so resort to find the folder with name '/' library_contents = self.show_library(library_id=library_id, contents=True) for f in library_contents: if f['name'] == '/': return f['id'] def create_folder(self, library_id, folder_name, description=None, base_folder_id=None): """ Create a folder in a library. :type library_id: str :param library_id: library id to use :type folder_name: str :param folder_name: name of the new folder in the data library :type description: str :param description: description of the new folder in the data library :type base_folder_id: str :param base_folder_id: id of the folder where to create the new folder. If not provided, the root folder will be used """ # Get root folder ID if no ID was provided if base_folder_id is None: base_folder_id = self._get_root_folder_id(library_id) # Compose the payload payload = {} payload['name'] = folder_name payload['folder_id'] = base_folder_id payload['create_type'] = 'folder' if description is not None: payload['description'] = description return Client._post(self, payload, id=library_id, contents=True) def get_folders(self, library_id, folder_id=None, name=None): """ Get all the folders or filter specific one(s) via the provided ``name`` or ``folder_id`` in data library with id ``library_id``. Provide only one argument: ``name`` or ``folder_id``, but not both. :type folder_id: str :param folder_id: filter for folder by folder id :type name: str :param name: filter for folder by name. For ``name`` specify the full path of the folder starting from the library's root folder, e.g. ``/subfolder/subsubfolder``. :rtype: list :return: list of dicts each containing basic information about a folder. """ if folder_id is not None and name is not None: raise ValueError('Provide only one argument between name or folder_id, but not both') library_contents = self.show_library(library_id=library_id, contents=True) if folder_id is not None: folder = next((_ for _ in library_contents if _['type'] == 'folder' and _['id'] == folder_id), None) folders = [folder] if folder is not None else [] elif name is not None: folders = [_ for _ in library_contents if _['type'] == 'folder' and _['name'] == name] else: folders = [_ for _ in library_contents if _['type'] == 'folder'] return folders def get_libraries(self, library_id=None, name=None, deleted=False): """ Get all the libraries or filter for specific one(s) via the provided name or ID. Provide only one argument: ``name`` or ``library_id``, but not both. :type library_id: str :param library_id: filter for library by library id :type name: str :param name: If ``name`` is set and multiple names match the given name, all the libraries matching the argument will be returned. :type deleted: bool :param deleted: If set to ``True``, return libraries that have been deleted. :rtype: list :return: list of dicts each containing basic information about a library. """ if library_id is not None and name is not None: raise ValueError('Provide only one argument between name or library_id, but not both') libraries = Client._get(self, deleted=deleted) if library_id is not None: library = next((_ for _ in libraries if _['id'] == library_id), None) libraries = [library] if library is not None else [] if name is not None: libraries = [_ for _ in libraries if _['name'] == name] return libraries def show_library(self, library_id, contents=False): """ Get information about a library. :type library_id: str :param library_id: filter for library by library id :type contents: bool :param contents: True if want to get contents of the library (rather than just the library details). :rtype: dict :return: details of the given library """ return Client._get(self, id=library_id, contents=contents) def _do_upload(self, library_id, **keywords): """ Set up the POST request and do the actual data upload to a data library. This method should not be called directly but instead refer to the methods specific for the desired type of data upload. """ folder_id = keywords.get('folder_id', None) if folder_id is None: folder_id = self._get_root_folder_id(library_id) files_attached = False # Compose the payload dict payload = {} payload['folder_id'] = folder_id payload['file_type'] = keywords.get('file_type', 'auto') payload['dbkey'] = keywords.get('dbkey', '?') payload['create_type'] = 'file' if keywords.get("roles", None): payload["roles"] = keywords["roles"] if keywords.get("link_data_only", None) and keywords['link_data_only'] != 'copy_files': payload["link_data_only"] = 'link_to_files' # upload options if keywords.get('file_url', None) is not None: payload['upload_option'] = 'upload_file' payload['files_0|url_paste'] = keywords['file_url'] elif keywords.get('pasted_content', None) is not None: payload['upload_option'] = 'upload_file' payload['files_0|url_paste'] = keywords['pasted_content'] elif keywords.get('server_dir', None) is not None: payload['upload_option'] = 'upload_directory' payload['server_dir'] = keywords['server_dir'] elif keywords.get('file_local_path', None) is not None: payload['upload_option'] = 'upload_file' payload['files_0|file_data'] = attach_file(keywords['file_local_path']) files_attached = True elif keywords.get("filesystem_paths", None) is not None: payload["upload_option"] = "upload_paths" payload["filesystem_paths"] = keywords["filesystem_paths"] try: return Client._post(self, payload, id=library_id, contents=True, files_attached=files_attached) finally: if payload.get('files_0|file_data', None) is not None: payload['files_0|file_data'].close() def upload_file_from_url(self, library_id, file_url, folder_id=None, file_type='auto', dbkey='?'): """ Upload a file to a library from a URL. :type library_id: str :param library_id: id of the library where to place the uploaded file :type file_url: str :param file_url: URL of the file to upload :type folder_id: str :param folder_id: id of the folder where to place the uploaded file. If not provided, the root folder will be used :type file_type: str :param file_type: Galaxy file format name :type dbkey: str :param dbkey: Dbkey """ return self._do_upload(library_id, file_url=file_url, folder_id=folder_id, file_type=file_type, dbkey=dbkey) def upload_file_contents(self, library_id, pasted_content, folder_id=None, file_type='auto', dbkey='?'): """ Upload pasted_content to a data library as a new file. :type library_id: str :param library_id: id of the library where to place the uploaded file :type pasted_content: str :param pasted_content: Content to upload into the library :type folder_id: str :param folder_id: id of the folder where to place the uploaded file. If not provided, the root folder will be used :type file_type: str :param file_type: Galaxy file format name :type dbkey: str :param dbkey: Dbkey """ return self._do_upload(library_id, pasted_content=pasted_content, folder_id=folder_id, file_type=file_type, dbkey=dbkey) def upload_file_from_local_path(self, library_id, file_local_path, folder_id=None, file_type='auto', dbkey='?'): """ Read local file contents from file_local_path and upload data to a library. :type library_id: str :param library_id: id of the library where to place the uploaded file :type file_local_path: str :param file_local_path: path of local file to upload :type folder_id: str :param folder_id: id of the folder where to place the uploaded file. If not provided, the root folder will be used :type file_type: str :param file_type: Galaxy file format name :type dbkey: str :param dbkey: Dbkey """ return self._do_upload(library_id, file_local_path=file_local_path, folder_id=folder_id, file_type=file_type, dbkey=dbkey) def upload_file_from_server(self, library_id, server_dir, folder_id=None, file_type='auto', dbkey='?', link_data_only=None, roles=""): """ Upload all files in the specified subdirectory of the Galaxy library import directory to a library. .. note:: For this method to work, the Galaxy instance must have the ``library_import_dir`` option configured in the ``config/galaxy.ini`` configuration file. :type library_id: str :param library_id: id of the library where to place the uploaded file :type server_dir: str :param server_dir: relative path of the subdirectory of ``library_import_dir`` to upload. All and only the files (i.e. no subdirectories) contained in the specified directory will be uploaded. :type folder_id: str :param folder_id: id of the folder where to place the uploaded files. If not provided, the root folder will be used :type file_type: str :param file_type: Galaxy file format name :type dbkey: str :param dbkey: Dbkey :type link_data_only: str :param link_data_only: either 'copy_files' (default) or 'link_to_files'. Setting to 'link_to_files' symlinks instead of copying the files :type roles: str :param roles: ??? """ return self._do_upload(library_id, server_dir=server_dir, folder_id=folder_id, file_type=file_type, dbkey=dbkey, link_data_only=link_data_only, roles=roles) def upload_from_galaxy_filesystem(self, library_id, filesystem_paths, folder_id=None, file_type="auto", dbkey="?", link_data_only=None, roles=""): """ Upload a set of files already present on the filesystem of the Galaxy server to a library. .. note:: For this method to work, the Galaxy instance must have the ``allow_library_path_paste`` option set to ``True`` in the ``config/galaxy.ini`` configuration file. :type library_id: str :param library_id: id of the library where to place the uploaded file :type filesystem_paths: str :param filesystem_paths: file paths on the Galaxy server to upload to the library, one file per line :type folder_id: str :param folder_id: id of the folder where to place the uploaded files. If not provided, the root folder will be used :type file_type: str :param file_type: Galaxy file format name :type dbkey: str :param dbkey: Dbkey :type link_data_only: str :param link_data_only: either 'copy_files' (default) or 'link_to_files'. Setting to 'link_to_files' symlinks instead of copying the files :type roles: str :param roles: ??? """ return self._do_upload(library_id, filesystem_paths=filesystem_paths, folder_id=folder_id, file_type=file_type, dbkey=dbkey, link_data_only=link_data_only, roles=roles) def copy_from_dataset(self, library_id, dataset_id, folder_id=None, message=''): """ Copy a Galaxy dataset into a library. :type library_id: str :param library_id: id of the library where to place the uploaded file :type dataset_id: str :param dataset_id: id of the dataset to copy from :type folder_id: str :param folder_id: id of the folder where to place the uploaded files. If not provided, the root folder will be used :type message: str :param message: message for copying action """ if folder_id is None: folder_id = self._get_root_folder_id(library_id) payload = {} payload['folder_id'] = folder_id payload['create_type'] = 'file' payload['from_hda_id'] = dataset_id payload['ldda_message'] = message return Client._post(self, payload, id=library_id, contents=True) def get_library_permissions(self, library_id): """ Get the permessions for a library. :type library_id: str :param library_id: id of the library :rtype: dict :return: dictionary with all applicable permissions' values """ url = '/'.join([self.gi._make_url(self, library_id), 'permissions']) return Client._get(self, url=url) def set_library_permissions(self, library_id, access_in=None, modify_in=None, add_in=None, manage_in=None): """ Set the permissions for a library. Note: it will override all security for this library even if you leave out a permission type. :type library_id: str :param library_id: id of the library :type access_in: list :param access_in: list of role ids :type modify_in: list :param modify_in: list of role ids :type add_in: list :param add_in: list of role ids :type manage_in: list :param manage_in: list of role ids """ payload = {} if access_in: payload['LIBRARY_ACCESS_in'] = access_in if modify_in: payload['LIBRARY_MODIFY_in'] = modify_in if add_in: payload['LIBRARY_ADD_in'] = add_in if manage_in: payload['LIBRARY_MANAGE_in'] = manage_in url = '/'.join([self.gi._make_url(self, library_id), 'permissions']) return Client._post(self, payload, url=url) bioblend-0.7.0/bioblend/galaxy/objects/000077500000000000000000000000001261571066300177775ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/objects/__init__.py000066400000000000000000000001241261571066300221050ustar00rootroot00000000000000from .wrappers import * # noqa from .galaxy_instance import GalaxyInstance # noqa bioblend-0.7.0/bioblend/galaxy/objects/client.py000066400000000000000000000341431261571066300216340ustar00rootroot00000000000000""" Clients for interacting with specific Galaxy entity types. Classes in this module should not be instantiated directly, but used via their handles in :class:`~.galaxy_instance.GalaxyInstance`. """ import abc import collections import json import six import bioblend from . import wrappers @six.add_metaclass(abc.ABCMeta) class ObjClient(object): @abc.abstractmethod def __init__(self, obj_gi): self.obj_gi = obj_gi self.gi = self.obj_gi.gi self.log = bioblend.log @abc.abstractmethod def get_previews(self, **kwargs): """ Get a list of object previews. Previews entity summaries provided by REST collection URIs, e.g. ``http://host:port/api/libraries``. Being the most lightweight objects associated to the various entities, these are the ones that should be used to retrieve their basic info. :rtype: list :return: a list of object previews """ pass @abc.abstractmethod def list(self, **kwargs): """ Get a list of objects. This method first gets the entity summaries, then gets the complete description for each entity with an additional GET call, so may be slow. :rtype: list :return: a list of objects """ pass def _select_ids(self, id_=None, name=None): """ Return the id list that corresponds to the given id or name info. """ if id_ is None and name is None: self._error('neither id nor name provided', err_type=TypeError) if id_ is not None and name is not None: self._error('both id and name provided', err_type=TypeError) if id_ is None: return [_.id for _ in self.get_previews(name=name)] else: return [id_] def _error(self, msg, err_type=RuntimeError): self.log.error(msg) raise err_type(msg) def _get_dict(self, meth_name, reply): if reply is None: self._error('%s: no reply' % meth_name) elif isinstance(reply, collections.Mapping): return reply try: return reply[0] except (TypeError, IndexError): self._error('%s: unexpected reply: %r' % (meth_name, reply)) class ObjDatasetContainerClient(ObjClient): def _get_container(self, id_, ctype): show_fname = 'show_%s' % ctype.__name__.lower() gi_client = getattr(self.gi, ctype.API_MODULE) show_f = getattr(gi_client, show_fname) res = show_f(id_) cdict = self._get_dict(show_fname, res) cdict['id'] = id_ # overwrite unencoded id c_infos = show_f(id_, contents=True) if not isinstance(c_infos, collections.Sequence): self._error('%s: unexpected reply: %r' % (show_fname, c_infos)) c_infos = [ctype.CONTENT_INFO_TYPE(_) for _ in c_infos] return ctype(cdict, content_infos=c_infos, gi=self.obj_gi) class ObjLibraryClient(ObjDatasetContainerClient): """ Interacts with Galaxy libraries. """ def __init__(self, obj_gi): super(ObjLibraryClient, self).__init__(obj_gi) def create(self, name, description=None, synopsis=None): """ Create a data library with the properties defined in the arguments. :rtype: :class:`~.wrappers.Library` :return: the library just created """ res = self.gi.libraries.create_library(name, description, synopsis) lib_info = self._get_dict('create_library', res) return self.get(lib_info['id']) def get(self, id_): """ Retrieve the data library corresponding to the given id. :rtype: :class:`~.wrappers.Library` :return: the library corresponding to ``id_`` """ return self._get_container(id_, wrappers.Library) def get_previews(self, name=None, deleted=False): dicts = self.gi.libraries.get_libraries(name=name, deleted=deleted) return [wrappers.LibraryPreview(_, gi=self.obj_gi) for _ in dicts] def list(self, name=None, deleted=False): """ Get libraries owned by the user of this Galaxy instance. :type name: str :param name: return only libraries with this name :type deleted: bool :param deleted: if ``True``, return libraries that have been deleted :rtype: list of :class:`~.wrappers.Library` """ dicts = self.gi.libraries.get_libraries(name=name, deleted=deleted) if not deleted: # return Library objects only for not-deleted libraries since Galaxy # does not filter them out and Galaxy release_14.08 and earlier # crashes when trying to get a deleted library return [self.get(_['id']) for _ in dicts if not _['deleted']] else: return [self.get(_['id']) for _ in dicts] def delete(self, id_=None, name=None): """ Delete the library with the given id or name. Note that the same name can map to multiple libraries. .. warning:: Deleting a data library is irreversible - all of the data from the library will be permanently deleted. """ for id_ in self._select_ids(id_=id_, name=name): res = self.gi.libraries.delete_library(id_) if not isinstance(res, collections.Mapping): self._error('delete_library: unexpected reply: %r' % (res,)) class ObjHistoryClient(ObjDatasetContainerClient): """ Interacts with Galaxy histories. """ def __init__(self, obj_gi): super(ObjHistoryClient, self).__init__(obj_gi) def create(self, name=None): """ Create a new Galaxy history, optionally setting its name. :rtype: :class:`~.wrappers.History` :return: the history just created """ res = self.gi.histories.create_history(name=name) hist_info = self._get_dict('create_history', res) return self.get(hist_info['id']) def get(self, id_): """ Retrieve the history corresponding to the given id. :rtype: :class:`~.wrappers.History` :return: the history corresponding to ``id_`` """ return self._get_container(id_, wrappers.History) def get_previews(self, name=None, deleted=False): dicts = self.gi.histories.get_histories(name=name, deleted=deleted) return [wrappers.HistoryPreview(_, gi=self.obj_gi) for _ in dicts] def list(self, name=None, deleted=False): """ Get histories owned by the user of this Galaxy instance. :type name: str :param name: return only histories with this name :type deleted: bool :param deleted: if ``True``, return histories that have been deleted :rtype: list of :class:`~.wrappers.History` """ dicts = self.gi.histories.get_histories(name=name, deleted=deleted) return [self.get(_['id']) for _ in dicts] def delete(self, id_=None, name=None, purge=False): """ Delete the history with the given id or name. Note that the same name can map to multiple histories. :type purge: bool :param purge: if ``True``, also purge (permanently delete) the history .. note:: For the purge option to work, the Galaxy instance must have the ``allow_user_dataset_purge`` option set to ``True`` in the ``config/galaxy.ini`` configuration file. """ for id_ in self._select_ids(id_=id_, name=name): res = self.gi.histories.delete_history(id_, purge=purge) if not isinstance(res, collections.Mapping): self._error('delete_history: unexpected reply: %r' % (res,)) class ObjWorkflowClient(ObjClient): """ Interacts with Galaxy workflows. """ def __init__(self, obj_gi): super(ObjWorkflowClient, self).__init__(obj_gi) def import_new(self, src): """ Imports a new workflow into Galaxy. :type src: dict or str :param src: deserialized (dictionary) or serialized (str) JSON dump of the workflow (this is normally obtained by exporting a workflow from Galaxy). :rtype: :class:`~.wrappers.Workflow` :return: the workflow just imported """ if isinstance(src, collections.Mapping): wf_dict = src else: try: wf_dict = json.loads(src) except (TypeError, ValueError): self._error('src not supported: %r' % (src,)) wf_info = self.gi.workflows.import_workflow_json(wf_dict) return self.get(wf_info['id']) def import_shared(self, id_): """ Imports a shared workflow to the user's space. :type id_: str :param id_: workflow id :rtype: :class:`~.wrappers.Workflow` :return: the workflow just imported """ wf_info = self.gi.workflows.import_shared_workflow(id_) return self.get(wf_info['id']) def get(self, id_): """ Retrieve the workflow corresponding to the given id. :rtype: :class:`~.wrappers.Workflow` :return: the workflow corresponding to ``id_`` """ res = self.gi.workflows.show_workflow(id_) wf_dict = self._get_dict('show_workflow', res) return wrappers.Workflow(wf_dict, gi=self.obj_gi) # the 'deleted' option is not available for workflows def get_previews(self, name=None, published=False): dicts = self.gi.workflows.get_workflows(name=name, published=published) return [wrappers.WorkflowPreview(_, gi=self.obj_gi) for _ in dicts] # the 'deleted' option is not available for workflows def list(self, name=None, published=False): """ Get workflows owned by the user of this Galaxy instance. :type name: str :param name: return only workflows with this name :type published: bool :param published: if ``True``, return also published workflows :rtype: list of :class:`~.wrappers.Workflow` """ dicts = self.gi.workflows.get_workflows(name=name, published=published) return [self.get(_['id']) for _ in dicts] def delete(self, id_=None, name=None): """ Delete the workflow with the given id or name. Note that the same name can map to multiple workflows. .. warning:: Deleting a workflow is irreversible - all of the data from the workflow will be permanently deleted. """ for id_ in self._select_ids(id_=id_, name=name): res = self.gi.workflows.delete_workflow(id_) if not isinstance(res, six.string_types): self._error('delete_workflow: unexpected reply: %r' % (res,)) class ObjToolClient(ObjClient): """ Interacts with Galaxy tools. """ def __init__(self, obj_gi): super(ObjToolClient, self).__init__(obj_gi) def get(self, id_, io_details=False, link_details=False): """ Retrieve the tool corresponding to the given id. :type io_details: bool :param io_details: if True, get also input and output details :type link_details: bool :param link_details: if True, get also link details :rtype: :class:`~.wrappers.Tool` :return: the tool corresponding to ``id_`` """ res = self.gi.tools.show_tool(id_, io_details=io_details, link_details=link_details) tool_dict = self._get_dict('show_tool', res) return wrappers.Tool(tool_dict, gi=self.obj_gi) def get_previews(self, name=None, trackster=None): """ Get the list of tools installed on the Galaxy instance. :type name: str :param name: return only tools with this name :type trackster: bool :param trackster: if True, only tools that are compatible with Trackster are returned :rtype: list of :class:`~.wrappers.Tool` """ dicts = self.gi.tools.get_tools(name=name, trackster=trackster) return [wrappers.Tool(_, gi=self.obj_gi) for _ in dicts] # the 'deleted' option is not available for tools def list(self, name=None, trackster=None): """ Get the list of tools installed on the Galaxy instance. :type name: str :param name: return only tools with this name :type trackster: bool :param trackster: if True, only tools that are compatible with Trackster are returned :rtype: list of :class:`~.wrappers.Tool` """ # dicts = self.gi.tools.get_tools(name=name, trackster=trackster) # return [self.get(_['id']) for _ in dicts] # As of 2015/04/15, GET /api/tools returns also data manager tools for # non-admin users, see # https://trello.com/c/jyl0cvFP/2633-api-tool-list-filtering-doesn-t-filter-data-managers-for-non-admins # Trying to get() a data manager tool would then return a 404 Not Found # error. # Moreover, the dicts returned by gi.tools.get_tools() are richer than # those returned by get(), so make this an alias for get_previews(). return self.get_previews(name, trackster) class ObjJobClient(ObjClient): """ Interacts with Galaxy jobs. """ def __init__(self, obj_gi): super(ObjJobClient, self).__init__(obj_gi) def get(self, id_, full_details=False): """ Retrieve the job corresponding to the given id. :type full_details: bool :param full_details: if ``True``, return the complete list of details for the given job. :rtype: :class:`~.wrappers.Job` :return: the job corresponding to ``id_`` """ res = self.gi.jobs.show_job(id_, full_details) job_dict = self._get_dict('job_tool', res) return wrappers.Job(job_dict, gi=self.obj_gi) def get_previews(self): dicts = self.gi.jobs.get_jobs() return [wrappers.JobPreview(_, gi=self.obj_gi) for _ in dicts] def list(self): """ Get the list of jobs of the current user. :rtype: list of :class:`~.wrappers.Job` """ dicts = self.gi.jobs.get_jobs() return [self.get(_['id']) for _ in dicts] bioblend-0.7.0/bioblend/galaxy/objects/galaxy_instance.py000066400000000000000000000072741261571066300235340ustar00rootroot00000000000000""" A representation of a Galaxy instance based on oo wrappers. """ import time import bioblend import bioblend.galaxy from . import client # dataset states corresponding to a 'pending' condition _PENDING_DS_STATES = set( ["new", "upload", "queued", "running", "setting_metadata"] ) def _get_error_info(hda): msg = hda.id try: msg += ' (%s): ' % hda.name msg += hda.wrapped['misc_info'] except Exception: # avoid 'error while generating an error report' msg += ': error' return msg class GalaxyInstance(object): """ A representation of an instance of Galaxy, identified by a URL and a user's API key. :type url: str :param url: a FQDN or IP for a given instance of Galaxy. For example: ``http://127.0.0.1:8080`` :type api_key: str :param api_key: user's API key for the given instance of Galaxy, obtained from the Galaxy web UI. This is actually a factory class which instantiates the entity-specific clients. Example: get a list of all histories for a user with API key 'foo':: from bioblend.galaxy.objects import * gi = GalaxyInstance('http://127.0.0.1:8080', 'foo') histories = gi.histories.list() """ def __init__(self, url, api_key=None, email=None, password=None): self.gi = bioblend.galaxy.GalaxyInstance(url, api_key, email, password) self.log = bioblend.log self.__histories = client.ObjHistoryClient(self) self.__libraries = client.ObjLibraryClient(self) self.__workflows = client.ObjWorkflowClient(self) self.__tools = client.ObjToolClient(self) self.__jobs = client.ObjJobClient(self) @property def histories(self): """ Client module for Galaxy histories. """ return self.__histories @property def libraries(self): """ Client module for Galaxy libraries. """ return self.__libraries @property def workflows(self): """ Client module for Galaxy workflows. """ return self.__workflows @property def tools(self): """ Client module for Galaxy tools. """ return self.__tools @property def jobs(self): """ Client module for Galaxy jobs. """ return self.__jobs def _wait_datasets(self, datasets, polling_interval, break_on_error=True): """ Wait for datasets to come out of the pending states. :type datasets: :class:`~collections.Iterable` of :class:`~.wrappers.Dataset` :param datasets: datasets :type polling_interval: float :param polling_interval: polling interval in seconds :type break_on_error: bool :param break_on_error: if ``True``, raise a RuntimeError exception as soon as at least one of the datasets is in the 'error' state. .. warning:: This is a blocking operation that can take a very long time. Also, note that this method does not return anything; however, each input dataset is refreshed (possibly multiple times) during the execution. """ def poll(ds_list): pending = [] for ds in ds_list: ds.refresh() self.log.info('{0.id}: {0.state}'.format(ds)) if break_on_error and ds.state == 'error': raise RuntimeError(_get_error_info(ds)) if ds.state in _PENDING_DS_STATES: pending.append(ds) return pending self.log.info('waiting for datasets') while datasets: datasets = poll(datasets) time.sleep(polling_interval) bioblend-0.7.0/bioblend/galaxy/objects/wrappers.py000066400000000000000000001306631261571066300222250ustar00rootroot00000000000000# pylint: disable=W0622,E1101 """ A basic object-oriented interface for Galaxy entities. """ import abc import collections import json from six.moves import http_client import six import bioblend __all__ = [ 'Wrapper', 'Step', 'Workflow', 'ContentInfo', 'LibraryContentInfo', 'HistoryContentInfo', 'DatasetContainer', 'History', 'Library', 'Folder', 'Dataset', 'HistoryDatasetAssociation', 'LibraryDatasetDatasetAssociation', 'LibraryDataset', 'Tool', 'Job', 'Preview', 'LibraryPreview', 'HistoryPreview', 'WorkflowPreview', ] @six.add_metaclass(abc.ABCMeta) class Wrapper(object): """ Abstract base class for Galaxy entity wrappers. Wrapper instances wrap deserialized JSON dictionaries such as the ones obtained by the Galaxy web API, converting key-based access to attribute-based access (e.g., ``library['name'] -> library.name``). Dict keys that are converted to attributes are listed in the ``BASE_ATTRS`` class variable: this is the 'stable' interface. Note that the wrapped dictionary is accessible via the ``wrapped`` attribute. """ BASE_ATTRS = ('id', 'name') @abc.abstractmethod def __init__(self, wrapped, parent=None, gi=None): """ :type wrapped: dict :param wrapped: JSON-serializable dictionary :type parent: :class:`Wrapper` :param parent: the parent of this wrapper :type gi: :class:`GalaxyInstance` :param gi: the GalaxyInstance through which we can access this wrapper """ if not isinstance(wrapped, collections.Mapping): raise TypeError('wrapped object must be a mapping type') # loads(dumps(x)) is a bit faster than deepcopy and allows type checks try: dumped = json.dumps(wrapped) except (TypeError, ValueError): raise ValueError('wrapped object must be JSON-serializable') object.__setattr__(self, 'wrapped', json.loads(dumped)) for k in self.BASE_ATTRS: object.__setattr__(self, k, self.wrapped.get(k)) object.__setattr__(self, '_cached_parent', parent) object.__setattr__(self, 'is_modified', False) object.__setattr__(self, 'gi', gi) @abc.abstractproperty def gi_module(self): """ The GalaxyInstance module that deals with objects of this type. """ pass @property def parent(self): """ The parent of this wrapper. """ return self._cached_parent @property def is_mapped(self): """ ``True`` if this wrapper is mapped to an actual Galaxy entity. """ return self.id is not None def unmap(self): """ Disconnect this wrapper from Galaxy. """ object.__setattr__(self, 'id', None) def clone(self): """ Return an independent copy of this wrapper. """ return self.__class__(self.wrapped) def touch(self): """ Mark this wrapper as having been modified since its creation. """ object.__setattr__(self, 'is_modified', True) if self.parent: self.parent.touch() def to_json(self): """ Return a JSON dump of this wrapper. """ return json.dumps(self.wrapped) @classmethod def from_json(cls, jdef): """ Build a new wrapper from a JSON dump. """ return cls(json.loads(jdef)) # FIXME: things like self.x[0] = 'y' do NOT call self.__setattr__ def __setattr__(self, name, value): if name not in self.wrapped: raise AttributeError("can't set attribute") else: self.wrapped[name] = value object.__setattr__(self, name, value) self.touch() def __repr__(self): return "%s(%r)" % (self.__class__.__name__, self.wrapped) class Step(Wrapper): """ Abstract base class for workflow steps. Steps are the main building blocks of a Galaxy workflow. A step can be: an input (type 'data_collection_input` or 'data_input`), a computational tool (type 'tool`) or a pause (type 'pause`). """ BASE_ATTRS = Wrapper.BASE_ATTRS + ( 'input_steps', 'tool_id', 'tool_inputs', 'tool_version', 'type' ) def __init__(self, step_dict, parent): super(Step, self).__init__(step_dict, parent=parent, gi=parent.gi) try: stype = step_dict['type'] except KeyError: raise ValueError('not a step dict') if stype not in set(['data_collection_input', 'data_input', 'pause', 'tool']): raise ValueError('Unknown step type: %r' % stype) if self.type == 'tool' and self.tool_inputs: for k, v in six.iteritems(self.tool_inputs): self.tool_inputs[k] = json.loads(v) @property def gi_module(self): return self.gi.workflows class Workflow(Wrapper): """ Workflows represent ordered sequences of computations on Galaxy. A workflow defines a sequence of steps that produce one or more results from an input dataset. """ BASE_ATTRS = Wrapper.BASE_ATTRS + ( 'deleted', 'inputs', 'published', 'steps', 'tags' ) POLLING_INTERVAL = 10 # for output state monitoring def __init__(self, wf_dict, gi=None): super(Workflow, self).__init__(wf_dict, gi=gi) missing_ids = [] if gi: tools_list_by_id = [t.id for t in gi.tools.get_previews()] else: tools_list_by_id = [] for k, v in six.iteritems(self.steps): # convert step ids to str for consistency with outer keys v['id'] = str(v['id']) for i in six.itervalues(v['input_steps']): i['source_step'] = str(i['source_step']) step = Step(v, self) self.steps[k] = step if step.type == 'tool': if not step.tool_inputs or step.tool_id not in tools_list_by_id: missing_ids.append(k) input_labels_to_ids = {} for id_, d in six.iteritems(self.inputs): input_labels_to_ids.setdefault(d['label'], set()).add(id_) tool_labels_to_ids = {} for s in six.itervalues(self.steps): if s.type == 'tool': tool_labels_to_ids.setdefault(s.tool_id, set()).add(s.id) object.__setattr__(self, 'input_labels_to_ids', input_labels_to_ids) object.__setattr__(self, 'tool_labels_to_ids', tool_labels_to_ids) dag, inv_dag = self._get_dag() heads, tails = set(dag), set(inv_dag) object.__setattr__(self, 'dag', dag) object.__setattr__(self, 'inv_dag', inv_dag) object.__setattr__(self, 'source_ids', heads - tails) # In Galaxy release_14.06 (the first to support dataset collection # inputs) `inputs` does not contain dataset collections, so test if it # is a subset of (instead of equal to) the union of dataset collection # and dataset input ids. assert set(self.inputs) <= self.data_collection_input_ids | self.data_input_ids, \ "inputs is %r, while data_collection_input_ids is %r and data_input_ids is %r" % (self.inputs, self.data_collection_input_ids, self.data_input_ids) object.__setattr__(self, 'sink_ids', tails - heads) object.__setattr__(self, 'missing_ids', missing_ids) @property def gi_module(self): return self.gi.workflows def _get_dag(self): """ Return the workflow's DAG. For convenience, this method computes a 'direct' (step => successors) and an 'inverse' (step => predecessors) representation of the same DAG. For instance, a workflow with a single tool *c*, two inputs *a, b* and three outputs *d, e, f* is represented by (direct):: {'a': {'c'}, 'b': {'c'}, 'c': set(['d', 'e', 'f'])} and by (inverse):: {'c': set(['a', 'b']), 'd': {'c'}, 'e': {'c'}, 'f': {'c'}} """ dag, inv_dag = {}, {} for s in six.itervalues(self.steps): for i in six.itervalues(s.input_steps): head, tail = i['source_step'], s.id dag.setdefault(head, set()).add(tail) inv_dag.setdefault(tail, set()).add(head) return dag, inv_dag def sorted_step_ids(self): """ Return a topological sort of the workflow's DAG. """ ids = [] source_ids = self.source_ids.copy() inv_dag = dict((k, v.copy()) for k, v in six.iteritems(self.inv_dag)) while source_ids: head = source_ids.pop() ids.append(head) for tail in self.dag.get(head, []): incoming = inv_dag[tail] incoming.remove(head) if not incoming: source_ids.add(tail) return ids @property def data_input_ids(self): """ Return the list of data input steps for this workflow. """ return set(id_ for id_, s in six.iteritems(self.steps) if s.type == 'data_input') @property def data_collection_input_ids(self): """ Return the list of data collection input steps for this workflow. """ return set(id_ for id_, s in six.iteritems(self.steps) if s.type == 'data_collection_input') @property def tool_ids(self): """ Return the list of tool steps for this workflow. """ return set(id_ for id_, s in six.iteritems(self.steps) if s.type == 'tool') @property def input_labels(self): """ Return the labels of this workflow's input steps. """ return set(self.input_labels_to_ids) @property def is_runnable(self): """ Return True if the workflow can be run on Galaxy. A workflow is considered runnable on a Galaxy instance if all of the tools it uses are installed in that instance. """ return not self.missing_ids def convert_input_map(self, input_map): """ Convert ``input_map`` to the format required by the Galaxy web API. :type input_map: dict :param input_map: a mapping from input labels to datasets :rtype: dict :return: a mapping from input slot ids to dataset ids in the format required by the Galaxy web API. """ m = {} for label, slot_ids in six.iteritems(self.input_labels_to_ids): datasets = input_map.get(label, []) if not isinstance(datasets, collections.Iterable): datasets = [datasets] if len(datasets) < len(slot_ids): raise RuntimeError('not enough datasets for "%s"' % label) for id_, ds in zip(slot_ids, datasets): m[id_] = {'id': ds.id, 'src': ds.SRC} return m def preview(self): getf = self.gi.workflows.get_previews try: p = [_ for _ in getf(published=True) if _.id == self.id][0] except IndexError: raise ValueError('no object for id %s' % self.id) return p def run(self, input_map=None, history='', params=None, import_inputs=False, replacement_params=None, wait=False, polling_interval=POLLING_INTERVAL, break_on_error=True): """ Run the workflow in the current Galaxy instance. :type input_map: dict :param input_map: a mapping from workflow input labels to datasets, e.g.: ``dict(zip(workflow.input_labels, library.get_datasets()))`` :type history: :class:`History` or str :param history: either a valid history object (results will be stored there) or a string (a new history will be created with the given name). :type params: :class:`~collections.Mapping` :param params: parameter settings for workflow steps (see below) :type import_inputs: bool :param import_inputs: If ``True``, workflow inputs will be imported into the history; if ``False``, only workflow outputs will be visible in the history. :type replacement_params: :class:`~collections.Mapping` :param replacement_params: pattern-based replacements for post-job actions (see the docs for :meth:`~bioblend.galaxy.workflows.WorkflowClient.run_workflow`) :type wait: bool :param wait: whether to wait while the returned datasets are in a pending state :type polling_interval: float :param polling_interval: polling interval in seconds :type break_on_error: bool :param break_on_error: whether to break as soon as at least one of the returned datasets is in the 'error' state :rtype: tuple :return: list of output datasets, output history The ``params`` dict should be structured as follows:: PARAMS = {STEP_ID: PARAM_DICT, ...} PARAM_DICT = {NAME: VALUE, ...} For backwards compatibility, the following (deprecated) format is also supported:: PARAMS = {TOOL_ID: PARAM_DICT, ...} in which case PARAM_DICT affects all steps with the given tool id. If both by-tool-id and by-step-id specifications are used, the latter takes precedence. Finally (again, for backwards compatibility), PARAM_DICT can also be specified as:: PARAM_DICT = {'param': NAME, 'value': VALUE} Note that this format allows only one parameter to be set per step. Example: set 'a' to 1 for the third workflow step:: params = {workflow.steps[2].id: {'a': 1}} .. warning:: This is a blocking operation that can take a very long time. If ``wait`` is set to ``False``, the method will return as soon as the workflow has been *scheduled*, otherwise it will wait until the workflow has been *run*. With a large number of steps, however, the delay may not be negligible even in the former case (e.g. minutes for 100 steps). """ if not self.is_mapped: raise RuntimeError('workflow is not mapped to a Galaxy object') if not self.is_runnable: raise RuntimeError('workflow has missing tools: %s' % ', '.join( '%s[%s]' % (self.steps[_].tool_id, _) for _ in self.missing_ids)) kwargs = { 'dataset_map': self.convert_input_map(input_map or {}), 'params': params, 'import_inputs_to_history': import_inputs, 'replacement_params': replacement_params, } if isinstance(history, History): try: kwargs['history_id'] = history.id except AttributeError: raise RuntimeError('history does not have an id') elif isinstance(history, six.string_types): kwargs['history_name'] = history else: raise TypeError( 'history must be either a history wrapper or a string') res = self.gi.gi.workflows.run_workflow(self.id, **kwargs) # res structure: {'history': HIST_ID, 'outputs': [DS_ID, DS_ID, ...]} out_hist = self.gi.histories.get(res['history']) assert set(res['outputs']).issubset(out_hist.dataset_ids) outputs = [out_hist.get_dataset(_) for _ in res['outputs']] if wait: self.gi._wait_datasets(outputs, polling_interval=polling_interval, break_on_error=break_on_error) return outputs, out_hist def export(self): """ Export a re-importable representation of the workflow. :rtype: dict :return: a JSON-serializable dump of the workflow """ return self.gi.gi.workflows.export_workflow_json(self.id) def delete(self): """ Delete this workflow. .. warning:: Deleting a workflow is irreversible - all of the data from the workflow will be permanently deleted. """ self.gi.workflows.delete(id_=self.id) self.unmap() @six.add_metaclass(abc.ABCMeta) class Dataset(Wrapper): """ Abstract base class for Galaxy datasets. """ BASE_ATTRS = Wrapper.BASE_ATTRS + ( 'data_type', 'file_name', 'file_size', 'state', 'deleted', 'file_ext' ) POLLING_INTERVAL = 1 # for state monitoring @abc.abstractmethod def __init__(self, ds_dict, container, gi=None): super(Dataset, self).__init__(ds_dict, gi=gi) object.__setattr__(self, 'container', container) @property def container_id(self): """ Deprecated property. Id of the dataset container. Use :attr:`.container.id` instead. """ return self.container.id @abc.abstractproperty def _stream_url(self): """ Return the URL to stream this dataset. """ pass def get_stream(self, chunk_size=bioblend.CHUNK_SIZE): """ Open dataset for reading and return an iterator over its contents. :type chunk_size: int :param chunk_size: read this amount of bytes at a time .. warning:: Due to a change in the Galaxy API endpoint, this method does not work on :class:`LibraryDataset` instances with Galaxy ``release_2014.06.02``. Methods that delegate work to this one are also affected: :meth:`peek`, :meth:`download` and :meth:`get_contents`. """ kwargs = {'stream': True} if isinstance(self, LibraryDataset): kwargs['params'] = {'ld_ids%5B%5D': self.id} r = self.gi.gi.make_get_request(self._stream_url, **kwargs) if isinstance(self, LibraryDataset) and r.status_code == 500: # compatibility with older Galaxy releases kwargs['params'] = {'ldda_ids%5B%5D': self.id} r = self.gi.gi.make_get_request(self._stream_url, **kwargs) r.raise_for_status() return r.iter_content(chunk_size) # FIXME: client can't close r def peek(self, chunk_size=bioblend.CHUNK_SIZE): """ Open dataset for reading and return the first chunk. See :meth:`.get_stream` for param info. """ try: return next(self.get_stream(chunk_size=chunk_size)) except StopIteration: return b'' def download(self, file_object, chunk_size=bioblend.CHUNK_SIZE): """ Open dataset for reading and save its contents to ``file_object``. :type file_object: file :param file_object: output file object See :meth:`.get_stream` for info on other params. """ for chunk in self.get_stream(chunk_size=chunk_size): file_object.write(chunk) def get_contents(self, chunk_size=bioblend.CHUNK_SIZE): """ Open dataset for reading and return its **full** contents. See :meth:`.get_stream` for param info. """ return b''.join(self.get_stream(chunk_size=chunk_size)) def refresh(self): """ Re-fetch the attributes pertaining to this object. Returns: self """ gi_client = getattr(self.gi.gi, self.container.API_MODULE) ds_dict = gi_client.show_dataset(self.container.id, self.id) self.__init__(ds_dict, self.container, self.gi) return self def wait(self, polling_interval=POLLING_INTERVAL, break_on_error=True): """ Wait for this dataset to come out of the pending states. :type polling_interval: float :param polling_interval: polling interval in seconds :type break_on_error: bool :param break_on_error: if ``True``, raise a RuntimeError exception if the dataset ends in the 'error' state. .. warning:: This is a blocking operation that can take a very long time. Also, note that this method does not return anything; however, this dataset is refreshed (possibly multiple times) during the execution. """ self.gi._wait_datasets([self], polling_interval=polling_interval, break_on_error=break_on_error) class HistoryDatasetAssociation(Dataset): """ Maps to a Galaxy ``HistoryDatasetAssociation``. """ BASE_ATTRS = Dataset.BASE_ATTRS + ('tags', 'visible') SRC = 'hda' def __init__(self, ds_dict, container, gi=None): super(HistoryDatasetAssociation, self).__init__( ds_dict, container, gi=gi) @property def gi_module(self): return self.gi.histories @property def _stream_url(self): base_url = self.gi.gi._make_url( self.gi.gi.histories, module_id=self.container.id, contents=True) return "%s/%s/display" % (base_url, self.id) def delete(self): """ Delete this dataset. """ self.gi.gi.histories.delete_dataset(self.container.id, self.id) self.container.refresh() self.refresh() class LibRelatedDataset(Dataset): """ Base class for LibraryDatasetDatasetAssociation and LibraryDataset classes. """ def __init__(self, ds_dict, container, gi=None): super(LibRelatedDataset, self).__init__(ds_dict, container, gi=gi) @property def gi_module(self): return self.gi.libraries @property def _stream_url(self): base_url = self.gi.gi._make_url(self.gi.gi.libraries) return "%s/datasets/download/uncompressed" % base_url class LibraryDatasetDatasetAssociation(LibRelatedDataset): """ Maps to a Galaxy ``LibraryDatasetDatasetAssociation``. """ SRC = 'ldda' class LibraryDataset(LibRelatedDataset): """ Maps to a Galaxy ``LibraryDataset``. """ SRC = 'ld' def delete(self, purged=False): """ Delete this library dataset. :type purged: bool :param purged: if ``True``, also purge (permanently delete) the dataset """ self.gi.gi.libraries.delete_library_dataset( self.container.id, self.id, purged=purged) self.container.refresh() self.refresh() @six.add_metaclass(abc.ABCMeta) class ContentInfo(Wrapper): """ Instances of this class wrap dictionaries obtained by getting ``/api/{histories,libraries}//contents`` from Galaxy. """ BASE_ATTRS = Wrapper.BASE_ATTRS + ('type',) @abc.abstractmethod def __init__(self, info_dict, gi=None): super(ContentInfo, self).__init__(info_dict, gi=gi) class LibraryContentInfo(ContentInfo): """ Instances of this class wrap dictionaries obtained by getting ``/api/libraries//contents`` from Galaxy. """ def __init__(self, info_dict, gi=None): super(LibraryContentInfo, self).__init__(info_dict, gi=gi) @property def gi_module(self): return self.gi.libraries class HistoryContentInfo(ContentInfo): """ Instances of this class wrap dictionaries obtained by getting ``/api/histories//contents`` from Galaxy. """ BASE_ATTRS = ContentInfo.BASE_ATTRS + ('deleted', 'state', 'visible') def __init__(self, info_dict, gi=None): super(HistoryContentInfo, self).__init__(info_dict, gi=gi) @property def gi_module(self): return self.gi.histories @six.add_metaclass(abc.ABCMeta) class DatasetContainer(Wrapper): """ Abstract base class for dataset containers (histories and libraries). """ BASE_ATTRS = Wrapper.BASE_ATTRS + ('deleted',) @abc.abstractmethod def __init__(self, c_dict, content_infos=None, gi=None): """ :type content_infos: list of :class:`ContentInfo` :param content_infos: info objects for the container's contents """ super(DatasetContainer, self).__init__(c_dict, gi=gi) if content_infos is None: content_infos = [] object.__setattr__(self, 'content_infos', content_infos) @property def dataset_ids(self): """ Return the ids of the contained datasets. """ return [_.id for _ in self.content_infos if _.type == 'file'] def preview(self): getf = self.gi_module.get_previews # self.state could be stale: check both regular and deleted containers try: p = [_ for _ in getf() if _.id == self.id][0] except IndexError: try: p = [_ for _ in getf(deleted=True) if _.id == self.id][0] except IndexError: raise ValueError('no object for id %s' % self.id) return p def refresh(self): """ Re-fetch the attributes pertaining to this object. Returns: self """ fresh = self.gi_module.get(self.id) self.__init__( fresh.wrapped, content_infos=fresh.content_infos, gi=self.gi) return self def get_dataset(self, ds_id): """ Retrieve the dataset corresponding to the given id. :type ds_id: str :param ds_id: dataset id :rtype: :class:`~.HistoryDatasetAssociation` or :class:`~.LibraryDataset` :return: the dataset corresponding to ``ds_id`` """ gi_client = getattr(self.gi.gi, self.API_MODULE) ds_dict = gi_client.show_dataset(self.id, ds_id) return self.DS_TYPE(ds_dict, self, gi=self.gi) def get_datasets(self, name=None): """ Get all datasets contained inside this dataset container. :type name: str :param name: return only datasets with this name :rtype: list of :class:`~.HistoryDatasetAssociation` or list of :class:`~.LibraryDataset` :return: datasets with the given name contained inside this container .. note:: when filtering library datasets by name, specify their full paths starting from the library's root folder, e.g., ``/seqdata/reads.fastq``. Full paths are available through the ``content_infos`` attribute of :class:`~.Library` objects. """ if name is None: ds_ids = self.dataset_ids else: ds_ids = [_.id for _ in self.content_infos if _.name == name] return [self.get_dataset(_) for _ in ds_ids] class History(DatasetContainer): """ Maps to a Galaxy history. """ BASE_ATTRS = DatasetContainer.BASE_ATTRS + ('annotation', 'state', 'state_ids', 'state_details', 'tags') DS_TYPE = HistoryDatasetAssociation CONTENT_INFO_TYPE = HistoryContentInfo API_MODULE = 'histories' def __init__(self, hist_dict, content_infos=None, gi=None): super(History, self).__init__( hist_dict, content_infos=content_infos, gi=gi) @property def gi_module(self): return self.gi.histories def update(self, name=None, annotation=None, **kwds): """ Update history metadata information. Some of the attributes that can be modified are documented below. :type name: str :param name: Replace history name with the given string :type annotation: str :param annotation: Replace history annotation with the given string :type deleted: bool :param deleted: Mark or unmark history as deleted :type published: bool :param published: Mark or unmark history as published :type importable: bool :param importable: Mark or unmark history as importable :type tags: list :param tags: Replace history tags with the given list """ # TODO: wouldn't it be better if name and annotation were attributes? # TODO: do we need to ensure the attributes of `self` are the same as # the ones returned by the call to `update_history` below? res = self.gi.gi.histories.update_history( self.id, name=name, annotation=annotation, **kwds) if res != http_client.OK: raise RuntimeError('failed to update history') self.refresh() return self def delete(self, purge=False): """ Delete this history. :type purge: bool :param purge: if ``True``, also purge (permanently delete) the history .. note:: For the purge option to work, the Galaxy instance must have the ``allow_user_dataset_purge`` option set to ``True`` in the ``config/galaxy.ini`` configuration file. """ self.gi.histories.delete(id_=self.id, purge=purge) self.unmap() def import_dataset(self, lds): """ Import a dataset into the history from a library. :type lds: :class:`~.LibraryDataset` :param lds: the library dataset to import :rtype: :class:`~.HistoryDatasetAssociation` :return: the imported history dataset """ if not self.is_mapped: raise RuntimeError('history is not mapped to a Galaxy object') if not isinstance(lds, LibraryDataset): raise TypeError('lds is not a LibraryDataset') res = self.gi.gi.histories.upload_dataset_from_library(self.id, lds.id) if not isinstance(res, collections.Mapping): raise RuntimeError( 'upload_dataset_from_library: unexpected reply: %r' % res) self.refresh() return self.get_dataset(res['id']) def upload_file(self, path, **kwargs): """ Upload the file specified by ``path`` to this history. :type path: str :param path: path of the file to upload See :meth:`~bioblend.galaxy.tools.ToolClient.upload_file` for the optional parameters. :rtype: :class:`~.HistoryDatasetAssociation` :return: the uploaded dataset """ out_dict = self.gi.gi.tools.upload_file(path, self.id, **kwargs) self.refresh() return self.get_dataset(out_dict['outputs'][0]['id']) upload_dataset = upload_file def upload_from_ftp(self, path, **kwargs): """ Upload the file specified by ``path`` from the user's FTP directory to this history. :type path: str :param path: path of the file in the user's FTP directory See :meth:`~bioblend.galaxy.tools.ToolClient.upload_file` for the optional parameters. :rtype: :class:`~.HistoryDatasetAssociation` :return: the uploaded dataset """ out_dict = self.gi.gi.tools.upload_from_ftp(path, self.id, **kwargs) self.refresh() return self.get_dataset(out_dict['outputs'][0]['id']) def paste_content(self, content, **kwargs): """ Upload a string to a new dataset in this history. :type content: str :param content: content of the new dataset to upload See :meth:`~bioblend.galaxy.tools.ToolClient.upload_file` for the optional parameters (except file_name). :rtype: :class:`~.HistoryDatasetAssociation` :return: the uploaded dataset """ out_dict = self.gi.gi.tools.paste_content(content, self.id, **kwargs) self.refresh() return self.get_dataset(out_dict['outputs'][0]['id']) def export(self, gzip=True, include_hidden=False, include_deleted=False, wait=False): """ Start a job to create an export archive for this history. See :meth:`~bioblend.galaxy.histories.HistoryClient.export_history` for parameter and return value info. """ return self.gi.gi.histories.export_history( self.id, gzip=gzip, include_hidden=include_hidden, include_deleted=include_deleted, wait=wait) def download(self, jeha_id, outf, chunk_size=bioblend.CHUNK_SIZE): """ Download an export archive for this history. Use :meth:`export` to create an export and get the required ``jeha_id``. See :meth:`~bioblend.galaxy.histories.HistoryClient.download_history` for parameter and return value info. """ return self.gi.gi.histories.download_history( self.id, jeha_id, outf, chunk_size=chunk_size) class Library(DatasetContainer): """ Maps to a Galaxy library. """ BASE_ATTRS = DatasetContainer.BASE_ATTRS + ('description', 'synopsis') DS_TYPE = LibraryDataset CONTENT_INFO_TYPE = LibraryContentInfo API_MODULE = 'libraries' def __init__(self, lib_dict, content_infos=None, gi=None): super(Library, self).__init__( lib_dict, content_infos=content_infos, gi=gi) @property def gi_module(self): return self.gi.libraries @property def folder_ids(self): """ Return the ids of the contained folders. """ return [_.id for _ in self.content_infos if _.type == 'folder'] def delete(self): """ Delete this library. """ self.gi.libraries.delete(id_=self.id) self.unmap() def __pre_upload(self, folder): """ Return the id of the given folder, after sanity checking. """ if not self.is_mapped: raise RuntimeError('library is not mapped to a Galaxy object') return None if folder is None else folder.id def upload_data(self, data, folder=None, **kwargs): """ Upload data to this library. :type data: str :param data: dataset contents :type folder: :class:`~.Folder` :param folder: a folder object, or ``None`` to upload to the root folder :rtype: :class:`~.LibraryDataset` :return: the dataset object that represents the uploaded content Optional keyword arguments: ``file_type``, ``dbkey``. """ fid = self.__pre_upload(folder) res = self.gi.gi.libraries.upload_file_contents( self.id, data, folder_id=fid, **kwargs) self.refresh() return self.get_dataset(res[0]['id']) def upload_from_url(self, url, folder=None, **kwargs): """ Upload data to this library from the given URL. :type url: str :param url: URL from which data should be read See :meth:`.upload_data` for info on other params. """ fid = self.__pre_upload(folder) res = self.gi.gi.libraries.upload_file_from_url( self.id, url, folder_id=fid, **kwargs) self.refresh() return self.get_dataset(res[0]['id']) def upload_from_local(self, path, folder=None, **kwargs): """ Upload data to this library from a local file. :type path: str :param path: local file path from which data should be read See :meth:`.upload_data` for info on other params. """ fid = self.__pre_upload(folder) res = self.gi.gi.libraries.upload_file_from_local_path( self.id, path, folder_id=fid, **kwargs) self.refresh() return self.get_dataset(res[0]['id']) def upload_from_galaxy_fs(self, paths, folder=None, link_data_only=None, **kwargs): """ Upload data to this library from filesystem paths on the server. .. note:: For this method to work, the Galaxy instance must have the ``allow_library_path_paste`` option set to ``True`` in the ``config/galaxy.ini`` configuration file. :type paths: str or :class:`~collections.Iterable` of str :param paths: server-side file paths from which data should be read :type link_data_only: str :param link_data_only: either 'copy_files' (default) or 'link_to_files'. Setting to 'link_to_files' symlinks instead of copying the files :rtype: list of :class:`~.LibraryDataset` :return: the dataset objects that represent the uploaded content See :meth:`.upload_data` for info on other params. """ fid = self.__pre_upload(folder) if isinstance(paths, six.string_types): paths = (paths,) paths = '\n'.join(paths) res = self.gi.gi.libraries.upload_from_galaxy_filesystem( self.id, paths, folder_id=fid, link_data_only=link_data_only, **kwargs) if res is None: raise RuntimeError('upload_from_galaxy_filesystem: no reply') if not isinstance(res, collections.Sequence): raise RuntimeError( 'upload_from_galaxy_filesystem: unexpected reply: %r' % res) new_datasets = [ self.get_dataset(ds_info['id']) for ds_info in res ] self.refresh() return new_datasets def copy_from_dataset(self, hda, folder=None, message=''): """ Copy a history dataset into this library. :type hda: :class:`~.HistoryDatasetAssociation` :param hda: history dataset to copy into the library See :meth:`.upload_data` for info on other params. """ fid = self.__pre_upload(folder) res = self.gi.gi.libraries.copy_from_dataset( self.id, hda.id, folder_id=fid, message=message) self.refresh() return self.get_dataset(res['library_dataset_id']) def create_folder(self, name, description=None, base_folder=None): """ Create a folder in this library. :type name: str :param name: folder name :type description: str :param description: optional folder description :type base_folder: :class:`~.Folder` :param base_folder: parent folder, or ``None`` to create in the root folder :rtype: :class:`~.Folder` :return: the folder just created """ bfid = None if base_folder is None else base_folder.id res = self.gi.gi.libraries.create_folder( self.id, name, description=description, base_folder_id=bfid) self.refresh() return self.get_folder(res[0]['id']) def get_folder(self, f_id): """ Retrieve the folder corresponding to the given id. :rtype: :class:`~.Folder` :return: the folder corresponding to ``f_id`` """ f_dict = self.gi.gi.libraries.show_folder(self.id, f_id) return Folder(f_dict, self, gi=self.gi) @property def root_folder(self): """ The root folder of this library. :rtype: :class:`~.Folder` :return: the root folder of this library """ return self.get_folder(self.gi.gi.libraries._get_root_folder_id(self.id)) class Folder(Wrapper): """ Maps to a folder in a Galaxy library. """ BASE_ATTRS = Wrapper.BASE_ATTRS + ('description', 'deleted', 'item_count') def __init__(self, f_dict, container, gi=None): super(Folder, self).__init__(f_dict, gi=gi) object.__setattr__(self, 'container', container) @property def parent(self): """ The parent folder of this folder. The parent of the root folder is ``None``. :rtype: :class:`~.Folder` :return: the parent of this folder """ if self._cached_parent is None: object.__setattr__(self, '_cached_parent', self._get_parent()) return self._cached_parent def _get_parent(self): """ Return the parent folder of this folder. """ # Galaxy release_13.04 and earlier does not have parent_id in the folder # dictionary, may be implemented by searching for the folder with the # correct name if 'parent_id' not in self.wrapped: raise NotImplementedError('This method has not been implemented for Galaxy release_13.04 and earlier') parent_id = self.wrapped['parent_id'] if parent_id is None: return None # Galaxy from release_14.02 to release_15.01 returns a dummy parent_id # for the root folder instead of None, so check if this is the root if self.id == self.gi.gi.libraries._get_root_folder_id(self.container.id): return None # Galaxy release_13.11 and earlier returns a parent_id without the # initial 'F' if not parent_id.startswith('F'): parent_id = 'F' + parent_id return self.container.get_folder(parent_id) @property def gi_module(self): return self.gi.libraries @property def container_id(self): """ Deprecated property. Id of the folder container. Use :attr:`.container.id` instead. """ return self.container.id def refresh(self): """ Re-fetch the attributes pertaining to this object. Returns: self """ f_dict = self.gi.gi.libraries.show_folder(self.container.id, self.id) self.__init__(f_dict, self.container, gi=self.gi) return self class Tool(Wrapper): """ Maps to a Galaxy tool. """ BASE_ATTRS = Wrapper.BASE_ATTRS + ('version',) POLLING_INTERVAL = 10 # for output state monitoring def __init__(self, t_dict, gi=None): super(Tool, self).__init__(t_dict, gi=gi) @property def gi_module(self): return self.gi.tools def run(self, inputs, history, wait=False, polling_interval=POLLING_INTERVAL): """ Execute this tool in the given history with inputs from dict ``inputs``. :type inputs: dict :param inputs: dictionary of input datasets and parameters for the tool (see below) :type history: :class:`History` :param history: the history where to execute the tool :type wait: bool :param wait: whether to wait while the returned datasets are in a pending state :type polling_interval: float :param polling_interval: polling interval in seconds :rtype: list of :class:`HistoryDatasetAssociation` :return: list of output datasets The ``inputs`` dict should contain input datasets and parameters in the (largely undocumented) format used by the Galaxy API. Some examples can be found in `Galaxy's API test suite `_. The value of an input dataset can also be a :class:`Dataset` object, which will be automatically converted to the needed format. """ for k, v in six.iteritems(inputs): if isinstance(v, Dataset): inputs[k] = {'src': v.SRC, 'id': v.id} out_dict = self.gi.gi.tools.run_tool(history.id, self.id, inputs) outputs = [history.get_dataset(_['id']) for _ in out_dict['outputs']] if wait: self.gi._wait_datasets(outputs, polling_interval=polling_interval) return outputs class Job(Wrapper): """ Maps to a Galaxy job. """ BASE_ATTRS = ('id', 'state') def __init__(self, j_dict, gi=None): super(Job, self).__init__(j_dict, gi=gi) @property def gi_module(self): return self.gi.jobs @six.add_metaclass(abc.ABCMeta) class Preview(Wrapper): """ Abstract base class for Galaxy entity 'previews'. Classes derived from this one model the short summaries returned by global getters such as ``/api/libraries``. """ BASE_ATTRS = Wrapper.BASE_ATTRS + ('deleted',) @abc.abstractmethod def __init__(self, pw_dict, gi=None): super(Preview, self).__init__(pw_dict, gi=gi) class LibraryPreview(Preview): """ Models Galaxy library 'previews'. Instances of this class wrap dictionaries obtained by getting ``/api/libraries`` from Galaxy. """ def __init__(self, pw_dict, gi=None): super(LibraryPreview, self).__init__(pw_dict, gi=gi) @property def gi_module(self): return self.gi.libraries class HistoryPreview(Preview): """ Models Galaxy history 'previews'. Instances of this class wrap dictionaries obtained by getting ``/api/histories`` from Galaxy. """ BASE_ATTRS = Preview.BASE_ATTRS + ('tags',) def __init__(self, pw_dict, gi=None): super(HistoryPreview, self).__init__(pw_dict, gi=gi) @property def gi_module(self): return self.gi.histories class WorkflowPreview(Preview): """ Models Galaxy workflow 'previews'. Instances of this class wrap dictionaries obtained by getting ``/api/workflows`` from Galaxy. """ BASE_ATTRS = Preview.BASE_ATTRS + ('published', 'tags') def __init__(self, pw_dict, gi=None): super(WorkflowPreview, self).__init__(pw_dict, gi=gi) @property def gi_module(self): return self.gi.workflows class JobPreview(Preview): """ Models Galaxy job 'previews'. Instances of this class wrap dictionaries obtained by getting ``/api/jobs`` from Galaxy. """ BASE_ATTRS = ('id', 'state') def __init__(self, pw_dict, gi=None): super(JobPreview, self).__init__(pw_dict, gi=gi) @property def gi_module(self): return self.gi.jobs bioblend-0.7.0/bioblend/galaxy/quotas/000077500000000000000000000000001261571066300176625ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/quotas/__init__.py000066400000000000000000000035561261571066300220040ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy Quota """ from bioblend.galaxy.client import Client class QuotaClient(Client): def __init__(self, galaxy_instance): self.module = 'quotas' super(QuotaClient, self).__init__(galaxy_instance) def get_quotas(self, deleted=False): """ Get a list of quotas :type deleted: bool :param deleted: Only return quota(s) that have been deleted :rtype: list :return: A list of dicts with details on individual quotas. For example:: [{ u'id': u'0604c8a56abe9a50', u'model_class': u'Quota', u'name': u'test ', u'url': u'/api/quotas/0604c8a56abe9a50'}, { u'id': u'1ee267091d0190af', u'model_class': u'Quota', u'name': u'workshop', u'url': u'/api/quotas/1ee267091d0190af'}] """ return Client._get(self, deleted=deleted) def show_quota(self, quota_id, deleted=False): """ Display information on a quota :type quota_id: str :param quota_id: Encoded quota ID :type deleted: bool :param deleted: Search for quota in list of ones already marked as deleted :rtype: dict :return: A description of quota For example:: { u'bytes': 107374182400, u'default': [], u'description': u'just testing', u'display_amount': u'100.0 GB', u'groups': [], u'id': u'0604c8a56abe9a50', u'model_class': u'Quota', u'name': u'test ', u'operation': u'=', u'users': []} """ return Client._get(self, id=quota_id, deleted=deleted) bioblend-0.7.0/bioblend/galaxy/roles/000077500000000000000000000000001261571066300174725ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/roles/__init__.py000066400000000000000000000027121261571066300216050ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy Roles """ from bioblend.galaxy.client import Client class RolesClient(Client): def __init__(self, galaxy_instance): self.module = 'roles' super(RolesClient, self).__init__(galaxy_instance) def get_roles(self): """ Displays a collection (list) of roles. :rtype: list :return: A list of dicts with details on individual roles. For example:: [ {"id": "f2db41e1fa331b3e", "model_class": "Role", "name": "Foo", "url": "/api/roles/f2db41e1fa331b3e"}, {"id": "f597429621d6eb2b", "model_class": "Role", "name": "Bar", "url": "/api/roles/f597429621d6eb2b"} ] """ return Client._get(self) def show_role(self, role_id): """ Display information on a single role :type role_id: str :param role_id: Encoded role ID :rtype: dict :return: A description of role For example:: {"description": "Private Role for Foo", "id": "f2db41e1fa331b3e", "model_class": "Role", "name": "Foo", "type": "private", "url": "/api/roles/f2db41e1fa331b3e"} """ return Client._get(self, id=role_id) bioblend-0.7.0/bioblend/galaxy/tool_data/000077500000000000000000000000001261571066300203145ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/tool_data/__init__.py000066400000000000000000000034711261571066300224320ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy Tool data tables """ from bioblend.galaxy.client import Client class ToolDataClient(Client): def __init__(self, galaxy_instance): self.module = 'tool_data' super(ToolDataClient, self).__init__(galaxy_instance) def get_data_tables(self): """ Get the list of all data tables. :rtype: list :return: A list of dicts with details on individual data tables. For example:: [{"model_class": "TabularToolDataTable", "name": "fasta_indexes"}, {"model_class": "TabularToolDataTable", "name": "bwa_indexes"}] """ return Client._get(self) def show_data_table(self, data_table_id): """ Get details of a given data table. :type data_table_id: str :param data_table_id: ID of the data table :rtype: dict :return: A description of the given data table and its content. For example:: {"columns": ["value", "dbkey", "name", "path"], "fields": [["test id", "test", "test name", "/opt/galaxy-dist/tool-data/test/seq/test id.fa"]], "model_class": "TabularToolDataTable", "name": "all_fasta"} """ return Client._get(self, id=data_table_id) def delete_data_table(self, data_table_id, values): """ Delete an item from a data table. :type data_table_id: str :param data_table_id: ID of the data table :type values: str :param values: a "|" separated list of column contents, there must be a value for all the columns of the data table """ payload = {} payload['values'] = values return Client._delete(self, payload, id=data_table_id) bioblend-0.7.0/bioblend/galaxy/tools/000077500000000000000000000000001261571066300175065ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/tools/__init__.py000066400000000000000000000177661261571066300216400ustar00rootroot00000000000000""" Contains possible interaction dealing with Galaxy tools. """ from bioblend.galaxy.client import Client from bioblend.util import attach_file from os.path import basename from json import dumps class ToolClient(Client): def __init__(self, galaxy_instance): self.module = 'tools' super(ToolClient, self).__init__(galaxy_instance) def get_tools(self, tool_id=None, name=None, trackster=None): """ Get all tools or filter the specific one(s) via the provided ``name`` or ``tool_id``. Provide only one argument, ``name`` or ``tool_id``, but not both. If ``name`` is set and multiple names match the given name, all the tools matching the argument will be returned. :type tool_id: str :param tool_id: id of the requested tool :type name: str :param name: name of the requested tool(s) :type trackster: bool :param trackster: if True, only tools that are compatible with Trackster are returned :rtype: list :return: List of tool descriptions. .. seealso:: bioblend.galaxy.toolshed.get_repositories() """ if tool_id is not None and name is not None: raise ValueError('Provide only one argument between name or tool_id, but not both') tools = self._raw_get_tool(in_panel=False, trackster=trackster) if tool_id is not None: tool = next((_ for _ in tools if _['id'] == tool_id), None) tools = [tool] if tool is not None else [] elif name is not None: tools = [_ for _ in tools if _['name'] == name] return tools def get_tool_panel(self): """ Get a list of available tool elements in Galaxy's configured toolbox. :rtype: list :return: List containing tools (if not in sections) or tool sections with nested tool descriptions. .. seealso:: bioblend.galaxy.toolshed.get_repositories() """ return self._raw_get_tool(in_panel=True) def _raw_get_tool(self, in_panel=None, trackster=None): params = {} params['in_panel'] = in_panel params['trackster'] = trackster return Client._get(self, params=params) def show_tool(self, tool_id, io_details=False, link_details=False): """ Get details of a given tool. :type tool_id: str :param tool_id: id of the requested tool :type io_details: bool :param io_details: if True, get also input and output details :type link_details: bool :param link_details: if True, get also link details """ params = {} params['io_details'] = io_details params['link_details'] = link_details return Client._get(self, id=tool_id, params=params) def run_tool(self, history_id, tool_id, tool_inputs): """ Runs tool specified by ``tool_id`` in history indicated by ``history_id`` with inputs from ``dict`` ``tool_inputs``. :type history_id: str :param history_id: encoded ID of the history in which to run the tool :type tool_id: str :param tool_id: ID of the tool to be run :type tool_inputs: dict :param tool_inputs: dictionary of input datasets and parameters for the tool (see below) The ``tool_inputs`` dict should contain input datasets and parameters in the (largely undocumented) format used by the Galaxy API. Some examples can be found in https://bitbucket.org/galaxy/galaxy-central/src/tip/test/api/test_tools.py . """ payload = {} payload["history_id"] = history_id payload["tool_id"] = tool_id try: payload["inputs"] = tool_inputs.to_dict() except AttributeError: payload["inputs"] = tool_inputs return self._tool_post(payload) def upload_file(self, path, history_id, **keywords): """ Upload the file specified by ``path`` to the history specified by ``history_id``. :type path: str :param path: path of the file to upload :type history_id: str :param history_id: id of the history where to upload the file :type file_name: str :param file_name: (optional) name of the new history dataset :type file_type: str :param file_type: Galaxy datatype for the new dataset, default is auto :type dbkey: str :param dbkey: (optional) genome dbkey :type to_posix_lines: bool :param to_posix_lines: if True, convert universal line endings to POSIX line endings. Default is True. Set to False if you upload a gzip, bz2 or zip archive containing a binary file :type space_to_tab: bool :param space_to_tab: whether to convert spaces to tabs. Default is False. Applicable only if to_posix_lines is True """ if "file_name" not in keywords: keywords["file_name"] = basename(path) payload = self._upload_payload(history_id, **keywords) payload["files_0|file_data"] = attach_file(path, name=keywords["file_name"]) try: return self._tool_post(payload, files_attached=True) finally: payload["files_0|file_data"].close() def upload_from_ftp(self, path, history_id, **keywords): """ Upload the file specified by ``path`` from the user's FTP directory to the history specified by ``history_id``. :type path: str :param path: path of the file in the user's FTP directory :type history_id: str :param history_id: id of the history where to upload the file See :meth:`upload_file` for the optional parameters. """ payload = self._upload_payload(history_id, **keywords) payload['files_0|ftp_files'] = path return self._tool_post(payload) def paste_content(self, content, history_id, **kwds): """ Upload a string to a new dataset in the history specified by ``history_id``. :type content: str :param content: content of the new dataset to upload or a list of URLs (one per line) to upload :type history_id: str :param history_id: id of the history where to upload the content See :meth:`upload_file` for the optional parameters (except file_name). """ payload = self._upload_payload(history_id, **kwds) payload["files_0|url_paste"] = content return self._tool_post(payload, files_attached=False) put_url = paste_content def _upload_payload(self, history_id, **keywords): payload = {} payload["history_id"] = history_id payload["tool_id"] = keywords.get("tool_id", "upload1") tool_input = {} tool_input["file_type"] = keywords.get('file_type', 'auto') tool_input["dbkey"] = keywords.get("dbkey", "?") if not keywords.get('to_posix_lines', True): tool_input['files_0|to_posix_lines'] = False elif keywords.get('space_to_tab', False): tool_input['files_0|space_to_tab'] = 'Yes' if 'file_name' in keywords: tool_input["files_0|NAME"] = keywords['file_name'] tool_input["files_0|type"] = "upload_dataset" payload["inputs"] = tool_input return payload def _tool_post(self, payload, files_attached=False): if files_attached: # If files_attached - this will be posted as multi-part form data # and so each individual parameter needs to be encoded so can be # decoded as JSON by Galaxy (hence dumping complex parameters). # If no files are attached, the whole thing is posted as # application/json and dumped/loaded all at once by requests and # Galaxy. complex_payload_params = ["inputs"] for key in complex_payload_params: if key in payload: payload[key] = dumps(payload[key]) return Client._post(self, payload, files_attached=files_attached) bioblend-0.7.0/bioblend/galaxy/tools/inputs.py000066400000000000000000000036131261571066300214050ustar00rootroot00000000000000import six class InputsBuilder(object): """ """ def __init__(self): self._input_dict = {} def set(self, name, input): self._input_dict[name] = input return self def set_param(self, name, value): return self.set(name, param(value=value)) def set_dataset_param(self, name, value, src="hda"): return self.set(name, dataset(value, src=src)) def to_dict(self): values = {} for key, value in self.flat_iter(): if hasattr(value, "value"): value = value.value values[key] = value return values def flat_iter(self, prefix=None): for key, value in six.iteritems(self._input_dict): effective_key = key if prefix is None else "%s|%s" % (prefix, key) if hasattr(value, "flat_iter"): for flattened_key, flattened_value in value.flat_iter(effective_key): yield flattened_key, flattened_value else: yield effective_key, value class RepeatBuilder(object): def __init__(self): self._instances = [] def instance(self, inputs): self._instances.append(inputs) return self def flat_iter(self, prefix=None): for index, instance in enumerate(self._instances): index_prefix = "%s_%d" % (prefix, index) for key, value in instance.flat_iter(index_prefix): yield key, value class Param(object): def __init__(self, value): self.value = value class DatasetParam(Param): def __init__(self, value, src="hda"): if not isinstance(value, dict): value = dict(src=src, id=value) super(DatasetParam, self).__init__(value) inputs = InputsBuilder repeat = RepeatBuilder conditional = InputsBuilder param = Param dataset = DatasetParam __all__ = ["inputs", "repeat", "conditional", "param"] bioblend-0.7.0/bioblend/galaxy/toolshed/000077500000000000000000000000001261571066300201675ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/toolshed/__init__.py000066400000000000000000000134221261571066300223020ustar00rootroot00000000000000""" Interaction with a Galaxy Tool Shed """ from bioblend.galaxy.client import Client class ToolShedClient(Client): def __init__(self, galaxy_instance): self.module = 'tool_shed_repositories' super(ToolShedClient, self).__init__(galaxy_instance) def get_repositories(self): """ Get the list of all installed Tool Shed repositories on this Galaxy instance. :rtype: list :return: a list of dictionaries containing information about repositories present in the Tool Shed. For example:: [{u'changeset_revision': u'4afe13ac23b6', u'deleted': False, u'dist_to_shed': False, u'error_message': u'', u'name': u'velvet_toolsuite', u'owner': u'edward-kirton', u'status': u'Installed'}] .. versionchanged:: 0.4.1 Changed method name from ``get_tools`` to ``get_repositories`` to better align with the Tool Shed concepts .. seealso:: bioblend.galaxy.tools.get_tool_panel() """ return Client._get(self) def show_repository(self, toolShed_id): """ Get details of a given Tool Shed repository as it is installed on this Galaxy instance. :type toolShed_id: str :param toolShed_id: Encoded toolShed ID :rtype: dict :return: Information about the tool For example:: {u'changeset_revision': u'b17455fb6222', u'ctx_rev': u'8', u'owner': u'aaron', u'status': u'Installed', u'url': u'/api/tool_shed_repositories/82de4a4c7135b20a'} .. versionchanged:: 0.4.1 Changed method name from ``show_tool`` to ``show_repository`` to better align with the Tool Shed concepts """ return Client._get(self, id=toolShed_id) def install_repository_revision(self, tool_shed_url, name, owner, changeset_revision, install_tool_dependencies=False, install_repository_dependencies=False, tool_panel_section_id=None, new_tool_panel_section_label=None): """ Install a specified repository revision from a specified Tool Shed into this Galaxy instance. This example demonstrates installation of a repository that contains valid tools, loading them into a section of the Galaxy tool panel or creating a new tool panel section. You can choose if tool dependencies or repository dependencies should be installed, use ``install_tool_dependencies`` or ``install_repository_dependencies``. Installing the repository into an existing tool panel section requires the tool panel config file (e.g., tool_conf.xml, shed_tool_conf.xml, etc) to contain the given tool panel section:
:type tool_shed_url: str :param tool_shed_url: URL of the Tool Shed from which the repository should be installed from (e.g., ``http://testtoolshed.g2.bx.psu.edu``) :type name: str :param name: The name of the repository that should be installed :type owner: str :param owner: The name of the repository owner :type changeset_revision: str :param changeset_revision: The revision of the repository to be installed :type install_tool_dependencies: bool :param install_tool_dependencies: Whether or not to automatically handle tool dependencies (see http://wiki.galaxyproject.org/AToolOrASuitePerRepository for more details) :type install_repository_dependencies: bool :param install_repository_dependencies: Whether or not to automatically handle repository dependencies (see http://wiki.galaxyproject.org/DefiningRepositoryDependencies for more details) :type tool_panel_section_id: str :param tool_panel_section_id: The ID of the Galaxy tool panel section where the tool should be insterted under. Note that you should specify either this parameter or the ``new_tool_panel_section_label``. If both are specified, this one will take precedence. :type new_tool_panel_section_label: str :param new_tool_panel_section_label: The name of a Galaxy tool panel section that should be created and the repository installed into. """ payload = {} payload['tool_shed_url'] = tool_shed_url payload['name'] = name payload['owner'] = owner payload['changeset_revision'] = changeset_revision payload['install_tool_dependencies'] = install_tool_dependencies payload['install_repository_dependencies'] = install_repository_dependencies if tool_panel_section_id: payload['tool_panel_section_id'] = tool_panel_section_id elif new_tool_panel_section_label: payload['new_tool_panel_section_label'] = new_tool_panel_section_label url = "%s%s" % (self.gi.url, '/tool_shed_repositories/new/install_repository_revision') return Client._post(self, url=url, payload=payload) bioblend-0.7.0/bioblend/galaxy/users/000077500000000000000000000000001261571066300175075ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/users/__init__.py000066400000000000000000000100121261571066300216120ustar00rootroot00000000000000""" Contains possible interaction dealing with Galaxy users. These methods must be executed by a registered Galaxy admin user. """ from bioblend.galaxy.client import Client class UserClient(Client): def __init__(self, galaxy_instance): self.module = 'users' super(UserClient, self).__init__(galaxy_instance) def get_users(self, deleted=False): """ Get a list of all registered users. If ``deleted`` is set to ``True``, get a list of deleted users. :rtype: list :return: A list of dicts with user details. For example:: [{u'email': u'a_user@example.com', u'id': u'dda47097d9189f15', u'url': u'/api/users/dda47097d9189f15'}] """ return Client._get(self, deleted=deleted) def show_user(self, user_id, deleted=False): """ Display information about a user. If ``deleted`` is set to ``True``, display information about a deleted user. :type user_id: str :param user_id: User ID to inspect :type deleted: bool :param deleted: Whether to return results for a deleted user :rtype: dict :return: dictionary containing information about the user """ return Client._get(self, id=user_id, deleted=deleted) def create_user(self, user_email): """ Deprecated method. Just an alias for create_remote_user(). """ return self.create_remote_user(user_email) def create_remote_user(self, user_email): """ Create a new Galaxy remote user. .. note:: For this method to work, the Galaxy instance must have the ``allow_user_creation`` and ``use_remote_user`` options set to ``True`` in the ``config/galaxy.ini`` configuration file. Also note that setting ``use_remote_user`` will require an upstream authentication proxy server; however, if you do not have one, access to Galaxy via a browser will not be possible. :type user_email: str :param user_email: Email of user to be created :rtype: dict :return: dictionary containing information about the created user """ payload = {} payload['remote_user_email'] = user_email return Client._post(self, payload) def create_local_user(self, username, user_email, password): """ Create a new Galaxy user. .. note:: For this method to work, the Galaxy instance must have the ``allow_user_creation`` option set to ``True`` and ``use_remote_user`` option set to ``False`` in the ``config/galaxy.ini`` configuration file. :type username: str :param username: Username of user to be created :type user_email: str :param user_email: Email of user to be created :type password: str :param password: password of user to be created :rtype: dict :return: dictionary containing information about the created user """ payload = {} payload['username'] = username payload['email'] = user_email payload['password'] = password return Client._post(self, payload) def get_current_user(self): """ Returns the user id associated with this Galaxy connection :rtype: dict :return: dictionary containing information about the current user """ url = self.gi._make_url(self, None) url = '/'.join([url, 'current']) return Client._get(self, url=url) def create_user_apikey(self, user_id): """ Create a new api key for a user :type user_id: str :param user_id: Encoded user ID :rtype: str :return: The api key for the user """ url = self.gi._make_url(self, None) url = '/'.join([url, user_id, 'api_key']) payload = {} payload['user_id'] = user_id return Client._post(self, payload, url=url) bioblend-0.7.0/bioblend/galaxy/visual/000077500000000000000000000000001261571066300176515ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/visual/__init__.py000066400000000000000000000035151261571066300217660ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy visualization """ from bioblend.galaxy.client import Client class VisualClient(Client): def __init__(self, galaxy_instance): self.module = 'visualizations' super(VisualClient, self).__init__(galaxy_instance) def get_visualizations(self): """ Get the list of all visualizations. :rtype: list :return: A list of dicts with details on individual visualizations. For example:: [{u'dbkey': u'eschColi_K12', u'id': u'df1c7c96fc427c2d', u'title': u'AVTest1', u'type': u'trackster', u'url': u'/api/visualizations/df1c7c96fc427c2d'}, {u'dbkey': u'mm9', u'id': u'a669f50f8bf55b02', u'title': u'Bam to Bigwig', u'type': u'trackster', u'url': u'/api/visualizations/a669f50f8bf55b02'}] """ results = Client._get(self) return results def show_visualization(self, visual_id): """ Get details of a given visualization. :type visual_id: str :param visual_id: Encoded visualization ID :rtype: dict :return: A description of the given visualization. For example:: {u'annotation': None, u'dbkey': u'mm9', u'id': u'18df9134ea75e49c', u'latest_revision': { ... }, u'model_class': u'Visualization', u'revisions': [u'aa90649bb3ec7dcb', u'20622bc6249c0c71'], u'slug': u'visualization-for-grant-1', u'title': u'Visualization For Grant', u'type': u'trackster', u'url': u'/u/azaron/v/visualization-for-grant-1', u'user_id': u'21e4aed91386ca8b'} """ return Client._get(self, id=visual_id) bioblend-0.7.0/bioblend/galaxy/workflows/000077500000000000000000000000001261571066300204035ustar00rootroot00000000000000bioblend-0.7.0/bioblend/galaxy/workflows/__init__.py000066400000000000000000000576461261571066300225360ustar00rootroot00000000000000""" Contains possible interactions with the Galaxy Workflows """ from bioblend.galaxy.client import Client import json import os class WorkflowClient(Client): def __init__(self, galaxy_instance): self.module = 'workflows' super(WorkflowClient, self).__init__(galaxy_instance) # the 'deleted' option is not available for workflows def get_workflows(self, workflow_id=None, name=None, published=False): """ Get all workflows or filter the specific one(s) via the provided ``name`` or ``workflow_id``. Provide only one argument, ``name`` or ``workflow_id``, but not both. :type workflow_id: str :param workflow_id: Encoded workflow ID (incompatible with ``name``) :type name: str :param name: Filter by name of workflow (incompatible with ``workflow_id``). If multiple names match the given name, all the workflows matching the argument will be returned. :type published: bool :param published: if ``True``, return also published workflows :rtype: list :return: A list of workflow dicts. For example:: [{u'id': u'92c56938c2f9b315', u'name': u'Simple', u'url': u'/api/workflows/92c56938c2f9b315'}] """ if workflow_id is not None and name is not None: raise ValueError('Provide only one argument between name or workflow_id, but not both') kwargs = {} if published: kwargs['params'] = {'show_published': 'True'} workflows = Client._get(self, **kwargs) if workflow_id is not None: workflow = next((_ for _ in workflows if _['id'] == workflow_id), None) workflows = [workflow] if workflow is not None else [] elif name is not None: workflows = [_ for _ in workflows if _['name'] == name] return workflows def show_workflow(self, workflow_id): """ Display information needed to run a workflow :type workflow_id: str :param workflow_id: Encoded workflow ID :rtype: dict :return: A description of the workflow and its inputs as a JSON object. For example:: {u'id': u'92c56938c2f9b315', u'inputs': {u'23': {u'label': u'Input Dataset', u'value': u''}}, u'name': u'Simple', u'url': u'/api/workflows/92c56938c2f9b315'} """ return Client._get(self, id=workflow_id) def get_workflow_inputs(self, workflow_id, label): """ Get a list of workflow input IDs that match the given label. If no input matches the given label, an empty list is returned. :type workflow_id: str :param workflow_id: Encoded workflow ID :type label: str :param label: label to filter workflow inputs on :rtype: list :return: list of workflow inputs matching the label query """ wf = Client._get(self, id=workflow_id) inputs = wf['inputs'] return [id for id in inputs if inputs[id]['label'] == label] def import_workflow_json(self, workflow_json): """ Imports a new workflow given a json representation of a previously exported workflow. :type workflow_json: str :param workflow_json: JSON string representing the workflow to be imported """ payload = {} payload['workflow'] = workflow_json url = self.gi._make_url(self) url = _join(url, "upload") return Client._post(self, url=url, payload=payload) def import_workflow_from_local_path(self, file_local_path): """ Imports a new workflow given the path to a file containing a previously exported workflow. :type file_local_path: str :param file_local_path: File to upload to the server for new workflow """ with open(file_local_path, 'r') as fp: workflow_json = json.load(fp) return self.import_workflow_json(workflow_json) def import_shared_workflow(self, workflow_id): """ Imports a new workflow from the shared published workflows. :type workflow_id: str :param workflow_id: Encoded workflow ID :rtype: dict :return: A description of the workflow. For example:: {u'id': u'ee0e2b4b696d9092', u'model_class': u'StoredWorkflow', u'name': u'Super workflow that solves everything!', u'published': False, u'tags': [], u'url': u'/api/workflows/ee0e2b4b696d9092'} """ payload = {} payload['workflow_id'] = workflow_id url = self.gi._make_url(self) url = _join(url, 'import') return Client._post(self, url=url, payload=payload) def export_workflow_json(self, workflow_id): """ Exports a workflow :type workflow_id: str :param workflow_id: Encoded workflow ID :rtype: dict :return: Dict representing the workflow requested """ url = self.gi._make_url(self) url = _join(url, "download", workflow_id) return Client._get(self, url=url) def export_workflow_to_local_path(self, workflow_id, file_local_path, use_default_filename=True): """ Exports a workflow in json format to a given local path. :type workflow_id: str :param workflow_id: Encoded workflow ID :type file_local_path: str :param file_local_path: Local path to which the exported file will be saved. (Should not contain filename if use_default_name=True) :type use_default_filename: bool :param use_default_filename: If the use_default_name parameter is True, the exported file will be saved as file_local_path/Galaxy-Workflow-%s.ga, where %s is the workflow name. If use_default_name is False, file_local_path is assumed to contain the full file path including filename. """ workflow_json = self.export_workflow_json(workflow_id) if use_default_filename: filename = 'Galaxy-Workflow-%s.ga' % workflow_json['name'] file_local_path = os.path.join(file_local_path, filename) with open(file_local_path, 'w') as fp: json.dump(workflow_json, fp) def run_workflow(self, workflow_id, dataset_map=None, params=None, history_id=None, history_name=None, import_inputs_to_history=False, replacement_params=None): """ Run the workflow identified by ``workflow_id``. This method is deprecated please use ``invoke_workflow`` instead. :type workflow_id: str :param workflow_id: Encoded workflow ID :type dataset_map: str or dict :param dataset_map: A mapping of workflow inputs to datasets. The datasets source can be a LibraryDatasetDatasetAssociation (``ldda``), LibraryDataset (``ld``), or HistoryDatasetAssociation (``hda``). The map must be in the following format: ``{'': {'id': , 'src': '[ldda, ld, hda]'}}`` (e.g. ``{'23': {'id': '29beef4fadeed09f', 'src': 'ld'}}``) :type params: str or dict :param params: A mapping of tool parameters that are non-datasets parameters. The map must be in the following format: ``{'blastn': {'param': 'evalue', 'value': '1e-06'}}`` :type history_id: str :param history_id: The encoded history ID where to store the workflow output. Alternatively, ``history_name`` may be specified to create a new history. :type history_name: str :param history_name: Create a new history with the given name to store the workflow output. If both ``history_id`` and ``history_name`` are provided, ``history_name`` is ignored. If neither is specified, a new 'Unnamed history' is created. :type import_inputs_to_history: bool :param import_inputs_to_history: If ``True``, used workflow inputs will be imported into the history. If ``False``, only workflow outputs will be visible in the given history. :type replacement_params: dict :param replacement_params: pattern-based replacements for post-job actions (see below) :rtype: dict :return: A dict containing the history ID where the outputs are placed as well as output dataset IDs. For example:: {u'history': u'64177123325c9cfd', u'outputs': [u'aa4d3084af404259']} The ``replacement_params`` dict should map parameter names in post-job actions (PJAs) to their runtime values. For instance, if the final step has a PJA like the following:: {u'RenameDatasetActionout_file1': { u'action_arguments': {u'newname': u'${output}'}, u'action_type': u'RenameDatasetAction', u'output_name': u'out_file1'}} then the following renames the output dataset to 'foo':: replacement_params = {'output': 'foo'} see also `this email thread `_. .. warning:: This method is deprecated, please use ``invoke_workflow`` instead. ``run_workflow`` will wait for the whole workflow to be scheduled before returning and will not scale to large workflows as a result. ``invoke_workflow`` also features improved default behavior for dataset input handling. """ payload = {} payload['workflow_id'] = workflow_id if dataset_map: payload['ds_map'] = dataset_map if params: payload['parameters'] = params if replacement_params: payload['replacement_params'] = replacement_params if history_id: payload['history'] = 'hist_id={0}'.format(history_id) elif history_name: payload['history'] = history_name if import_inputs_to_history is False: payload['no_add_to_history'] = True return Client._post(self, payload) def invoke_workflow(self, workflow_id, inputs=None, params=None, history_id=None, history_name=None, import_inputs_to_history=False, replacement_params=None, allow_tool_state_corrections=None): """ Invoke the workflow identified by ``workflow_id``. This will cause a workflow to be scheduled and return an object describing the workflow invocation. :type workflow_id: str :param workflow_id: Encoded workflow ID :type inputs: dict :param inputs: A mapping of workflow inputs to datasets and dataset collections. The datasets source can be a LibraryDatasetDatasetAssociation (``ldda``), LibraryDataset (``ld``), HistoryDatasetAssociation (``hda``), or HistoryDatasetCollectionAssociation (``hdca``). The map must be in the following format: ``{'': {'id': , 'src': '[ldda, ld, hda, hdca]'}}`` (e.g. ``{'2': {'id': '29beef4fadeed09f', 'src': 'hda'}}``) This map may also be indexed by the UUIDs of the workflow steps, as indicated by the ``uuid`` property of steps returned from the Galaxy API. :type params: str or dict :param params: A mapping of tool parameters that are non-datasets parameters. The map must be in the following format: ``{'blastn': {'param': 'evalue', 'value': '1e-06'}}`` :type history_id: str :param history_id: The encoded history ID where to store the workflow output. Alternatively, ``history_name`` may be specified to create a new history. :type history_name: str :param history_name: Create a new history with the given name to store the workflow output. If both ``history_id`` and ``history_name`` are provided, ``history_name`` is ignored. If neither is specified, a new 'Unnamed history' is created. :type import_inputs_to_history: bool :param import_inputs_to_history: If ``True``, used workflow inputs will be imported into the history. If ``False``, only workflow outputs will be visible in the given history. :type allow_tool_state_corrections: bool :param allow_tool_state_corrections: If True, allow Galaxy to fill in missing tool state when running workflows. This may be useful for workflows using tools that have changed over time or for workflows built outside of Galaxy with only a subset of inputs defined. :type replacement_params: dict :param replacement_params: pattern-based replacements for post-job actions (see below) :rtype: dict :return: A dict containing the workflow invocation describing the scheduling of the workflow. For example:: {u'inputs': {u'0': {u'src': u'hda', u'id': u'a7db2fac67043c7e', u'uuid': u'7932ffe0-2340-4952-8857-dbaa50f1f46a'}}, u'update_time': u'2015-10-31T22:00:26', u'uuid': u'c8aa2b1c-801a-11e5-a9e5-8ca98228593c', u'history_id': u'2f94e8ae9edff68a', u'workflow_id': u'03501d7626bd192f', u'state': u'ready', u'steps': [{u'workflow_step_uuid': u'b81250fd-3278-4e6a-b269-56a1f01ef485', u'update_time': u'2015-10-31T22:00:26', u'job_id': None, u'state': None, u'workflow_step_label': None, u'order_index': 0, u'action': None, u'model_class': u'WorkflowInvocationStep', u'workflow_step_id': u'cbbbf59e8f08c98c', u'id': u'd413a19dec13d11e'}, {u'workflow_step_uuid': u'e62440b8-e911-408b-b124-e05435d3125e', u'update_time': u'2015-10-31T22:00:26', u'job_id': u'e89067bb68bee7a0', u'state': u'new', u'workflow_step_label':None, u'order_index': 1, u'action': None, u'model_class': u'WorkflowInvocationStep', u'workflow_step_id': u'964b37715ec9bd22', u'id': u'2f94e8ae9edff68a'}, ], u'model_class': u'WorkflowInvocation', u'id': u'df7a1f0c02a5b08e' } The ``replacement_params`` dict should map parameter names in post-job actions (PJAs) to their runtime values. For instance, if the final step has a PJA like the following:: {u'RenameDatasetActionout_file1': { u'action_arguments': {u'newname': u'${output}'}, u'action_type': u'RenameDatasetAction', u'output_name': u'out_file1'}} then the following renames the output dataset to 'foo':: replacement_params = {'output': 'foo'} see also `this email thread `_. .. warning:: Historically, the ``run_workflow`` method consumed a ``dataset_map`` data structure that was indexed by unencoded workflow step IDs. These IDs would not be stable across Galaxy instances. The new ``inputs`` property is instead indexed by either the ``order_index`` property which is stable across workflow imports or the step UUID which is also stable. """ payload = {} payload['workflow_id'] = workflow_id if inputs: payload['inputs'] = inputs if params: payload['parameters'] = params if replacement_params: payload['replacement_params'] = replacement_params if history_id: payload['history'] = 'hist_id={0}'.format(history_id) elif history_name: payload['history'] = history_name if import_inputs_to_history is False: payload['no_add_to_history'] = True if allow_tool_state_corrections is not None: payload['allow_tool_state_corrections'] = allow_tool_state_corrections url = self.gi._make_url(self) url = _join(url, workflow_id, "invocations") return Client._post(self, payload, url=url) def show_invocation(self, workflow_id, invocation_id): """ Get a workflow invocation object representing the scheduling of a workflow. This object may be sparse at first (missing inputs and invocation steps) and will become more populated as the workflow is actually scheduled. :type workflow_id: str :param workflow_id: Encoded workflow ID :type invocation_id: str :param invocation_id: Encoded workflow invocation ID :rtype: dict :return: The workflow invocation. For example:: {u'inputs': {u'0': {u'src': u'hda', u'id': u'a7db2fac67043c7e', u'uuid': u'7932ffe0-2340-4952-8857-dbaa50f1f46a'}}, u'update_time': u'2015-10-31T22:00:26', u'uuid': u'c8aa2b1c-801a-11e5-a9e5-8ca98228593c', u'history_id': u'2f94e8ae9edff68a', u'workflow_id': u'03501d7626bd192f', u'state': u'ready', u'steps': [{u'workflow_step_uuid': u'b81250fd-3278-4e6a-b269-56a1f01ef485', u'update_time': u'2015-10-31T22:00:26', u'job_id': None, u'state': None, u'workflow_step_label': None, u'order_index': 0, u'action': None, u'model_class': u'WorkflowInvocationStep', u'workflow_step_id': u'cbbbf59e8f08c98c', u'id': u'd413a19dec13d11e'}, {u'workflow_step_uuid': u'e62440b8-e911-408b-b124-e05435d3125e', u'update_time': u'2015-10-31T22:00:26', u'job_id': u'e89067bb68bee7a0', u'state': u'new', u'workflow_step_label':None, u'order_index': 1, u'action': None, u'model_class': u'WorkflowInvocationStep', u'workflow_step_id': u'964b37715ec9bd22', u'id': u'2f94e8ae9edff68a'}, ], u'model_class': u'WorkflowInvocation', u'id': u'df7a1f0c02a5b08e' } """ url = self._invocation_url(workflow_id, invocation_id) return Client._get(self, url=url) def get_invocations(self, workflow_id): """ Get a list containing all the workflow invocations corresponding to the specified workflow. :type workflow_id: str :param workflow_id: Encoded workflow ID :rtype: list :return: A list of workflow invocations. For example:: [{u'update_time': u'2015-10-31T22:00:22', u'uuid': u'c8aa2b1c-801a-11e5-a9e5-8ca98228593c', u'history_id': u'2f94e8ae9edff68a', u'workflow_id': u'03501d7626bd192f', u'state': u'new', u'model_class': u'WorkflowInvocation', u'id': u'df7a1f0c02a5b08e'} ] """ url = self._invocations_url(workflow_id) return Client._get(self, url=url) def cancel_invocation(self, workflow_id, invocation_id): """ Cancel the scheduling of a workflow. :type workflow_id: str :param workflow_id: Encoded workflow ID :type invocation_id: str :param invocation_id: Encoded workflow invocation ID """ url = self._invocation_url(workflow_id, invocation_id) return Client._delete(self, url=url) def show_invocation_step(self, workflow_id, invocation_id, step_id): """ See the details of a particular workflow invocation step. :type workflow_id: str :param workflow_id: Encoded workflow ID :type invocation_id: str :param invocation_id: Encoded workflow invocation ID :type step_id: str :param step_id: Encoded workflow invocation step ID :rtype: dict :return: The workflow invocation step. For example:: {u'workflow_step_uuid': u'4060554c-1dd5-4287-9040-8b4f281cf9dc', u'update_time': u'2015-10-31T22:11:14', u'job_id': None, u'state': None, u'workflow_step_label': None, u'order_index': 2, u'action': None, u'model_class': u'WorkflowInvocationStep', u'workflow_step_id': u'52e496b945151ee8', u'id': u'63cd3858d057a6d1'} """ url = self._invocation_step_url(workflow_id, invocation_id, step_id) return Client._get(self, url=url) def run_invocation_step_action(self, workflow_id, invocation_id, step_id, action): """ Execute an action for an active workflow invocation step. The nature of this action and what is expected will vary based on the the type of workflow step (the only currently valid action is True/False for pause steps). :type workflow_id: str :param workflow_id: Encoded workflow ID :type invocation_id: str :param invocation_id: Encoded workflow invocation ID :type step_id: str :param step_id: Encoded workflow invocation step ID :type action: object :param action: Action to use when updating state, semantics depends on step type. """ url = self._invocation_step_url(workflow_id, invocation_id, step_id) payload = {"action": action} return Client._put(self, payload, url=url) def delete_workflow(self, workflow_id): """ Delete a workflow identified by `workflow_id`. :type workflow_id: str :param workflow_id: Encoded workflow ID .. warning:: Deleting a workflow is irreversible - all workflow data will be permanently deleted. """ payload = {} return Client._delete(self, payload, id=workflow_id) def _invocation_step_url(self, workflow_id, invocation_id, step_id): return _join(self._invocation_url(workflow_id, invocation_id), "steps", step_id) def _invocation_url(self, workflow_id, invocation_id): return _join(self._invocations_url(workflow_id), invocation_id) def _invocations_url(self, workflow_id): return _join(self._workflow_url(workflow_id), "invocations") def _workflow_url(self, workflow_id): url = self.gi._make_url(self) url = _join(url, workflow_id) return url def _join(*args): return "/".join(args) __all__ = ['WorkflowClient'] bioblend-0.7.0/bioblend/galaxyclient.py000066400000000000000000000166341261571066300201310ustar00rootroot00000000000000""" Helper class for Galaxy and ToolShed Instance object This class is primarily a helper for the library and user code should not use it directly. A base representation of an instance """ import base64 import json import requests from requests_toolbelt import MultipartEncoder import six from six.moves.urllib.parse import urljoin, urlparse from .galaxy.client import ConnectionError class GalaxyClient(object): def __init__(self, url, key=None, email=None, password=None): # Make sure the url scheme is defined (otherwise requests will not work) if not urlparse(url).scheme: url = "http://" + url # All of Galaxy's and ToolShed's API's are rooted at /api so make that the url self.base_url = url self.url = urljoin(url, 'api') # If key has been supplied, use it; otherwise just set email and # password and grab user's key before first request. if key: self._key = key else: self._key = None self.email = email self.password = password self.json_headers = {'Content-Type': 'application/json'} self.verify = True # Should SSL verification be done def _make_url(self, module, module_id=None, deleted=False, contents=False): """ Compose a URL based on the provided arguments. :type module: :class:`~.galaxy.Client` subclass :param module: The base module for which to make the URL. For example: an object of class LibraryClient, WorkflowClient, HistoryClient, ToolShedClient :type module_id: str :param module_id: The encoded ID for a specific module (eg, library ID) :type deleted: bool :param deleted: If ``True``, include ``deleted`` in the URL, after the module name (eg, ``/api/libraries/deleted``) :type contents: bool :param contents: If ``True``, include 'contents' in the URL, after the module ID: ``/api/libraries//contents`` """ c_url = self.url c_url = '/'.join([c_url, module.module]) if deleted is True: c_url = '/'.join([c_url, 'deleted']) if module_id is not None: c_url = '/'.join([c_url, module_id]) if contents is True: c_url = '/'.join([c_url, 'contents']) return c_url def make_get_request(self, url, **kwargs): """ Make a GET request using the provided ``url``. Keyword arguments are the same as in requests.request. If ``verify`` is not provided, ``self.verify`` will be used. If the ``params`` are not provided, use ``default_params`` class field. If params are provided and the provided dict does not have ``key`` key, the default ``self.key`` value will be included in what's passed to the server via the request. """ params = kwargs.get('params') if params is not None and params.get('key', False) is False: params['key'] = self.key else: params = self.default_params kwargs['params'] = params kwargs.setdefault('verify', self.verify) r = requests.get(url, **kwargs) return r def make_post_request(self, url, payload, params=None, files_attached=False): """ Make a POST request using the provided ``url`` and ``payload``. The ``payload`` must be a dict that contains the request values. The payload dict may contain file handles (in which case the files_attached flag must be set to true). If the ``params`` are not provided, use ``default_params`` class field. If params are provided and the provided dict does not have ``key`` key, the default ``self.key`` value will be included in what's passed to the server via the request. The return value will contain the response body as a JSON object. """ if params is not None and params.get('key', False) is False: params['key'] = self.key else: params = self.default_params # Compute data, headers, params arguments for request.post, # leveraging the requests-toolbelt library if any files have # been attached. if files_attached: payload.update(params) payload = MultipartEncoder(fields=payload) headers = self.json_headers.copy() headers['Content-Type'] = payload.content_type post_params = {} else: payload = json.dumps(payload) headers = self.json_headers post_params = params r = requests.post(url, data=payload, headers=headers, verify=self.verify, params=post_params) if r.status_code == 200: return r.json() # @see self.body for HTTP response body raise ConnectionError("Unexpected response from galaxy: %s" % r.status_code, body=r.text) def make_delete_request(self, url, payload=None, params=None): """ Make a DELETE request using the provided ``url`` and the optional arguments. The ``payload`` must be a dict that can be converted into a JSON object (via ``json.dumps``) If the ``params`` are not provided, use ``default_params`` class field. If params are provided and the provided dict does not have ``key`` key, the default ``self.key`` value will be included in what's passed to the server via the request. """ if params is not None and params.get('key', False) is False: params['key'] = self.key else: params = self.default_params r = requests.delete(url, verify=self.verify, data=payload, params=params) return r def make_put_request(self, url, payload=None, params=None): """ Make a PUT request using the provided ``url`` with required payload. The ``payload`` must be a dict that can be converted into a JSON object (via ``json.dumps``) """ if params is not None and params.get('key', False) is False: params['key'] = self.key else: params = self.default_params payload = json.dumps(payload) r = requests.put(url, verify=self.verify, data=payload, params=params) return r @property def key(self): if not self._key and self.email is not None and self.password is not None: unencoded_credentials = "%s:%s" % (self.email, self.password) authorization = base64.b64encode(unencoded_credentials) headers = self.json_headers.copy() headers["Authorization"] = authorization auth_url = "%s/authenticate/baseauth" % self.url # make_post_request uses default_params, which uses this and # sets wrong headers - so using lower level method. r = requests.get(auth_url, verify=self.verify, headers=headers) if r.status_code != 200: raise Exception("Failed to authenticate user.") response = r.json() if isinstance(response, (six.string_types, six.text_type)): # bug in Tool Shed response = json.loads(response) self._key = response["api_key"] return self._key @property def default_params(self): return {'key': self.key} bioblend-0.7.0/bioblend/toolshed/000077500000000000000000000000001261571066300167025ustar00rootroot00000000000000bioblend-0.7.0/bioblend/toolshed/__init__.py000066400000000000000000000027461261571066300210240ustar00rootroot00000000000000""" A base representation of an instance of Tool Shed """ from bioblend.toolshed import (repositories) from bioblend.toolshed import (tools) from bioblend.galaxyclient import GalaxyClient class ToolShedInstance(GalaxyClient): def __init__(self, url, key='', email=None, password=None): """ A base representation of an instance of ToolShed, identified by a URL and a user's API key. After you have created an ``ToolShed`` object, access various modules via the class fields (see the source for the most up-to-date list): ``repositories`` are the minimum set supported. For example, to work with a repositories, and get a list of all the public repositories, the following should be done:: from bioblend import toolshed ts = toolshed.ToolShedInstance(url='http://testtoolshed.g2.bx.psu.edu') rl = ts.repositories.get_repositories() tools = ts.tools.search_tools('fastq') :type url: str :param url: A FQDN or IP for a given instance of ToolShed. For example: http://testtoolshed.g2.bx.psu.edu :type key: str :param key: If required, user's API key for the given instance of ToolShed, obtained from the user preferences. """ super(ToolShedInstance, self).__init__(url, key, email, password) self.repositories = repositories.ToolShedClient(self) self.tools = tools.ToolShedClient(self) bioblend-0.7.0/bioblend/toolshed/repositories/000077500000000000000000000000001261571066300214315ustar00rootroot00000000000000bioblend-0.7.0/bioblend/toolshed/repositories/__init__.py000066400000000000000000000442611261571066300235510ustar00rootroot00000000000000""" Interaction with a Tool Shed instance repositories """ from six.moves.urllib.parse import urljoin from bioblend.galaxy.client import Client from bioblend.util import attach_file class ToolShedClient(Client): def __init__(self, toolshed_instance): self.module = 'repositories' super(ToolShedClient, self).__init__(toolshed_instance) def get_repositories(self): """ Get a list of all the repositories in a Galaxy Tool Shed :rtype: list :return: Returns a list of dictionaries containing information about repositories present in the Tool Shed. For example:: [{u'times_downloaded': 0, u'user_id': u'5cefd48bc04af6d4', u'description': u'Order Contigs', u'deleted': False, u'deprecated': False, u'private': False, u'url': u'/api/repositories/287bd69f724b99ce', u'owner': u'billybob', u'id': u'287bd69f724b99ce', u'name': u'best_tool_ever'}] .. versionchanged:: 0.4.1 Changed method name from ``get_tools`` to ``get_repositories`` to better align with the Tool Shed concepts """ return Client._get(self) def search_repositories(self, q, page=1, page_size=10): """ Search for repositories in a Galaxy Tool Shed :type q: str :param q: query string for searching purposes :type page: int :param page: page requested :type page_size: int :param page_size: page size requested :rtype: dict :return: dictionary containing search hits as well as metadata for the search example: { u'hits': [ { u'matched_terms': [], u'repository': { u'approved': u'no', u'description': u'Convert export file to fastq', u'full_last_updated': u'2015-01-18 09:48 AM', u'homepage_url': None, u'id': u'bdfa208f0cf6504e', u'last_updated': u'less than a year', u'long_description': u'This is a simple too to convert Solexas Export files to FASTQ files. The tool installation needs to add a new Export file type, the new class is included in the README file as a patch.', u'name': u'export_to_fastq', u'remote_repository_url': None, u'repo_owner_username': u'louise', u'times_downloaded': 164 }, u'score': 4.92 }, { u'matched_terms': [], u'repository': { u'approved': u'no', u'description': u'Convert BAM file to fastq', u'full_last_updated': u'2015-04-07 11:57 AM', u'homepage_url': None, u'id': u'175812cd7caaf439', u'last_updated': u'less than a month', u'long_description': u'Use Picards SamToFastq to convert a BAM file to fastq. Useful for storing reads as BAM in Galaxy and converting to fastq when needed for analysis.', u'name': u'bam_to_fastq', u'remote_repository_url': None, u'repo_owner_username': u'brad-chapman', u'times_downloaded': 138 }, u'score': 4.14 } ], u'hostname': u'https://testtoolshed.g2.bx.psu.edu/', u'page': u'1', u'page_size': u'2', u'total_results': u'64' } """ params = dict(q=q, page=page, page_size=page_size) return Client._get(self, params=params) def show_repository(self, toolShed_id): """ Display information of a repository from Tool Shed :type toolShed_id: str :param toolShed_id: Encoded toolShed ID :rtype: dictionary :return: Information about the tool For example:: {{u'times_downloaded': 0, u'user_id': u'5cefd48bc04af6d4', u'description': u'Order Contigs', u'deleted': False, u'deprecated': False, u'private': False, u'url': u'/api/repositories/287bd69f724b99ce', u'owner': u'billybob', u'id': u'287bd69f724b99ce', u'name': u'best_tool_ever'} .. versionchanged:: 0.4.1 Changed method name from ``show_tool`` to ``show_repository`` to better align with the Tool Shed concepts """ return Client._get(self, id=toolShed_id) def get_ordered_installable_revisions(self, name, owner): """ Returns the ordered list of changeset revision hash strings that are associated with installable revisions. As in the changelog, the list is ordered oldest to newest. :type name: str :param name: the name of the repository :type owner: str :param owner: the owner of the repository :rtype: list :return: List of changeset revision hash string from oldest to newest """ url = self.url + '/get_ordered_installable_revisions' params = {} params['name'] = name params['owner'] = owner r = Client._get(self, url=url, params=params) return r def get_repository_revision_install_info(self, name, owner, changeset_revision): """ Return a list of dictionaries of metadata about a certain changeset revision for a single tool. :type name: str :param name: the name of the repository :type owner: str :param owner: the owner of the repository :type changeset_revision: str :param changeset_revision: the changset_revision of the RepositoryMetadata object associated with the repository :rtype: List of dictionaries :return: Returns a list of the following dictionaries: - a dictionary defining the repository - a dictionary defining the repository revision (RepositoryMetadata) - a dictionary including the additional information required to install the repository For example:: [{u'times_downloaded': 269, u'user_id': u'1de29d50c3c44272', u'description': u'Galaxy Freebayes Bayesian genetic variant detector tool', u'deleted': False, u'deprecated': False, u'private': False, u'long_description': u'Galaxy Freebayes Bayesian genetic variant detector tool originally included in the Galaxy code distribution but migrated to the tool shed.', u'url': u'/api/repositories/491b7a3fddf9366f', u'owner': u'devteam', u'id': u'491b7a3fddf9366f', u'name': u'freebayes'}, {u'repository_id': u'491b7a3fddf9366f', u'has_repository_dependencies': False, u'includes_tools_for_display_in_tool_panel': True, u'url': u'/api/repository_revisions/504be8aaa652c154', u'malicious': False, u'includes_workflows': False, u'downloadable': True, u'includes_tools': True, u'changeset_revision': u'd291dc763c4c', u'id': u'504be8aaa652c154', u'includes_tool_dependencies': True, u'includes_datatypes': False}, {u'freebayes': [u'Galaxy Freebayes Bayesian genetic variant detector tool', u'http://takadonet@toolshed.g2.bx.psu.edu/repos/devteam/freebayes', u'd291dc763c4c', u'9', u'devteam', {}, {u'freebayes/0.9.6_9608597d12e127c847ae03aa03440ab63992fedf': {u'repository_name': u'freebayes', u'name': u'freebayes', u'readme': u'FreeBayes requires g++ and the standard C and C++ development libraries. Additionally, cmake is required for building the BamTools API.', u'version': u'0.9.6_9608597d12e127c847ae03aa03440ab63992fedf', u'repository_owner': u'devteam', u'changeset_revision': u'd291dc763c4c', u'type': u'package'}, u'samtools/0.1.18': {u'repository_name': u'freebayes', u'name': u'samtools', u'readme': u'Compiling SAMtools requires the ncurses and zlib development libraries.', u'version': u'0.1.18', u'repository_owner': u'devteam', u'changeset_revision': u'd291dc763c4c', u'type': u'package'}}]}] """ url = self.url + '/get_repository_revision_install_info' params = {} params['name'] = name params['owner'] = owner params['changeset_revision'] = changeset_revision return Client._get(self, url=url, params=params) def repository_revisions( self, downloadable=None, malicious=None, tools_functionally_correct=None, missing_test_components=None, do_not_test=None, includes_tools=None, test_install_error=None, skip_tool_test=None): """ Returns a (possibly filtered) list of dictionaries that include information about all repository revisions. The following parameters can be used to filter the list. :type downloadable: Boolean :param downloadable: Can the tool be downloaded :type malicious: Boolean :param malicious: :type tools_functionally_correct: Boolean :param tools_functionally_correct: :type missing_test_components: Boolean :param missing_test_components: :type do_not_test: Boolean :param do_not_test: :type includes_tools: Boolean :param includes_tools: :type test_install_error: Boolean :param test_install_error: :type skip_tool_test: Boolean :param skip_tool_test: :rtype: List of dictionaries :return: Returns a (possibly filtered) list of dictionaries that include information about all repository revisions. For example:: [{u'repository_id': u'78f2604ff5e65707', u'has_repository_dependencies': False, u'includes_tools_for_display_in_tool_panel': True, u'url': u'/api/repository_revisions/92250afff777a169', u'malicious': False, u'includes_workflows': False, u'downloadable': True, u'includes_tools': True, u'changeset_revision': u'6e26c5a48e9a', u'id': u'92250afff777a169', u'includes_tool_dependencies': False, u'includes_datatypes': False}, {u'repository_id': u'f9662009da7bfce0', u'has_repository_dependencies': False, u'includes_tools_for_display_in_tool_panel': True, u'url': u'/api/repository_revisions/d3823c748ae2205d', u'malicious': False, u'includes_workflows': False, u'downloadable': True, u'includes_tools': True, u'changeset_revision': u'15a54fa11ad7', u'id': u'd3823c748ae2205d', u'includes_tool_dependencies': False, u'includes_datatypes': False}] """ # Not using '_make_url' or '_get' to create url since the module id used # to create url is not the same as needed for this method url = self.gi.url + '/repository_revisions' params = {} # nice and long... my god! if downloadable: params['downloadable'] = 'True' if malicious: params['malicious'] = 'True' if tools_functionally_correct: params['tools_functionally_correct'] = 'True' if missing_test_components: params['missing_test_components'] = 'True' if do_not_test: params['do_not_test'] = 'True' if includes_tools: params['includes_tools'] = 'True' if test_install_error: params['test_install_error'] = 'True' if skip_tool_test: params['skip_tool_test'] = 'True' return Client._get(self, url=url, params=params) def show_repository_revision(self, metadata_id): ''' Returns a dictionary that includes information about a specified repository revision. :type metadata_id: str :param metadata_id: Encoded repository metadata ID :rtype: dictionary :return: Returns a dictionary that includes information about a specified repository revision. For example:: {u'repository_id': u'491b7a3fddf9366f', u'has_repository_dependencies': False, u'includes_tools_for_display_in_tool_panel': True, u'test_install_error': False, u'url': u'/api/repository_revisions/504be8aaa652c154', u'malicious': False, u'includes_workflows': False, u'id': u'504be8aaa652c154', u'do_not_test': False, u'downloadable': True, u'includes_tools': True, u'tool_test_results': {u'missing_test_components': [],, u'includes_datatypes': False} ''' # Not using '_make_url' or '_get' to create url since the module id used # to create url is not the same as needed for this method # since metadata_id has to be defined, easy to create the url here url = self.gi.url + '/repository_revisions/' + metadata_id return Client._get(self, url=url) def get_categories(self): """ Returns a list of dictionaries that contain descriptions of the repository categories found on the given Tool Shed instance. :rtype: list :return: A list of dictionaries containing information about repository categories present in the Tool Shed. For example:: [{u'deleted': False, u'description': u'Tools for manipulating data', u'id': u'175812cd7caaf439', u'model_class': u'Category', u'name': u'Text Manipulation', u'url': u'/api/categories/175812cd7caaf439'},] .. versionadded:: 0.5.2 """ url = urljoin(self.url, 'categories') return Client._get(self, url=url) def update_repository(self, id, tar_ball_path, commit_message=None): """ Update the contents of a tool shed repository with specified tar ball. :type id: str :param id: Encoded repository ID :type tar_ball_path: str :param tar_ball_path: Path to file containing tar ball to upload. :type commit_message: str :param commit_message: Commit message used for underlying mercurial repository backing tool shed repository. :rtype: dict :return: Returns a dictionary that includes repository content warnings. Most valid uploads will result in no such warning and an exception will be raised generally if there are problems. For example a successful upload will look like:: {u'content_alert': u'', u'message': u''} .. versionadded:: 0.5.2 """ url = self.gi._make_url(self, id) + '/changeset_revision' payload = {} if commit_message is not None: payload['commit_message'] = commit_message payload["file"] = attach_file(tar_ball_path) try: return Client._post(self, id=id, payload=payload, files_attached=True, url=url) finally: payload["file"].close() def create_repository(self, name, synopsis, description=None, type="unrestricted", remote_repository_url=None, homepage_url=None, category_ids=None): """ Create a new repository in a Tool Shed :type name: str :param name: Name of the repository :type synopsis: str :param synopsis: Synopsis of the repository :type description: str :param description: Optional description of the repository :type type: str :param type: type of the repository. One of "unrestricted", "repository_suite_definition", or "tool_dependency_definition" :type remote_repository_url: str :param remote_repository_url: Remote URL (e.g. github/bitbucket repository) :type homepage_url: str :param homepage_url: Upstream's homepage for the project. :type category_ids: list :param category_ids: List of encoded category IDs :rtype: dict :return: a dictionary containing information about the new repository. For example:: { "deleted": false, "deprecated": false, "description": "new_synopsis", "homepage_url": "https://github.com/galaxyproject/", "id": "8cf91205f2f737f4", "long_description": "this is some repository", "model_class": "Repository", "name": "new_repo_17", "owner": "qqqqqq", "private": false, "remote_repository_url": "https://github.com/galaxyproject/tools-devteam", "times_downloaded": 0, "type": "unrestricted", "user_id": "adb5f5c93f827949" } """ payload = { 'name': name, 'synopsis': synopsis, } if description is not None: payload['description'] = description if description is not None: payload['description'] = description if type is not None: payload['type'] = type if remote_repository_url is not None: payload['remote_repository_url'] = remote_repository_url if homepage_url is not None: payload['homepage_url'] = homepage_url if category_ids is not None: payload['category_ids[]'] = category_ids return Client._post(self, payload) bioblend-0.7.0/bioblend/toolshed/tools/000077500000000000000000000000001261571066300200425ustar00rootroot00000000000000bioblend-0.7.0/bioblend/toolshed/tools/__init__.py000066400000000000000000000043711261571066300221600ustar00rootroot00000000000000""" Interaction with a Tool Shed instance tools """ from bioblend.galaxy.client import Client class ToolShedClient(Client): def __init__(self, toolshed_instance): self.module = 'tools' super(ToolShedClient, self).__init__(toolshed_instance) def search_tools(self, q, page=1, page_size=10): """ Search for tools in a Galaxy Tool Shed :type q: str :param q: query string for searching purposes :type page: int :param page: page requested :type page_size: int :param page_size: page size requested :rtype: dict :return: dictionary containing search hits as well as metadata for the search example: { u'hits': [ { u'matched_terms': [], u'score': 3.0, u'tool': { u'description': u'convert between various FASTQ quality formats', u'id': u'69819b84d55f521efda001e0926e7233', u'name': u'FASTQ Groomer', u'repo_name': None, u'repo_owner_username': u'devteam' } }, { u'matched_terms': [], u'score': 3.0, u'tool': { u'description': u'converts a bam file to fastq files.', u'id': u'521e282770fd94537daff87adad2551b', u'name': u'Defuse BamFastq', u'repo_name': None, u'repo_owner_username': u'jjohnson' } } ], u'hostname': u'https://testtoolshed.g2.bx.psu.edu/', u'page': u'1', u'page_size': u'2', u'total_results': u'118' } """ params = dict(q=q, page=page, page_size=page_size) return Client._get(self, params=params) bioblend-0.7.0/bioblend/util/000077500000000000000000000000001261571066300160365ustar00rootroot00000000000000bioblend-0.7.0/bioblend/util/__init__.py000066400000000000000000000025551261571066300201560ustar00rootroot00000000000000import os from collections import namedtuple class Bunch(object): """ A convenience class to allow dict keys to be represented as object fields. The end result is that this allows a dict to be to be represented the same as a database class, thus the two become interchangeable as a data source. """ def __init__(self, **kwargs): self.__dict__.update(kwargs) def __repr__(self): """ Return the contents of the dict in a printable representation """ return str(self.__dict__) def _file_stream_close(self): """ Close the open file descriptor associated with the FileStream object. """ self[1].close() FileStream = namedtuple("FileStream", ["name", "fd"]) FileStream.close = _file_stream_close def attach_file(path, name=None): """ Attach a path to a request payload object. :type path: str :param path: Path to file to attach to payload. :type name: str :param name: Name to give file, if different than actual pathname. :rtype: object :return: Returns an object compatible with requests post operation and capable of being closed with a ``close()`` method. """ if name is None: name = os.path.basename(path) attachment = FileStream(name, open(path, "rb")) return attachment __all__ = [ 'Bunch', 'attach_file', ] bioblend-0.7.0/docs/000077500000000000000000000000001261571066300142335ustar00rootroot00000000000000bioblend-0.7.0/docs/Makefile000066400000000000000000000126701261571066300157010ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . # the i18n builder cannot share the environment and doctrees with the others I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest gettext help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " texinfo to make Texinfo files" @echo " info to make Texinfo files and run them through makeinfo" @echo " gettext to make PO message catalogs" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Blend.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Blend.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/Blend" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Blend" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." $(MAKE) -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." texinfo: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." @echo "Run \`make' in that directory to run these through makeinfo" \ "(use \`make info' here to do that automatically)." info: $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo @echo "Running Texinfo files through makeinfo..." make -C $(BUILDDIR)/texinfo info @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." gettext: $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale @echo @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." bioblend-0.7.0/docs/_static/000077500000000000000000000000001261571066300156615ustar00rootroot00000000000000bioblend-0.7.0/docs/_static/.empty000066400000000000000000000000001261571066300170060ustar00rootroot00000000000000bioblend-0.7.0/docs/api_docs/000077500000000000000000000000001261571066300160145ustar00rootroot00000000000000bioblend-0.7.0/docs/api_docs/cloudman/000077500000000000000000000000001261571066300176165ustar00rootroot00000000000000bioblend-0.7.0/docs/api_docs/cloudman/all.rst000066400000000000000000000006531261571066300211240ustar00rootroot00000000000000.. _cloudman-api-docs: =============================================== API documentation for interacting with CloudMan =============================================== CloudManLauncher ---------------- .. autoclass:: bioblend.cloudman.launch.CloudManLauncher :members: :undoc-members: .. _cloudman-instance-api: CloudManInstance ---------------- .. automodule:: bioblend.cloudman :members: :undoc-members: bioblend-0.7.0/docs/api_docs/cloudman/docs.rst000066400000000000000000000121151261571066300213000ustar00rootroot00000000000000=================== Usage documentation =================== This page describes some sample use cases for CloudMan API and provides examples for these API calls. In addition to this page, there are functional examples of complete scripts in ``docs/examples`` directory of the BioBlend source code repository. Setting up custom cloud properties ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CloudMan supports Amazon, OpenStack, OpenNebula, and Eucalyptus based clouds and BioBlend can be used to programatically manipulate CloudMan on any of those clouds. Once launched, the API calls to CloudMan are the same irrespective of the cloud. In order to launch an instance on a given cloud, cloud properties need to be provided to ``CloudManLauncher``. If cloud properties are not specified, ``CloudManLauncher`` will default to Amazon cloud properties. If we want to use a different cloud provider, we need to specify additional cloud properties when creating an instance of the ``CloudManLauncher`` class. For example, if we wanted to create a connection to `NeCTAR`_, Australia's national research cloud, we would use the following properties:: from bioblend.util import Bunch nectar = Bunch( name='NeCTAR', cloud_type='openstack', bucket_default='cloudman-os', region_name='NeCTAR', region_endpoint='nova.rc.nectar.org.au', ec2_port=8773, ec2_conn_path='/services/Cloud', cidr_range='115.146.92.0/22', is_secure=True, s3_host='swift.rc.nectar.org.au', s3_port=8888, s3_conn_path='/') .. Note:: These properties are cloud-specific and need to be obtained from a given cloud provider. Launching a new cluster instance ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ In order to launch a CloudMan cluster on a chosen cloud, we do the following (continuing from the previous example):: from bioblend.cloudman import CloudManConfig from bioblend.cloudman import CloudManInstance cmc = CloudManConfig('', 'm1.medium', 'choose_a_password_here', nectar) cmi = CloudManInstance.launch_instance(cmc) .. Note:: If you already have an existing instance of CloudMan, just create an instance of the ``CloudManInstance`` object directly by calling its constructor and connecting to it (the password you provide must match the password you provided as part of user data when launching this instance). For example:: cmi = CloudManInstance('http://115.146.92.174', 'your_UD_password') We now have a ``CloudManInstance`` object that allows us to manage created CloudMan instance via the API. Once launched, it will take a few minutes for the instance to boot and CloudMan start. To check on the status of the machine, (repeatedly) run the following command:: >>> cmi.get_machine_status() {'error': '', 'instance_state': u'pending', 'placement': '', 'public_ip': ''} >>> cmi.get_machine_status() {'error': '', 'instance_state': u'running', 'placement': u'melbourne-qh2', 'public_ip': u'115.146.86.29'} Once the instance is ready, although it may still take a few moments for CloudMan to start, it is possible to start interacting with the application. .. Note:: The ``CloudManInstance`` object (e.g., ``cmi``) is a local representation of the actual CloudMan instance. As a result, the local object can get out of sync with the remote instance. To update the state of the local object, call the ``update`` method on the ``cmi`` object:: >>> cmi.update() Manipulating an existing cluster ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Having a reference to a ``CloudManInstance`` object, we can manage it via the available :ref:`cloudman-instance-api` API:: >>> cmi.initialized False >>> cmi.initialize('SGE') >>> cmi.get_status() {u'all_fs': [], u'app_status': u'yellow', u'autoscaling': {u'as_max': u'N/A', u'as_min': u'N/A', u'use_autoscaling': False}, u'cluster_status': u'STARTING', u'data_status': u'green', u'disk_usage': {u'pct': u'0%', u'total': u'0', u'used': u'0'}, u'dns': u'#', u'instance_status': {u'available': u'0', u'idle': u'0', u'requested': u'0'}, u'snapshot': {u'progress': u'None', u'status': u'None'}} >>> cmi.get_cluster_size() 1 >>> cmi.get_nodes() [{u'id': u'i-00006016', u'instance_type': u'm1.medium', u'ld': u'0.0 0.025 0.065', u'public_ip': u'115.146.86.29', u'time_in_state': u'2268'}] >>> cmi.add_nodes(2) {u'all_fs': [], u'app_status': u'green', u'autoscaling': {u'as_max': u'N/A', u'as_min': u'N/A', u'use_autoscaling': False}, u'cluster_status': u'READY', u'data_status': u'green', u'disk_usage': {u'pct': u'0%', u'total': u'0', u'used': u'0'}, u'dns': u'#', u'instance_status': {u'available': u'0', u'idle': u'0', u'requested': u'2'}, u'snapshot': {u'progress': u'None', u'status': u'None'}} >>> cmi.get_cluster_size() 3 .. _NeCTAR: http://www.nectar.org.au/research-cloud bioblend-0.7.0/docs/api_docs/galaxy/000077500000000000000000000000001261571066300173015ustar00rootroot00000000000000bioblend-0.7.0/docs/api_docs/galaxy/all.rst000066400000000000000000000035071261571066300206100ustar00rootroot00000000000000============================================= API documentation for interacting with Galaxy ============================================= GalaxyInstance -------------- .. autoclass:: bioblend.galaxy.GalaxyInstance .. automethod:: bioblend.galaxy.GalaxyInstance.__init__ ----- .. _libraries-api: Config -------- .. automodule:: bioblend.galaxy.config ----- Datasets -------- .. automodule:: bioblend.galaxy.datasets ----- Datatypes --------- .. automodule:: bioblend.galaxy.datatypes ----- Folders ------- .. automodule:: bioblend.galaxy.folders ----- Forms ----- .. automodule:: bioblend.galaxy.forms ----- FTP files --------- .. automodule:: bioblend.galaxy.ftpfiles ----- Genomes ------- .. automodule:: bioblend.galaxy.genomes Groups ------ .. automodule:: bioblend.galaxy.groups ----- Histories --------- .. automodule:: bioblend.galaxy.histories ----- Jobs ---- .. automodule:: bioblend.galaxy.jobs ----- Libraries --------- .. automodule:: bioblend.galaxy.libraries ----- Quotas ------ .. automodule:: bioblend.galaxy.quotas ----- Roles ----- .. automodule:: bioblend.galaxy.roles ----- Tools ----- .. automodule:: bioblend.galaxy.tools ----- Tool data tables ---------------- .. automodule:: bioblend.galaxy.tool_data ----- ToolShed -------- .. automodule:: bioblend.galaxy.toolshed ----- Users ----- .. automodule:: bioblend.galaxy.users ----- Visual -------- .. automodule:: bioblend.galaxy.visual ----- .. _workflows-api: Workflows --------- .. automodule:: bioblend.galaxy.workflows .. _objects-api: ========================== Object-oriented Galaxy API ========================== .. autoclass:: bioblend.galaxy.objects.galaxy_instance.GalaxyInstance Client ------ .. automodule:: bioblend.galaxy.objects.client Wrappers -------- .. automodule:: bioblend.galaxy.objects.wrappers bioblend-0.7.0/docs/api_docs/galaxy/docs.rst000066400000000000000000000430361261571066300207710ustar00rootroot00000000000000=================== Usage documentation =================== This page describes some sample use cases for the Galaxy API and provides examples for these API calls. In addition to this page, there are functional examples of complete scripts in the ``docs/examples`` directory of the BioBlend source code repository. Connect to a Galaxy server ~~~~~~~~~~~~~~~~~~~~~~~~~~ To connect to a running Galaxy server, you will need an account on that Galaxy instance and an API key for the account. Instructions on getting an API key can be found at http://wiki.galaxyproject.org/Learn/API . To open a connection call:: from bioblend.galaxy import GalaxyInstance gi = GalaxyInstance(url='http://example.galaxy.url', key='your-API-key') We now have a ``GalaxyInstance`` object which allows us to interact with the Galaxy server under our account, and access our data. If the account is a Galaxy admin account we also will be able to use this connection to carry out admin actions. .. _view-histories-and-datasets: View Histories and Datasets ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Methods for accessing histories and datasets are grouped under ``GalaxyInstance.histories.*`` and ``GalaxyInstance.datasets.*`` respectively. To get information on the Histories currently in your account, call:: >>> gi.histories.get_histories() [{u'id': u'f3c2b0f3ecac9f02', u'name': u'RNAseq_DGE_BASIC_Prep', u'url': u'/api/histories/f3c2b0f3ecac9f02'}, {u'id': u'8a91dcf1866a80c2', u'name': u'June demo', u'url': u'/api/histories/8a91dcf1866a80c2'}] This returns a list of dictionaries containing basic metadata, including the id and name of each History. In this case, we have two existing Histories in our account, 'RNAseq_DGE_BASIC_Prep' and 'June demo'. To get more detailed information about a History we can pass its id to the ``show_history`` method:: >>> gi.histories.show_history('f3c2b0f3ecac9f02', contents=False) {u'annotation': u'', u'contents_url': u'/api/histories/f3c2b0f3ecac9f02/contents', u'id': u'f3c2b0f3ecac9f02', u'name': u'RNAseq_DGE_BASIC_Prep', u'nice_size': u'93.5 MB', u'state': u'ok', u'state_details': {u'discarded': 0, u'empty': 0, u'error': 0, u'failed_metadata': 0, u'new': 0, u'ok': 7, u'paused': 0, u'queued': 0, u'running': 0, u'setting_metadata': 0, u'upload': 0 }, u'state_ids': {u'discarded': [], u'empty': [], u'error': [], u'failed_metadata': [], u'new': [], u'ok': [u'd6842fb08a76e351', u'10a4b652da44e82a', u'81c601a2549966a0', u'a154f05e3bcee26b', u'1352fe19ddce0400', u'06d549c52d753e53', u'9ec54455d6279cc7'], u'paused': [], u'queued': [], u'running': [], u'setting_metadata': [], u'upload': [] } } .. _example-dataset: This gives us a dictionary containing the History's metadata. With ``contents=False`` (the default), we only get a list of ids of the datasets contained within the History; with ``contents=True`` we would get metadata on each dataset. We can also directly access more detailed information on a particular dataset by passing its id to the ``show_dataset`` method:: >>> gi.datasets.show_dataset('10a4b652da44e82a') {u'data_type': u'fastqsanger', u'deleted': False, u'file_size': 16527060, u'genome_build': u'dm3', u'id': 17499, u'metadata_data_lines': None, u'metadata_dbkey': u'dm3', u'metadata_sequences': None, u'misc_blurb': u'15.8 MB', u'misc_info': u'Noneuploaded fastqsanger file', u'model_class': u'HistoryDatasetAssociation', u'name': u'C1_R2_1.chr4.fq', u'purged': False, u'state': u'ok', u'visible': True} Uploading Datasets to a History ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ To upload a local file to a Galaxy server, you can run the ``upload_file`` method, supplying the path to a local file:: >>> gi.tools.upload_file('test.txt', 'f3c2b0f3ecac9f02') {u'implicit_collections': [], u'jobs': [{u'create_time': u'2015-07-28T17:52:39.756488', u'exit_code': None, u'id': u'9752b387803d3e1e', u'model_class': u'Job', u'state': u'new', u'tool_id': u'upload1', u'update_time': u'2015-07-28T17:52:39.987509'}], u'output_collections': [], u'outputs': [{u'create_time': u'2015-07-28T17:52:39.331176', u'data_type': u'galaxy.datatypes.data.Text', u'deleted': False, u'file_ext': u'auto', u'file_size': 0, u'genome_build': u'?', u'hda_ldda': u'hda', u'hid': 16, u'history_content_type': u'dataset', u'history_id': u'f3c2b0f3ecac9f02', u'id': u'59c76a119581e190', u'metadata_data_lines': None, u'metadata_dbkey': u'?', u'misc_blurb': None, u'misc_info': None, u'model_class': u'HistoryDatasetAssociation', u'name': u'test.txt', u'output_name': u'output0', u'peek': u'
', u'purged': False, u'state': u'queued', u'tags': [], u'update_time': u'2015-07-28T17:52:39.611887', u'uuid': u'ff0ee99b-7542-4125-802d-7a193f388e7e', u'visible': True}]} If files are greater than 2GB in size, they will need to be uploaded via FTP. Importing files from the user's FTP folder can be done via running the upload tool again:: >>> gi.tools.upload_from_ftp('test.txt', 'f3c2b0f3ecac9f02') {u'implicit_collections': [], u'jobs': [{u'create_time': u'2015-07-28T17:57:43.704394', u'exit_code': None, u'id': u'82b264d8c3d11790', u'model_class': u'Job', u'state': u'new', u'tool_id': u'upload1', u'update_time': u'2015-07-28T17:57:43.910958'}], u'output_collections': [], u'outputs': [{u'create_time': u'2015-07-28T17:57:43.209041', u'data_type': u'galaxy.datatypes.data.Text', u'deleted': False, u'file_ext': u'auto', u'file_size': 0, u'genome_build': u'?', u'hda_ldda': u'hda', u'hid': 17, u'history_content_type': u'dataset', u'history_id': u'f3c2b0f3ecac9f02', u'id': u'a676e8f07209a3be', u'metadata_data_lines': None, u'metadata_dbkey': u'?', u'misc_blurb': None, u'misc_info': None, u'model_class': u'HistoryDatasetAssociation', u'name': u'test.txt', u'output_name': u'output0', u'peek': u'
', u'purged': False, u'state': u'queued', u'tags': [], u'update_time': u'2015-07-28T17:57:43.544407', u'uuid': u'2cbe8f0a-4019-47c4-87e2-005ce35b8449', u'visible': True}]} View Data Libraries ~~~~~~~~~~~~~~~~~~~ Methods for accessing Data Libraries are grouped under ``GalaxyInstance.libraries.*``. Most Data Library methods are available to all users, but as only administrators can create new Data Libraries within Galaxy, the ``create_folder`` and ``create_library`` methods can only be called using an API key belonging to an admin account. We can view the Data Libraries available to our account using:: >>> gi.libraries.get_libraries() [{u'id': u'8e6f930d00d123ea', u'name': u'RNA-seq workshop data', u'url': u'/api/libraries/8e6f930d00d123ea'}, {u'id': u'f740ab636b360a70', u'name': u'1000 genomes', u'url': u'/api/libraries/f740ab636b360a70'}] This gives a list of metadata dictionaries with basic information on each library. We can get more information on a particular Data Library by passing its id to the ``show_library`` method:: >>> gi.libraries.show_library('8e6f930d00d123ea') {u'contents_url': u'/api/libraries/8e6f930d00d123ea/contents', u'description': u'RNA-Seq workshop data', u'name': u'RNA-Seq', u'synopsis': u'Data for the RNA-Seq tutorial'} Upload files to a Data Library ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ We can get files into Data Libraries in several ways: by uploading from our local machine, by retrieving from a URL, by passing the new file content directly into the method, or by importing a file from the filesystem on the Galaxy server. For instance, to upload a file from our machine we might call: >>> gi.libraries.upload_file_from_local_path('8e6f930d00d123ea', '/local/path/to/mydata.fastq', file_type='fastqsanger') Note that we have provided the id of the destination Data Library, and in this case we have specified the type that Galaxy should assign to the new dataset. The default value for ``file_type`` is 'auto', in which case Galaxy will attempt to guess the dataset type. View Workflows ~~~~~~~~~~~~~~ Methods for accessing workflows are grouped under ``GalaxyInstance.workflows.*``. To get information on the Workflows currently in your account, use:: >>> gi.workflows.get_workflows() [{u'id': u'e8b85ad72aefca86', u'name': u"TopHat + cufflinks part 1", u'url': u'/api/workflows/e8b85ad72aefca86'}, {u'id': u'b0631c44aa74526d', u'name': u'CuffDiff', u'url': u'/api/workflows/b0631c44aa74526d'}] This returns a list of metadata dictionaries. We can get the details of a particular Workflow, including its steps, by passing its id to the ``show_workflow`` method:: >>> gi.workflows.show_workflow('e8b85ad72aefca86') {u'id': u'e8b85ad72aefca86', u'inputs': {u'252': {u'label': u'Input RNA-seq fastq', u'value': u'' } }, u'name': u"TopHat + cufflinks part 1", u'steps': {u'250': {u'id': 250, u'input_steps': {u'input1': {u'source_step': 252, u'step_output': u'output' } }, u'tool_id': u'tophat', u'type': u'tool' }, u'251': {u'id': 251, u'input_steps': {u'input': {u'source_step': 250, u'step_output': u'accepted_hits' } }, u'tool_id': u'cufflinks', u'type': u'tool' }, u'252': {u'id': 252, u'input_steps': {}, u'tool_id': None, u'type': u'data_input' } }, u'url': u'/api/workflows/e8b85ad72aefca86' } Export or import a Workflow ~~~~~~~~~~~~~~~~~~~~~~~~~~~ Workflows can be exported from or imported into Galaxy as JSON. This makes it possible to archive Workflows, or to move them between Galaxy instances. To export a workflow, we can call:: >>> workflow_string = gi.workflows.export_workflow_json('e8b85ad72aefca86') This gives us a (rather long) string with a JSON-encoded representation of the Workflow. We can import this string as a new Workflow with:: >>> gi.workflows.import_workflow_json(workflow_string) {u'id': u'c0bacafdfe211f9a', u'name': u'TopHat + cufflinks part 1 (imported from API)', u'url': u'/api/workflows/c0bacafdfe211f9a'} This call returns a dictionary containing basic metadata on the new Workflow object. Since in this case we have imported the JSON string into the original Galaxy instance, we now have a duplicate of the original Workflow in our account: >>> gi.workflows.get_workflows() [{u'id': u'c0bacafdfe211f9a', u'name': u'TopHat + cufflinks part 1 (imported from API)', u'url': u'/api/workflows/c0bacafdfe211f9a'}, {u'id': u'e8b85ad72aefca86', u'name': u"TopHat + cufflinks part 1", u'url': u'/api/workflows/e8b85ad72aefca86'}, {u'id': u'b0631c44aa74526d', u'name': u'CuffDiff', u'url': u'/api/workflows/b0631c44aa74526d'}] Instead of using JSON strings directly, Workflows can be exported to or imported from files on the local disk using the ``export_workflow_to_local_path`` and ``import_workflow_from_local_path`` methods. See the :ref:`API reference ` for details. .. Note:: If we export a Workflow from one Galaxy instance and import it into another, Galaxy will only run it without modification if it has the same versions of the tool wrappers installed. This is to ensure reproducibility. Otherwise, we will need to manually update the Workflow to use the new tool versions. Run a Workflow ~~~~~~~~~~~~~~ To run a Workflow, we need to tell Galaxy which datasets to use for which workflow inputs. We can use datasets from Histories or Data Libraries. Examine the Workflow above. We can see that it takes only one input file. That is: >>> wf = gi.workflows.show_workflow('e8b85ad72aefca86') >>> wf['inputs'] {u'252': {u'label': u'Input RNA-seq fastq', u'value': u'' } } There is one input, labelled 'Input RNA-seq fastq'. This input is passed to the Tophat tool and should be a fastq file. We will use the dataset we examined above, under :ref:`view-histories-and-datasets`, which had name 'C1_R2_1.chr4.fq' and id '10a4b652da44e82a'. To specify the inputs, we build a data map and pass this to the ``run_workflow`` method. This data map is a nested dictionary object which maps inputs to datasets. We call:: >>> datamap = dict() >>> datamap['252'] = { 'src':'hda', 'id':'10a4b652da44e82a' } >>> gi.workflows.run_workflow('e8b85ad72aefca86', datamap, history_name='New output history') {u'history': u'0a7b7992a7cabaec', u'outputs': [u'33be8ad9917d9207', u'fbee1c2dc793c114', u'85866441984f9e28', u'1c51aa78d3742386', u'a68e8770e52d03b4', u'c54baf809e3036ac', u'ba0db8ce6cd1fe8f', u'c019e4cf08b2ac94' ] } In this case the only input id is '252' and the corresponding dataset id is '10a4b652da44e82a'. We have specified the dataset source to be 'hda' (HistoryDatasetAssociation) since the dataset is stored in a History. See the :ref:`API reference ` for allowed dataset specifications. We have also requested that a new History be created and used to store the results of the run, by setting ``history_name='New output history'``. The ``run_workflow`` call submits all the jobs which need to be run to the Galaxy workflow engine, with the appropriate dependencies so that they will run in order. The call returns immediately, so we can continue to submit new jobs while waiting for this workflow to execute. ``run_workflow`` returns the id of the output History and of the datasets that will be created as a result of this run. Note that these dataset ids are valid immediately, so we can specify these datasets as inputs to new jobs even before the files have been created, and the new jobs will be added to the queue with the appropriate dependencies. If we view the output History immediately after calling ``run_workflow``, we will see something like:: >>> gi.histories.show_history('0a7b7992a7cabaec') {u'annotation': u'', u'contents_url': u'/api/histories/0a7b7992a7cabaec/contents', u'id': u'0a7b7992a7cabaec', u'name': u'New output history', u'nice_size': u'0 bytes', u'state': u'queued', u'state_details': {u'discarded': 0, u'empty': 0, u'error': 0, u'failed_metadata': 0, u'new': 0, u'ok': 0, u'paused': 0, u'queued': 8, u'running': 0, u'setting_metadata': 0, u'upload': 0}, u'state_ids': {u'discarded': [], u'empty': [], u'error': [], u'failed_metadata': [], u'new': [], u'ok': [], u'paused': [], u'queued': [u'33be8ad9917d9207', u'fbee1c2dc793c114', u'85866441984f9e28', u'1c51aa78d3742386', u'a68e8770e52d03b4', u'c54baf809e3036ac', u'ba0db8ce6cd1fe8f', u'c019e4cf08b2ac94'], u'running': [], u'setting_metadata': [], u'upload': [] } } In this case, because the submitted jobs have not had time to run, the output History contains 8 datasets in the 'queued' state and has a total size of 0 bytes. If we make this call again later we should instead see completed output files. View Users ~~~~~~~~~~ Methods for managing users are grouped under ``GalaxyInstance.users.*``. User management is only available to Galaxy administrators, that is, the API key used to connect to Galaxy must be that of an admin account. To get a list of users, call: >>> gi.users.get_users() [{u'email': u'userA@unimelb.edu.au', u'id': u'975a9ce09b49502a', u'quota_percent': None, u'url': u'/api/users/975a9ce09b49502a'}, {u'email': u'userB@student.unimelb.edu.au', u'id': u'0193a95acf427d2c', u'quota_percent': None, u'url': u'/api/users/0193a95acf427d2c'}] bioblend-0.7.0/docs/api_docs/lib_config.rst000066400000000000000000000003741261571066300206450ustar00rootroot00000000000000==================================== Configuration documents for BioBlend ==================================== BioBlend -------- .. automodule:: bioblend :members: Config ------ .. automodule:: bioblend.config :members: :undoc-members: bioblend-0.7.0/docs/api_docs/toolshed/000077500000000000000000000000001261571066300176355ustar00rootroot00000000000000bioblend-0.7.0/docs/api_docs/toolshed/all.rst000066400000000000000000000006251261571066300211420ustar00rootroot00000000000000========================================================== API documentation for interacting with the Galaxy Toolshed ========================================================== ToolShedInstance ---------------- .. autoclass:: bioblend.toolshed.ToolShedInstance .. automethod:: bioblend.toolshed.ToolShedInstance.__init__ Repositories ------------ .. automodule:: bioblend.toolshed.repositories bioblend-0.7.0/docs/conf.py000066400000000000000000000201201261571066300155250ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # BioBlend documentation build configuration file, created by # sphinx-quickstart on Wed Jun 6 11:51:19 2012. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import os import sys sys.path.append('../') # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. sys.path.insert(0, os.path.abspath(os.path.join(os.getcwd(), '..'))) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode' ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = 'BioBlend' copyright = '2012-2015, Enis Afgan' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. from bioblend import get_version # The short X.Y version. version = get_version() # The full version, including alpha/beta/rc tags. release = get_version() # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ['_build'] # The reST default role (used for this markup: `text`) to use for all documents. # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. # add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # List of autodoc directive flags that should be automatically applied to all # autodoc directives autodoc_default_flags = ['members', 'undoc-members'] # Include the __init__ method's doc string in addition to the class doc string # in the documentation. autoclass_content = 'both' # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = 'default' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. # html_use_smartypants = True # Custom sidebar templates, maps document names to template names. # html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. # html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. # html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'BioBlenddoc' # -- Options for LaTeX output -------------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # 'preamble': '', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'BioBlend.tex', 'BioBlend Documentation', 'Enis Afgan', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # latex_use_parts = False # If true, show page references after internal links. # latex_show_pagerefs = False # If true, show URL addresses after external links. # latex_show_urls = False # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'bioblend', 'BioBlend Documentation', ['Enis Afgan'], 1) ] # If true, show URL addresses after external links. # man_show_urls = False # -- Options for Texinfo output ------------------------------------------------ # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ ('index', 'BioBlend', 'BioBlend Documentation', 'Enis Afgan', 'BioBlend', 'One line description of project.', 'Miscellaneous'), ] # Documents to append as an appendix to all manuals. # texinfo_appendices = [] # If false, no module index is generated. # texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. # texinfo_show_urls = 'footnote' bioblend-0.7.0/docs/examples/000077500000000000000000000000001261571066300160515ustar00rootroot00000000000000bioblend-0.7.0/docs/examples/cloudman_basic_usage_scenario.py000066400000000000000000000021541261571066300244370ustar00rootroot00000000000000from bioblend import cloudman url = "http://127.0.0.1:42284" password = "password" cm = cloudman.CloudManInstance(url, password) # Expects CloudMan to be running locally # Set cluster type and storage size cm.initialize(type="SGE") # Get cluster status status = cm.get_status() # Get nodes nodes = cm.get_nodes() # There should be a master node # Add node num_nodes = 1 status = cm.add_nodes(num_nodes) # Remove nodes status = cm.remove_nodes(num_nodes, force=True) instance_id = "abcdef" cm.remove_node(instance_id, force=True) # Reboot instance cm.reboot_node(instance_id) # Autoscaling: # enable cm.disable_autoscaling() cm.enable_autoscaling(minimum_nodes=0, maximum_nodes=19) # autoscaling should be enabled now is_enabled = cm.autoscaling_enabled() min_autoscaling = cm.get_status()['autoscaling']['as_min'] max_autoscaling = cm.get_status()['autoscaling']['as_max'] # adjust cm.adjust_autoscaling(minimum_nodes=5, maximum_nodes=10) # disable cm.disable_autoscaling() # autoscaling should be disabled cm.autoscaling_enabled() # Get Galaxy DNS/Host galaxy_state = cm.get_galaxy_state() # RUNNING, STARTING..... bioblend-0.7.0/docs/examples/create_user_get_api_key.py000066400000000000000000000015531261571066300232700ustar00rootroot00000000000000""" This example creates a new user and prints her API key. It is also used to initialize a Galaxy server in Continuous Integration testing of BioBlend. Usage: python create_user_get_api_key.py """ from __future__ import print_function import sys import bioblend.galaxy if len(sys.argv) != 6: print("Usage: python create_user_get_api_key.py ") sys.exit(1) galaxy_url = sys.argv[1] galaxy_api_key = sys.argv[2] # Initiating Galaxy connection gi = bioblend.galaxy.GalaxyInstance(galaxy_url, galaxy_api_key) # Create a new user and get a new API key for her new_user = gi.users.create_local_user(sys.argv[3], sys.argv[4], sys.argv[5]) new_api_key = gi.users.create_user_apikey(new_user['id']) print(new_api_key) bioblend-0.7.0/docs/examples/list_data_libraries.py000066400000000000000000000014751261571066300224320ustar00rootroot00000000000000""" This example retrieves details of all the Data Libraries available to us and lists information on them. Usage: python list_data_libraries.py """ from __future__ import print_function import sys from bioblend.galaxy import GalaxyInstance if len(sys.argv) != 3: print("Usage: python list_data_libraries.py ") sys.exit(1) galaxy_url = sys.argv[1] galaxy_key = sys.argv[2] print("Initiating Galaxy connection") gi = GalaxyInstance(url=galaxy_url, key=galaxy_key) print("Retrieving Data Library list") libraries = gi.libraries.get_libraries() if len(libraries) == 0: print("There are no Data Libraries available.") else: print("\nData Libraries:") for lib_dict in libraries: print("{0} : {1}".format(lib_dict['name'], lib_dict['id'])) bioblend-0.7.0/docs/examples/list_histories.py000066400000000000000000000017501261571066300214720ustar00rootroot00000000000000""" This example retrieves details of all the Histories in our Galaxy account and lists information on them. Usage: python list_histories.py """ from __future__ import print_function import sys from bioblend.galaxy import GalaxyInstance if len(sys.argv) != 3: print("Usage: python list_histories.py ") sys.exit(1) galaxy_url = sys.argv[1] galaxy_key = sys.argv[2] print("Initiating Galaxy connection") gi = GalaxyInstance(url=galaxy_url, key=galaxy_key) print("Retrieving History list") histories = gi.histories.get_histories() if len(histories) == 0: print("There are no Histories in your account.") else: print("\nHistories:") for hist_dict in histories: # As an example, we retrieve a piece of metadata (the size) using show_history hist_details = gi.histories.show_history(hist_dict['id']) print("{0} ({1}) : {2}".format(hist_dict['name'], hist_details['nice_size'], hist_dict['id'])) bioblend-0.7.0/docs/examples/list_workflows.py000066400000000000000000000014521261571066300215150ustar00rootroot00000000000000""" This example retrieves details of all the Workflows in our Galaxy account and lists information on them. Usage: python list_workflows.py """ from __future__ import print_function import sys from bioblend.galaxy import GalaxyInstance if len(sys.argv) != 3: print("Usage: python list_workflows.py ") sys.exit(1) galaxy_url = sys.argv[1] galaxy_key = sys.argv[2] print("Initiating Galaxy connection") gi = GalaxyInstance(url=galaxy_url, key=galaxy_key) print("Retrieving Workflows list") workflows = gi.workflows.get_workflows() if len(workflows) == 0: print("There are no Workflows in your account.") else: print("\nWorkflows:") for wf_dict in workflows: print("{0} : {1}".format(wf_dict['name'], wf_dict['id'])) bioblend-0.7.0/docs/examples/objects/000077500000000000000000000000001261571066300175025ustar00rootroot00000000000000bioblend-0.7.0/docs/examples/objects/README.txt000066400000000000000000000035261261571066300212060ustar00rootroot00000000000000BioBlend.objects Examples ========================= Microbiology ------------ This directory contains three examples of interaction with real-world microbiology workflows hosted by CRS4's Orione Galaxy server: * bacterial re-sequencing (w2_bacterial_reseq.py); * bacterial de novo assembly (w3_bacterial_denovo.py); * metagenomics (w5_metagenomics.py). All examples use workflows and datasets publicly available on Orione. Before you can run them, you have to register and obtain an API key: * go to https://orione.crs4.it and register -- or log in, if you are already registered -- through the "User" menu at the top of the page; * open "User" -> "API Keys"; * generate an API key if you don't have one. In the example file, replace YOUR_API_KEY with your API key (or assign its value to the GALAXY_API_KEY environment variable), then run it: export GALAXY_API_KEY=000this_should_be_your_api_key00 python w2_bacterial_reseq.py The job can take a long time to complete: before exiting, the script runs the workflow asynchronously, then displays the name and id of the output history on standard output. In the Galaxy web UI, click the gear icon at the top right corner of the History panel, select "Saved Histories" and look for the name of the output history in the center frame; finally, choose "switch" from the history's drop-down menu to make it the current one and follow the job as it evolves on Galaxy. Toy Example ----------- The small.py file contains a "toy" example that should run much faster (once the cluster's resource manager allows it to run) than the above ones. In this case, the script waits for the job to complete and downloads its results to a local file. See Also -------- Cuccuru et al., "Orione, a web-based framework for NGS analysis in microbiology". Bioinformatics (2014). http://dx.doi.org/10.1093/bioinformatics/btu135 bioblend-0.7.0/docs/examples/objects/common.py000066400000000000000000000001331261571066300213410ustar00rootroot00000000000000def get_one(iterable): seq = list(iterable) assert len(seq) == 1 return seq[0] bioblend-0.7.0/docs/examples/objects/list_data_libraries.py000066400000000000000000000014471261571066300240620ustar00rootroot00000000000000""" This example retrieves details of all the Data Libraries available to us and lists information on them. Usage: python list_data_libraries.py """ from __future__ import print_function import sys from bioblend.galaxy.objects import GalaxyInstance if len(sys.argv) != 3: print("Usage: python list_data_libraries.py ") sys.exit(1) galaxy_url = sys.argv[1] galaxy_key = sys.argv[2] print("Initiating Galaxy connection") gi = GalaxyInstance(galaxy_url, galaxy_key) print("Retrieving Data Library list") libraries = gi.libraries.get_previews() if len(libraries) == 0: print("There are no Data Libraries available.") else: print("\nData Libraries:") for lib in libraries: print("{0} : {1}".format(lib.name, lib.id)) bioblend-0.7.0/docs/examples/objects/list_histories.py000066400000000000000000000021511261571066300231170ustar00rootroot00000000000000""" This example retrieves details of all the Histories in our Galaxy account and lists information on them. Usage: python list_histories.py """ from __future__ import print_function import sys from bioblend.galaxy.objects import GalaxyInstance if len(sys.argv) != 3: print("Usage: python list_histories.py ") sys.exit(1) galaxy_url = sys.argv[1] galaxy_key = sys.argv[2] print("Initiating Galaxy connection") gi = GalaxyInstance(galaxy_url, galaxy_key) print("Retrieving History list") # histories.get_previews() returns a list of HistoryPreview objects, which contain only basic information # histories.list() method returns a list of History objects, which contain more extended information # As an example, we will use a piece of metadata (the size) from the 'wrapped' data attribute of History histories = gi.histories.list() if len(histories) == 0: print("There are no Histories in your account.") else: print("\nHistories:") for hist in histories: print("{0} ({1}) : {2}".format(hist.name, hist.wrapped['nice_size'], hist.id)) bioblend-0.7.0/docs/examples/objects/list_workflows.py000066400000000000000000000014241261571066300231450ustar00rootroot00000000000000""" This example retrieves details of all the Workflows in our Galaxy account and lists information on them. Usage: python list_workflows.py """ from __future__ import print_function import sys from bioblend.galaxy.objects import GalaxyInstance if len(sys.argv) != 3: print("Usage: python list_workflows.py ") sys.exit(1) galaxy_url = sys.argv[1] galaxy_key = sys.argv[2] print("Initiating Galaxy connection") gi = GalaxyInstance(galaxy_url, galaxy_key) print("Retrieving Workflows list") workflows = gi.workflows.get_previews() if len(workflows) == 0: print("There are no Workflows in your account.") else: print("\nWorkflows:") for wf in workflows: print("{0} : {1}".format(wf.name, wf.id)) bioblend-0.7.0/docs/examples/objects/small.ga000066400000000000000000000057151261571066300211330ustar00rootroot00000000000000{ "a_galaxy_workflow": "true", "annotation": "", "format-version": "0.1", "name": "get_col", "steps": { "0": { "annotation": "", "id": 0, "input_connections": {}, "inputs": [ { "description": "", "name": "input_tsv" } ], "name": "Input dataset", "outputs": [], "position": { "left": 200, "top": 200 }, "tool_errors": null, "tool_id": null, "tool_state": "{\"name\": \"input_tsv\"}", "tool_version": null, "type": "data_input", "user_outputs": [] }, "1": { "annotation": "", "id": 1, "input_connections": { "input": { "id": 0, "output_name": "output" } }, "inputs": [], "name": "Remove beginning", "outputs": [ { "name": "out_file1", "type": "input" } ], "position": { "left": 420, "top": 200 }, "post_job_actions": { "HideDatasetActionout_file1": { "action_arguments": {}, "action_type": "HideDatasetAction", "output_name": "out_file1" } }, "tool_errors": null, "tool_id": "Remove beginning1", "tool_state": "{\"input\": \"null\", \"__rerun_remap_job_id__\": null, \"chromInfo\": \"\\\"/SHARE/USERFS/els7/users/biobank/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\", \"num_lines\": \"\\\"2\\\"\", \"__page__\": 0}", "tool_version": "1.0.0", "type": "tool", "user_outputs": [] }, "2": { "annotation": "", "id": 2, "input_connections": { "input": { "id": 1, "output_name": "out_file1" } }, "inputs": [], "name": "Cut", "outputs": [ { "name": "out_file1", "type": "tabular" } ], "position": { "left": 640, "top": 200 }, "post_job_actions": {}, "tool_errors": null, "tool_id": "Cut1", "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"delimiter\": \"\\\"T\\\"\", \"columnList\": \"\\\"c1\\\"\", \"input\": \"null\", \"chromInfo\": \"\\\"/SHARE/USERFS/els7/users/biobank/galaxy/tool-data/shared/ucsc/chrom/?.len\\\"\"}", "tool_version": "1.0.2", "type": "tool", "user_outputs": [] } } } bioblend-0.7.0/docs/examples/objects/small.py000066400000000000000000000035251261571066300211710ustar00rootroot00000000000000from __future__ import print_function import os import sys import tempfile from bioblend.galaxy.objects import GalaxyInstance from common import get_one # This is a "toy" example that should run much faster # (once the cluster's resource manager allows it to run) than the # real-world ones. The workflow, which is imported from local disk, # removes two header lines from a tabular file, then extracts one of # the columns. The input dataset is publicly available on CRS4's # Orione Galaxy server. URL = 'https://orione.crs4.it' API_KEY = os.getenv('GALAXY_API_KEY', 'YOUR_API_KEY') if API_KEY == 'YOUR_API_KEY': sys.exit('API_KEY not set, see the README.txt file') gi = GalaxyInstance(URL, API_KEY) # import the workflow from the JSON dump with open("small.ga") as f: wf = gi.workflows.import_new(f.read()) # Select the "Orione SupMat" library library_name = 'Orione SupMat' l = get_one(gi.libraries.list(name=library_name)) # Select the input dataset ds_name = '/RNA-Seq - Listeria monocytogenes/Listeria_monocytogenes_EGD_e_uid61583/NC_003210.rnt' ld = get_one(l.get_datasets(name=ds_name)) input_map = {'input_tsv': ld} # Run the workflow on a new history with the selected dataset as # input, overriding the index of the column to remove; wait until the # computation is complete. history_name = 'get_col output' params = {'Cut1': {'columnList': 'c2'}} print('Running workflow: %s [%s]' % (wf.name, wf.id)) outputs, out_hist = wf.run(input_map, history_name, params=params, wait=True) print('Job has finished') assert out_hist.name == history_name print('Output history: %s [%s]' % (out_hist.name, out_hist.id)) # Save results to local disk out_ds = get_one([_ for _ in outputs if _.name == 'Cut on data 1']) with tempfile.NamedTemporaryFile(prefix='bioblend_', delete=False) as f: out_ds.download(f) print('Output downloaded to "%s"' % f.name) bioblend-0.7.0/docs/examples/objects/w2_bacterial_reseq.py000066400000000000000000000035271261571066300236200ustar00rootroot00000000000000from __future__ import print_function import os import sys from bioblend.galaxy.objects import GalaxyInstance from common import get_one URL = 'https://orione.crs4.it' API_KEY = os.getenv('GALAXY_API_KEY', 'YOUR_API_KEY') if API_KEY == 'YOUR_API_KEY': sys.exit('API_KEY not set, see the README.txt file') gi = GalaxyInstance(URL, API_KEY) # Select "W2 - Bacterial re-sequencing | Paired-end" from published workflows workflow_name = 'W2 - Bacterial re-sequencing | Paired-end' previews = gi.workflows.get_previews(name=workflow_name, published=True) p = get_one(_ for _ in previews if _.published) # Import the workflow to user space iw = gi.workflows.import_shared(p.id) # Create a new history history_name = '%s output' % workflow_name h = gi.histories.create(history_name) # Select the "Orione SupMat" library library_name = 'Orione SupMat' l = get_one(gi.libraries.list(name=library_name)) # Select the datasets ds_names = [ '/Whole genome - Escherichia coli/E coli DH10B MiSeq R1.fastq', '/Whole genome - Escherichia coli/E coli DH10B MiSeq R2.fastq', '/Whole genome - Escherichia coli/E coli DH10B - Reference', ] input_labels = [ 'Forward Reads', 'Reverse Reads', 'Reference Genome', ] input_map = dict((label, h.import_dataset(get_one(l.get_datasets(name=name)))) for name, label in zip(ds_names, input_labels)) # Set custom parameters for the "check_contigs" and "sspace" tools params = { 'check_contigs': {'genomesize': 5.0}, # affects both occurrences 'sspace': {'insert': 300, 'error': 0.5, 'minoverlap': 35}, } # Run the workflow on a new history with the selected datasets as inputs outputs, out_hist = iw.run(input_map, h, params=params) assert out_hist.name == history_name print('Running workflow: %s [%s]' % (iw.name, iw.id)) print('Output history: %s [%s]' % (out_hist.name, out_hist.id)) bioblend-0.7.0/docs/examples/objects/w3_bacterial_denovo.py000066400000000000000000000045531261571066300237740ustar00rootroot00000000000000from __future__ import print_function import os import sys from bioblend.galaxy.objects import GalaxyInstance from common import get_one URL = 'https://orione.crs4.it' API_KEY = os.getenv('GALAXY_API_KEY', 'YOUR_API_KEY') if API_KEY == 'YOUR_API_KEY': sys.exit('API_KEY not set, see the README.txt file') gi = GalaxyInstance(URL, API_KEY) # Select "W3 - Bacterial de novo assembly | Paired-end" from published workflows workflow_name = 'W3 - Bacterial de novo assembly | Paired-end' previews = gi.workflows.get_previews(name=workflow_name, published=True) p = get_one(_ for _ in previews if _.published) # Import the workflow to user space iw = gi.workflows.import_shared(p.id) # Create a new history history_name = '%s output' % workflow_name h = gi.histories.create(history_name) # Select the "Orione SupMat" library library_name = 'Orione SupMat' l = get_one(gi.libraries.list(name=library_name)) # Select the datasets ds_names = [ '/Whole genome - Escherichia coli/E coli DH10B MiSeq R1.fastq', '/Whole genome - Escherichia coli/E coli DH10B MiSeq R2.fastq', ] input_labels = [ 'Left/Forward FASTQ Reads', 'Right/Reverse FASTQ Reads', ] input_map = dict((label, h.import_dataset(get_one(l.get_datasets(name=name)))) for name, label in zip(ds_names, input_labels)) # Set the "hash_length" parameter to different values for the 3 "velveth" steps lengths = set(['19', '23', '29']) ws_ids = iw.tool_labels_to_ids['velveth'] assert len(ws_ids) == len(lengths) params = dict((id_, {'hash_length': v}) for id_, v in zip(ws_ids, lengths)) # Set the "ins_length" runtime parameter to the same value for the 3 # "velvetg" steps tool_id = 'velvetg' ws_ids = iw.tool_labels_to_ids[tool_id] step = iw.steps[next(iter(ws_ids))] # arbitrarily pick one params[tool_id] = {'reads': step.tool_inputs['reads'].copy()} params[tool_id]['reads']['ins_length'] = -1 # Set more custom parameters params['cisarunner'] = {'genomesize': 5000000} params['check_contigs'] = {'genomesize': 5.0} params[ 'toolshed.g2.bx.psu.edu/repos/edward-kirton/abyss_toolsuite/abyss/1.0.0' ] = {'k': 41} # Run the workflow on a new history with the selected datasets as inputs outputs, out_hist = iw.run(input_map, h, params=params) assert out_hist.name == history_name print('Running workflow: %s [%s]' % (iw.name, iw.id)) print('Output history: %s [%s]' % (out_hist.name, out_hist.id)) bioblend-0.7.0/docs/examples/objects/w5_galaxy_api.py000066400000000000000000000055301261571066300226100ustar00rootroot00000000000000from __future__ import print_function import json import os import sys from six.moves.urllib.parse import urljoin # This example, provided for comparison with w5_metagenomics.py, # contains the code required to run the metagenomics workflow # *without* BioBlend. URL = os.getenv('GALAXY_URL', 'https://orione.crs4.it') API_URL = urljoin(URL, 'api') API_KEY = os.getenv('GALAXY_API_KEY', 'YOUR_API_KEY') if API_KEY == 'YOUR_API_KEY': sys.exit('API_KEY not set, see the README.txt file') # Clone the galaxy-dist mercurial repository and replace # YOUR_GALAXY_PATH with the clone's local path in the following code, e.g.: # cd /tmp # hg clone https://bitbucket.org/galaxy/galaxy-dist # GALAXY_PATH = '/tmp/galaxy-dist' GALAXY_PATH = 'YOUR_GALAXY_PATH' sys.path.insert(1, os.path.join(GALAXY_PATH, 'scripts/api')) import common # Select "W5 - Metagenomics" from published workflows workflow_name = 'W5 - Metagenomics' workflows = common.get(API_KEY, '%s/workflows?show_published=True' % API_URL) w = [_ for _ in workflows if _['published'] and _['name'] == workflow_name] assert len(w) == 1 w = w[0] # Import the workflow to user space data = {'workflow_id': w['id']} iw = common.post(API_KEY, '%s/workflows/import' % API_URL, data) iw_details = common.get(API_KEY, '%s/workflows/%s' % (API_URL, iw['id'])) # Select the "Orione SupMat" library library_name = 'Orione SupMat' libraries = common.get(API_KEY, '%s/libraries' % API_URL) l = [_ for _ in libraries if _['name'] == library_name] assert len(l) == 1 l = l[0] # Select the "/Metagenomics/MetagenomicsDataset.fq" dataset ds_name = '/Metagenomics/MetagenomicsDataset.fq' contents = common.get(API_KEY, '%s/libraries/%s/contents' % (API_URL, l['id'])) ld = [_ for _ in contents if _['type'] == 'file' and _['name'] == ds_name] assert len(ld) == 1 ld = ld[0] # Select the blastn step ws = [_ for _ in iw_details['steps'].itervalues() if _['tool_id'] and 'blastn' in _['tool_id']] assert len(ws) == 1 ws = ws[0] tool_id = ws['tool_id'] # Get (a copy of) the parameters dict for the selected step ws_parameters = ws['tool_inputs'].copy() for k, v in ws_parameters.iteritems(): ws_parameters[k] = json.loads(v) # Run the workflow on a new history with the selected dataset # as input, setting the BLAST db to "16SMicrobial-20131106" history_name = '%s output' % workflow_name ws_parameters['db_opts']['database'] = '16SMicrobial-20131106' data = { 'workflow_id': iw['id'], 'parameters': {tool_id: {'db_opts': ws_parameters['db_opts']}}, } assert len(iw_details['inputs']) == 1 input_step_id = iw_details['inputs'].keys()[0] data['ds_map'] = {input_step_id: {'src': 'ld', 'id': ld['id']}} data['history'] = history_name r_dict = common.post(API_KEY, '%s/workflows' % API_URL, data) print('Running workflow: %s [%s]' % (iw['name'], iw['id'])) print('Output history: %s [%s]' % (history_name, r_dict['history'])) bioblend-0.7.0/docs/examples/objects/w5_metagenomics.py000066400000000000000000000034211261571066300231420ustar00rootroot00000000000000from __future__ import print_function import os import sys from bioblend.galaxy.objects import GalaxyInstance from common import get_one URL = 'https://orione.crs4.it' API_KEY = os.getenv('GALAXY_API_KEY', 'YOUR_API_KEY') if API_KEY == 'YOUR_API_KEY': sys.exit('API_KEY not set, see the README.txt file') gi = GalaxyInstance(URL, API_KEY) # Select "W5 - Metagenomics" from published workflows workflow_name = 'W5 - Metagenomics' previews = gi.workflows.get_previews(name=workflow_name, published=True) p = get_one(_ for _ in previews if _.published) # Import the workflow to user space iw = gi.workflows.import_shared(p.id) # Create a new history history_name = '%s output' % workflow_name h = gi.histories.create(history_name) # Select the "Orione SupMat" library library_name = 'Orione SupMat' l = get_one(gi.libraries.list(name=library_name)) # Select the "/Metagenomics/MetagenomicsDataset.fq" dataset ds_name = '/Metagenomics/MetagenomicsDataset.fq' input_map = {'Input Dataset': h.import_dataset(get_one(l.get_datasets(name=ds_name)))} # Select the blastn step tool_id = 'toolshed.g2.bx.psu.edu/repos/devteam/ncbi_blast_plus/ncbi_blastn_wrapper/0.1.00' step_id = get_one(iw.tool_labels_to_ids[tool_id]) ws = iw.steps[step_id] # Get (a copy of) the parameters dict for the selected step ws_parameters = ws.tool_inputs.copy() # Run the workflow on a new history with the selected dataset # as input, setting the BLAST db to "16SMicrobial-20131106" params = {tool_id: {'db_opts': ws_parameters['db_opts']}} params[tool_id]['db_opts']['database'] = '16SMicrobial-20131106' outputs, out_hist = iw.run(input_map, h, params=params) assert out_hist.name == history_name print('Running workflow: %s [%s]' % (iw.name, iw.id)) print('Output history: %s [%s]' % (out_hist.name, out_hist.id)) bioblend-0.7.0/docs/examples/run_imported_workflow.py000066400000000000000000000071431261571066300230710ustar00rootroot00000000000000""" This example demonstrates running a tophat+cufflinks workflow over paired-end data. This is a task we could not do using Galaxy's GUI batch mode, because the inputs need to be paired. The workflow is imported from a json file (previously exported from Galaxy), and the input data files from URLs. This example creates a new Data Library, so you must be a Galaxy Admin on the instance you run the script against. Also note that a Galaxy Workflow will only run without modification if it finds the expected versions of tool wrappers installed on the Galaxy instance. This is to ensure reproducibility. In this case we expect Tophat wrapper 1.5.0 and Cufflinks wrapper 0.0.5. Usage: python run_imported_workflow.py """ from __future__ import print_function import sys from bioblend import galaxy # Specify workflow and data to import into Galaxy workflow_file = 'tophat_cufflinks_pairedend_workflow.ga' import_file_pairs = [ ('https://bioblend.s3.amazonaws.com/C1_R1_1.chr4.fq', 'https://bioblend.s3.amazonaws.com/C1_R1_2.chr4.fq'), ('https://bioblend.s3.amazonaws.com/C1_R2_1.chr4.fq', 'https://bioblend.s3.amazonaws.com/C1_R2_2.chr4.fq'), ('https://bioblend.s3.amazonaws.com/C1_R3_1.chr4.fq', 'https://bioblend.s3.amazonaws.com/C1_R3_2.chr4.fq') ] # Specify names of Library and History that will be created in Galaxy # In this simple example, these will be created even if items with the same name already exist. library_name = 'Imported data for API demo' output_history_name = 'Output from API demo' if len(sys.argv) != 3: print("Usage: python run_imported_workflow.py ") sys.exit(1) galaxy_url = sys.argv[1] galaxy_key = sys.argv[2] print("Initiating Galaxy connection") gi = galaxy.GalaxyInstance(url=galaxy_url, key=galaxy_key) print("Importing workflow") wf_import_dict = gi.workflows.import_workflow_from_local_path(workflow_file) workflow = wf_import_dict['id'] print("Creating data library '%s'" % library_name) library_dict = gi.libraries.create_library(library_name) library = library_dict['id'] print("Importing data") # Import each pair of files, and track the resulting identifiers. dataset_ids = [] filenames = dict() for (file1, file2) in import_file_pairs: dataset1 = gi.libraries.upload_file_from_url(library, file1, file_type='fastqsanger') dataset2 = gi.libraries.upload_file_from_url(library, file2, file_type='fastqsanger') id1, id2 = dataset1[0]['id'], dataset2[0]['id'] filenames[id1] = file1 filenames[id2] = file2 dataset_ids.append((id1, id2)) print("Creating output history '%s'" % output_history_name) outputhist_dict = gi.histories.create_history(output_history_name) outputhist = outputhist_dict['id'] print("Will run workflow on %d pairs of files" % len(dataset_ids)) # Get the input step IDs from the workflow. # We use the BioBlend convenience function get_workflow_inputs to retrieve inputs by label. input1 = gi.workflows.get_workflow_inputs(workflow, label='Input fastq readpair-1')[0] input2 = gi.workflows.get_workflow_inputs(workflow, label='Input fastq readpair-2')[0] # For each pair of datasets we imported, run the imported workflow # For each input we need to build a datamap dict with 'src' set to 'ld', as we stored our data in a Galaxy Library for (data1, data2) in dataset_ids: print("Initiating workflow run on files %s, %s" % (filenames[data1], filenames[data2])) datamap = dict() datamap[input1] = {'src': 'ld', 'id': data1} datamap[input2] = {'src': 'ld', 'id': data2} result = gi.workflows.run_workflow(workflow, datamap, history_id=outputhist, import_inputs_to_history=True) bioblend-0.7.0/docs/examples/start_cloudman.py000066400000000000000000000047061261571066300214510ustar00rootroot00000000000000""" A quick way to start and initialize an instance of the CloudMan platform directly from the command line. Usage:: python start_cloudman.py ```` can be 'Galaxy', 'Data', or 'SGE' (see http://wiki.g2.bx.psu.edu/CloudMan, Step 3 for more details on types) Example: python start_cloudman.py "cluster bioblend" pwd SGE m1.small ami-00000032 """ from __future__ import print_function import sys from bioblend.util import Bunch from bioblend.cloudman import CloudManConfig from bioblend.cloudman import CloudManInstance def start_cloudman(name, pwd, cm_type, inst_type, ami, ak, sk): """ Start an instance of CloudMan with the provided arguments. Returns a tuple: an instance of ``CloudManConfig`` pointing to the settings used to launch this instance of CloudMan; and an instance of ``CloudManInstance`` pointing to the given instance of CloudMan. """ cloud = None # If left as None, BioBlend will default to Amazon # Define properties for the NeCTAR cloud cloud = Bunch(id='-1', name="NeCTAR", cloud_type='openstack', bucket_default='cloudman-os', region_name='melbourne', region_endpoint='nova.rc.nectar.org.au', ec2_port=8773, ec2_conn_path='/services/Cloud', cidr_range='115.146.92.0/22', is_secure=True, s3_host='swift.rc.nectar.org.au', s3_port=8888, s3_conn_path='/') # Create an instance of the CloudManConfig class and launch a CloudMan instance cmc = CloudManConfig( ak, sk, name, ami, inst_type, pwd, cloud_metadata=cloud, cloudman_type=cm_type, initial_storage_size=2, placement='melbourne-np') print("Configured an instance; waiting to launch and boot...") cmi = CloudManInstance.launch_instance(cmc) print("Done! CloudMan IP is {0}".format(cmi.cloudman_url)) return cmc, cmi if __name__ == "__main__": if len(sys.argv) != 8: print("\nUsage:\n" "python start_cloudman.py " " ") sys.exit(1) cml, cm = start_cloudman(sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4], sys.argv[5], sys.argv[6], sys.argv[7]) bioblend-0.7.0/docs/examples/tophat_cufflinks_pairedend_workflow.ga000066400000000000000000000124531261571066300256770ustar00rootroot00000000000000{ "a_galaxy_workflow": "true", "annotation": "", "format-version": "0.1", "name": "TopHat + cufflinks paired-end", "steps": { "0": { "annotation": "", "id": 0, "input_connections": {}, "inputs": [ { "description": "", "name": "Input fastq readpair-1" } ], "name": "Input dataset", "outputs": [], "position": { "left": 200, "top": 308 }, "tool_errors": null, "tool_id": null, "tool_state": "{\"name\": \"Input fastq readpair-1\"}", "tool_version": null, "type": "data_input", "user_outputs": [] }, "1": { "annotation": "", "id": 1, "input_connections": {}, "inputs": [ { "description": "", "name": "Input fastq readpair-2" } ], "name": "Input dataset", "outputs": [], "position": { "left": 177.7833251953125, "top": 395.26666259765625 }, "tool_errors": null, "tool_id": null, "tool_state": "{\"name\": \"Input fastq readpair-2\"}", "tool_version": null, "type": "data_input", "user_outputs": [] }, "2": { "annotation": "", "id": 2, "input_connections": { "input1": { "id": 0, "output_name": "output" }, "singlePaired|input2": { "id": 1, "output_name": "output" } }, "inputs": [], "name": "Tophat for Illumina", "outputs": [ { "name": "insertions", "type": "bed" }, { "name": "deletions", "type": "bed" }, { "name": "junctions", "type": "bed" }, { "name": "accepted_hits", "type": "bam" } ], "position": { "left": 436, "top": 280 }, "post_job_actions": { "HideDatasetActiondeletions": { "action_arguments": {}, "action_type": "HideDatasetAction", "output_name": "deletions" }, "HideDatasetActioninsertions": { "action_arguments": {}, "action_type": "HideDatasetAction", "output_name": "insertions" } }, "tool_errors": null, "tool_id": "tophat", "tool_state": "{\"__page__\": 0, \"input1\": \"null\", \"refGenomeSource\": \"{\\\"genomeSource\\\": \\\"indexed\\\", \\\"index\\\": \\\"dm3\\\", \\\"__current_case__\\\": 0}\", \"singlePaired\": \"{\\\"input2\\\": null, \\\"sPaired\\\": \\\"paired\\\", \\\"pParams\\\": {\\\"pSettingsType\\\": \\\"preSet\\\", \\\"__current_case__\\\": 0}, \\\"__current_case__\\\": 1, \\\"mate_inner_distance\\\": \\\"20\\\"}\"}", "tool_version": "1.5.0", "type": "tool", "user_outputs": [] }, "3": { "annotation": "", "id": 3, "input_connections": { "input": { "id": 2, "output_name": "accepted_hits" } }, "inputs": [], "name": "Cufflinks", "outputs": [ { "name": "genes_expression", "type": "tabular" }, { "name": "transcripts_expression", "type": "tabular" }, { "name": "assembled_isoforms", "type": "gtf" }, { "name": "total_map_mass", "type": "txt" } ], "position": { "left": 679, "top": 342 }, "post_job_actions": {}, "tool_errors": null, "tool_id": "cufflinks", "tool_state": "{\"min_isoform_fraction\": \"\\\"0.1\\\"\", \"multiread_correct\": \"\\\"Yes\\\"\", \"singlePaired\": \"{\\\"sPaired\\\": \\\"No\\\", \\\"__current_case__\\\": 0}\", \"__page__\": 0, \"pre_mrna_fraction\": \"\\\"0.15\\\"\", \"bias_correction\": \"{\\\"do_bias_correction\\\": \\\"No\\\", \\\"__current_case__\\\": 1}\", \"max_intron_len\": \"\\\"300000\\\"\", \"reference_annotation\": \"{\\\"use_ref\\\": \\\"No\\\", \\\"__current_case__\\\": 0}\", \"global_model\": \"null\", \"do_normalization\": \"\\\"No\\\"\", \"input\": \"null\"}", "tool_version": "0.0.5", "type": "tool", "user_outputs": [] } } } bioblend-0.7.0/docs/index.rst000066400000000000000000000117331261571066300161010ustar00rootroot00000000000000======== BioBlend ======== About ===== .. include:: ../ABOUT.rst Installation ============ Stable releases of BioBlend are best installed via ``pip`` or ``easy_install`` from PyPI using something like:: $ pip install bioblend Alternatively, you may install the most current source code from our `Git repository`_, or fork the project on Github. To install from source, do the following:: # Clone the repository to a local directory $ git clone https://github.com/galaxyproject/bioblend.git # Install the library $ cd bioblend $ python setup.py install After installing the library, you will be able to simply import it into your Python environment with ``import bioblend``. For details on the available functionality, see the `API documentation`_. BioBlend requires a number of Python libraries. These libraries are installed automatically when BioBlend itself is installed, regardless whether it is installed via PyPi_ or by running ``python setup.py install`` command. The current list of required libraries is always available from `setup.py`_ in the source code repository. If you also want to run tests locally, some extra libraries are required. To install them, run:: $ python setup.py test Usage ===== To get started using BioBlend, install the library as described above. Once the library becomes available on the given system, it can be developed against. The developed scripts do not need to reside in any particular location on the system. It is probably best to take a look at the example scripts in ``docs/examples`` source directory and browse the `API documentation`_. Beyond that, it's up to your creativity :). Development =========== Anyone interested in contributing or tweaking the library is more then welcome to do so. To start, simply fork the `Git repository`_ on Github and start playing with it. Then, issue pull requests. API Documentation ================= BioBlend's API focuses around and matches the services it wraps. Thus, there are two top-level sets of APIs, each corresponding to a separate service and a corresponding step in the automation process. *Note* that each of the service APIs can be used completely independently of one another. Effort has been made to keep the structure and naming of those API's consistent across the library but because they do bridge different services, some discrepancies may exist. Feel free to point those out and/or provide fixes. For Galaxy, an alternative :ref:`object-oriented API ` is also available. This API provides an explicit modeling of server-side Galaxy instances and their relationships, providing higher-level methods to perform operations such as retrieving all datasets for a given history, etc. Note that, at the moment, the oo API is still incomplete, providing access to a more restricted set of Galaxy modules with respect to the standard one. CloudMan API ~~~~~~~~~~~~ API used to manipulate the instantiated infrastructure. For example, scale the size of the compute cluster, get infrastructure status, get service status. .. toctree:: :maxdepth: 2 :glob: api_docs/cloudman/* Galaxy API ~~~~~~~~~~ API used to manipulate genomic analyses within Galaxy, including data management and workflow execution. .. toctree:: :maxdepth: 3 :glob: api_docs/galaxy/* Toolshed API ~~~~~~~~~~~~ API used to interact with the Galaxy Toolshed, including repository management. .. toctree:: :maxdepth: 3 :glob: api_docs/toolshed/* Configuration ============= BioBlend allows library-wide configuration to be set in external files. These configuration files can be used to specify access keys, for example. .. toctree:: :maxdepth: 1 :glob: api_docs/lib_config Testing ======= If you'd like to do more than just a mock test, you'll want to point BioBlend to an instance of Galaxy. Do so by exporting the following two variables:: $ export BIOBLEND_GALAXY_URL=http://127.0.0.1:8080 $ export BIOBLEND_GALAXY_API_KEY= The unit tests, stored in the ``tests`` folder, can be run using `nose `_. From the project root:: $ nosetests Getting help ============ If you've run into issues, found a bug, or can't seem to find an answer to your question regarding the use and functionality of BioBlend, please use `Github Issues `_ page to ask your question. Related documentation ===================== Links to other documentation and libraries relevant to this library: * `Galaxy API documentation `_ * `Blend4j `_: Galaxy API wrapper for Java * `clj-blend `_: Galaxy API wrapper for Clojure Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` .. References/hyperlinks used above .. _PyPi: https://pypi.python.org/pypi/bioblend/ .. _setup.py: https://github.com/galaxyproject/bioblend/blob/master/setup.py bioblend-0.7.0/my_unittest2.py000066400000000000000000000005011261571066300163170ustar00rootroot00000000000000import os import sys from unittest2.loader import defaultTestLoader def collector(): # import __main__ triggers code re-execution __main__ = sys.modules['__main__'] setupDir = os.path.abspath(os.path.dirname(__main__.__file__)) return defaultTestLoader.discover(setupDir + '/tests', pattern='Test*.py') bioblend-0.7.0/run_bioblend_tests.sh000077500000000000000000000103751261571066300175340ustar00rootroot00000000000000#!/bin/sh show_help () { echo "Usage: $0 -g GALAXY_DIR [-p PORT] [-e TOX_ENV] [-t BIOBLEND_TESTS] [-r GALAXY_REV] [-c] Run tests for BioBlend. Useful for Continuous Integration testing. *Please note* that this script overwrites the main.pid file and appends to the main.log file inside the specified Galaxy directory (-g). Options: -g GALAXY_DIR Path of the local Galaxy git repository. -p PORT Port to use for the Galaxy server. Defaults to 8080. -e TOX_ENV Work against specified tox environments. Defaults to py27. -t BIOBLEND_TESTS Subset of tests to run, e.g. 'tests/TestGalaxyObjects.py:TestHistory'. See 'man nosetests' for more information. Defaults to all tests. -r GALAXY_REV Branch or commit of the local Galaxy git repository to checkout. Defaults to the dev branch. -c Force removal of the temporary directory created for Galaxy, even if some test failed." } get_abs_dirname () { # $1 : relative dirname echo $(cd "$1" && pwd) } e_val=py27 p_val=8080 r_val=dev while getopts 'hcg:e:p:t:r:' option do case $option in h) show_help exit;; c) c_val=1;; g) g_val=$(get_abs_dirname $OPTARG);; e) e_val=$OPTARG;; p) p_val=$OPTARG;; t) t_val=$OPTARG;; r) r_val=$OPTARG;; esac done if [ -z "$g_val" ]; then echo "Error: missing -g value." show_help exit 1 fi # Install BioBlend BIOBLEND_DIR=$(get_abs_dirname $(dirname $0)) cd ${BIOBLEND_DIR} if [ ! -d .venv ]; then virtualenv .venv fi . .venv/bin/activate python setup.py install || exit 1 pip install --upgrade "tox>=1.8.0" # Setup Galaxy cd ${g_val} # Update repository (may change the sample files or the list of eggs) git fetch git checkout ${r_val} if git show-ref -q --verify "refs/heads/${r_val}" 2>/dev/null; then # ${r_val} is a branch export GALAXY_VERSION=${r_val} git pull --ff-only fi # Setup Galaxy master API key and admin user if [ -f universe_wsgi.ini.sample ]; then GALAXY_SAMPLE_CONFIG_FILE=universe_wsgi.ini.sample else GALAXY_SAMPLE_CONFIG_FILE=config/galaxy.ini.sample fi TEMP_DIR=`mktemp -d 2>/dev/null || mktemp -d -t 'mytmpdir'` echo "Created temporary directory $TEMP_DIR" export GALAXY_CONFIG_FILE=$TEMP_DIR/galaxy.ini GALAXY_MASTER_API_KEY=`date --rfc-3339=ns | md5sum | cut -f 1 -d ' '` GALAXY_USER_EMAIL=${USER}@localhost.localdomain sed -e "s/^#master_api_key.*/master_api_key = $GALAXY_MASTER_API_KEY/" -e "s/^#admin_users.*/admin_users = $GALAXY_USER_EMAIL/" $GALAXY_SAMPLE_CONFIG_FILE > $GALAXY_CONFIG_FILE sed -i -e "s|^#database_connection.*|database_connection = sqlite:///$TEMP_DIR/universe.sqlite?isolation_level=IMMEDIATE|" -e "s|^#file_path.*|file_path = $TEMP_DIR/files|" -e "s|^#new_file_path.*|new_file_path = $TEMP_DIR/tmp|" -e "s|#job_working_directory.*|job_working_directory = $TEMP_DIR/job_working_directory|" $GALAXY_CONFIG_FILE # Change configuration needed by many tests sed -i -e 's/^#allow_user_dataset_purge.*/allow_user_dataset_purge = True/' $GALAXY_CONFIG_FILE # Change configuration needed by some library tests sed -i -e 's/^#allow_library_path_paste.*/allow_library_path_paste = True/' $GALAXY_CONFIG_FILE if [ -n "${p_val}" ]; then # Change only the first occurence of port number sed -i -e "0,/^#port/ s/^#port.*/port = $p_val/" $GALAXY_CONFIG_FILE fi # Start Galaxy and wait for successful server start GALAXY_RUN_ALL=1 ${BIOBLEND_DIR}/run_galaxy.sh --daemon --wait || exit 1 # Use the master API key to create the admin user and get its API key export BIOBLEND_GALAXY_URL=http://localhost:${p_val} GALAXY_USER=$USER GALAXY_USER_PASSWD=`date --rfc-3339=ns | md5sum | cut -f 1 -d ' '` export BIOBLEND_GALAXY_API_KEY=`python ${BIOBLEND_DIR}/docs/examples/create_user_get_api_key.py $BIOBLEND_GALAXY_URL $GALAXY_MASTER_API_KEY $GALAXY_USER $GALAXY_USER_EMAIL $GALAXY_USER_PASSWD` echo "Created new Galaxy user $GALAXY_USER with email $GALAXY_USER_EMAIL , password $GALAXY_USER_PASSWD and API key $BIOBLEND_GALAXY_API_KEY" # Run the tests cd ${BIOBLEND_DIR} if [ -n "${t_val}" ]; then tox -e ${e_val} -- --tests ${t_val} else tox -e ${e_val} fi exit_code=$? deactivate # Stop Galaxy cd ${g_val} GALAXY_RUN_ALL=1 ./run.sh --daemon stop # Remove temporary directory if -c is specified or if all tests passed if [ -n "${c_val}" ] || [ $exit_code -eq 0 ]; then rm -rf $TEMP_DIR fi bioblend-0.7.0/run_galaxy.sh000077500000000000000000000117541261571066300160230ustar00rootroot00000000000000#!/bin/sh #This script should be run from inside the Galaxy base directory #cd `dirname $0` # If there is a file that defines a shell environment specific to this # instance of Galaxy, source the file. if [ -z "$GALAXY_LOCAL_ENV_FILE" ]; then GALAXY_LOCAL_ENV_FILE='./config/local_env.sh' fi if [ -f $GALAXY_LOCAL_ENV_FILE ]; then . $GALAXY_LOCAL_ENV_FILE fi if [ -f scripts/common_startup.sh ]; then ./scripts/common_startup.sh || exit 1 else if [ -f scripts/copy_sample_files.sh ]; then ./scripts/copy_sample_files.sh else SAMPLES=" community_wsgi.ini.sample datatypes_conf.xml.sample external_service_types_conf.xml.sample migrated_tools_conf.xml.sample reports_wsgi.ini.sample shed_tool_conf.xml.sample tool_conf.xml.sample shed_tool_data_table_conf.xml.sample tool_data_table_conf.xml.sample tool_sheds_conf.xml.sample data_manager_conf.xml.sample shed_data_manager_conf.xml.sample openid_conf.xml.sample universe_wsgi.ini.sample tool-data/shared/ncbi/builds.txt.sample tool-data/shared/ensembl/builds.txt.sample tool-data/shared/ucsc/builds.txt.sample tool-data/shared/ucsc/publicbuilds.txt.sample tool-data/shared/ucsc/ucsc_build_sites.txt.sample tool-data/shared/igv/igv_build_sites.txt.sample tool-data/shared/rviewer/rviewer_build_sites.txt.sample tool-data/*.sample static/welcome.html.sample " # Create any missing config/location files for sample in $SAMPLES; do file=`echo $sample | sed -e 's/\.sample$//'` if [ ! -f "$file" -a -f "$sample" ]; then echo "Initializing $file from `basename $sample`" cp $sample $file fi done fi # explicitly attempt to fetch eggs before running FETCH_EGGS=1 for arg in "$@"; do [ "$arg" = "--stop-daemon" ] && FETCH_EGGS=0; break done if [ $FETCH_EGGS -eq 1 ]; then python ./scripts/check_eggs.py -q if [ $? -ne 0 ]; then echo "Some eggs are out of date, attempting to fetch..." python ./scripts/fetch_eggs.py if [ $? -eq 0 ]; then echo "Fetch successful." else echo "Fetch failed." exit 1 fi fi fi fi # If there is a .venv/ directory, assume it contains a virtualenv that we # should run this instance in. if [ -d .venv ]; then printf "Activating virtualenv at %s/.venv\n" $(pwd) . .venv/bin/activate fi python ./scripts/check_python.py || exit 1 if [ -n "$GALAXY_UNIVERSE_CONFIG_DIR" ]; then python ./scripts/build_universe_config.py "$GALAXY_UNIVERSE_CONFIG_DIR" fi if [ -z "$GALAXY_CONFIG_FILE" ]; then if [ -f universe_wsgi.ini ]; then GALAXY_CONFIG_FILE=universe_wsgi.ini elif [ -f config/galaxy.ini ]; then GALAXY_CONFIG_FILE=config/galaxy.ini else GALAXY_CONFIG_FILE=config/galaxy.ini.sample fi export GALAXY_CONFIG_FILE fi if [ -n "$GALAXY_RUN_ALL" ]; then servers=`sed -n 's/^\[server:\(.*\)\]/\1/ p' $GALAXY_CONFIG_FILE | xargs echo` echo "$@" | grep -q 'daemon\|restart' if [ $? -ne 0 ]; then echo 'ERROR: $GALAXY_RUN_ALL cannot be used without the `--daemon`, `--stop-daemon` or `restart` arguments to run.sh' exit 1 fi (echo "$@" | grep -q -e '--daemon\|restart') && (echo "$@" | grep -q -e '--wait') WAIT=$? ARGS=`echo "$@" | sed 's/--wait//'` for server in $servers; do if [ $WAIT -eq 0 ]; then python ./scripts/paster.py serve $GALAXY_CONFIG_FILE --server-name=$server --pid-file=$server.pid --log-file=$server.log $ARGS while true; do sleep 1 printf "." # Grab the current pid from the pid file if ! current_pid_in_file=$(cat $server.pid); then echo "A Galaxy process died, interrupting" >&2 exit 1 fi # Search for all pids in the logs and tail for the last one latest_pid=`egrep '^Starting server in PID [0-9]+\.$' $server.log -o | sed 's/Starting server in PID //g;s/\.$//g' | tail -n 1` # If they're equivalent, then the current pid file agrees with our logs # and we've succesfully started [ -n "$latest_pid" ] && [ $latest_pid -eq $current_pid_in_file ] && break done echo else echo "Handling $server with log file $server.log..." python ./scripts/paster.py serve $GALAXY_CONFIG_FILE --server-name=$server --pid-file=$server.pid --log-file=$server.log $@ fi done else # Handle only 1 server, whose name can be specified with --server-name parameter (defaults to "main") python ./scripts/paster.py serve $GALAXY_CONFIG_FILE $@ fi bioblend-0.7.0/setup.cfg000066400000000000000000000001151261571066300151210ustar00rootroot00000000000000[flake8] ignore = E501 [nosetests] verbosity=2 [bdist_wheel] universal = 1 bioblend-0.7.0/setup.py000066400000000000000000000034711261571066300150220ustar00rootroot00000000000000# http://bugs.python.org/issue15881#msg170215 import multiprocessing # noqa import ast import os.path import re import sys from setuptools import setup, find_packages # Cannot use "from bioblend import get_version" because that would try to import # the six package which may not be installed yet. reg = re.compile(r'__version__\s*=\s*(.+)') with open(os.path.join('bioblend', '__init__.py')) as f: for line in f: m = reg.match(line) if m: version = ast.literal_eval(m.group(1)) break tests_require = ['nose>=1.3.1'] if sys.version_info < (2, 7): tests_require.extend(['mock>=0.7.0,<=1.0.1', 'unittest2>=0.5.1']) elif sys.version_info < (3, 3): tests_require.append('mock>=0.7.0') setup(name="bioblend", version=version, description="CloudMan and Galaxy API library", author="Enis Afgan", author_email="afgane@gmail.com", url="http://bioblend.readthedocs.org/", install_requires=['requests>=2.4.3', 'requests-toolbelt', 'boto>=2.9.7', 'pyyaml', 'six'], tests_require=tests_require, packages=find_packages(), license='MIT', platforms="Posix; MacOS X; Windows", classifiers=["Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Topic :: Scientific/Engineering", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4"], test_suite='nose.collector') bioblend-0.7.0/teamcity_run_tests.sh000077500000000000000000000001161261571066300175650ustar00rootroot00000000000000#!/bin/bash virtualenv . source bin/activate python setup.py install nosetestsbioblend-0.7.0/tests/000077500000000000000000000000001261571066300144455ustar00rootroot00000000000000bioblend-0.7.0/tests/CloudmanTestBase.py000066400000000000000000000046521261571066300202230ustar00rootroot00000000000000""" Tests the functionality of the Blend CloudMan API. These tests require working credentials to supported cloud infrastructure. Use ``nose`` to run these unit tests. """ from test_util import unittest import os from bioblend.cloudman.launch import Bunch import test_util class CloudmanTestBase(unittest.TestCase): @classmethod @test_util.skip_unless_cloudman() def setUpClass(cls): if os.environ.get('BIOBLEND_CLOUD_TYPE') == 'EC2': cls.access_key = os.environ['BIOBLEND_ACCESS_KEY'] cls.secret_key = os.environ['BIOBLEND_SECRET_KEY'] cls.cluster_name = 'Blend CloudMan' cls.ami_id = os.environ['BIOBLEND_AMI_ID'] cls.instance_type = 'm1.small' cls.password = 'password' cls.cloud_metadata = Bunch( id='1', # for compatibility w/ DB representation name="Amazon", cloud_type="ec2", bucket_default="cloudman", region_name="us-east-1", region_endpoint="ec2.amazonaws.com", ec2_port="", ec2_conn_path="/", cidr_range="", is_secure=True, s3_host="s3.amazonaws.com", s3_port="", s3_conn_path='/') else: # Assume OpenStack/NeCTAR cls.access_key = os.environ['BIOBLEND_ACCESS_KEY'] cls.secret_key = os.environ['BIOBLEND_SECRET_KEY'] cls.cloud_metadata = Bunch( id='-1', name="NeCTAR", cloud_type='openstack', bucket_default='cloudman-os', region_name='melbourne', region_endpoint='nova.rc.nectar.org.au', ec2_port=8773, ec2_conn_path='/services/Cloud', cidr_range='115.146.92.0/22', is_secure=True, s3_host='swift.rc.nectar.org.au', s3_port=8888, s3_conn_path='/') cls.cluster_name = 'Blend CloudMan' cls.ami_id = os.environ['BIOBLEND_AMI_ID'] cls.instance_type = 'm1.small' cls.password = 'password' @classmethod @test_util.skip_unless_cloudman() def tearDownClass(cls): try: # TODO: cloudman's terminate method has a bug. Needs fix cls.cmi.terminate(delete_cluster=True) except: pass bioblend-0.7.0/tests/GalaxyTestBase.py000066400000000000000000000027661261571066300177120ustar00rootroot00000000000000import os import time import tempfile from six.moves import range from bioblend import galaxy from test_util import unittest class GalaxyTestBase(unittest.TestCase): def setUp(self): galaxy_key = os.environ['BIOBLEND_GALAXY_API_KEY'] galaxy_url = os.environ['BIOBLEND_GALAXY_URL'] self.gi = galaxy.GalaxyInstance(url=galaxy_url, key=galaxy_key) def _test_dataset(self, history_id, contents="1\t2\t3", **kwds): tool_output = self.gi.tools.paste_content(contents, history_id, **kwds) return tool_output["outputs"][0]["id"] def _wait_for_history(self, history_id, timeout_seconds=15): def _state_ready(state_str): if state_str == 'ok': return True elif state_str == 'error': raise Exception('History in error state') return False for _ in range(timeout_seconds): state = self.gi.histories.show_history(history_id)['state'] if _state_ready(state): return time.sleep(1) raise Exception('Timeout expired while waiting for history') def _wait_and_verify_dataset(self, history_id, dataset_id, expected_contents): self._wait_for_history(history_id) with tempfile.NamedTemporaryFile(prefix='bioblend_test_') as f: self.gi.histories.download_dataset(history_id, dataset_id, file_path=f.name, use_default_filename=False) f.flush() self.assertEqual(f.read(), expected_contents) bioblend-0.7.0/tests/README.TXT000066400000000000000000000014531261571066300160060ustar00rootroot00000000000000To run the cloud tests, the following environment variables must be set: BIOBLEND_ACCESS_KEY = BIOBLEND_SECRET_KEY = BIOBLEND_CLOUD_TYPE = BIOBLEND_AMI_ID = To run Galaxy tests, the following environment variables must be set: BIOBLEND_GALAXY_API_KEY = BIOBLEND_GALAXY_URL = If you wish to run the entire suite, set all of the above. The integration tests can subsequently be run by invoking `nosetests` from the command line. nosetests should be invoked from the project root folder, and not the tests child folder, since the test data is resolved relative to the bioblend folder. bioblend-0.7.0/tests/TestCloudmanLaunch.py000066400000000000000000000041541261571066300205600ustar00rootroot00000000000000""" Tests the functionality of the Blend CloudMan API. These tests require working credentials to supported cloud infrastructure. Use ``nose`` to run these unit tests. """ from bioblend.cloudman import CloudManConfig from bioblend.cloudman import CloudManInstance import CloudmanTestBase import test_util @test_util.skip_unless_cloudman() class TestCloudmanLaunch(CloudmanTestBase.CloudmanTestBase): def setUp(self): super(TestCloudmanLaunch, self).setUp() def test_validate_valid_config(self): """ Tests whether a valid config is validated properly. """ # cfg = CloudManConfig(self.access_key, self.secret_key, self.cluster_name, self.ami_id, self.instance_type, self.password, cloud_metadata=self.cloud_metadata) cls = TestCloudmanLaunch cfg = CloudManConfig(cls.access_key, cls.secret_key, cls.cluster_name, cls.ami_id, cls.instance_type, cls.password, cloud_metadata=cls.cloud_metadata) result = cfg.validate() self.assertIsNone(result, "Validation did not return null to indicate success!") def test_validate_invalid_config(self): """ Tests whether an invalid config is validated properly. """ cfg = CloudManConfig() result = cfg.validate() self.assertIsNotNone(result, "Validation should have returned a value since the configuration was invalid!") def test_launch_and_terminate(self): cls = TestCloudmanLaunch cfg = CloudManConfig(cls.access_key, cls.secret_key, cls.cluster_name, cls.ami_id, cls.instance_type, cls.password, cloud_metadata=cls.cloud_metadata) cmi = CloudManInstance.launch_instance(cfg) status = cmi.get_status() self.assertNotEqual(status['cluster_status'], 'ERROR', "instance.get_status() returned ERROR. Should return a successful status!") try: # TODO: The terminate method is unpredictable! Needs fix. result = cmi.terminate(delete_cluster=True) self.assertEqual(result['cluster_status'], 'SHUTDOWN', "Cluster should be in status SHUTDOWN after call to terminate!") except: pass bioblend-0.7.0/tests/TestCloudmanMock.py000066400000000000000000000136051261571066300202400ustar00rootroot00000000000000""" Tests the functionality of the BioBlend CloudMan API, without actually making calls to a remote CloudMan instance/server. These don't actually ensure that BioBlend is integrated with CloudMan correctly. They only ensure that if you refactor the BioBlend CloudMan API code, that it will maintain its current behaviour. Use ``nose`` to run these unit tests. """ try: from unittest.mock import MagicMock except ImportError: # Python < 3.3 from mock import MagicMock from bioblend import cloudman from test_util import unittest class TestCloudmanMock(unittest.TestCase): def setUp(self): url = "http://127.0.0.1:42284" password = "password" self.cm = cloudman.CloudManInstance(url, password) # def test_initialize(self): # self.cm._make_get_request = MagicMock(return_value="{}") # # ## Set cluster type # self.cm.initialize(type="Galaxy") # # params = {'startup_opt': 'Galaxy'} # self.cm._make_get_request.assert_called_with("initialize_cluster", parameters=params) def test_get_status(self): # Set return value of call self.cm._make_get_request = MagicMock(return_value={}) status = self.cm.get_status() self.assertNotEqual(status, None) self.assertEqual(status, {}) # Check that the correct URL was called self.cm._make_get_request.assert_called_with("instance_state_json") def test_get_nodes(self): # Set return value of call self.cm._make_get_request = MagicMock(return_value={'instances': []}) nodes = self.cm.get_nodes() self.assertIsNotNone(nodes) self.assertEqual(len(nodes), 0) # Check that the correct URL was called self.cm._make_get_request.assert_called_with("instance_feed_json") def test_add_nodes(self): self.cm._make_get_request = MagicMock(return_value="{}") num_nodes = 10 status = self.cm.add_nodes(num_nodes) self.assertIsNotNone(status) # Check that the correct URL was called params = {'number_nodes': 10, 'instance_type': '', 'spot_price': ''} self.cm._make_get_request.assert_called_with("add_instances", parameters=params) def test_remove_nodes(self): self.cm._make_get_request = MagicMock(return_value="{}") num_nodes = 10 status = self.cm.remove_nodes(num_nodes, force=True) self.assertIsNotNone(status) # Check that the correct URL was called params = {'number_nodes': 10, 'force_termination': True} self.cm._make_get_request.assert_called_with("remove_instances", parameters=params) def test_remove_node(self): self.cm._make_get_request = MagicMock(return_value="{}") instance_id = "abcdef" self.cm.remove_node(instance_id, force=True) # Check that the correct URL was called params = {'instance_id': "abcdef"} self.cm._make_get_request.assert_called_with("remove_instance", parameters=params) def test_reboot_node(self): self.cm._make_get_request = MagicMock(return_value="{}") instance_id = "abcdef" self.cm.reboot_node(instance_id) # Check that the correct URL was called params = {'instance_id': "abcdef"} self.cm._make_get_request.assert_called_with("reboot_instance", parameters=params) def test_autoscaling_enabled_true(self): return_json_string = {"autoscaling": {"use_autoscaling": True, "as_max": "3", "as_min": "1"}} self.cm._make_get_request = MagicMock(return_value=return_json_string) self.assertTrue(self.cm.autoscaling_enabled()) def test_autoscaling_enabled_false(self): return_json_string = {"autoscaling": {"use_autoscaling": False, "as_max": "3", "as_min": "1"}} self.cm._make_get_request = MagicMock(return_value=return_json_string) self.assertFalse(self.cm.autoscaling_enabled()) def test_enable_autoscaling(self): return_json_string = {"autoscaling": {"use_autoscaling": False, "as_max": "N/A", "as_min": "N/A"}} self.cm._make_get_request = MagicMock(return_value=return_json_string) self.assertFalse(self.cm.autoscaling_enabled()) self.cm.enable_autoscaling(minimum_nodes=0, maximum_nodes=19) # Check that the correct URL was called params = {'as_min': 0, 'as_max': 19} self.cm._make_get_request.assert_called_with("toggle_autoscaling", parameters=params) return_json_string = {"autoscaling": {"use_autoscaling": True, "as_max": "19", "as_min": "0"}} self.cm.enable_autoscaling(minimum_nodes=0, maximum_nodes=19) # Check that the correct URL was called params = {'as_min': 0, 'as_max': 19} self.cm._make_get_request.assert_called_with("toggle_autoscaling", parameters=params) def test_disable_autoscaling(self): return_json_string = {"autoscaling": {"use_autoscaling": True, "as_max": "3", "as_min": "1"}} self.cm._make_get_request = MagicMock(return_value=return_json_string) self.cm.disable_autoscaling() self.cm._make_get_request.assert_called_with("toggle_autoscaling") def test_adjust_autoscaling(self): return_json_string = {"autoscaling": {"use_autoscaling": True, "as_max": "3", "as_min": "1"}} self.cm._make_get_request = MagicMock(return_value=return_json_string) self.cm.adjust_autoscaling(minimum_nodes=3, maximum_nodes=4) params = {'as_min_adj': 3, 'as_max_adj': 4} self.cm._make_get_request.assert_called_with("adjust_autoscaling", parameters=params) def test_get_galaxy_state_stopped(self): return_json = {"status": "'Galaxy' is not running", "srvc": "Galaxy"} self.cm._make_get_request = MagicMock(return_value=return_json) self.assertEquals(self.cm.get_galaxy_state()['status'], "'Galaxy' is not running") params = {'srvc': "Galaxy"} self.cm._make_get_request.assert_called_with("get_srvc_status", parameters=params) bioblend-0.7.0/tests/TestCloudmanServices.py000066400000000000000000000036771261571066300211420ustar00rootroot00000000000000""" Tests the functionality of the Blend CloudMan API. These tests require working credentials to supported cloud infrastructure. Use ``nose`` to run these unit tests. """ from bioblend.cloudman import CloudManConfig from bioblend.cloudman import CloudManInstance import CloudmanTestBase import test_util @test_util.skip_unless_cloudman() class TestCloudmanServices(CloudmanTestBase.CloudmanTestBase): @classmethod def setUpClass(cls): super(TestCloudmanServices, cls).setUpClass() cls.cfg = CloudManConfig(cls.access_key, cls.secret_key, cls.cluster_name, cls.ami_id, cls.instance_type, cls.password, cloud_metadata=cls.cloud_metadata) cls.cmi = CloudManInstance.launch_instance(cls.cfg) def setUp(self): self.cmi = self.__class__.cmi def test_get_status(self): status = self.cmi.get_status() self.assertIsNotNone(status) def test_get_nodes(self): nodes = self.cmi.get_nodes() self.assertIsNotNone(nodes) def test_add_nodes(self): num_nodes = 1 status = self.cmi.add_nodes(num_nodes) self.assertIsNotNone(status) def test_reboot_node(self): instance_id = self.cmi.instance_id self.cmi.reboot_node(instance_id) def test_remove_node(self): instance_id = self.cmi.instance_id self.cmi.remove_node(instance_id, force=True) def test_enable_autoscaling(self): self.assertFalse(self.cmi.autoscaling_enabled()) self.cmi.enable_autoscaling(minimum_nodes=0, maximum_nodes=19) self.assertTrue(self.cmi.autoscaling_enabled()) def test_disable_autoscaling(self): self.cmi.disable_autoscaling() self.assertFalse(self.cmi.autoscaling_enabled()) def test_adjust_autoscaling(self): self.cmi.adjust_autoscaling(minimum_nodes=3, maximum_nodes=4) # def test_get_galaxy_state_stopped(self): # self.assertEquals(self.cmi.get_galaxy_state(), "'Galaxy' is not running") bioblend-0.7.0/tests/TestGalaxyDatasetCollections.py000066400000000000000000000160761261571066300226230ustar00rootroot00000000000000from bioblend.galaxy import dataset_collections as collections import GalaxyTestBase import test_util @test_util.skip_unless_galaxy('release_14.06') class TestGalaxyDatasetCollections(GalaxyTestBase.GalaxyTestBase): def test_create_list_in_history(self): history_id = self.gi.histories.create_history(name="TestDSListCreate")["id"] dataset1_id = self._test_dataset(history_id) dataset2_id = self._test_dataset(history_id) dataset3_id = self._test_dataset(history_id) collection_response = self.gi.histories.create_dataset_collection( history_id=history_id, collection_description=collections.CollectionDescription( name="MyDatasetList", elements=[ collections.HistoryDatasetElement(name="sample1", id=dataset1_id), collections.HistoryDatasetElement(name="sample2", id=dataset2_id), collections.HistoryDatasetElement(name="sample3", id=dataset3_id), ] ) ) self.assertEqual(collection_response["name"], "MyDatasetList") self.assertEqual(collection_response["collection_type"], "list") elements = collection_response["elements"] self.assertEqual(len(elements), 3) self.assertEqual(elements[0]["element_index"], 0) self.assertEqual(elements[0]["object"]["id"], dataset1_id) self.assertEqual(elements[1]["object"]["id"], dataset2_id) self.assertEqual(elements[2]["object"]["id"], dataset3_id) self.assertEqual(elements[2]["element_identifier"], "sample3") def test_create_list_of_paired_datasets_in_history(self): history_id = self.gi.histories.create_history(name="TestDSListCreate")["id"] dataset1_id = self._test_dataset(history_id) dataset2_id = self._test_dataset(history_id) dataset3_id = self._test_dataset(history_id) dataset4_id = self._test_dataset(history_id) collection_response = self.gi.histories.create_dataset_collection( history_id=history_id, collection_description=collections.CollectionDescription( name="MyListOfPairedDatasets", type="list:paired", elements=[ collections.CollectionElement( name="sample1", type="paired", elements=[ collections.HistoryDatasetElement(name="forward", id=dataset1_id), collections.HistoryDatasetElement(name="reverse", id=dataset2_id), ] ), collections.CollectionElement( name="sample2", type="paired", elements=[ collections.HistoryDatasetElement(name="forward", id=dataset3_id), collections.HistoryDatasetElement(name="reverse", id=dataset4_id), ] ), ] ) ) self.assertEqual(collection_response["name"], "MyListOfPairedDatasets") self.assertEqual(collection_response["collection_type"], "list:paired") elements = collection_response["elements"] self.assertEqual(len(elements), 2) self.assertEqual(elements[0]["element_index"], 0) created_pair1 = elements[0]["object"] self.assertEqual(created_pair1["collection_type"], "paired") self.assertEqual(len(created_pair1["elements"]), 2) forward_element1 = created_pair1["elements"][0] self.assertEqual(forward_element1["element_identifier"], "forward") self.assertEqual(forward_element1["element_index"], 0) forward_dataset1 = forward_element1["object"] self.assertEqual(forward_dataset1["id"], dataset1_id) self.assertEqual(elements[1]["element_index"], 1) created_pair2 = elements[1]["object"] self.assertEqual(created_pair2["collection_type"], "paired") self.assertEqual(len(created_pair2["elements"]), 2) reverse_element2 = created_pair2["elements"][1] reverse_dataset2 = reverse_element2["object"] self.assertEqual(reverse_element2["element_identifier"], "reverse") self.assertEqual(reverse_element2["element_index"], 1) self.assertEqual(reverse_dataset2["id"], dataset4_id) def test_collections_in_history_index(self): history_id = self.gi.histories.create_history(name="TestHistoryDSIndex")["id"] history_dataset_collection = self._create_pair_in_history(history_id) contents = self.gi.histories.show_history(history_id, contents=True) self.assertEqual(len(contents), 3) self.assertEqual(contents[2]["id"], history_dataset_collection["id"]) self.assertEqual(contents[2]["name"], "MyTestPair") self.assertEqual(contents[2]["collection_type"], "paired") def test_show_history_dataset_collection(self): history_id = self.gi.histories.create_history(name="TestHistoryDSIndexShow")["id"] history_dataset_collection = self._create_pair_in_history(history_id) show_response = self.gi.histories.show_dataset_collection(history_id, history_dataset_collection["id"]) for key in ["collection_type", "elements", "name", "deleted", "visible"]: self.assertIn(key, show_response) self.assertFalse(show_response["deleted"]) self.assertTrue(show_response["visible"]) def test_delete_history_dataset_collection(self): history_id = self.gi.histories.create_history(name="TestHistoryDSDelete")["id"] history_dataset_collection = self._create_pair_in_history(history_id) self.gi.histories.delete_dataset_collection(history_id, history_dataset_collection["id"]) show_response = self.gi.histories.show_dataset_collection(history_id, history_dataset_collection["id"]) self.assertTrue(show_response["deleted"]) def test_update_history_dataset_collection(self): history_id = self.gi.histories.create_history(name="TestHistoryDSDelete")["id"] history_dataset_collection = self._create_pair_in_history(history_id) self.gi.histories.update_dataset_collection(history_id, history_dataset_collection["id"], visible=False) show_response = self.gi.histories.show_dataset_collection(history_id, history_dataset_collection["id"]) self.assertFalse(show_response["visible"]) def _create_pair_in_history(self, history_id): dataset1_id = self._test_dataset(history_id) dataset2_id = self._test_dataset(history_id) collection_response = self.gi.histories.create_dataset_collection( history_id=history_id, collection_description=collections.CollectionDescription( name="MyTestPair", type="paired", elements=[ collections.HistoryDatasetElement(name="forward", id=dataset1_id), collections.HistoryDatasetElement(name="reverse", id=dataset2_id), ] ) ) return collection_response bioblend-0.7.0/tests/TestGalaxyDatasets.py000066400000000000000000000025601261571066300206000ustar00rootroot00000000000000""" Use ``nose`` to run these unit tests. """ import tempfile import GalaxyTestBase import test_util @test_util.skip_unless_galaxy() class TestGalaxyDatasets(GalaxyTestBase.GalaxyTestBase): def setUp(self): super(TestGalaxyDatasets, self).setUp() self.history_id = self.gi.histories.create_history(name='TestShowDataset')['id'] self.dataset_id = self._test_dataset(self.history_id) def tearDown(self): self.gi.histories.delete_history(self.history_id, purge=True) def test_show_dataset(self): with self.assertRaises(Exception): self.gi.datasets.show_dataset(None) self.gi.datasets.show_dataset(self.dataset_id) def test_download_dataset(self): with self.assertRaises(Exception): self.gi.datasets.download_dataset(None) self._wait_for_history(self.history_id) with tempfile.NamedTemporaryFile(prefix='bioblend_test_') as f: self.gi.datasets.download_dataset(self.dataset_id, file_path=f.name, use_default_filename=False) f.flush() self.assertEqual(f.read(), b"1\t2\t3\n") def test_show_stderr(self): stderr = self.gi.datasets.show_stderr(self.dataset_id) self.assertIsNotNone(stderr) def test_show_stdout(self): stdout = self.gi.datasets.show_stdout(self.dataset_id) self.assertIsNotNone(stdout) bioblend-0.7.0/tests/TestGalaxyGroups.py000066400000000000000000000051671261571066300203150ustar00rootroot00000000000000""" WARNING: only admins can operate on groups! """ import uuid import GalaxyTestBase import test_util @test_util.skip_unless_galaxy() class TestGalaxyGroups(GalaxyTestBase.GalaxyTestBase): def setUp(self): super(TestGalaxyGroups, self).setUp() self.name = 'test_%s' % uuid.uuid4().hex self.group = self.gi.groups.create_group(self.name)[0] def tearDown(self): # As of 2015/04/13, deleting a group is not possible through the API pass def test_create_group(self): self.assertEqual(self.group['name'], self.name) self.assertIsNotNone(self.group['id']) def test_get_groups(self): groups = self.gi.groups.get_groups() for group in groups: self.assertIsNotNone(group['id']) self.assertIsNotNone(group['name']) def test_show_group(self): group_data = self.gi.groups.show_group(self.group['id']) self.assertEqual(self.group['id'], group_data['id']) self.assertEqual(self.group['name'], group_data['name']) def test_get_group_users(self): group_users = self.gi.groups.get_group_users(self.group['id']) self.assertEqual(group_users, []) def test_get_group_roles(self): group_roles = self.gi.groups.get_group_roles(self.group['id']) self.assertEqual(group_roles, []) def test_update_group(self): new_name = 'test_%s' % uuid.uuid4().hex new_users = [self.gi.users.get_current_user()['id']] self.gi.groups.update_group(self.group['id'], new_name, user_ids=new_users) updated_group = self.gi.groups.show_group(self.group['id']) self.assertEqual(self.group['id'], updated_group['id']) self.assertEqual(updated_group['name'], new_name) updated_group_users = [_['id'] for _ in self.gi.groups.get_group_users(self.group['id'])] self.assertEqual(set(updated_group_users), set(new_users)) updated_group_roles = [_['id'] for _ in self.gi.groups.get_group_roles(self.group['id'])] self.assertEqual(set(updated_group_roles), set()) def test_add_delete_group_user(self): new_user = self.gi.users.get_current_user()['id'] ret = self.gi.groups.add_group_user(self.group['id'], new_user) self.assertEqual(ret['id'], new_user) updated_group_users = [_['id'] for _ in self.gi.groups.get_group_users(self.group['id'])] self.assertIn(new_user, updated_group_users) self.gi.groups.delete_group_user(self.group['id'], new_user) updated_group_users = [_['id'] for _ in self.gi.groups.get_group_users(self.group['id'])] self.assertNotIn(new_user, updated_group_users) bioblend-0.7.0/tests/TestGalaxyHistories.py000066400000000000000000000160311261571066300207770ustar00rootroot00000000000000""" """ import os import shutil import tarfile import tempfile import GalaxyTestBase import test_util @test_util.skip_unless_galaxy() class TestGalaxyHistories(GalaxyTestBase.GalaxyTestBase): def setUp(self): super(TestGalaxyHistories, self).setUp() self.default_history_name = "buildbot - automated test" self.history = self.gi.histories.create_history(name=self.default_history_name) def test_create_history(self): history_name = "another buildbot - automated test" new_history = self.gi.histories.create_history(name=history_name) self.assertIsNotNone(new_history['id']) self.assertEqual(new_history['name'], history_name) self.assertIsNotNone(new_history['url']) def test_update_history(self): new_name = 'buildbot - automated test renamed' new_annotation = 'Annotation for %s' % new_name new_tags = ['tag1', 'tag2'] self.gi.histories.update_history(self.history['id'], name=new_name, annotation=new_annotation, tags=new_tags) updated_hist = self.gi.histories.show_history(self.history['id']) self.assertEqual(self.history['id'], updated_hist['id']) self.assertEqual(updated_hist['name'], new_name) self.assertEqual(updated_hist['annotation'], new_annotation) self.assertEqual(updated_hist['tags'], new_tags) def test_get_histories(self): # Make sure there's at least one value - the one we created full_history = self.gi.histories.get_histories() self.assertIsNotNone(full_history) # Check whether name is correct, when searched by id new_history = self.gi.histories.get_histories(history_id=self.history['id']) self.assertTrue(any(d['name'] == self.default_history_name for d in new_history)) # Check whether id is present, when searched by name new_history = self.gi.histories.get_histories(name=self.default_history_name) self.assertTrue(any(d['id'] == self.history['id'] for d in new_history)) # TODO: check whether deleted history is returned correctly # At the moment, get_histories() returns only not-deleted histories # and get_histories(deleted=True) returns only deleted histories, # so they are not comparable. # In the future, according to https://trello.com/c/MoilsmVv/1673-api-incoherent-and-buggy-indexing-of-deleted-entities , # get_histories() will return both not-deleted and deleted histories # and we can uncomment the following test. # deleted_history = self.gi.histories.get_histories(deleted=True) # self.assertGreaterEqual(len(full_history), len(deleted_history)) def test_show_history(self): history_data = self.gi.histories.show_history(self.history['id']) self.assertEqual(self.history['id'], history_data['id']) self.assertEqual(self.history['name'], history_data['name']) self.assertEqual('new', history_data['state']) @test_util.skip_unless_galaxy('release_14.04') def test_create_history_tag(self): new_tag = 'tag1' self.gi.histories.create_history_tag(self.history['id'], new_tag) updated_hist = self.gi.histories.show_history(self.history['id']) self.assertEqual(self.history['id'], updated_hist['id']) self.assertIn(new_tag, updated_hist['tags']) def test_show_dataset(self): history_id = self.history["id"] dataset1_id = self._test_dataset(history_id) dataset = self.gi.histories.show_dataset(history_id, dataset1_id) for key in ["name", "hid", "id", "deleted", "history_id", "visible"]: self.assertIn(key, dataset) self.assertEqual(dataset["history_id"], history_id) self.assertEqual(dataset["hid"], 1) self.assertEqual(dataset["id"], dataset1_id) self.assertEqual(dataset["deleted"], False) self.assertEqual(dataset["visible"], True) def test_show_dataset_provenance(self): history_id = self.history["id"] dataset1_id = self._test_dataset(history_id) prov = self.gi.histories.show_dataset_provenance(history_id, dataset1_id) # 'job_id' key was added in Galaxy release_14.06 for key in ["id", "stdout", "stderr", "parameters", "tool_id"]: self.assertIn(key, prov) def test_delete_dataset(self): history_id = self.history["id"] dataset1_id = self._test_dataset(history_id) self.gi.histories.delete_dataset(history_id, dataset1_id) dataset = self.gi.histories.show_dataset(history_id, dataset1_id) self.assertTrue(dataset["deleted"]) def test_update_dataset(self): history_id = self.history["id"] dataset1_id = self._test_dataset(history_id) self.gi.histories.update_dataset(history_id, dataset1_id, visible=False) dataset = self.gi.histories.show_dataset(history_id, dataset1_id) self.assertFalse(dataset["visible"]) def test_upload_dataset_from_library(self): pass def test_download_dataset(self): history_id = self.history["id"] dataset1_id = self._test_dataset(history_id) self._wait_and_verify_dataset(history_id, dataset1_id, b"1\t2\t3\n") def test_delete_history(self): result = self.gi.histories.delete_history(self.history['id']) self.assertTrue(result['deleted']) full_history = self.gi.histories.get_histories() self.assertTrue(not any(d['id'] == self.history['id'] for d in full_history)) def test_undelete_history(self): self.gi.histories.delete_history(self.history['id']) self.gi.histories.undelete_history(self.history['id']) full_history = self.gi.histories.get_histories() self.assertTrue(any(d['id'] == self.history['id'] for d in full_history)) def test_get_status(self): state = self.gi.histories.get_status(self.history['id']) self.assertEqual('new', state['state']) def test_get_most_recently_used_history(self): most_recently_used_history = self.gi.histories.get_most_recently_used_history() # if the user has been created via the API, it does not have # a session, therefore no history if most_recently_used_history is not None: self.assertIsNotNone(most_recently_used_history['id']) self.assertIsNotNone(most_recently_used_history['name']) self.assertIsNotNone(most_recently_used_history['state']) def test_download_history(self): jeha_id = self.gi.histories.export_history( self.history['id'], wait=True ) self.assertTrue(jeha_id) tempdir = tempfile.mkdtemp(prefix='bioblend_test_') temp_fn = os.path.join(tempdir, 'export.tar.gz') try: with open(temp_fn, 'wb') as fo: self.gi.histories.download_history(self.history['id'], jeha_id, fo) self.assertTrue(tarfile.is_tarfile(temp_fn)) finally: shutil.rmtree(tempdir) def tearDown(self): self.gi.histories.delete_history(self.history['id'], purge=True) bioblend-0.7.0/tests/TestGalaxyInstance.py000066400000000000000000000025021261571066300205700ustar00rootroot00000000000000""" Tests on the GalaxyInstance object itself. Use ``nose`` to run these unit tests. """ from test_util import unittest from bioblend.galaxy import GalaxyInstance from bioblend.galaxy.client import Client, ConnectionError class TestGalaxyInstance(unittest.TestCase): def setUp(self): # "connect" to a galaxy instance that doesn't exist self.gi = GalaxyInstance("http://localhost:56789", key="whatever") def test_set_max_get_retries(self): self.gi.max_get_attempts = 3 self.assertEqual(3, Client.max_get_retries()) def test_set_retry_delay(self): self.gi.get_retry_delay = 5 self.assertEqual(5, Client.get_retry_delay()) def test_get_retry(self): # We set the client to try twice, with a delay of 5 seconds between # attempts. So, we expect the call to take at least 5 seconds before # failing. self.gi.max_get_attempts = 2 self.gi.get_retry_delay = 5 import time start = time.time() try: self.gi.libraries.get_libraries() self.fail("Call to show_libraries should have raised a ConnectionError") except ConnectionError: end = time.time() duration = end - start self.assertGreater(duration, self.gi.get_retry_delay, "Didn't seem to retry long enough") bioblend-0.7.0/tests/TestGalaxyLibraries.py000066400000000000000000000066701261571066300207520ustar00rootroot00000000000000""" Use ``nose`` to run these unit tests. """ import os import shutil import tempfile import GalaxyTestBase import test_util FOO_DATA = 'foo\nbar\n' @test_util.skip_unless_galaxy() class TestGalaxyLibraries(GalaxyTestBase.GalaxyTestBase): def setUp(self): super(TestGalaxyLibraries, self).setUp() self.name = 'automated test library' self.library = self.gi.libraries.create_library(self.name, description='automated test', synopsis='automated test synopsis') def tearDown(self): self.gi.libraries.delete_library(self.library['id']) def test_create_library(self): self.assertEqual(self.library['name'], self.name) self.assertIsNotNone(self.library['id']) def test_create_folder(self): pass def test_get_libraries(self): # Make sure there's at least one value - the one we created all_libraries = self.gi.libraries.get_libraries() self.assertGreaterEqual(len(all_libraries), 1) def test_show_library(self): library_data = self.gi.libraries.show_library(self.library['id']) self.assertEqual(self.library['id'], library_data['id']) self.assertEqual(self.library['name'], library_data['name']) def test_upload_file_from_url(self): pass def test_upload_file_contents(self): self.gi.libraries.upload_file_contents(self.library['id'], FOO_DATA) def test_upload_file_from_local_path(self): with tempfile.NamedTemporaryFile(mode='w', prefix='bioblend_test_') as f: f.write(FOO_DATA) f.flush() self.gi.libraries.upload_file_from_local_path(self.library['id'], f.name) def test_upload_file_from_server(self): pass def test_upload_from_galaxy_filesystem(self): bnames = ['f%d.txt' % i for i in range(2)] tempdir = tempfile.mkdtemp(prefix='bioblend_test_') try: fnames = [os.path.join(tempdir, _) for _ in bnames] for fn in fnames: with open(fn, 'w') as f: f.write(FOO_DATA) filesystem_paths = '\n'.join(fnames) self.gi.libraries.upload_from_galaxy_filesystem(self.library['id'], filesystem_paths) self.gi.libraries.upload_from_galaxy_filesystem(self.library['id'], filesystem_paths, link_data_only='link_to_files') finally: shutil.rmtree(tempdir) def test_copy_from_dataset(self): history = self.gi.histories.create_history() dataset_id = self._test_dataset(history['id']) self.gi.libraries.copy_from_dataset(self.library['id'], dataset_id, message='Copied from dataset') @test_util.skip_unless_galaxy('release_14.10') def test_library_permissions(self): current_user = self.gi.users.get_current_user() user_id_list_new = [current_user['id']] self.gi.libraries.set_library_permissions(self.library['id'], access_in=user_id_list_new, modify_in=user_id_list_new, add_in=user_id_list_new, manage_in=user_id_list_new) ret = self.gi.libraries.get_library_permissions(self.library['id']) self.assertEqual(set(_[1] for _ in ret['access_library_role_list']), set(user_id_list_new)) self.assertEqual(set(_[1] for _ in ret['modify_library_role_list']), set(user_id_list_new)) self.assertEqual(set(_[1] for _ in ret['add_library_item_role_list']), set(user_id_list_new)) self.assertEqual(set(_[1] for _ in ret['manage_library_role_list']), set(user_id_list_new)) bioblend-0.7.0/tests/TestGalaxyObjects.py000066400000000000000000000640361261571066300204270ustar00rootroot00000000000000# pylint: disable=C0103,E1101 import json import os import shutil import socket import sys import tarfile import tempfile import uuid from six.moves.urllib.request import urlopen from six.moves.urllib.error import URLError import six import bioblend bioblend.set_stream_logger('test', level='INFO') import bioblend.galaxy.objects.wrappers as wrappers import bioblend.galaxy.objects.galaxy_instance as galaxy_instance from bioblend.galaxy.client import ConnectionError from test_util import unittest import test_util socket.setdefaulttimeout(10.0) THIS_DIR = os.path.dirname(os.path.abspath(__file__)) SAMPLE_FN = os.path.join(THIS_DIR, 'data', 'paste_columns.ga') SAMPLE_WF_COLL_FN = os.path.join(THIS_DIR, 'data', 'paste_columns_collections.ga') FOO_DATA = 'foo\nbar\n' FOO_DATA_2 = 'foo2\nbar2\n' SAMPLE_WF_DICT = { 'deleted': False, 'id': '9005c5112febe774', 'inputs': { '571': {'label': 'Input Dataset', 'value': ''}, '572': {'label': 'Input Dataset', 'value': ''}, }, 'model_class': 'StoredWorkflow', 'name': 'paste_columns', 'published': False, 'steps': { '571': { 'id': 571, 'input_steps': {}, 'tool_id': None, 'tool_inputs': {'name': 'Input Dataset'}, 'tool_version': None, 'type': 'data_input', }, '572': { 'id': 572, 'input_steps': {}, 'tool_id': None, 'tool_inputs': {'name': 'Input Dataset'}, 'tool_version': None, 'type': 'data_input', }, '573': { 'id': 573, 'input_steps': { 'input1': {'source_step': 571, 'step_output': 'output'}, 'input2': {'source_step': 572, 'step_output': 'output'}, }, 'tool_id': 'Paste1', 'tool_inputs': { 'delimiter': '"T"', 'input1': 'null', 'input2': 'null', }, 'tool_version': '1.0.0', 'type': 'tool', } }, 'tags': [], 'url': '/api/workflows/9005c5112febe774', } def is_reachable(url): res = None try: res = urlopen(url, timeout=1) except (URLError, socket.timeout): return False if res is not None: res.close() return True def upload_from_fs(lib, bnames, **kwargs): tempdir = tempfile.mkdtemp(prefix='bioblend_test_') try: fnames = [os.path.join(tempdir, _) for _ in bnames] for fn in fnames: with open(fn, 'w') as f: f.write(FOO_DATA) dss = lib.upload_from_galaxy_fs(fnames, **kwargs) finally: shutil.rmtree(tempdir) return dss, fnames class MockWrapper(wrappers.Wrapper): BASE_ATTRS = frozenset(['a', 'b']) def __init__(self, *args, **kwargs): super(MockWrapper, self).__init__(*args, **kwargs) @property def gi_module(self): return super(MockWrapper, self).gi_module() class TestWrapper(unittest.TestCase): def setUp(self): self.d = {'a': 1, 'b': [2, 3], 'c': {'x': 4}} self.assertRaises(TypeError, wrappers.Wrapper, self.d) self.w = MockWrapper(self.d) def test_initialize(self): for k in MockWrapper.BASE_ATTRS: self.assertEqual(getattr(self.w, k), self.d[k]) self.w.a = 222 self.w.b[0] = 222 self.assertEqual(self.w.a, 222) self.assertEqual(self.w.b[0], 222) self.assertEqual(self.d['a'], 1) self.assertEqual(self.d['b'][0], 2) self.assertRaises(AttributeError, getattr, self.w, 'foo') self.assertRaises(AttributeError, setattr, self.w, 'foo', 0) def test_taint(self): self.assertFalse(self.w.is_modified) self.w.a = 111 # pylint: disable=W0201 self.assertTrue(self.w.is_modified) def test_serialize(self): w = MockWrapper.from_json(self.w.to_json()) self.assertEqual(w.wrapped, self.w.wrapped) def test_clone(self): w = self.w.clone() self.assertEqual(w.wrapped, self.w.wrapped) w.b[0] = 111 self.assertEqual(self.w.b[0], 2) def test_kwargs(self): parent = MockWrapper({'a': 10}) w = MockWrapper(self.d, parent=parent) self.assertIs(w.parent, parent) self.assertRaises(AttributeError, setattr, w, 'parent', 0) class TestWorkflow(unittest.TestCase): def setUp(self): self.wf = wrappers.Workflow(SAMPLE_WF_DICT) def test_initialize(self): self.assertEqual(self.wf.id, '9005c5112febe774') self.assertEqual(self.wf.name, 'paste_columns') self.assertEqual(self.wf.deleted, False) self.assertEqual(self.wf.published, False) self.assertEqual(self.wf.tags, []) self.assertEqual( self.wf.input_labels_to_ids, {'Input Dataset': set(['571', '572'])}) self.assertEqual(self.wf.tool_labels_to_ids, {'Paste1': set(['573'])}) self.assertEqual(self.wf.data_input_ids, set(['571', '572'])) self.assertEqual(self.wf.source_ids, set(['571', '572'])) self.assertEqual(self.wf.sink_ids, set(['573'])) def test_dag(self): inv_dag = {} for h, tails in six.iteritems(self.wf.dag): for t in tails: inv_dag.setdefault(str(t), set()).add(h) self.assertEqual(self.wf.inv_dag, inv_dag) heads = set(self.wf.dag) self.assertEqual(heads, set.union(*self.wf.inv_dag.values())) tails = set(self.wf.inv_dag) self.assertEqual(tails, set.union(*self.wf.dag.values())) ids = self.wf.sorted_step_ids() self.assertEqual(set(ids), heads | tails) for h, tails in six.iteritems(self.wf.dag): for t in tails: self.assertLess(ids.index(h), ids.index(t)) def test_steps(self): steps = SAMPLE_WF_DICT['steps'] for sid, s in six.iteritems(self.wf.steps): self.assertIsInstance(s, wrappers.Step) self.assertEqual(s.id, sid) self.assertIn(sid, steps) self.assertIs(s.parent, self.wf) self.assertEqual(self.wf.data_input_ids, set(['571', '572'])) self.assertEqual(self.wf.tool_ids, set(['573'])) def test_taint(self): self.assertFalse(self.wf.is_modified) self.wf.steps['571'].tool_id = 'foo' self.assertTrue(self.wf.is_modified) def test_input_map(self): class DummyLD(object): SRC = 'ld' def __init__(self, id_): self.id = id_ label = 'Input Dataset' self.assertEqual(self.wf.input_labels, set([label])) input_map = self.wf.convert_input_map( {label: [DummyLD('a'), DummyLD('b')]}) # {'571': {'id': 'a', 'src': 'ld'}, '572': {'id': 'b', 'src': 'ld'}} # OR # {'571': {'id': 'b', 'src': 'ld'}, '572': {'id': 'a', 'src': 'ld'}} self.assertEqual(set(input_map), set(['571', '572'])) for d in six.itervalues(input_map): self.assertEqual(set(d), set(['id', 'src'])) self.assertEqual(d['src'], 'ld') self.assertIn(d['id'], 'ab') class GalaxyObjectsTestBase(unittest.TestCase): def setUp(self): galaxy_key = os.environ['BIOBLEND_GALAXY_API_KEY'] galaxy_url = os.environ['BIOBLEND_GALAXY_URL'] self.gi = galaxy_instance.GalaxyInstance(galaxy_url, galaxy_key) @test_util.skip_unless_galaxy() class TestGalaxyInstance(GalaxyObjectsTestBase): def test_library(self): name = 'test_%s' % uuid.uuid4().hex description, synopsis = 'D', 'S' lib = self.gi.libraries.create( name, description=description, synopsis=synopsis) self.assertEqual(lib.name, name) self.assertEqual(lib.description, description) self.assertEqual(lib.synopsis, synopsis) self.assertEqual(len(lib.content_infos), 1) # root folder self.assertEqual(len(lib.folder_ids), 1) self.assertEqual(len(lib.dataset_ids), 0) self.assertIn(lib.id, [_.id for _ in self.gi.libraries.list()]) lib.delete() self.assertFalse(lib.is_mapped) def test_history(self): name = 'test_%s' % uuid.uuid4().hex hist = self.gi.histories.create(name) self.assertEqual(hist.name, name) self.assertIn(hist.id, [_.id for _ in self.gi.histories.list()]) hist.delete(purge=True) self.assertFalse(hist.is_mapped) def test_workflow_from_str(self): with open(SAMPLE_FN) as f: wf = self.gi.workflows.import_new(f.read()) self.__check_and_del_workflow(wf) @test_util.skip_unless_galaxy('release_14.06') def test_workflow_collections_from_str(self): with open(SAMPLE_WF_COLL_FN) as f: wf = self.gi.workflows.import_new(f.read()) self.__check_and_del_workflow(wf) def test_workflow_from_dict(self): with open(SAMPLE_FN) as f: wf = self.gi.workflows.import_new(json.load(f)) self.__check_and_del_workflow(wf) def test_workflow_missing_tools(self): with open(SAMPLE_FN) as f: wf_dump = json.load(f) wf_info = self.gi.gi.workflows.import_workflow_json(wf_dump) wf_dict = self.gi.gi.workflows.show_workflow(wf_info['id']) for id_, step in six.iteritems(wf_dict['steps']): if step['type'] == 'tool': for k in 'tool_inputs', 'tool_version': wf_dict['steps'][id_][k] = None wf = wrappers.Workflow(wf_dict, gi=self.gi) self.assertFalse(wf.is_runnable) self.assertRaises(RuntimeError, wf.run) wf.delete() def test_export(self): with open(SAMPLE_FN) as f: wf1 = self.gi.workflows.import_new(f.read()) wf2 = self.gi.workflows.import_new(wf1.export()) self.assertNotEqual(wf1.id, wf2.id) for wf in wf1, wf2: self.__check_and_del_workflow(wf) def __check_and_del_workflow(self, wf): # Galaxy appends additional text to imported workflow names self.assertTrue(wf.name.startswith('paste_columns')) self.assertEqual(len(wf.steps), 3) wf_ids = set(_.id for _ in self.gi.workflows.list()) self.assertIn(wf.id, wf_ids) wf.delete() # not very accurate: # * we can't publish a wf from the API # * we can't directly get another user's wf def test_workflow_from_shared(self): all_prevs = dict( (_.id, _) for _ in self.gi.workflows.get_previews(published=True) ) pub_only_ids = set(all_prevs).difference( _.id for _ in self.gi.workflows.get_previews()) if pub_only_ids: wf_id = pub_only_ids.pop() imported = self.gi.workflows.import_shared(wf_id) self.assertIsInstance(imported, wrappers.Workflow) imported.delete() else: self.skipTest('no published workflows, manually publish a workflow to run this test') def test_get_libraries(self): self.__test_multi_get('library') def test_get_histories(self): self.__test_multi_get('history') def test_get_workflows(self): self.__test_multi_get('workflow') def __normalized_functions(self, obj_type): if obj_type == 'library': create = self.gi.libraries.create get_objs = self.gi.libraries.list get_prevs = self.gi.libraries.get_previews del_kwargs = {} elif obj_type == 'history': create = self.gi.histories.create get_objs = self.gi.histories.list get_prevs = self.gi.histories.get_previews del_kwargs = {'purge': True} elif obj_type == 'workflow': def create(name): with open(SAMPLE_FN) as f: d = json.load(f) d['name'] = name return self.gi.workflows.import_new(d) get_objs = self.gi.workflows.list get_prevs = self.gi.workflows.get_previews del_kwargs = {} return create, get_objs, get_prevs, del_kwargs def __test_multi_get(self, obj_type): create, get_objs, get_prevs, del_kwargs = self.__normalized_functions( obj_type) ids = lambda seq: set(_.id for _ in seq) names = ['test_%s' % uuid.uuid4().hex for _ in range(2)] objs = [] try: objs = [create(_) for _ in names] self.assertLessEqual(ids(objs), ids(get_objs())) if obj_type != 'workflow': filtered = get_objs(name=names[0]) self.assertEqual(len(filtered), 1) self.assertEqual(filtered[0].id, objs[0].id) del_id = objs[-1].id objs.pop().delete(**del_kwargs) self.assertIn(del_id, ids(get_prevs(deleted=True))) else: # Galaxy appends info strings to imported workflow names prev = get_prevs()[0] filtered = get_objs(name=prev.name) self.assertEqual(len(filtered), 1) self.assertEqual(filtered[0].id, prev.id) finally: for o in objs: o.delete(**del_kwargs) def test_delete_libraries_by_name(self): self.__test_delete_by_name('library') def test_delete_histories_by_name(self): self.__test_delete_by_name('history') def test_delete_workflows_by_name(self): self.__test_delete_by_name('workflow') def __test_delete_by_name(self, obj_type): create, _, get_prevs, del_kwargs = self.__normalized_functions( obj_type) name = 'test_%s' % uuid.uuid4().hex objs = [create(name) for _ in range(2)] # noqa final_name = objs[0].name prevs = [_ for _ in get_prevs(name=final_name) if not _.deleted] self.assertEqual(len(prevs), len(objs)) del_kwargs['name'] = final_name objs[0].gi_module.delete(**del_kwargs) prevs = [_ for _ in get_prevs(name=final_name) if not _.deleted] self.assertEqual(len(prevs), 0) @test_util.skip_unless_galaxy() class TestLibrary(GalaxyObjectsTestBase): # just something that can be expected to be always up DS_URL = 'http://tools.ietf.org/rfc/rfc1866.txt' def setUp(self): super(TestLibrary, self).setUp() self.lib = self.gi.libraries.create('test_%s' % uuid.uuid4().hex) def tearDown(self): self.lib.delete() def test_root_folder(self): r = self.lib.root_folder self.assertIsNone(r.parent) def test_folder(self): name, desc = 'test_%s' % uuid.uuid4().hex, 'D' folder = self.lib.create_folder(name, description=desc) self.assertEqual(folder.name, name) self.assertEqual(folder.description, desc) self.assertIs(folder.container, self.lib) self.assertEqual(folder.parent.id, self.lib.root_folder.id) self.assertEqual(len(self.lib.content_infos), 2) self.assertEqual(len(self.lib.folder_ids), 2) self.assertIn(folder.id, self.lib.folder_ids) retrieved = self.lib.get_folder(folder.id) self.assertEqual(folder.id, retrieved.id) def __check_datasets(self, dss): self.assertEqual(len(dss), len(self.lib.dataset_ids)) self.assertEqual(set(_.id for _ in dss), set(self.lib.dataset_ids)) for ds in dss: self.assertIs(ds.container, self.lib) def test_dataset(self): folder = self.lib.create_folder('test_%s' % uuid.uuid4().hex) ds = self.lib.upload_data(FOO_DATA, folder=folder) self.assertEqual(len(self.lib.content_infos), 3) self.assertEqual(len(self.lib.folder_ids), 2) self.__check_datasets([ds]) def test_dataset_from_url(self): if is_reachable(self.DS_URL): ds = self.lib.upload_from_url(self.DS_URL) self.__check_datasets([ds]) else: self.skipTest('%s not reachable' % self.DS_URL) def test_dataset_from_local(self): with tempfile.NamedTemporaryFile(mode='w', prefix='bioblend_test_') as f: f.write(FOO_DATA) f.flush() ds = self.lib.upload_from_local(f.name) self.__check_datasets([ds]) def test_datasets_from_fs(self): bnames = ['f%d.txt' % i for i in range(2)] dss, fnames = upload_from_fs(self.lib, bnames) self.__check_datasets(dss) dss, fnames = upload_from_fs( self.lib, bnames, link_data_only='link_to_files') for ds, fn in zip(dss, fnames): self.assertEqual(ds.file_name, fn) def test_copy_from_dataset(self): hist = self.gi.histories.create('test_%s' % uuid.uuid4().hex) try: hda = hist.paste_content(FOO_DATA) ds = self.lib.copy_from_dataset(hda) finally: hist.delete(purge=True) self.__check_datasets([ds]) def test_get_dataset(self): ds = self.lib.upload_data(FOO_DATA) retrieved = self.lib.get_dataset(ds.id) self.assertEqual(ds.id, retrieved.id) def test_get_datasets(self): bnames = ['f%d.txt' % _ for _ in range(2)] dss, _ = upload_from_fs(self.lib, bnames) retrieved = self.lib.get_datasets() self.assertEqual(len(dss), len(retrieved)) self.assertEqual(set(_.id for _ in dss), set(_.id for _ in retrieved)) name = '/%s' % bnames[0] selected = self.lib.get_datasets(name=name) self.assertEqual(len(selected), 1) self.assertEqual(selected[0].name, bnames[0]) @test_util.skip_unless_galaxy() class TestLDContents(GalaxyObjectsTestBase): def setUp(self): super(TestLDContents, self).setUp() self.lib = self.gi.libraries.create('test_%s' % uuid.uuid4().hex) self.ds = self.lib.upload_data(FOO_DATA) self.ds.wait() def tearDown(self): self.lib.delete() @test_util.skip_unless_galaxy('release_14.08') def test_dataset_get_stream(self): for idx, c in enumerate(self.ds.get_stream(chunk_size=1)): self.assertEqual(six.b(FOO_DATA[idx]), c) @test_util.skip_unless_galaxy('release_14.08') def test_dataset_peek(self): fetched_data = self.ds.peek(chunk_size=4) self.assertEqual(six.b(FOO_DATA[0:4]), fetched_data) @test_util.skip_unless_galaxy('release_14.08') def test_dataset_download(self): with tempfile.TemporaryFile() as f: self.ds.download(f) f.seek(0) self.assertEqual(six.b(FOO_DATA), f.read()) @test_util.skip_unless_galaxy('release_14.08') def test_dataset_get_contents(self): self.assertEqual(six.b(FOO_DATA), self.ds.get_contents()) def test_dataset_delete(self): self.ds.delete() # Cannot test this yet because the 'deleted' attribute is not exported # by the API at the moment # self.assertTrue(self.ds.deleted) @test_util.skip_unless_galaxy() class TestHistory(GalaxyObjectsTestBase): def setUp(self): super(TestHistory, self).setUp() self.hist = self.gi.histories.create('test_%s' % uuid.uuid4().hex) def tearDown(self): self.hist.delete(purge=True) def test_delete(self): hist = self.gi.histories.create('test_%s' % uuid.uuid4().hex) hist_id = hist.id hist.delete(purge=True) self.assertFalse(hist.is_mapped) try: h = self.gi.histories.get(hist_id) self.assertTrue(h.deleted) except ConnectionError: # Galaxy up to release_2015.01.13 gives a ConnectionError pass def __check_dataset(self, hda): self.assertIsInstance(hda, wrappers.HistoryDatasetAssociation) self.assertIs(hda.container, self.hist) self.assertEqual(len(self.hist.dataset_ids), 1) self.assertEqual(self.hist.dataset_ids[0], hda.id) def test_import_dataset(self): lib = self.gi.libraries.create('test_%s' % uuid.uuid4().hex) lds = lib.upload_data(FOO_DATA) self.assertEqual(len(self.hist.dataset_ids), 0) hda = self.hist.import_dataset(lds) lib.delete() self.__check_dataset(hda) def test_upload_file(self): with tempfile.NamedTemporaryFile(mode='w', prefix='bioblend_test_') as f: f.write(FOO_DATA) f.flush() hda = self.hist.upload_file(f.name) self.__check_dataset(hda) def test_paste_content(self): hda = self.hist.paste_content(FOO_DATA) self.__check_dataset(hda) def test_get_dataset(self): hda = self.hist.paste_content(FOO_DATA) retrieved = self.hist.get_dataset(hda.id) self.assertEqual(hda.id, retrieved.id) def test_get_datasets(self): bnames = ['f%d.txt' % _ for _ in range(2)] lib = self.gi.libraries.create('test_%s' % uuid.uuid4().hex) lds = upload_from_fs(lib, bnames)[0] hdas = [self.hist.import_dataset(_) for _ in lds] lib.delete() retrieved = self.hist.get_datasets() self.assertEqual(len(hdas), len(retrieved)) self.assertEqual(set(_.id for _ in hdas), set(_.id for _ in retrieved)) selected = self.hist.get_datasets(name=bnames[0]) self.assertEqual(len(selected), 1) self.assertEqual(selected[0].name, bnames[0]) def test_export_and_download(self): jeha_id = self.hist.export(wait=True) self.assertTrue(jeha_id) tempdir = tempfile.mkdtemp(prefix='bioblend_test_') temp_fn = os.path.join(tempdir, 'export.tar.gz') try: with open(temp_fn, 'wb') as fo: self.hist.download(jeha_id, fo) self.assertTrue(tarfile.is_tarfile(temp_fn)) finally: shutil.rmtree(tempdir) def test_update(self): new_name = 'test_%s' % uuid.uuid4().hex new_annotation = 'Annotation for %s' % new_name new_tags = ['tag1', 'tag2'] updated_hist = self.hist.update(name=new_name, annotation=new_annotation, tags=new_tags) self.assertEqual(self.hist.id, updated_hist.id) self.assertEqual(self.hist.name, new_name) self.assertEqual(self.hist.annotation, new_annotation) self.assertEqual(self.hist.tags, new_tags) @test_util.skip_unless_galaxy() class TestHDAContents(GalaxyObjectsTestBase): def setUp(self): super(TestHDAContents, self).setUp() self.hist = self.gi.histories.create('test_%s' % uuid.uuid4().hex) self.ds = self.hist.paste_content(FOO_DATA) self.ds.wait() def tearDown(self): self.hist.delete(purge=True) def test_dataset_get_stream(self): for idx, c in enumerate(self.ds.get_stream(chunk_size=1)): self.assertEqual(six.b(FOO_DATA[idx]), c) def test_dataset_peek(self): fetched_data = self.ds.peek(chunk_size=4) self.assertEqual(six.b(FOO_DATA[0:4]), fetched_data) def test_dataset_download(self): with tempfile.TemporaryFile() as f: self.ds.download(f) f.seek(0) data = f.read() self.assertEqual(six.b(FOO_DATA), data) def test_dataset_get_contents(self): self.assertEqual(six.b(FOO_DATA), self.ds.get_contents()) def test_dataset_delete(self): self.ds.delete() self.assertTrue(self.ds.deleted) @test_util.skip_unless_galaxy() class TestRunWorkflow(GalaxyObjectsTestBase): def setUp(self): super(TestRunWorkflow, self).setUp() self.lib = self.gi.libraries.create('test_%s' % uuid.uuid4().hex) with open(SAMPLE_FN) as f: self.wf = self.gi.workflows.import_new(f.read()) self.contents = ['one\ntwo\n', '1\n2\n'] self.inputs = [self.lib.upload_data(_) for _ in self.contents] self.hist_name = 'test_%s' % uuid.uuid4().hex def tearDown(self): self.wf.delete() self.lib.delete() def __test(self, existing_hist=False, params=False): if existing_hist: hist = self.gi.histories.create(self.hist_name) else: hist = self.hist_name if params: params = {'Paste1': {'delimiter': 'U'}} sep = '_' # 'U' maps to '_' in the paste tool else: params = None sep = '\t' # default input_map = {'Input 1': self.inputs[0], 'Input 2': self.inputs[1]} sys.stderr.write(os.linesep) outputs, out_hist = self.wf.run( input_map, hist, params=params, wait=True, polling_interval=1) self.assertEqual(len(outputs), 1) out_ds = outputs[0] self.assertIn(out_ds.id, out_hist.dataset_ids) res = out_ds.get_contents() exp_rows = zip(*(_.splitlines() for _ in self.contents)) exp_res = six.b("\n".join(sep.join(t) for t in exp_rows) + "\n") self.assertEqual(res, exp_res) if existing_hist: self.assertEqual(out_hist.id, hist.id) out_hist.delete(purge=True) def test_existing_history(self): self.__test(existing_hist=True) def test_new_history(self): self.__test(existing_hist=False) def test_params(self): self.__test(params=True) @test_util.skip_unless_galaxy() class TestJob(GalaxyObjectsTestBase): def setUp(self): super(TestJob, self).setUp() def test_get(self): job_prevs = self.gi.jobs.get_previews() if len(job_prevs) > 0: job_prev = job_prevs[0] self.assertIsInstance(job_prev, wrappers.JobPreview) job = self.gi.jobs.get(job_prev.id) self.assertIsInstance(job, wrappers.Job) self.assertEqual(job.id, job_prev.id) for job in self.gi.jobs.list(): self.assertIsInstance(job, wrappers.Job) # XXX: don't use TestLoader.loadTests* until support for Python 2.6 is dropped def suite(): loader = unittest.TestLoader() s = unittest.TestSuite() s.addTests([loader.loadTestsFromTestCase(c) for c in ( TestWrapper, TestWorkflow, TestGalaxyInstance, TestLibrary, TestLDContents, TestHistory, TestHDAContents, TestRunWorkflow, )]) return s if __name__ == '__main__': # By default, run all tests. To run specific tests, do the following: # python -m unittest .. tests = suite() RUNNER = unittest.TextTestRunner(verbosity=2) RUNNER.run(tests) bioblend-0.7.0/tests/TestGalaxyRoles.py000066400000000000000000000007541261571066300201170ustar00rootroot00000000000000""" Tests the functionality of the Blend CloudMan API. These tests require working credentials to supported cloud infrastructure. Use ``nose`` to run these unit tests. """ import GalaxyTestBase import test_util @test_util.skip_unless_galaxy() class TestGalaxyRoles(GalaxyTestBase.GalaxyTestBase): def test_get_roles(self): roles = self.gi.roles.get_roles() for role in roles: self.assertIsNotNone(role['id']) self.assertIsNotNone(role['name']) bioblend-0.7.0/tests/TestGalaxyToolData.py000066400000000000000000000014151261571066300205350ustar00rootroot00000000000000""" Tests the functionality of the Blend CloudMan API. These tests require working credentials to supported cloud infrastructure. Use ``nose`` to run these unit tests. """ import GalaxyTestBase import test_util @test_util.skip_unless_galaxy('release_15.01') class TestGalaxyToolData(GalaxyTestBase.GalaxyTestBase): def test_get_data_tables(self): tables = self.gi.tool_data.get_data_tables() for table in tables: self.assertIsNotNone(table['name']) def test_show_data_table(self): tables = self.gi.tool_data.get_data_tables() table = self.gi.tool_data.show_data_table(tables[0]['name']) self.assertIsNotNone(table['columns']) self.assertIsNotNone(table['fields']) self.assertIsNotNone(table['name']) bioblend-0.7.0/tests/TestGalaxyToolInputs.py000066400000000000000000000024771261571066300211570ustar00rootroot00000000000000from bioblend.galaxy.tools.inputs import ( inputs, conditional, repeat, dataset ) def test_conditional(): # Build up example inputs for random_lines1 as_dict = inputs().set( "num_lines", 5 ).set( "input", dataset("encoded1") ).set( "seed_source", conditional().set( "seed_source_selector", "set_seed" ).set( "seed", "asdf" ) ).to_dict() assert as_dict["num_lines"] == 5 assert as_dict["input"]["src"] == "hda" assert as_dict["input"]["id"] == "encoded1" assert as_dict["seed_source|seed_source_selector"] == "set_seed" assert as_dict["seed_source|seed"] == "asdf" def test_repeat(): # Build up inputs for cat1 as_dict = inputs().set( "input1", dataset("encoded1") ).set( "queries", repeat().instance( inputs().set_dataset_param("input2", "encoded2") ).instance( inputs().set_dataset_param("input2", "encoded3") ) ).to_dict() assert as_dict["input1"]["src"] == "hda" assert as_dict["input1"]["id"] == "encoded1" assert as_dict["queries_0|input2"]["src"] == "hda" assert as_dict["queries_0|input2"]["id"] == "encoded2" assert as_dict["queries_1|input2"]["src"] == "hda" assert as_dict["queries_1|input2"]["id"] == "encoded3" bioblend-0.7.0/tests/TestGalaxyTools.py000066400000000000000000000115321261571066300201270ustar00rootroot00000000000000""" """ import os import six from bioblend.galaxy.tools.inputs import ( inputs, dataset, repeat, conditional, ) import GalaxyTestBase import test_util def get_abspath(path): return os.path.join(os.path.dirname(__file__), path) @test_util.skip_unless_galaxy() class TestGalaxyTools(GalaxyTestBase.GalaxyTestBase): def setUp(self): super(TestGalaxyTools, self).setUp() def test_get_tools(self): # Test requires target Galaxy is configured with at least one tool. tools = self.gi.tools.get_tools() self.assertGreater(len(tools), 0) self.assertTrue(all(map(self._assert_is_tool_rep, tools))) def test_get_tool_panel(self): # Test requires target Galaxy is configured with at least one tool # section. tool_panel = self.gi.tools.get_tool_panel() sections = [s for s in tool_panel if "elems" in s] self.assertGreater(len(sections), 0) self.assertTrue(all(map(self._assert_is_tool_rep, sections[0]["elems"]))) def _assert_is_tool_rep(self, data): self.assertTrue(data["model_class"].endswith("Tool")) # Special tools like SetMetadataTool may have different model_class # than Tool - but they all seem to end in tool. for key in ["name", "id", "version"]: self.assertIn(key, data) return True def test_paste_content(self): history = self.gi.histories.create_history(name="test_paste_data history") paste_text = 'test contents' tool_output = self.gi.tools.paste_content(paste_text, history["id"]) self.assertEqual(len(tool_output["outputs"]), 1) self._wait_and_verify_dataset(history['id'], tool_output['outputs'][0]['id'], six.b(paste_text.rstrip('\r\n') + "\n")) # Same with space_to_tab=True tool_output = self.gi.tools.paste_content(paste_text, history["id"], space_to_tab=True) self.assertEqual(len(tool_output["outputs"]), 1) self._wait_and_verify_dataset(history['id'], tool_output['outputs'][0]['id'], six.b("\t".join(paste_text.rstrip('\r\n').split()) + "\n")) def test_upload_file(self): history = self.gi.histories.create_history(name="test_upload_file history") fn = get_abspath(os.path.join(os.pardir, "setup.py")) file_name = "test1" tool_output = self.gi.tools.upload_file( fn, # First param could be a regular path also of course... history_id=history["id"], file_name=file_name, dbkey="?", file_type="txt", ) self.assertEqual(len(tool_output["outputs"]), 1) output = tool_output['outputs'][0] self.assertEqual(output['name'], file_name) expected_contents = open(fn, "rb").read() self._wait_and_verify_dataset(history["id"], output["id"], expected_contents) @test_util.skip_unless_tool("random_lines1") def test_run_random_lines(self): # Run second test case from randomlines.xml history_id = self.gi.histories.create_history(name="test_run_random_lines history")["id"] with open(get_abspath(os.path.join("data", "1.bed"))) as f: contents = f.read() dataset_id = self._test_dataset(history_id, contents=contents) tool_inputs = inputs().set( "num_lines", "1" ).set( "input", dataset(dataset_id) ).set( "seed_source", conditional().set( "seed_source_selector", "set_seed" ).set( "seed", "asdf" ) ) tool_output = self.gi.tools.run_tool( history_id=history_id, tool_id="random_lines1", tool_inputs=tool_inputs ) self.assertEqual(len(tool_output["outputs"]), 1) # TODO: Wait for results and verify has 1 line and is # chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + @test_util.skip_unless_tool("cat1") def test_run_cat1(self): history_id = self.gi.histories.create_history(name="test_run_cat1 history")["id"] dataset1_id = self._test_dataset(history_id, contents="1 2 3") dataset2_id = self._test_dataset(history_id, contents="4 5 6") dataset3_id = self._test_dataset(history_id, contents="7 8 9") tool_inputs = inputs().set( "input1", dataset(dataset1_id) ).set( "queries", repeat().instance( inputs().set("input2", dataset(dataset2_id)) ).instance( inputs().set("input2", dataset(dataset3_id)) ) ) tool_output = self.gi.tools.run_tool( history_id=history_id, tool_id="cat1", tool_inputs=tool_inputs ) self.assertEqual(len(tool_output["outputs"]), 1) # TODO: Wait for results and verify it has 3 lines - 1 2 3, 4 5 6, # and 7 8 9. bioblend-0.7.0/tests/TestGalaxyUsers.py000066400000000000000000000045601261571066300201330ustar00rootroot00000000000000""" Tests the functionality of the Blend CloudMan API. These tests require working credentials to supported cloud infrastructure. Use ``nose`` to run these unit tests. """ import GalaxyTestBase import test_util @test_util.skip_unless_galaxy() class TestGalaxyUsers(GalaxyTestBase.GalaxyTestBase): def test_get_users(self): users = self.gi.users.get_users() for user in users: self.assertIsNotNone(user['id']) self.assertIsNotNone(user['email']) def test_show_user(self): current_user = self.gi.users.get_current_user() user = self.gi.users.show_user(current_user['id']) self.assertEqual(user['id'], current_user['id']) self.assertEqual(user['username'], current_user['username']) self.assertEqual(user['email'], current_user['email']) # The 2 following tests randomly fail # self.assertEqual(user['nice_total_disk_usage'], current_user['nice_total_disk_usage']) # self.assertEqual(user['total_disk_usage'], current_user['total_disk_usage']) @test_util.skip_unless_galaxy('release_14.06') def test_create_remote_user(self): # WARNING: only admins can create users! # WARNING: Users cannot be deleted in Galaxy, so execute this test only # on a disposable Galaxy instance! if self.gi.config.get_config()['use_remote_user']: user = self.gi.users.create_remote_user('newuser@example.com') self.assertEqual(user['email'], 'newuser@example.com') @test_util.skip_unless_galaxy('release_14.06') def test_create_local_user(self): # WARNING: only admins can create users! # WARNING: Users cannot be deleted in Galaxy, so execute this test only # on a disposable Galaxy instance! if not self.gi.config.get_config()['use_remote_user']: user = self.gi.users.create_local_user('newuser', 'newuser@example.com', 'secret') self.assertEqual(user['username'], 'newuser') self.assertEqual(user['email'], 'newuser@example.com') def test_get_current_user(self): user = self.gi.users.get_current_user() self.assertIsNotNone(user['id']) self.assertIsNotNone(user['username']) self.assertIsNotNone(user['email']) self.assertIsNotNone(user['nice_total_disk_usage']) self.assertIsNotNone(user['total_disk_usage']) bioblend-0.7.0/tests/TestGalaxyWorkflows.py000066400000000000000000000155621261571066300210330ustar00rootroot00000000000000""" Use ``nose`` to run these unit tests. """ import os import json import tempfile import time import shutil from six.moves import range import GalaxyTestBase import test_util def get_abspath(path): return os.path.join(os.path.dirname(__file__), path) @test_util.skip_unless_galaxy() class TestGalaxyWorkflows(GalaxyTestBase.GalaxyTestBase): @test_util.skip_unless_galaxy('release_15.03') @test_util.skip_unless_tool("cat1") @test_util.skip_unless_tool("cat") def test_workflow_scheduling(self): path = get_abspath(os.path.join('data', 'test_workflow_pause.ga')) workflow = self.gi.workflows.import_workflow_from_local_path(path) workflow_id = workflow["id"] history_id = self.gi.histories.create_history(name="TestWorkflowState")["id"] dataset1_id = self._test_dataset(history_id) invocations = self.gi.workflows.get_invocations(workflow_id) assert len(invocations) == 0 invocation = self.gi.workflows.invoke_workflow( workflow["id"], inputs={"0": {"src": "hda", "id": dataset1_id}}, ) invocation_id = invocation["id"] invocations = self.gi.workflows.get_invocations(workflow_id) assert len(invocations) == 1 assert invocations[0]["id"] == invocation_id def invocation_steps_by_order_index(): invocation = self.gi.workflows.show_invocation(workflow_id, invocation_id) return dict([(s["order_index"], s) for s in invocation["steps"]]) for i in range(20): if 2 in invocation_steps_by_order_index(): break time.sleep(.5) invocation = self.gi.workflows.show_invocation(workflow_id, invocation_id) assert invocation['state'] == "ready" steps = invocation_steps_by_order_index() pause_step = steps[2] assert self.gi.workflows.show_invocation_step(workflow_id, invocation_id, pause_step["id"])["action"] is None self.gi.workflows.run_invocation_step_action(workflow_id, invocation_id, pause_step["id"], action=True) assert self.gi.workflows.show_invocation_step(workflow_id, invocation_id, pause_step["id"])["action"] is True for i in range(20): invocation = self.gi.workflows.show_invocation(workflow_id, invocation_id) if invocation["state"] == "scheduled": break time.sleep(.5) invocation = self.gi.workflows.show_invocation(workflow_id, invocation_id) assert invocation["state"] == "scheduled" @test_util.skip_unless_galaxy('release_15.03') @test_util.skip_unless_tool("cat1") @test_util.skip_unless_tool("cat") def test_cancelling_workflow_scheduling(self): path = get_abspath(os.path.join('data', 'test_workflow_pause.ga')) workflow = self.gi.workflows.import_workflow_from_local_path(path) workflow_id = workflow["id"] history_id = self.gi.histories.create_history(name="TestWorkflowState")["id"] dataset1_id = self._test_dataset(history_id) invocations = self.gi.workflows.get_invocations(workflow_id) assert len(invocations) == 0 invocation = self.gi.workflows.invoke_workflow( workflow["id"], inputs={"0": {"src": "hda", "id": dataset1_id}}, ) invocation_id = invocation["id"] invocations = self.gi.workflows.get_invocations(workflow_id) assert len(invocations) == 1 assert invocations[0]["id"] == invocation_id invocation = self.gi.workflows.show_invocation(workflow_id, invocation_id) assert invocation['state'] in ['new', 'ready'] self.gi.workflows.cancel_invocation(workflow_id, invocation_id) invocation = self.gi.workflows.show_invocation(workflow_id, invocation_id) assert invocation['state'] == 'cancelled' def test_import_workflow_from_local_path(self): with self.assertRaises(Exception): self.gi.workflows.import_workflow_from_local_path(None) path = get_abspath(os.path.join('data', 'paste_columns.ga')) wk = self.gi.workflows.import_workflow_from_local_path(path) self.assertIsNotNone(wk['id']) def test_export_workflow_to_local_path(self): export_dir = tempfile.mkdtemp(prefix='bioblend_test_') with self.assertRaises(Exception): self.gi.workflows.export_workflow_to_local_path(None, None, None) path = get_abspath(os.path.join('data', 'paste_columns.ga')) wk = self.gi.workflows.import_workflow_from_local_path(path) self.gi.workflows.export_workflow_to_local_path(wk['id'], export_dir) dir_contents = os.listdir(export_dir) self.assertEqual(len(dir_contents), 1) export_path = os.path.join(export_dir, dir_contents[0]) with open(export_path, 'r') as f: workflow_json = json.load(f) self.assertIsInstance(workflow_json, dict) shutil.rmtree(export_dir) def test_get_workflows(self): wk = self.gi.workflows.get_workflows()[0] self.assertIsNotNone(wk['id']) self.assertIsNotNone(wk['name']) self.assertIsNotNone(wk['url']) def test_show_workflow(self): wk = self.gi.workflows.get_workflows()[0] # TODO: This test is problematic, because it relies on the get_workflow method! # This test is not self-contained. wk = self.gi.workflows.show_workflow(wk['id']) self.assertIsNotNone(wk['id']) self.assertIsNotNone(wk['name']) self.assertIsNotNone(wk['inputs']) self.assertIsNotNone(wk['url']) def test_run_workflow(self): wk = self.gi.workflows.get_workflows()[0] # Try invalid run of workflow with self.assertRaises(Exception): self.gi.workflows.run_workflow(wk['id'], None) # TODO: Hard coded workflow ID. We need to either import, or have a fixed workflow id for testing # workflowID = wk['id'] # sourcehist = '177346507b04acbf' # # # Do a run of a workflow over fastq files from a history # print "Finding workflow" # wk = self.gi.workflows.show_workflow(workflowID) # print wk # input = wk['inputs'].keys()[0] # # print "Finding fastqsanger input files" # sourcecontents = self.gi.histories.show_history(sourcehist, contents=True) # sourcedata = [self.gi.histories.show_dataset(sourcehist, content['id']) for content in sourcecontents] # # fastqdata = [data['id'] for data in sourcedata if data['data_type']=='fastqsanger'] # # fastqID = fastqdata[0] # datamap = dict() # datamap[input] = dict() # datamap[input]['src'] = 'hda' # datamap[input]['id'] = fastqID # data_name = self.gi.histories.show_dataset(sourcehist, fastqID)['name'] # print "Running workflow on "+data_name # self.gi.workflows.run_workflow(workflowID, datamap, history_name="automated_test", import_inputs_to_history=True) bioblend-0.7.0/tests/data/000077500000000000000000000000001261571066300153565ustar00rootroot00000000000000bioblend-0.7.0/tests/data/1.bed000066400000000000000000000101521261571066300161710ustar00rootroot00000000000000chr1 147962192 147962580 CCDS989.1_cds_0_0_chr1_147962193_r 0 - chr1 147984545 147984630 CCDS990.1_cds_0_0_chr1_147984546_f 0 + chr1 148078400 148078582 CCDS993.1_cds_0_0_chr1_148078401_r 0 - chr1 148185136 148185276 CCDS996.1_cds_0_0_chr1_148185137_f 0 + chr10 55251623 55253124 CCDS7248.1_cds_0_0_chr10_55251624_r 0 - chr11 116124407 116124501 CCDS8374.1_cds_0_0_chr11_116124408_r 0 - chr11 116206508 116206563 CCDS8377.1_cds_0_0_chr11_116206509_f 0 + chr11 116211733 116212337 CCDS8378.1_cds_0_0_chr11_116211734_r 0 - chr11 1812377 1812407 CCDS7726.1_cds_0_0_chr11_1812378_f 0 + chr12 38440094 38440321 CCDS8736.1_cds_0_0_chr12_38440095_r 0 - chr13 112381694 112381953 CCDS9526.1_cds_0_0_chr13_112381695_f 0 + chr14 98710240 98712285 CCDS9949.1_cds_0_0_chr14_98710241_r 0 - chr15 41486872 41487060 CCDS10096.1_cds_0_0_chr15_41486873_r 0 - chr15 41673708 41673857 CCDS10097.1_cds_0_0_chr15_41673709_f 0 + chr15 41679161 41679250 CCDS10098.1_cds_0_0_chr15_41679162_r 0 - chr15 41826029 41826196 CCDS10101.1_cds_0_0_chr15_41826030_f 0 + chr16 142908 143003 CCDS10397.1_cds_0_0_chr16_142909_f 0 + chr16 179963 180135 CCDS10401.1_cds_0_0_chr16_179964_r 0 - chr16 244413 244681 CCDS10402.1_cds_0_0_chr16_244414_f 0 + chr16 259268 259383 CCDS10403.1_cds_0_0_chr16_259269_r 0 - chr18 23786114 23786321 CCDS11891.1_cds_0_0_chr18_23786115_r 0 - chr18 59406881 59407046 CCDS11985.1_cds_0_0_chr18_59406882_f 0 + chr18 59455932 59456337 CCDS11986.1_cds_0_0_chr18_59455933_r 0 - chr18 59600586 59600754 CCDS11988.1_cds_0_0_chr18_59600587_f 0 + chr19 59068595 59069564 CCDS12866.1_cds_0_0_chr19_59068596_f 0 + chr19 59236026 59236146 CCDS12872.1_cds_0_0_chr19_59236027_r 0 - chr19 59297998 59298008 CCDS12877.1_cds_0_0_chr19_59297999_f 0 + chr19 59302168 59302288 CCDS12878.1_cds_0_0_chr19_59302169_r 0 - chr2 118288583 118288668 CCDS2120.1_cds_0_0_chr2_118288584_f 0 + chr2 118394148 118394202 CCDS2121.1_cds_0_0_chr2_118394149_r 0 - chr2 220190202 220190242 CCDS2441.1_cds_0_0_chr2_220190203_f 0 + chr2 220229609 220230869 CCDS2443.1_cds_0_0_chr2_220229610_r 0 - chr20 33330413 33330423 CCDS13249.1_cds_0_0_chr20_33330414_r 0 - chr20 33513606 33513792 CCDS13255.1_cds_0_0_chr20_33513607_f 0 + chr20 33579500 33579527 CCDS13256.1_cds_0_0_chr20_33579501_r 0 - chr20 33593260 33593348 CCDS13257.1_cds_0_0_chr20_33593261_f 0 + chr21 32707032 32707192 CCDS13614.1_cds_0_0_chr21_32707033_f 0 + chr21 32869641 32870022 CCDS13615.1_cds_0_0_chr21_32869642_r 0 - chr21 33321040 33322012 CCDS13620.1_cds_0_0_chr21_33321041_f 0 + chr21 33744994 33745040 CCDS13625.1_cds_0_0_chr21_33744995_r 0 - chr22 30120223 30120265 CCDS13897.1_cds_0_0_chr22_30120224_f 0 + chr22 30160419 30160661 CCDS13898.1_cds_0_0_chr22_30160420_r 0 - chr22 30665273 30665360 CCDS13901.1_cds_0_0_chr22_30665274_f 0 + chr22 30939054 30939266 CCDS13903.1_cds_0_0_chr22_30939055_r 0 - chr5 131424298 131424460 CCDS4149.1_cds_0_0_chr5_131424299_f 0 + chr5 131556601 131556672 CCDS4151.1_cds_0_0_chr5_131556602_r 0 - chr5 131621326 131621419 CCDS4152.1_cds_0_0_chr5_131621327_f 0 + chr5 131847541 131847666 CCDS4155.1_cds_0_0_chr5_131847542_r 0 - chr6 108299600 108299744 CCDS5061.1_cds_0_0_chr6_108299601_r 0 - chr6 108594662 108594687 CCDS5063.1_cds_0_0_chr6_108594663_f 0 + chr6 108640045 108640151 CCDS5064.1_cds_0_0_chr6_108640046_r 0 - chr6 108722976 108723115 CCDS5067.1_cds_0_0_chr6_108722977_f 0 + chr7 113660517 113660685 CCDS5760.1_cds_0_0_chr7_113660518_f 0 + chr7 116512159 116512389 CCDS5771.1_cds_0_0_chr7_116512160_r 0 - chr7 116714099 116714152 CCDS5773.1_cds_0_0_chr7_116714100_f 0 + chr7 116945541 116945787 CCDS5774.1_cds_0_0_chr7_116945542_r 0 - chr8 118881131 118881317 CCDS6324.1_cds_0_0_chr8_118881132_r 0 - chr9 128764156 128764189 CCDS6914.1_cds_0_0_chr9_128764157_f 0 + chr9 128787519 128789136 CCDS6915.1_cds_0_0_chr9_128787520_r 0 - chr9 128882427 128882523 CCDS6917.1_cds_0_0_chr9_128882428_f 0 + chr9 128937229 128937445 CCDS6919.1_cds_0_0_chr9_128937230_r 0 - chrX 122745047 122745924 CCDS14606.1_cds_0_0_chrX_122745048_f 0 + chrX 152648964 152649196 CCDS14733.1_cds_0_0_chrX_152648965_r 0 - chrX 152691446 152691471 CCDS14735.1_cds_0_0_chrX_152691447_f 0 + chrX 152694029 152694263 CCDS14736.1_cds_0_0_chrX_152694030_r 0 - bioblend-0.7.0/tests/data/paste_columns.ga000066400000000000000000000047411261571066300205510ustar00rootroot00000000000000{ "a_galaxy_workflow": "true", "annotation": "", "format-version": "0.1", "name": "paste_columns", "steps": { "0": { "annotation": "", "id": 0, "input_connections": {}, "inputs": [ { "description": "", "name": "Input 1" } ], "name": "Input dataset", "outputs": [], "position": { "left": 10, "top": 10 }, "tool_errors": null, "tool_id": null, "tool_state": "{\"name\": \"Input 1\"}", "tool_version": null, "type": "data_input", "user_outputs": [] }, "1": { "annotation": "", "id": 1, "input_connections": {}, "inputs": [ { "description": "", "name": "Input 2" } ], "name": "Input dataset", "outputs": [], "position": { "left": 10, "top": 130 }, "tool_errors": null, "tool_id": null, "tool_state": "{\"name\": \"Input 2\"}", "tool_version": null, "type": "data_input", "user_outputs": [] }, "2": { "annotation": "", "id": 2, "input_connections": { "input1": { "id": 0, "output_name": "output" }, "input2": { "id": 1, "output_name": "output" } }, "inputs": [], "name": "Paste", "outputs": [ { "name": "out_file1", "type": "input" } ], "position": { "left": 230, "top": 10 }, "post_job_actions": {}, "tool_errors": null, "tool_id": "Paste1", "tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"input1\": \"null\", \"__rerun_remap_job_id__\": null, \"delimiter\": \"\\\"T\\\"\", \"chromInfo\": \"\\\"/home/simleo/hg/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", "tool_version": "1.0.0", "type": "tool", "user_outputs": [] } } } bioblend-0.7.0/tests/data/paste_columns_collections.ga000066400000000000000000000056031261571066300231450ustar00rootroot00000000000000{ "a_galaxy_workflow": "true", "annotation": "", "format-version": "0.1", "name": "paste_columns_collections", "steps": { "0": { "annotation": "", "id": 0, "input_connections": {}, "inputs": [ { "description": "", "name": "Input Dataset Collection" } ], "label": null, "name": "Input dataset collection", "outputs": [], "position": { "left": 119.5, "top": 200 }, "tool_errors": null, "tool_id": null, "tool_state": "{\"collection_type\": \"list\", \"name\": \"Input Dataset Collection\"}", "tool_version": null, "type": "data_collection_input", "user_outputs": [], "uuid": "88591325-c867-407a-a8df-df01430f2196" }, "1": { "annotation": "", "id": 1, "input_connections": {}, "inputs": [ { "description": "", "name": "Input 2" } ], "label": null, "name": "Input dataset", "outputs": [], "position": { "left": 200, "top": 434 }, "tool_errors": null, "tool_id": null, "tool_state": "{\"name\": \"Input 2\"}", "tool_version": null, "type": "data_input", "user_outputs": [], "uuid": "64008e61-3304-4452-96ce-9564ec55cf9f" }, "2": { "annotation": "", "id": 2, "input_connections": { "input1": { "id": 0, "output_name": "output" }, "input2": { "id": 1, "output_name": "output" } }, "inputs": [], "label": null, "name": "Paste", "outputs": [ { "name": "out_file1", "type": "input" } ], "position": { "left": 420, "top": 314 }, "post_job_actions": {}, "tool_errors": null, "tool_id": "Paste1", "tool_state": "{\"input2\": \"null\", \"__page__\": 0, \"input1\": \"null\", \"__rerun_remap_job_id__\": null, \"delimiter\": \"\\\"T\\\"\", \"chromInfo\": \"\\\"/home/simleo/hg/galaxy-dist/tool-data/shared/ucsc/chrom/?.len\\\"\"}", "tool_version": "1.0.0", "type": "tool", "user_outputs": [], "uuid": "b89ede53-9967-4138-8b1a-59799f8f5cb5" } }, "uuid": "4b38804c-064d-4e84-aa02-ca1e0fe7cf8d" } bioblend-0.7.0/tests/data/test_workflow_pause.ga000066400000000000000000000071131261571066300217770ustar00rootroot00000000000000{ "a_galaxy_workflow": "true", "annotation": "", "format-version": "0.1", "name": "test_workflow_pause", "steps": { "0": { "annotation": "", "id": 0, "input_connections": {}, "inputs": [ { "description": "", "name": "Input Dataset" } ], "name": "Input dataset", "outputs": [], "position": { "left": 199.9201512336731, "top": 251.4826512336731 }, "tool_errors": null, "tool_id": null, "tool_state": "{\"name\": \"Input Dataset\"}", "tool_version": null, "type": "data_input", "user_outputs": [] }, "1": { "annotation": "", "id": 1, "input_connections": { "input1": { "id": 0, "output_name": "output" } }, "inputs": [], "name": "Concatenate datasets (for test workflows)", "outputs": [ { "name": "out_file1", "type": "input" } ], "position": { "left": 516.7257237434387, "top": 187.28126573562622 }, "post_job_actions": {}, "tool_errors": null, "tool_id": "cat", "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input1\": \"null\", \"queries\": \"[]\"}", "tool_version": "1.0.0", "type": "tool", "user_outputs": [] }, "2": { "annotation": "", "id": 2, "input_connections": { "input": { "id": 1, "output_name": "out_file1" } }, "inputs": [ { "description": "", "name": "Pause for Dataset Review" } ], "name": "Pause for dataset review", "outputs": [], "position": { "left": 862.715301990509, "top": 197.28126573562622 }, "tool_errors": null, "tool_id": null, "tool_state": "{\"name\": \"Pause for Dataset Review\"}", "tool_version": null, "type": "pause", "user_outputs": [] }, "3": { "annotation": "", "id": 3, "input_connections": { "input1": { "id": 2, "output_name": "output" } }, "inputs": [], "name": "Concatenate datasets (for test workflows)", "outputs": [ { "name": "out_file1", "type": "input" } ], "position": { "left": 1181.9722595214844, "top": 181.52084350585938 }, "post_job_actions": {}, "tool_errors": null, "tool_id": "cat1", "tool_state": "{\"__page__\": 0, \"__rerun_remap_job_id__\": null, \"input1\": \"null\", \"queries\": \"[]\"}", "tool_version": "1.0.0", "type": "tool", "user_outputs": [] } }, "uuid": "9058956e-76b6-4909-bab3-c12b2cc394c7" }bioblend-0.7.0/tests/test_util.py000066400000000000000000000043011261571066300170310ustar00rootroot00000000000000""" General support infrastructure not tied to any particular test. """ import os import unittest if not hasattr(unittest, 'skip'): # Python < 2.7 import unittest2 as unittest NO_CLOUDMAN_MESSAGE = "CloudMan required and no CloudMan AMI configured." NO_GALAXY_MESSAGE = "Externally configured Galaxy required, but not found. Set BIOBLEND_GALAXY_URL and BIOBLEND_GALAXY_API_KEY to run this test." OLD_GALAXY_RELEASE = "Testing on Galaxy %s, but need %s to run this test." MISSING_TOOL_MESSAGE = "Externally configured Galaxy instance requires tool %s to run test." def skip_unless_cloudman(): """ Decorate tests with this to skip the test if CloudMan is not configured. """ test = lambda f: f if 'BIOBLEND_AMI_ID' not in os.environ: test = unittest.skip(NO_CLOUDMAN_MESSAGE) return test def skip_unless_galaxy(min_release=None): """ Decorate tests with this to skip the test if Galaxy is not configured. """ test = lambda f: f for prop in ['BIOBLEND_GALAXY_URL', 'BIOBLEND_GALAXY_API_KEY']: if prop not in os.environ: test = unittest.skip(NO_GALAXY_MESSAGE) break if min_release is not None: galaxy_release = os.environ.get('GALAXY_VERSION', None) if galaxy_release is not None and galaxy_release.startswith('release_') and galaxy_release < min_release: test = unittest.skip(OLD_GALAXY_RELEASE % (galaxy_release, min_release)) return test def skip_unless_tool(tool_id): """ Decorate a Galaxy test method as requiring a specific tool, skip the test case if the tool is unavailable. """ def method_wrapper(method): def wrapped_method(has_gi, *args, **kwargs): tools = has_gi.gi.tools.get_tools() # In panels by default, so flatten out sections... tool_ids = [_['id'] for _ in tools] if tool_id not in tool_ids: raise unittest.SkipTest(MISSING_TOOL_MESSAGE % tool_id) return method(has_gi, *args, **kwargs) # Must preserve method name so nose can detect and report tests by # name. wrapped_method.__name__ = method.__name__ return wrapped_method return method_wrapper bioblend-0.7.0/tox.ini000066400000000000000000000007711261571066300146230ustar00rootroot00000000000000# Tox (http://tox.testrun.org/) is a tool for running tests # in multiple virtualenvs. This configuration file will run the # test suite on all supported python versions. To use it, "pip install tox" # and then run "tox" from this directory. [tox] envlist = py26, py27, py33, py34 [testenv] commands = flake8 -v --exclude=.git,.venv . {envpython} setup.py nosetests [] deps = flake8 nose>=1.3.1 py26: unittest2>=0.5.1 passenv = BIOBLEND_GALAXY_URL BIOBLEND_GALAXY_API_KEY GALAXY_VERSION