pax_global_header00006660000000000000000000000064130360146050014510gustar00rootroot0000000000000052 comment=edc15158f0ff2fb227ffccdbd2588accb3560510 pyflow-1.1.14/000077500000000000000000000000001303601460500131145ustar00rootroot00000000000000pyflow-1.1.14/.appveyor.yml000066400000000000000000000004471303601460500155670ustar00rootroot00000000000000 install: # Check the python version: - "python.exe --version" build: false # Not a C# project test_script: # Build the compiled extension and run the project tests - "python.exe scratch/test/test_pyflow.py" notifications: - provider: Email to: - csaunders@illumina.com pyflow-1.1.14/.gitattributes000066400000000000000000000000371303601460500160070ustar00rootroot00000000000000pyflow/README.txt export-subst pyflow-1.1.14/.gitignore000066400000000000000000000000251303601460500151010ustar00rootroot00000000000000*.pyc *~ pyflow.data pyflow-1.1.14/.travis.yml000066400000000000000000000022011303601460500152200ustar00rootroot00000000000000language: python # Note PYVER drives hack to use python 2.4, this is # actually pretty ugly on travis -- process is: # 1) install # python2.4 from deadsnakes ppa # 2) shove 2.4 in /usr/bin/python # 3) set PATH back to /usr/bin # # This removes the system python link which is probably not # smart, but the test works so leaving it for now. # matrix: include: - os: linux sudo: required python: "2.7" - os: linux sudo: required python: "2.7" env: PYVER="2.4" before_install: - date -u - uname -a - lsb_release -a - if [ "$PYVER" == "2.4" ]; then sudo add-apt-repository -y ppa:fkrull/deadsnakes && sudo apt-get update -qq; fi install: - if [ "$PYVER" == "2.4" ]; then sudo apt-get install python2.4 -y && python2.4 -V; fi - if [ "$PYVER" == "2.4" ]; then sudo rm -f /usr/bin/python && sudo ln -s /usr/bin/python2.4 /usr/bin/python; fi - if [ "$PYVER" == "2.4" ]; then export PATH=/usr/bin:$PATH; fi - python -V script: - cd scratch/test && bash ./test_release_tarball.bash -nosge branches: only: - master notifications: email: recipients: - csaunders@illumina.com pyflow-1.1.14/README.md000066400000000000000000000032601303601460500143740ustar00rootroot00000000000000pyFlow - a lightweight parallel task engine =========================================== [![Build Status] [tcistatus]] [tcihome] [![Build status] [acistatus]] [acihome] pyFlow is a tool to manage tasks in the context of a task dependency graph. It has some similarities to make. pyFlow is not a program – it is a python module, and workflows are defined using pyFlow by writing regular python code with the pyFlow API For more information, please see the [pyFlow website] [site]. [site]:http://illumina.github.io/pyflow/ [tcistatus]:https://travis-ci.org/Illumina/pyflow.svg?branch=master [tcihome]:https://travis-ci.org/Illumina/pyflow [acistatus]:https://ci.appveyor.com/api/projects/status/fkovw5ife59ae48t/branch/master?svg=true [acihome]:https://ci.appveyor.com/project/ctsa/pyflow/branch/master License ------- pyFlow source code is provided under the [BSD 2-Clause License] (pyflow/COPYRIGHT.txt). Releases -------- Recent release tarballs can be found on the github release list here: https://github.com/Illumina/pyflow/releases To create a release tarball corresponding to any other version, run: git clone git://github.com/Illumina/pyflow.git pyflow cd pyflow git checkout ${VERSION} ./scratch/make_release_tarball.bash # tarball is "./pyflow-${VERSION}.tar.gz" Note this README is at the root of the pyflow development repository and is not part of the python source release. Contents -------- For the development repository (this directory), the sub-directories are: pyflow/ Contains all pyflow code intended for distribution, plus demo code and documentation. scratch/ This directory contains support scripts for tests/cleanup/release tarballing.. etc. pyflow-1.1.14/pyflow/000077500000000000000000000000001303601460500144345ustar00rootroot00000000000000pyflow-1.1.14/pyflow/COPYRIGHT.txt000066400000000000000000000025111303601460500165440ustar00rootroot00000000000000pyFlow - a lightweight parallel task engine Copyright (c) 2012-2015 Illumina, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pyflow-1.1.14/pyflow/README.md000066400000000000000000000156601303601460500157230ustar00rootroot00000000000000 pyFlow - a lightweight parallel task engine =========================================== Chris Saunders (csaunders@illumina.com) Version: ${VERSION} SUMMARY: -------- pyFlow manages running tasks in the context of a task dependency graph. It has some similarities to make. pyFlow is not a program -- it is a python module, and workflows are defined using pyFlow by writing regular python code with the pyFlow API. FEATURES: --------- - Define workflows as python code - Run workflows on localhost or sge - Continue workflows which have partially completed - Task resource management: Specify number of threads and memory required for each task. - Recursive workflow specification: take any existing pyFlow object and use it as a task in another pyFlow. - Dynamic workflow specification: define a wait on task specification rather than just tasks, so that tasks can be defined based on the results of upstream tasks (note: recursive workflows are an even better way to do this) - Detects and reports all failed tasks with consistent workflow-level logging. - Task-level logging: All task stderr is logged and decorated, eg. [time][host][workflow_run][taskid] - Task timing: Task wrapper function provides wall time for every task - Task priority: Tasks which are simultanously eligible to run can be assigned relative priorities to be run or queued first. - Task mutex sets: define sets of tasks which access an exclusive resource - Email notification on job completion/error/exception - Provide ongoing task summary report at specified intervals - Output task graph in dot format LICENSE: -------- pyFlow source code is provided under the [BSD 2-Clause License] (COPYRIGHT.txt). INSTALL: -------- pyFlow can be installed and used on python versions in the 2.4 to 2.7 series The pyflow module can be installed using standard python distutils intallation. To do so unpack the tarball and use the setup script as follows: ``` tar -xzf pyflow-X.Y.Z.tar.gz cd pyflow-X.Y.Z python setup.py build install ``` If installation in not convenient, you can simply add the pyflow src/ directory to the system search path. For instance: usepyflow.py: ``` import sys sys.path.append("/path/to/pyflow/src") from pyflow import WorkflowRunner ``` WRITING WORKFLOWS: ------------------ Briefly, pyFlow workflows are written by creating a new class which inherits from pyflow.WorkflowRunner. This class then defines its workflow by overloading the WorkflowRunner.workflow() method. Workflows are run by instantiating a workflow class and calling the WorkflowRunner.run() method. A very simple demonstration of the minimal workflow setup and run described above is available in the directory: `${pyflowDir}/demo/helloWorld/` Several other demonstration workflows are available: `${pyflowDir}/demo/simpleDemo` – a basic feature sandbox `${pyflowDir}/demo/subWorkflow` – shows how recursive workflow invocation works The developer documentation for the pyflow API can be generated by running `${pyflowDir}/doc/getApiDoc.py` or `python ${pyflowDir}/src/pydoc.py` An advanced proof-of-concept demonstration of bclToBam conversion is also available in `${pyflowDir}/demo/bclToBwaBam` USING WORKFLOWS: ---------------- When running a pyFlow workflow, all logs and state information are written into a single "pyflow.data" directory. The root of this directory is specified in the workflow.run() call. ### Logging: pyFlow creates a primary workflow-level log, and 2 log files to capture all task stdout and stderr, respectively. Workflow-level log information is copied to both stderr and pyflow.data/logs/pyflow_log.txt. All workflow log messages are prefixed with "[time] [hosname] [workflow_run] [component] ". Where: - 'time' is UTC in ISO 8601 format. - 'workflow_run' is an id that's weakly unique for each run of the workflow. It is composed of (1) the run() PID and (2) the number of times run() has been called on the workflow by the same process. These two values are joined by an underscore - 'component' - the name of the pyflow thread, the primary threads are 'WorkflowManager' which runs the worklow() method, and 'TaskManager' which polls the task graph and launches jobs. In the task logs, only the stderr stream is decorated. The prefix in this case is: "[time] [hostname] [workflow_run] [taskname] ". The 'taskname" is usually the label provided for each task in its addTask() call. All tasks are launched by a task wrapping function, and any messages from the taskWrapper (as opposed to the task command itself) will use an extended taskname: "pyflowTaskWrapper:${tasklabel}". One example where the task wrapper writes to the log is to report the total runtime for its task. All logging is append only -- pyFlow does not overwrite logs even over multiple runs. The workflow_run id can be used to select out the information from a specific run if restarting/continuing a run multiple times. ### State: pyFlow continues jobs by marking their status in a file, *not* by looking for the presence of file targets. This is a major difference from make and must be kept in mind when restarting interrupted workflows. The runstate of each task is in pyflow.data/state/pyflow_tasks_runstate.txt, the description of each task is in pyflow.data/state/pyflow_tasks_info.txt. At the beginning of each run any existing task files are backed up in pyflow.data/state/backup. ### Other: #### Email notification: When running a workflow with one or more email addresses given in the mailTo argument, pyflow will attempt to send a notification describing the outcome of the run under any circumstance short of host hardware failure. The email should result from 1 of 3 outcomes: (1) successful run completion (2) the first unrecoverable task failure, with a description of the error (3) an unhandled software exception. Mail comes by default from "pyflow-bot@YOURDOMAIN" (configurable). Note that (1) you may have to change the email address from the automatically detected domain to to recieve emails, and (2) you may need to check your junk-mail filter to recieve notifications. It is best to configure one of the demo scripts to email you on a new machine to test out any issues before starting a production run. #### Graph output: pyFlow provides a script which can be used to produce a graph of the current task dependencies, where each node colored by the task status. The graph generation script is automatically created for each run in the pyflow state directory here: pyflow.data/state/make_pyflow_task_graph.py This script can be run without arguments to produce the current task graph in dot format based on the data files in the pyflow.data/state/ directory. #### Site configuration: The file ${pyflowDir}/src/pyflowConfig.py contains any pyflow variables or functions which would be likely to need configuration at a new site. This currently incudes: - from: email address from pyflow - default memory per task - default memory available per thread in localhost mode - qsub arguments given in response to a resource request. pyflow-1.1.14/pyflow/demo/000077500000000000000000000000001303601460500153605ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/README.txt000066400000000000000000000022521303601460500170570ustar00rootroot00000000000000 This directory contains small demonstration workflows for various pyflow features. If you are new to pyflow, a recommended order to become familiar with its features is: 1. helloWorld This demonstrates a minimum single-task pyflow workflow. 2. simpleDemo This workflow demonstrates a number of commonly used pyflow features by setting up a number of tasks and showing different ways to specify task resource requirements and dependencies. 3. subWorkflow This workflow demonstrates the more advanced workflow recursion feature. 4. runOptionsDemo This workflow demostrates one possible way the pyflow API runtime options could be translated to user command-line arguments if building a command-line utility. 5. bclToBwaBam This workflow demonstrates a much larger 'real-world' script which performs bcl to fasta conversion from mulitple flowcells, alignment with BWA and translation of the BWA output to a single sorted and indexed BAM file. It has numerous dependencies required to actually run -- it's primary purpose here is to provide an example of how a larger scale pyflow workflow might look. Most of the remaining workflows demonstrate/test the use of specific pyflow features. pyflow-1.1.14/pyflow/demo/bclToBwaBam/000077500000000000000000000000001303601460500174755ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/bclToBwaBam/README.txt000066400000000000000000000022611303601460500211740ustar00rootroot00000000000000 This demo shows the use of pyflow on a production-scale problem. The "configBclToBwaBam.py" script here will take one or more bcl basecalls directories, run them through CASAVA 1.8 bcl conversion and align/sort/merge/markdup each sample into a single BAM file. A list of sample names may be given to restrict the analysis post bcl conversion. Help for the configuration script is available by typing "./configBclToBwaBam.py -h". To run, the script requires at minimum a bcl basecalls directory and a BWA index genome fasta file. This directory contains a configuration file "configBclToBwaBam.py.ini" which contains paths for bwa, samtools, Picard and CASAVA. You may need to change these to reflect the installed location at your site before running If on the sd-isilon, the file "example_configuration.bash" will call "configBclToBwaBam.py" with a pointer to a subsampled bcl directory to quickly demonstate the use of this script on real data. Note that once all arguments are provided and the configuration script completes, a run script will be generated in the output directory which can be used to actually execute the workflow, allowing for local/sge and total job limit specification. pyflow-1.1.14/pyflow/demo/bclToBwaBam/bwaworkflow.py000066400000000000000000000566211303601460500224250ustar00rootroot00000000000000# # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # """ bwaworkflow -- a pyflow demonstration module This is a quick-and-dirty BCL to BWA BAM workflow to demonstrate how pyflow could be used on a production-scale problem. __author__ = "Christopher Saunders" """ import os.path import sys # In production, pyflow can either be installed, or we can distribute # workflow to external users with pyflow in the same directory/fixed # relative directory or a configured directory macro-ed in by cmake, # etc # # For now we add the module path by hand: # scriptDir = os.path.abspath(os.path.dirname(__file__)) sys.path.append(scriptDir + "/../../src") from pyflow import WorkflowRunner # # utility methods: # def ensureDir(d): """ make directory if it doesn't already exist, raise exception is something else is in the way: """ if os.path.exists(d): if not os.path.isdir(d) : raise Exception("Can't create directory: %s" % (d)) else : os.makedirs(d) def skipJoin(sep, a, b) : if a == "" : return b elif b == "" : return a return a + sep + b def preJoin(a, b) : return skipJoin('_', a, b) # # All of these "flow" functions take a set of task dependencies as # input and report a set of tasks on output, and thus are designed to # be plugged together to create workflows which are initiated in # the WorkflowRunner.workflow() method. # # Note that this style is not a design constraint of pyflow, it was # just one natural way to write the bwa workflow, and demonstrates an # extensible model wherein various flow functions could be stored in # external modules and combined as required. # # Note that these flow functions are written to lookup dependencies # from the WorkflowRunner class, so they are really class # methods. Although they could also lookup data from the derived BWA # class, they don't -- this allows them to be reused by other # WorkflowRunner classes. # def casava18BclToFastqFlow(self, taskPrefix="", dependencies=set()) : """ CASAVA 1.8 bcl to fastq conversion This assumes the bclBasecallsDir is generated in a CASAVA 1.8 compatible format, and uses CASAVA 1.8 to convert to fastq This demonstrates pyflow's makefile handling option, where you specify a makefile directory instead of a regular command, and pyflow runs make/qmake according to the run mode. params: casavaDir bclBasecallsDir flowcellFastqDir bclTilePattern bclToFastqMaxCores """ # configure bcl2fastq makefile: configBclToFastqCmd = "perl %s/bin/configureBclToFastq.pl" % (self.params.casavaDir) configBclToFastqCmd += " --input-dir=%s" % self.params.bclBasecallsDir configBclToFastqCmd += " --output-dir=%s" % self.params.flowcellFastqDir configBclToFastqCmd += " --force" # always a good idea for CASAVA configBclToFastqCmd += " --ignore-missing-bcl" configBclToFastqCmd += " --ignore-missing-stats" configBclToFastqCmd += " --ignore-missing-control" if self.params.bclTilePattern != None : configBclToFastqCmd += " --tiles=%s" % (self.params.bclTilePattern) # run configuration: configLabel = self.addTask(preJoin(taskPrefix, "configBclToFastq"), configBclToFastqCmd, isForceLocal=True, dependencies=dependencies) # for the bcl to fastq step, we use another workflow manager, so # we just run it as one huge task and handle the mode ourselves: nCores = self.getNCores() mode = self.getRunMode() maxCores = self.params.bclToFastqMaxCores if (nCores == "unlimited") or (nCores > maxCores) : nCores = maxCores # run the fastq conversion: bclToFastqLabel = self.addTask(preJoin(taskPrefix, "bclToFastq"), self.params.flowcellFastqDir, nCores=nCores, dependencies=configLabel, isCommandMakePath=True) return set([bclToFastqLabel]) class FastqPairToBwaBamFlow(WorkflowRunner) : """ Given a read1 and read2 pair of fastq files, create an aligned and sorted bamFile. Don't delete input fastaq files. """ def __init__(self, params, suggestedAlignThreadCount=2) : """ suggestedAlignThreadCount -- Number of threads to use in bwa aln step. The workflow will lower this if it exceeds the total number of cores available in the run, or if it exceeds alnMaxCores params: fastq1File fastq2File bamFile alnMaxCores bwaBin genomeFasta samtoolsBin samtoolsSortMemPerCore isKeepFastq """ self.params = params self.suggestedAlignThreadCount = suggestedAlignThreadCount def workflow(self) : bamDir = os.path.dirname(self.params.bamFile) ensureDir(bamDir) (bamPrefix, bamExt) = os.path.splitext(self.params.bamFile) # must end in ".bam" for samtools if bamExt != ".bam" : raise Exception("bamFile argument must end in '.bam'. bamFile is: %s" % (bamFile)) if bamPrefix == "" : raise Exception("bamFile argument must have a prefix before the '.bam' extension.") # assuming many fastq pairs are running, good total throughput given cluster nodes with 2G of ram each # should be achieved by given the align processes 2 threads each: # grab total cores to make sure we don't exceed it: totalCores = self.getNCores() # # setup aln step: # # set alnCores alnCores = int(self.suggestedAlignThreadCount) if (totalCores != "unlimited") and (alnCores > totalCores) : alnCores = int(totalCores) if (alnCores > self.params.alnMaxCores) : alnCores = int(self.params.alnMaxCores) bwaBaseCmd = "%s aln -t %i %s" % (self.params.bwaBin, alnCores, self.params.genomeFasta) peDependencies = set() def getReadLabel(i) : return "Read%iBwaAlign" % (i) def getReadSaiFile(i) : return "%s.read%i.sai" % (self.params.bamFile, i) def getReadFastqFile(i) : return (self.params.fastq1File, self.params.fastq2File)[i - 1] for read in (1, 2) : readAlnCmd = "%s %s >| %s" % (bwaBaseCmd, getReadFastqFile(read), getReadSaiFile(read)) peDependencies.add(self.addTask(getReadLabel(read), readAlnCmd, nCores=alnCores)) # # setup sampe step: # # with all the pipes, the sampe step is probably a 2 core? this lets sort use more mem too: peCores = 2 if (totalCores != "unlimited") and (peCores > totalCores) : peCores = int(totalCores) peCmd = "%s sampe %s %s %s %s %s" % (self.params.bwaBin, self.params.genomeFasta, getReadSaiFile(1), getReadSaiFile(2), getReadFastqFile(1), getReadFastqFile(2)) peCmd += " | %s view -uS -" % (self.params.samtoolsBin) # For a real pipeline, we'd probably prefer Picard sort, but I don't want to add another # dependency to the trial workflow: # peCmd += " | %s sort -m %i - %s" % (self.params.samtoolsBin, self.params.samtoolsSortMemPerCore, # *peCores, need to leave memory for bwa... bamPrefix) peTaskLabel = self.addTask("BwaSamPESort", peCmd, nCores=peCores, dependencies=peDependencies) # delete sai files: rmCmd = "rm -f" for read in (1, 2) : rmCmd += " %s" % (getReadSaiFile(read)) self.addTask("RmSai", rmCmd, dependencies=peTaskLabel, isForceLocal=True) # optionally delete input fastqs: if not self.params.isKeepFastq : fastqRmCmd = "rm -f" for read in (1, 2) : fastqRmCmd += " %s" % (getReadFastqFile(read)) self.addTask("RmFastq", fastqRmCmd, dependencies=peTaskLabel, isForceLocal=True) class FileDigger(object) : """ Digs into a well-defined directory structure with prefixed folder names to extract all files associated with combinations of directory names. This is written primarily to go through the CASAVA 1.8 output structure. #casava 1.8 fastq example: fqDigger=FileDigger('.fastq.gz',['Project_','Sample_']) """ def __init__(self, targetExtension, prefixList) : self.targetExtension = targetExtension self.prefixList = prefixList def getNextFile(self, dir, depth=0, ans=tuple()) : """ generator of a tuple: (flowcell,project,sample,bamfile) given a multi-flowcell directory """ if depth < len(self.prefixList) : for d in os.listdir(dir) : nextDir = os.path.join(dir, d) if not os.path.isdir(nextDir) : continue if not d.startswith(self.prefixList[depth]) : continue value = d[len(self.prefixList[depth]):] for val in self.getNextFile(nextDir, depth + 1, ans + tuple([value])) : yield val else: for f in os.listdir(dir) : file = os.path.join(dir, f) if not os.path.isfile(file) : continue if not f.endswith(self.targetExtension) : continue yield ans + tuple([file]) def flowcellDirFastqToBwaBamFlow(self, taskPrefix="", dependencies=set()) : """ Takes as input 'flowcellFastqDir' pointing to the CASAVA 1.8 flowcell project/sample fastq directory structure. For each project/sample, the fastqs are aligned using BWA, sorted and merged into a single BAM file. The bam output is placed in a parallel project/sample directory structure below 'flowcellBamDir' params: samtoolsBin flowcellFastqDir flowcellBamDir calls: FastqPairToBwaBamFlow supplies: bamFile fastq1File fastq2File """ # # 1. separate fastqs into matching pairs: # fqs = {} fqDigger = FileDigger(".fastq.gz", ["Project_", "Sample_"]) for (project, sample, fqPath) in fqDigger.getNextFile(self.params.flowcellFastqDir) : if (self.params.sampleNameList != None) and \ (len(self.params.sampleNameList) != 0) and \ (sample not in self.params.sampleNameList) : continue fqFile = os.path.basename(fqPath) w = (fqFile.split(".")[0]).split("_") if len(w) != 5 : raise Exception("Unexpected fastq filename format: '%s'" % (fqPath)) (sample2, index, lane, read, num) = w if sample != sample2 : raise Exception("Fastq name sample disagrees with directory sample: '%s;" % (fqPath)) key = (project, sample, index, lane, num) if key not in fqs : fqs[key] = [None, None] readNo = int(read[1]) if fqs[key][readNo - 1] != None : raise Exceptoin("Unresolvable repeated fastq file pattern in sample: '%s'" % (fqPath)) fqs[key][readNo - 1] = fqPath ensureDir(self.params.flowcellBamDir) # # 2. run all fastq pairs through BWA: # nextWait = set() for key in fqs.keys() : (project, sample, index, lane, num) = key sampleBamDir = os.path.join(self.params.flowcellBamDir, "Project_" + project, "Sample_" + sample) ensureDir(sampleBamDir) keytag = "_".join(key) self.params.bamFile = os.path.join(sampleBamDir, keytag + ".bam") self.params.fastq1File = fqs[key][0] self.params.fastq2File = fqs[key][1] nextWait.add(self.addWorkflowTask(preJoin(taskPrefix, keytag), FastqPairToBwaBamFlow(self.params), dependencies=dependencies)) return nextWait class FlowcellDirFastqToBwaBamFlow(WorkflowRunner) : """ Takes as input 'flowcellFastqDir' pointing to the CASAVA 1.8 flowcell project/sample fastq directory structure. For each project/sample, the fastqs are aligned using BWA, sorted and merged into a single BAM file. The bam output is placed in a parallel project/sample directory structure below 'flowcellBamDir' params: flowcellFastqDir flowcellBamDir """ def __init__(self, params) : self.params = params def workflow(self) : flowcellDirFastqToBwaBamFlow(self) # use a really boring flowcell label everywhere right now: def getFlowcellLabel(self, i) : return "Flowcell_FC%i" % (i) def casava18BclToBamListFlow(self, taskPrefix="", dependencies=set()) : """ Runs bcl conversion and alignment on multiple flowcells for a subset of samples. Writes BAM files to parallel fastq Project/Sample directory structure. Does not merge individual BAMs. Deletes fastqs on alignment when option is set to do so. params: allFlowcellDir bclBasecallsDirList bclTilePatternList calls: casava18BclToFastqFlow supplies: bclBasecallsDir flowcellFastqDir FlowcellDirFastqToBwaBamFlow supplies: flowcellFastqDir flowcellBamDir """ ensureDir(self.params.allFlowcellDir) # first bcl->fastq->bwa bam for requested samples in all flowcells: nextWait = set() for i, self.params.bclBasecallsDir in enumerate(self.params.bclBasecallsDirList) : flowcellLabel = getFlowcellLabel(self, i) flowcellDir = os.path.join(self.params.allFlowcellDir, flowcellLabel) ensureDir(flowcellDir) self.params.flowcellFastqDir = os.path.join(flowcellDir, "fastq") self.params.flowcellBamDir = os.path.join(flowcellDir, "bam") if self.params.bclTilePatternList == None : self.params.bclTilePattern = None else : self.params.bclTilePattern = self.params.bclTilePatternList[i] fastqFinal = casava18BclToFastqFlow(self, taskPrefix=flowcellLabel) label = preJoin(taskPrefix, "_".join((flowcellLabel, "FastqToBwaBam"))) nextWait.add(self.addWorkflowTask(label, FlowcellDirFastqToBwaBamFlow(self.params), dependencies=fastqFinal)) return nextWait def mergeBamListFlow(self, taskPrefix="", dependencies=set()) : """ Take a list of sorted bam files from the same sample, merge them together, and delete input bams, final output to mergeBamName params: mergeBamList mergeBamName samtoolsBin """ for bamFile in self.params.mergeBamList : if not os.path.isfile(bamFile) : raise Exception("Can't find bam file: '%s'" % (bamFile)) mergeTasks = set() mergeLabel = preJoin(taskPrefix, "merge") if len(self.params.mergeBamList) > 1 : mergeCmd = "%s merge -f %s %s" % (self.params.samtoolsBin, self.params.mergeBamName, " ".join(self.params.mergeBamList)) mergeTasks.add(self.addTask(mergeLabel, mergeCmd, dependencies=dependencies, isTaskStable=False)) rmCmd = "rm -f" for bamFile in self.params.mergeBamList : rmCmd += " %s" % (bamFile) self.addTask(preJoin(taskPrefix, "rmBam"), rmCmd, dependencies=mergeLabel, isForceLocal=True) elif len(self.params.mergeBamList) == 1 : mvCmd = "mv %s %s" % (self.params.mergeBamList[0], self.params.mergeBamName) # *must* have same taskLabel as merge command for continuation # to work correctly because of the potential for partial # deletion of the input bam files: mergeTasks.add(self.addTask(mergeLabel, mvCmd, dependencies=dependencies, isForceLocal=True, isTaskStable=False)) return mergeTasks def flowcellBamListMergeFlow(self, taskPrefix="", dependencies=set()) : """ given a root flowcell directory and list of samples, merge sample bams across flowcells and dedup. ?? Will we be in a situation where sample has more than one library -- this affects the debup order & logic ?? params: allFlowcellDir mergedDir sampleNameList picardDir calls: mergeBamListFlow supplies: mergeBamList mergeBamName """ # # 1) get a list of bams associated with each project/sample combination: # # TODO: what if there's an NFS delay updating all the bams while # we're reading them out here? make this process more robust -- we # should know how many BAM's we're expecting, in a way that's # robust to interuption/restart # bams = {} bamDigger = FileDigger(".bam", ["Flowcell_", "bam", "Project_", "Sample_"]) for (flowcell, nothing, project, sample, bamFile) in bamDigger.getNextFile(self.params.allFlowcellDir) : if (self.params.sampleNameList != None) and \ (len(self.params.sampleNameList) != 0) and \ (sample not in self.params.sampleNameList) : continue key = (project, sample) if key not in bams : bams[key] = [] bams[key].append(bamFile) mergedBamExt = ".merged.bam" markDupBamExt = ".markdup.bam" # # 2) merge and delete smaller bams: # mergedBams = {} mergedBamDir = os.path.join(self.params.mergedDir, "bam") sampleTasks = {} if len(bams) : # skip this section if smaller bams have already been deleted ensureDir(mergedBamDir) for key in bams.keys() : (project, sample) = key mergedSampleDir = os.path.join(mergedBamDir, "Project_" + project, "Sample_" + sample) ensureDir(mergedSampleDir) self.params.mergeBamList = bams[key] self.params.mergeBamName = os.path.join(mergedSampleDir, sample + mergedBamExt) mergedBams[key] = self.params.mergeBamName outTaskPrefix = preJoin(taskPrefix, "_".join(key)) sampleTasks[key] = mergeBamListFlow(self, outTaskPrefix, dependencies) if not os.path.isdir(mergedBamDir) : return # # 3) mark dup: # # mergedBams contains all bams from the current run, we also add any from a # previous interupted run: mergedBamDigger = FileDigger(mergedBamExt, ["Project_", "Sample_"]) for (project, sample, bamFile) in mergedBamDigger.getNextFile(mergedBamDir) : key = (project, sample) if key in mergedBams : assert (mergedBams[key] == bamFile) else : mergedBams[key] = bamFile nextWait = set() totalCores = self.getNCores() for sampleKey in mergedBams.keys() : markDupDep = set() if sampleKey in sampleTasks : markDupDep = sampleTasks[sampleKey] fullName = "_".join(sampleKey) markDupBamFile = mergedBams[sampleKey][:-(len(mergedBamExt))] + markDupBamExt markDupMetricsFile = markDupBamFile[:-(len(".bam"))] + ".metrics.txt" markDupTmpDir = markDupBamFile + ".tmpdir" # for now, solve the memory problem with lots of threads: nCores = 4 if (totalCores != "unlimited") and (totalCores < nCores) : nCores = totalCores gigs = 2 * nCores javaOpts = "-Xmx%ig" % (gigs) markDupFiles = "INPUT=%s OUTPUT=%s METRICS_FILE=%s" % (mergedBams[sampleKey], markDupBamFile, markDupMetricsFile) markDupOpts = "REMOVE_DUPLICATES=false ASSUME_SORTED=true VALIDATION_STRINGENCY=SILENT CREATE_INDEX=true TMP_DIR=%s" % (markDupTmpDir) markDupJar = os.path.join(self.params.picardDir, "MarkDuplicates.jar") markDupCmd = "java %s -jar %s %s %s" % (javaOpts, markDupJar, markDupFiles, markDupOpts) markDupTask = self.addTask(preJoin(taskPrefix, fullName + "_dupmark"), markDupCmd, dependencies=markDupDep) # link index filename to something samtools can understand: # markDupPicardBaiFile = markDupBamFile[:-(len(".bam"))] + ".bai" markDupSamtoolsBaiFile = markDupBamFile + ".bai" indexLinkCmd = "ln %s %s" % (markDupPicardBaiFile, markDupSamtoolsBaiFile) indexLinkTask = self.addTask(preJoin(taskPrefix, fullName + "_indexLink"), indexLinkCmd, dependencies=markDupTask, isForceLocal=True) nextWait.add(indexLinkTask) # delete TmpDir: # rmMarkDupTmpCmd = "rm -rf %s" % (markDupTmpDir) self.addTask(preJoin(taskPrefix, fullName + "_rmMarkDupTmp"), rmMarkDupTmpCmd, dependencies=markDupTask, isForceLocal=True) # now remove the original file: # rmCmd = "rm -f %s" % (mergedBams[sampleKey]) self.addTask(preJoin(taskPrefix, fullName + "_rmMerge"), rmCmd, dependencies=markDupTask, isForceLocal=True) return nextWait class FlowcellBamListMergeFlow(WorkflowRunner) : def __init__(self, params) : self.params = params def workflow(self) : flowcellBamListMergeFlow(self) class BWAWorkflow(WorkflowRunner) : """ pyflow BCL to BAM BWA workflow """ def __init__(self, params) : self.params = params # make sure working directory is setup: self.params.outputDir = os.path.abspath(self.params.outputDir) ensureDir(self.params.outputDir) self.params.allFlowcellDir = os.path.join(self.params.outputDir, "flowcell_results") self.params.mergedDir = os.path.join(self.params.outputDir, "merged_results") # Verify/manipulate various input options: # # this is mostly repeated in the conflig script now... get this minimized with auto verification: # self.params.bclBasecallsDirList = map(os.path.abspath, self.params.bclBasecallsDirList) for dir in self.params.bclBasecallsDirList : if not os.path.isdir(dir) : raise Exception("Input BCL basecalls directory not found: '%s'" % (dir)) self.params.samtoolsSortMemPerCore = int(self.params.samtoolsSortMemPerCore) minSortMem = 1000000 if self.params.samtoolsSortMemPerCore < minSortMem : raise Exception("samtoolsSortMemPerCore must be an integer greater than minSortMem") if self.params.genomeFasta == None: raise Exception("No bwa genome file defined.") else: if not os.path.isfile(self.params.genomeFasta) : raise Exception("Can't find bwa genome file '%s'" % (self.params.genomeFasta)) def workflow(self) : alignTasks = casava18BclToBamListFlow(self) mergeTask = self.addWorkflowTask("mergeBams", FlowcellBamListMergeFlow(self.params), dependencies=alignTasks) pyflow-1.1.14/pyflow/demo/bclToBwaBam/configBclToBwaBam.py000077500000000000000000000364411303601460500233250ustar00rootroot00000000000000#!/usr/bin/env python # # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # """ This demonstrates a run of a prototype BCL to BWA BAM workflow created as a production-scale proof of concept for pyflow. The bwa workflow is written into the BWAWorkflow object. See bwaworkflow.py for implementation details of this class. Finally, make sure configuration settings in BWAWorkflowConfig are appropriate before running. """ import os, sys scriptDir = os.path.abspath(os.path.dirname(__file__)) scriptName = os.path.basename(__file__) runScript1 = """#!/usr/bin/env python # BWAWorkflow run script auto-generated by command: %s import os.path, sys scriptDir=os.path.abspath(os.path.dirname(__file__)) sys.path.append('%s') from bwaworkflow import BWAWorkflow class WorkflowOptions(object) : """ % (" ".join(sys.argv), scriptDir) runScript2 = """ def get_run_options() : from optparse import OptionParser import textwrap epilog=\"""Note this script can be re-run to continue the workflow run in case of interuption. Also note that dryRun option has limited utility when task definition depends on upstream task results, in which case the dry run will not cover the full 'live' run task set)\""" # no epilog in py 2.4! hack-in the feature instead: class MyOptionParser(OptionParser) : def __init__(self, *args, **kwargs): self.myepilog = None try: self.myepilog = kwargs.pop('epilog') except KeyError: pass OptionParser.__init__(self,*args, **kwargs) def print_help(self,*args,**kwargs) : OptionParser.print_help(self,*args, **kwargs) if self.myepilog != None : sys.stdout.write("%s\\n" % (textwrap.fill(self.myepilog))) parser = MyOptionParser(epilog=epilog) parser.add_option("-m", "--mode", type="string",dest="mode", help="select run mode (local|sge)") parser.add_option("-j", "--jobs", type="string",dest="jobs", help="number of jobs (default: 1 for local mode, 'unlimited' for sge mode)") parser.add_option("-e","--mailTo", type="string",dest="mailTo",action="append", help="send email notification of job completion status to this address (may be provided multiple times for more than one email address)") parser.add_option("-d","--dryRun", dest="isDryRUn",action="store_true", help="dryRun workflow code without actually running command-tasks") (options,args) = parser.parse_args() if len(args) : parser.print_help() sys.exit(2) if options.mode == None : parser.print_help() sys.exit(2) elif options.mode not in ["local","sge"] : parser.error("Invalid mode. Available modes are: local, sge") if options.jobs == None : if options.mode == "sge" : options.jobs == "unlimited" else : options.jobs == "1" elif (options.jobs != "unlimited") and (int(options.jobs) <= 0) : parser.error("Jobs must be 'unlimited' or an integer greater than 1") return options runOptions=get_run_options() flowOptions=WorkflowOptions() flowOptions.outputDir=scriptDir wflow = BWAWorkflow(flowOptions) retval=wflow.run(mode=runOptions.mode, nCores=runOptions.jobs, dataDirRoot=scriptDir, mailTo=runOptions.mailTo, isContinue="Auto", isForceContinue=True, isDryRun=runOptions.isDryRUn) sys.exit(retval) """ def checkArg(x, label, checkfunc) : if x != None: x = os.path.abspath(x) if not checkfunc(x) : raise Exception("Can't find %s: '%s'" % (label, x)) return x def checkDirArg(dir, label) : return checkArg(dir, label, os.path.isdir) def checkFileArg(file, label) : return checkArg(file, label, os.path.isfile) def get_option_parser(defaults, configFileName, isAllHelp=False) : from optparse import OptionGroup, OptionParser, SUPPRESS_HELP import textwrap description = """This script configures a bcl to BWA alignmed BAM workflow. Given a bcl basecalls directory the workflow will create fastq's using CASAVA's bcl to fastq converter, then align each fastq using bwa, and finally consolidate the output into a single BAM file for for each Project/Sample combination. The configuration process will produce a workflow run script, which can be used to execute the workflow on a single node or through sge with a specific job limit. """ epilog = """Default parameters will always be read from the file '%s' if it exists. This file is searched for in the current working directory first -- if it is not found then the directory containing this script is searched as well. The current set of default parameters may be written to this file using the --writeConfig switch, which takes all current defaults and arguments, writes these to the configuration file and exits without setting up a workflow run script as usual. """ % (configFileName) # no epilog in py 2.4! hack-in the feature instead: class MyOptionParser(OptionParser) : def __init__(self, *args, **kwargs): self.myepilog = None try: self.myepilog = kwargs.pop('epilog') except KeyError: pass OptionParser.__init__(self, *args, **kwargs) def print_help(self, *args, **kwargs) : OptionParser.print_help(self, *args, **kwargs) if self.myepilog != None : sys.stdout.write("%s\n" % (textwrap.fill(self.myepilog))) parser = MyOptionParser(description=description, epilog=epilog) parser.set_defaults(**defaults) parser.add_option("--allHelp", action="store_true", dest="isAllHelp", help="show all extended/hidden options") group = OptionGroup(parser, "Workflow options") group.add_option("--bclBasecallsDir", type="string", dest="bclBasecallsDirList", metavar="DIR", action="append", help="BCL basecalls directory. Call this option multiple times to specify multiple bcl directories, samples with the same name will be combined over all flowcells after alignmnet. [required] (default: %default)") group.add_option("--bclTilePattern", type="string", dest="bclTilePatternList", metavar="PATTERN", action="append", help="BCL converter tiles expression used to select a subsset of tiles (eg. 's_1') call this option either once for each basecalls dir or not at all (default: %default)") group.add_option("--genomeFasta", type="string", dest="genomeFasta", help="Genome fasta file which includes BWA index in the same directory [required] (default: %default)") group.add_option("--outputDir", type="string", dest="outputDir", help="BCL basecalls directory [required] (default: %default)") group.add_option("--sampleName", type="string", dest="sampleNameList", metavar="sampleName", action="append", help="Restrict analysis to given sampleName. This option can be provided more than once for multiple sample names. If no names are provided all samples are analyzed (default: %default)") parser.add_option_group(group) secgroup = OptionGroup(parser, "Extended options", "These options are not likely to be reset after initial configuration in a new site, they will not be printed here if a default exists from the configuration file or otherwise, unless --allHelp is specified") # used to access isAnyHelp from the maybeHelp function class Hack : isAnyHelp = False def maybeDefHelp(key, msg) : if isAllHelp or (key not in defaults) : Hack.isAnyHelp = True return msg return SUPPRESS_HELP secgroup.add_option("--casavaDir", type="string", dest="casavaDir", help=maybeDefHelp("casavaDir", "casava 1.8.2+ installation directory [required] (default: %default)")) secgroup.add_option("--bwaBin", type="string", dest="bwaBin", help=maybeDefHelp("bwaBin", "bwa binary [required] (default: %default)")) secgroup.add_option("--samtoolsBin", type="string", dest="samtoolsBin", help=maybeDefHelp("samtoolsBin", "samtools binary [required] (default: %default)")) secgroup.add_option("--picardDir", type="string", dest="picardDir", help=maybeDefHelp("picardDir", "casava 1.8.2+ installation directory [required] (default: %default)")) if not Hack.isAnyHelp: secgroup.description = "hidden" parser.add_option_group(secgroup) def maybeHelp(key, msg) : if isAllHelp : return msg return SUPPRESS_HELP configgroup = OptionGroup(parser, "Config options") configgroup.add_option("--writeConfig", action="store_true", dest="isWriteConfig", help=maybeHelp("writeConfig", "Write new default configuration file based on current defaults and agruments. Defaults written to: '%s'" % (configFileName))) if not isAllHelp : configgroup.description = "hidden" parser.add_option_group(configgroup) return parser def get_run_options() : from ConfigParser import SafeConfigParser configFileName = scriptName + ".ini" if not os.path.isfile(configFileName) : configPath = os.path.join(scriptDir, configFileName) else : configPath = os.path.join('.', configFileName) configSectionName = scriptName config = SafeConfigParser() config.optionxform = str config.read(configPath) configOptions = {} if config.has_section(configSectionName) : for (k, v) in config.items(configSectionName) : if v == "" : continue configOptions[k] = v defaults = { 'outputDir' : './results', 'bclToFastqMaxCores' : 12, 'samtoolsSortMemPerCore' : 1000000000, # samtools sort uses about 2x what you tell it to... 'alnMaxCores' : 8, # presumably bwa aln will become increasingly inefficient per core, so we don't want to let this go forever... 'isKeepFastq' : True, # important to keep these during testing, but not for production } defaults.update(configOptions) parser = get_option_parser(defaults, configFileName) (options, args) = parser.parse_args() if options.isAllHelp : parser = get_option_parser(defaults, configFileName, True) parser.print_help() sys.exit(2) if len(args) : # or (len(sys.argv) == 1): parser.print_help() sys.exit(2) # sanitize arguments before writing defaults, check for missing arguments after: # def checkListRepeats(list, itemLabel) : if list == None : return if len(set(list)) != len(list) : parser.error("Repeated %s entries" % (itemLabel)) if options.bclBasecallsDirList != None : for i, bclDir in enumerate(options.bclBasecallsDirList) : options.bclBasecallsDirList[i] = checkDirArg(bclDir, "bcl basecalls directory") # tmp for testing: # checkListRepeats(options.bclBasecallsDirList,"bcl basecalls directory") if (options.bclTilePatternList != None) and \ (len(options.bclBasecallsDirList) != len(options.bclTilePatternList)) : parser.error("Unexpected number of bclTilPattern entries") checkListRepeats(options.sampleNameList, "sample name") options.casavaDir = checkDirArg(options.casavaDir, "casava directory") options.genomeFasta = checkFileArg(options.genomeFasta, "genome fasta file") options.bwaBin = checkFileArg(options.bwaBin, "bwa binary") options.samtoolsBin = checkFileArg(options.samtoolsBin, "samtools binary") if options.isWriteConfig == True : if not config.has_section(configSectionName) : config.add_section(configSectionName) for k, v in vars(options).iteritems() : if k == "isWriteConfig" : continue if v == None : v = "" config.set(configSectionName, k, str(v)) configfp = open(configFileName, "w") config.write(configfp) configfp.close() sys.exit(0) def noArgOrError(msg) : if len(sys.argv) <= 1 : parser.print_help() sys.exit(2) else : parser.error(msg) def assertOption(arg, label) : if arg == None: noArgOrError("No %s specified" % (label)) def assertList(list, itemLabel) : if (list == None) or (len(list) == 0) : noArgOrError("List containing %s (s) is empty or missing" % (itemLabel)) else : for item in list : assertOption(item, itemLabel) assertList(options.bclBasecallsDirList, "bcl basecalls directory") assertList(options.sampleNameList, "sample name") assertOption(options.genomeFasta, "genome fasta file") assertOption(options.outputDir, "output directory") assertOption(options.casavaDir, "casava directory") assertOption(options.picardDir, "picard directory") assertOption(options.bwaBin, "bwa binary") assertOption(options.samtoolsBin, "samtools binary") return options from bwaworkflow import BWAWorkflow, ensureDir def main() : options = get_run_options() # instantiate workflow object to trigger parameter validation only # wflow = BWAWorkflow(options) # generate runscript: # scriptFile = os.path.join(options.outputDir, "runWorkflow.py") ensureDir(options.outputDir) sfp = open(scriptFile, "w") sfp.write(runScript1) # there must be a nicer way to reverse eval() an object -- maybe human readable pickle is what we want here? for k, v in vars(options).iteritems() : if isinstance(v, basestring) : sfp.write(" %s = '%s'\n" % (k, v)) else: sfp.write(" %s = %s\n" % (k, v)) sfp.write("\n") sfp.write(runScript2) sfp.close() os.chmod(scriptFile, 0755) notefp = sys.stdout notefp.write(""" Successfully created workflow run script. To execute the workflow, run the following script and set appropriate options: %s """ % (scriptFile)) if __name__ == "__main__" : main() pyflow-1.1.14/pyflow/demo/bclToBwaBam/configBclToBwaBam.py.ini000066400000000000000000000003651303601460500240740ustar00rootroot00000000000000[configBclToBwaBam.py] bwaBin = /home/csaunders/opt/x86_64-linux/bwa/bwa samtoolsBin = /illumina/thirdparty/samtools/samtools-0.1.14/samtools casavaDir = /illumina/software/casava/CASAVA-1.8.2 picardDir = /home/csaunders/opt/noarch/picard-tools pyflow-1.1.14/pyflow/demo/bclToBwaBam/example_configuration.bash000077500000000000000000000014041303601460500247200ustar00rootroot00000000000000#!/usr/bin/env bash set -o xtrace # # executes the configure script for a small bcl directory -- note that # the tile mask is required for this bcl directory because it has been # extensively subsampled for testing purposes # ./configBclToBwaBam.py \ --bclBasecallsDir /home/csaunders/proj/bwa_workflow_hashout/create_small_lane/small_lane/111119_SN192_0307_BD0FNCACXX_Genentech/Data/Intensities/BaseCalls \ --bclTilePattern "s_8_[02468][0-9][0-9]1" \ --bclBasecallsDir /home/csaunders/proj/bwa_workflow_hashout/create_small_lane/small_lane/111119_SN192_0307_BD0FNCACXX_Genentech/Data/Intensities/BaseCalls \ --bclTilePattern "s_8_[13579][0-9][0-9]1" \ --genomeFasta /illumina/scratch/iGenomes/Homo_sapiens/UCSC/hg19/Sequence/BWAIndex/genome.fa \ --sampleName "lane8" pyflow-1.1.14/pyflow/demo/cwdDemo/000077500000000000000000000000001303601460500167425ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/cwdDemo/cwdDemo.py000077500000000000000000000050211303601460500206770ustar00rootroot00000000000000#!/usr/bin/env python # # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # # demonstrate/test addTask() cwd option # import os.path import sys # add module path by hand # scriptDir=os.path.abspath(os.path.dirname(__file__)) sys.path.append(scriptDir+"/../../src") from pyflow import WorkflowRunner # all pyflow workflows are written into classes derived from # pyflow.WorkflowRunner: # class CwdWorkflow(WorkflowRunner) : # a workflow is defined by overloading the # WorkflowRunner.workflow() method: # def workflow(self) : # get cwd and its parent for the addTask cwd test # cwd=os.getcwd() parentdir=os.path.abspath(os.path.join(cwd,"..")) self.flowLog("testing pyflow cwd: '%s' parentdir: '%s'" % (cwd,parentdir)) # task will fail unless pwd == parentdir: # # test both absolute and relative cwd arguments: # self.addTask("testAbsCwd","[ $(pwd) == '%s' ]; exit $?" % (parentdir),cwd=parentdir) self.addTask("testRelCwd","[ $(pwd) == '%s' ]; exit $?" % (parentdir),cwd="..") # Instantiate the workflow # wflow = CwdWorkflow() # Run the worklow: # retval=wflow.run(mode="local") sys.exit(retval) pyflow-1.1.14/pyflow/demo/envDemo/000077500000000000000000000000001303601460500167555ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/envDemo/envDemo.py000077500000000000000000000057011303601460500207320ustar00rootroot00000000000000#!/usr/bin/env python # # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # # demonstrate/test addTask() env option # import os.path import sys # add module path by hand # scriptDir = os.path.abspath(os.path.dirname(__file__)) sys.path.append(scriptDir + "/../../src") from pyflow import WorkflowRunner # all pyflow workflows are written into classes derived from # pyflow.WorkflowRunner: # class EnvWorkflow(WorkflowRunner) : # a workflow is defined by overloading the # WorkflowRunner.workflow() method: # def workflow(self) : # run a task with the parent env: # home = os.environ["HOME"] self.addTask("testDefEnv", "[ $HOME == '%s' ]; exit $?" % (home)) # create a minimal test environment # new_path = "/bin" min_env = { "PATH" : new_path } self.addTask("testMinEnv", "[ $PATH == '%s' ]; exit $?" % (new_path), env=min_env) # augment parent env with additional settings: # augmented_env = os.environ.copy() augmented_env["FOO"] = "BAZ" self.addTask("testAugmentedEnv", "[ $FOO == 'BAZ' ]; exit $?", env=augmented_env) # test funny characters that have shown to cause trouble on some sge installations funky_env = {} funky_env["PATH"] = "/bin" funky_env["_"] = "| %s %F \n" # in this case we just want the job to run at all: self.addTask("testFunkyEnv", "echo 'foo'; exit $?", env=funky_env) assert("FOO" not in os.environ) # Instantiate the workflow # wflow = EnvWorkflow() # Run the worklow: # retval = wflow.run(mode="local") sys.exit(retval) pyflow-1.1.14/pyflow/demo/helloWorld/000077500000000000000000000000001303601460500174735ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/helloWorld/README.txt000066400000000000000000000002661303601460500211750ustar00rootroot00000000000000The following demo shows a very simple pyFlow composed of only a single task -- a command which echos a simple message. You can run this workflow by typing "python ./helloWorld.py" pyflow-1.1.14/pyflow/demo/helloWorld/helloWorld.py000077500000000000000000000045611303601460500221710ustar00rootroot00000000000000#!/usr/bin/env python # # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # # This demo shows possibly the simplist possible pyflow we can create -- # a single 'hello world' task. After experimenting with this file # please see the 'simpleDemo' for coverage of a few more pyflow features # import os.path import sys # add module path # scriptDir=os.path.abspath(os.path.dirname(__file__)) sys.path.append(os.path.abspath(os.path.join(scriptDir,os.pardir,os.pardir,"src"))) from pyflow import WorkflowRunner # all pyflow workflows are written into classes derived from pyflow.WorkflowRunner: # class HelloWorkflow(WorkflowRunner) : # a workflow is defined by overloading the WorkflowRunner.workflow() method: # def workflow(self) : # # The output for this task will be written to the file helloWorld.out.txt # self.addTask("easy_task1", "echo 'Hello World!' > helloWorld.out.txt") # Instantiate the workflow # wflow = HelloWorkflow() # Run the worklow: # retval = wflow.run() # done! sys.exit(retval) pyflow-1.1.14/pyflow/demo/makeDemo/000077500000000000000000000000001303601460500171025ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/makeDemo/.hidden000066400000000000000000000000611303601460500203330ustar00rootroot00000000000000 .PHONY: A B A: B @echo "Made it!" B: sleep 5 pyflow-1.1.14/pyflow/demo/makeDemo/makeDemo.py000077500000000000000000000055571303601460500212150ustar00rootroot00000000000000#!/usr/bin/env python # # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # import os.path import sys # add module path by hand # scriptDir = os.path.abspath(os.path.dirname(__file__)) sys.path.append(scriptDir + "/../../src") from pyflow import WorkflowRunner # all pyflow workflows are written into classes derived from # pyflow.WorkflowRunner: # class MakeWorkflow(WorkflowRunner) : # a workflow is defined by overloading the # WorkflowRunner.workflow() method: # def workflow(self) : # This command 'configures' a makefile # self.addTask("task1", "cd %s; cp .hidden Makefile" % scriptDir) # Sometimes you get to deal with make. The task below # demonstates a make command which starts when the above task # completes. Make tasks are specified as directories which # contain a makefile. This task points to the direcotry of # this demo script, which contains has a Makefile at the # completion of task1. # pyflow will switch the task command between make and qmake # depending on run type. # self.addTask("make_task", scriptDir, isCommandMakePath=True, nCores=2, dependencies="task1") # This command 'unconfigures' the makefile # self.addTask("task2", "rm -f %s/Makefile" % scriptDir, dependencies="make_task") # Instantiate the workflow # # parameters are passed into the workflow via its constructor: # wflow = MakeWorkflow() # Run the worklow: # retval = wflow.run(mode="local", nCores=8) sys.exit(retval) pyflow-1.1.14/pyflow/demo/memoryDemo/000077500000000000000000000000001303601460500174755ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/memoryDemo/memoryDemo.py000077500000000000000000000051271303601460500221740ustar00rootroot00000000000000#!/usr/bin/env python # # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # # This is a very simple demo/test of pyFlow's new (@ v0.4) memory # resource feature. # import os.path import sys # add module path by hand # sys.path.append(os.path.abspath(os.path.dirname(__file__)) + "/../../src") from pyflow import WorkflowRunner # all pyflow workflows are written into classes derived from pyflow.WorkflowRunner: # class MemTestWorkflow(WorkflowRunner) : # a workflow is defined by overloading the WorkflowRunner.workflow() method: # def workflow(self) : # Each task has a default memory request of 2048 megabytes # but this is site-configurable in pyflowConfig.py, so we # specify it for every task here # # This works correctly if task 4 is the only task run in # parallel with one of the other 3 tasks. # self.addTask("task1", "echo 'Hello World!'", memMb=2048) self.addTask("task2", "echo 'Hello World!'", memMb=2048) self.addTask("task3", "echo 'Hello World!'", memMb=2048) self.addTask("task4", "echo 'Hello World!'", memMb=1) # Instantiate the workflow # wflow = MemTestWorkflow() # Run the worklow: # retval = wflow.run(nCores=8, memMb=2049) # done! sys.exit(retval) pyflow-1.1.14/pyflow/demo/mutexDemo/000077500000000000000000000000001303601460500173275ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/mutexDemo/mutexDemo.py000077500000000000000000000050621303601460500216560ustar00rootroot00000000000000#!/usr/bin/env python # # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # import os.path import sys # add module path by hand # scriptDir = os.path.abspath(os.path.dirname(__file__)) sys.path.append(scriptDir + "/../../src") from pyflow import WorkflowRunner # # very simple task scripts called by the demo: # testJobDir = os.path.join(scriptDir, "testtasks") sleepjob = os.path.join(testJobDir, "sleeper.bash") # sleeps # all pyflow workflows are written into classes derived from # pyflow.WorkflowRunner: # class MutexWorkflow(WorkflowRunner) : # a workflow is defined by overloading the # WorkflowRunner.workflow() method: # def workflow(self) : # create an array of mutex restricted tasks which can only run # once at a time: for i in range(8) : self.addTask("mutex_task_" + str(i), sleepjob + " 1", mutex="test") # and add an array of 'normal' tasks for comparison: for i in range(16) : self.addTask("normal_task_" + str(i), sleepjob + " 1") def main() : # Instantiate the workflow wflow = MutexWorkflow() # Run the worklow: retval = wflow.run(mode="local", nCores=6) sys.exit(retval) if __name__ == "__main__" : main() pyflow-1.1.14/pyflow/demo/mutexDemo/testtasks/000077500000000000000000000000001303601460500213545ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/mutexDemo/testtasks/sleeper.bash000077500000000000000000000002711303601460500236550ustar00rootroot00000000000000#!/usr/bin/env bash if [ $# != 1 ]; then echo "usage $0 arg" exit 1 fi arg=$1 pid=$$ echo pid: $pid arg: $arg starting sleep sleep $arg echo pid: $pid arg: $arg ending sleep pyflow-1.1.14/pyflow/demo/retryDemo/000077500000000000000000000000001303601460500173325ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/retryDemo/retryDemo.py000077500000000000000000000055731303601460500216730ustar00rootroot00000000000000#!/usr/bin/env python # # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # # # This is a very simple demo/test of pyFlow's new (@ v0.4) memory # resource feature. # import os.path import sys # add module path by hand # sys.path.append(os.path.abspath(os.path.dirname(__file__)) + "/../../src") from pyflow import WorkflowRunner # all pyflow workflows are written into classes derived from pyflow.WorkflowRunner: # class RetryWorkflow(WorkflowRunner) : # a workflow is defined by overloading the WorkflowRunner.workflow() method: # def workflow(self) : # this task behaves correctly it retries the job 4 times before failing, no automated way # to confirm success right now. # self.flowLog("****** NOTE: This demo is supposed to fail ******") self.addTask("retry_task_success", "exit 0", retryMax=8, retryWait=2, retryWindow=0, retryMode="all") self.addTask("retry_task_fail", "exit 1", retryMax=3, retryWait=2, retryWindow=0, retryMode="all") # Instantiate the workflow # wflow = RetryWorkflow() # Run the worklow: # retval = wflow.run() if retval == 0 : raise Exception("Example workflow is expected to fail, but did not.") else : sys.stderr.write("INFO: Demo workflow failed as expected.\n\n") # Run the workflow again to demonstrate that global settings are overridden by task retry settings: # retval = wflow.run(retryMax=0) if retval == 0 : raise Exception("Example workflow is expected to fail, but did not.") else : sys.stderr.write("INFO: Demo workflow failed as expected.\n\n") pyflow-1.1.14/pyflow/demo/runOptionsDemo/000077500000000000000000000000001303601460500203455ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/runOptionsDemo/getDemoRunOptions.py000066400000000000000000000116561303601460500243550ustar00rootroot00000000000000#!/usr/bin/env python # # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # import os.path import sys pyflowDir = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../src")) sys.path.append(pyflowDir) from optparse import OptionParser, SUPPRESS_HELP from pyflow import WorkflowRunner from pyflow import isLocalSmtp localDefaultCores = WorkflowRunner.runModeDefaultCores('local') sgeDefaultCores = WorkflowRunner.runModeDefaultCores('sge') def getDemoRunOptions() : """ This routine is shared by a demo programs to demostrate how to pass pyflow's runtime options on to command-line options. It is not intended to be a demo program itself. """ parser = OptionParser() parser.add_option("-m", "--mode", type="string", dest="mode", help="Select run mode {local,sge} (required)") parser.add_option("-q", "--queue", type="string", dest="queue", help="Specify sge queue name. Argument ignored if mode is not sge") parser.add_option("-j", "--jobs", type="string", dest="jobs", help="Number of jobs, must be an integer or 'unlimited' (default: %s for local mode, %s for sge mode)" % (localDefaultCores, sgeDefaultCores)) parser.add_option("-g", "--memGb", type="string", dest="memGb", help="Gigabytes of memory available to run workflow -- only meaningful in local mode, must be an integer or 'unlimited' (default: 2*jobs for local mode, 'unlimited' for sge mode)") parser.add_option("-r", "--resume", dest="isResume", action="store_true", default=False, help="Resume a workflow from the point of interuption. This flag has no effect on a new workflow run.") isEmail = isLocalSmtp() emailHelp=SUPPRESS_HELP if isEmail: emailHelp="Send email notification of job completion status to this address (may be provided multiple times for more than one email address)" parser.add_option("-e", "--mailTo", type="string", dest="mailTo", action="append", help=emailHelp) (options, args) = parser.parse_args() if not isEmail : options.mailTo = None if len(args) : parser.print_help() sys.exit(2) if options.mode is None : parser.print_help() sys.stderr.write("\n\nERROR: must specify run mode\n\n") sys.exit(2) elif options.mode not in ["local", "sge"] : parser.error("Invalid mode. Available modes are: local, sge") if options.jobs is None : if options.mode == "sge" : options.jobs = sgeDefaultCores else : options.jobs = localDefaultCores if options.jobs != "unlimited" : options.jobs = int(options.jobs) if options.jobs <= 0 : parser.error("Jobs must be 'unlimited' or an integer greater than 1") # note that the user sees gigs, but we set megs if options.memGb is None : if options.mode == "sge" : options.memMb = "unlimited" else : if options.jobs == "unlimited" : options.memMb = "unlimited" else : options.memMb = 2 * 1024 * options.jobs elif options.memGb != "unlimited" : options.memGb = int(options.memGb) if options.memGb <= 0 : parser.error("memGb must be 'unlimited' or an integer greater than 1") options.memMb = 1024 * options.memGb else : options.memMb = options.memGb options.schedulerArgList = [] if options.queue is not None : options.schedulerArgList = ["-q", options.queue] return options if __name__ == "__main__" : help(getDemoRunOptions) pyflow-1.1.14/pyflow/demo/runOptionsDemo/runOptionsDemo.py000077500000000000000000000070031303601460500237070ustar00rootroot00000000000000#!/usr/bin/env python # # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # import os.path import sys # add module paths # filePath = os.path.dirname(__file__) pyflowPath = os.path.abspath(os.path.join(filePath, "../../src")) sys.path.append(pyflowPath) from pyflow import WorkflowRunner from getDemoRunOptions import getDemoRunOptions # # very simple task scripts called by the demo: # testJobDir = os.path.join(filePath, "testtasks") sleepjob = os.path.join(testJobDir, "sleeper.bash") # sleeps yelljob = os.path.join(testJobDir, "yeller.bash") # generates some i/o runjob = os.path.join(testJobDir, "runner.bash") # runs at 100% cpu # all pyflow workflows are written into classes derived from # pyflow.WorkflowRunner: # class TestWorkflow(WorkflowRunner) : # a workflow is defined by overloading the # WorkflowRunner.workflow() method: # def workflow(self) : # A simple command task with no dependencies, labeled 'task1'. # cmd = "%s 1" % (yelljob) self.addTask("task1", cmd) # Another task which runs the same command, this time the # command is provided as an argument list. An argument list # can be useful when a command has many arguments or # complicated quoting issues: # cmd = [yelljob, "1"] self.addTask("task2", cmd) # This task will always run on the local machine, no matter # what the run mode is. The force local option is useful for # non-cpu intensive jobs which are taking care of minor # workflow overhead (moving/touching files, etc) # self.addTask("task3a", sleepjob + " 10", isForceLocal=True) # get runtime options # runOptions = getDemoRunOptions() # Instantiate the workflow # wflow = TestWorkflow() # Run the worklow with runtime options specified on the command-line: # retval = wflow.run(mode=runOptions.mode, nCores=runOptions.jobs, memMb=runOptions.memMb, mailTo=runOptions.mailTo, isContinue=(runOptions.isResume and "Auto" or False), isForceContinue=True, schedulerArgList=runOptions.schedulerArgList) sys.exit(retval) pyflow-1.1.14/pyflow/demo/runOptionsDemo/testtasks/000077500000000000000000000000001303601460500223725ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/runOptionsDemo/testtasks/sleeper.bash000077500000000000000000000002711303601460500246730ustar00rootroot00000000000000#!/usr/bin/env bash if [ $# != 1 ]; then echo "usage $0 arg" exit 1 fi arg=$1 pid=$$ echo pid: $pid arg: $arg starting sleep sleep $arg echo pid: $pid arg: $arg ending sleep pyflow-1.1.14/pyflow/demo/runOptionsDemo/testtasks/yeller.bash000077500000000000000000000004351303601460500245320ustar00rootroot00000000000000#!/usr/bin/env bash if [ $# != 1 ]; then echo "usage $0 arg" exit 1 fi arg=$1 pid=$$ echo pid: $pid arg: $arg starting yell for i in {1..100}; do echo "Yeller $pid yellin $i stdout" echo "Yeller $pid yellin $i stderr" 1>&2 done echo pid: $pid arg: $arg ending sleep pyflow-1.1.14/pyflow/demo/simpleDemo/000077500000000000000000000000001303601460500174565ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/simpleDemo/simpleDemo.py000077500000000000000000000136161303601460500221400ustar00rootroot00000000000000#!/usr/bin/env python # # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # import os.path import sys # add module path by hand # scriptDir=os.path.abspath(os.path.dirname(__file__)) sys.path.append(scriptDir+"/../../src") from pyflow import WorkflowRunner # # very simple task scripts called by the demo: # testJobDir=os.path.join(scriptDir,"testtasks") sleepjob=os.path.join(testJobDir,"sleeper.bash") # sleeps yelljob=os.path.join(testJobDir,"yeller.bash") # generates some i/o runjob=os.path.join(testJobDir,"runner.bash") # runs at 100% cpu # all pyflow workflows are written into classes derived from # pyflow.WorkflowRunner: # class SimpleWorkflow(WorkflowRunner) : # WorkflowRunner objects can create regular constructors to hold # run parameters or other state information: # def __init__(self,params) : self.params=params # a workflow is defined by overloading the # WorkflowRunner.workflow() method: # def workflow(self) : # A simple command task with no dependencies, labeled 'task1'. # cmd="%s 1" % (yelljob) self.addTask("task1",cmd) # Another task which runs the same command, this time the # command is provided as an argument list. An argument list # can be useful when a command has many arguments or # complicated quoting issues: # cmd=[yelljob,"1"] self.addTask("task2",cmd) # This task will always run on the local machine, no matter # what the run mode is. The force local option is useful for # non-cpu intensive jobs which are taking care of minor # workflow overhead (moving/touching files, etc) # self.addTask("task3a",sleepjob+" 10",isForceLocal=True) # This job is requesting 2 threads: # self.addTask("task3b",runjob+" 10",nCores=2) # This job is requesting 2 threads and 3 gigs of ram: # self.addTask("task3c",runjob+" 10",nCores=2,memMb=3*1024) # addTask and addWorkflowTask always return their task labels # as a simple convenience. taskName is set to "task4" now. # taskName=self.addTask("task4",sleepjob+" 1") # an example task dependency: # # pyflow stores dependencies in set() objects, but you can # provide a list,tuple,set or single string as the argument to # dependencies: # # all the task5* tasks below specify "task4" as their # dependency: # self.addTask("task5a",yelljob+" 2",dependencies=taskName) self.addTask("task5b",yelljob+" 2",dependencies="task4") self.addTask("task5c",yelljob+" 2",dependencies=["task4"]) self.addTask("task5d",yelljob+" 2",dependencies=[taskName]) # this time we launch a number of sleep tasks based on the # workflow parameters: # # we store all tasks in sleepTasks -- which we use to make # other tasks wait for this entire set of jobs to complete: # sleepTasks=set() for i in range(self.params["numSleepTasks"]) : taskName="sleep_task%i" % (i) sleepTasks.add(taskName) self.addTask(taskName,sleepjob+" 1",dependencies="task5a") ## note the three lines above could have been written in a ## more compact single-line format: ## #sleepTasks.add(self.addTask("sleep_task%i" % (i),sleepjob+" 1",dependencies="task5a")) # this job cannot start until all tasks in the above loop complete: self.addTask("task6",runjob+" 2",nCores=3,dependencies=sleepTasks) # This task is supposed to fail, uncomment to see error reporting: # #self.addTask("task7",sleepjob) # Note that no command is provided to this task. It will not # be distributed locally or to sge, but does provide a # convenient label for a set of tasks that other processes # depend on. There is no special "checkpoint-task" type in # pyflow -- but any task can function like one per this # example: # self.addTask("checkpoint_task",dependencies=["task1","task6","task5a"]) # The final task depends on the above checkpoint: # self.addTask("task8",yelljob+" 2",dependencies="checkpoint_task") # simulated workflow parameters # myRunParams={"numSleepTasks" : 15} # Instantiate the workflow # # parameters are passed into the workflow via its constructor: # wflow = SimpleWorkflow(myRunParams) # Run the worklow: # retval=wflow.run(mode="local",nCores=8) sys.exit(retval) pyflow-1.1.14/pyflow/demo/simpleDemo/testtasks/000077500000000000000000000000001303601460500215035ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/simpleDemo/testtasks/runner.bash000077500000000000000000000006141303601460500236570ustar00rootroot00000000000000#!/usr/bin/env bash thisdir=$(dirname $0) cd $thisdir if ! [ -e ./runner ]; then # turning on -O2 is too variable accross different platforms, so leave off: # # the move and sleep steps here help to make sure that we don't get a "text file busy" # error on the ./runner call below: # gcc ./runner.c -lm -o runner.tmp && mv runner.tmp runner && sleep 1 fi ./runner $1 pyflow-1.1.14/pyflow/demo/simpleDemo/testtasks/runner.c000066400000000000000000000003521303601460500231600ustar00rootroot00000000000000#include "math.h" #include "assert.h" int main(int argc, char**argv) { assert(argc==2); int mult=atoi(argv[1]); int i,j; double a=0; long total=50000000; for(j=0;j&2 done echo pid: $pid arg: $arg ending sleep pyflow-1.1.14/pyflow/demo/subWorkflow/000077500000000000000000000000001303601460500177045ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/subWorkflow/subWorkflow.py000077500000000000000000000072121303601460500226070ustar00rootroot00000000000000#!/usr/bin/env python # # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # import os.path import sys # add module path by hand # scriptDir=os.path.abspath(os.path.dirname(__file__)) sys.path.append(scriptDir+"/../../src") from pyflow import WorkflowRunner # # very simple task scripts called by the demo: # testJobDir=os.path.join(scriptDir,"testtasks") sleepjob=os.path.join(testJobDir,"sleeper.bash") # sleeps yelljob=os.path.join(testJobDir,"yeller.bash") # generates some i/o runjob=os.path.join(testJobDir,"runner.bash") # runs at 100% cpu # all pyflow workflows are written into classes derived from pyflow.WorkflowRunner: # # this workflow is a simple example of a workflow we can either run directly, # or run as a task within another workflow: # class SubWorkflow(WorkflowRunner) : # a workflow is defined by overloading the WorkflowRunner.workflow() method: # def workflow(self) : # this workflow executes a simple dependency diamond: self.addTask("task1",yelljob+" 1") self.addTask("task2a",yelljob+" 1",dependencies="task1") self.addTask("task2b",yelljob+" 1",dependencies="task1") self.addTask("task3",yelljob+" 1",dependencies=("task2a","task2b")) # # This workflow will use SubWorkflow as a task: # class SimpleWorkflow(WorkflowRunner) : # a workflow is defined by overloading the WorkflowRunner.workflow() method: # def workflow(self) : # it's fine to repeat task names in two workflows, even if you're sub-tasking one from the other self.addTask("task1",yelljob+" 1") self.addTask("task2",runjob+" 3") # instantiate a new workflow and run it as soon as task1 and task2 complete wflow=SubWorkflow() self.addWorkflowTask("subwf_task3",wflow,dependencies=("task1","task2")) # this job will not run until the workflow-task completes. This means that all of the # tasks that SubWorkflow launches will need to complete successfully beforehand: # self.addTask("task4",sleepjob+" 1",dependencies="subwf_task3") # Instantiate our workflow # wflow = SimpleWorkflow() # Run the worklow: # retval=wflow.run(mode="local",nCores=8) # If we want to run the SubWorkflow as a regular workflow, that can be done as well: # #wflow2 = SubWorkflow() #retval2=wflow2.run() sys.exit(retval) pyflow-1.1.14/pyflow/demo/subWorkflow/testtasks/000077500000000000000000000000001303601460500217315ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/subWorkflow/testtasks/runner.bash000077500000000000000000000006141303601460500241050ustar00rootroot00000000000000#!/usr/bin/env bash thisdir=$(dirname $0) cd $thisdir if ! [ -e ./runner ]; then # turning on -O2 is too variable accross different platforms, so leave off: # # the move and sleep steps here help to make sure that we don't get a "text file busy" # error on the ./runner call below: # gcc ./runner.c -lm -o runner.tmp && mv runner.tmp runner && sleep 1 fi ./runner $1 pyflow-1.1.14/pyflow/demo/subWorkflow/testtasks/runner.c000066400000000000000000000003521303601460500234060ustar00rootroot00000000000000#include "math.h" #include "assert.h" int main(int argc, char**argv) { assert(argc==2); int mult=atoi(argv[1]); int i,j; double a=0; long total=50000000; for(j=0;j&2 done echo pid: $pid arg: $arg ending sleep pyflow-1.1.14/pyflow/demo/successMsgDemo/000077500000000000000000000000001303601460500203045ustar00rootroot00000000000000pyflow-1.1.14/pyflow/demo/successMsgDemo/successMsgDemo.py000077500000000000000000000045511303601460500236120ustar00rootroot00000000000000#!/usr/bin/env python # # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # import os.path import sys # add module path by hand # scriptDir=os.path.abspath(os.path.dirname(__file__)) sys.path.append(scriptDir+"/../../src") from pyflow import WorkflowRunner # # test and demostrate the use of a custom success message # at the end of a workflow # # all pyflow workflows are written into classes derived from # pyflow.WorkflowRunner: # class SuccessWorkflow(WorkflowRunner) : # a workflow is defined by overloading the # WorkflowRunner.workflow() method: # def workflow(self) : # provide a minimum task self.addTask("task1","touch success\! && exit 0") # Instantiate the workflow # wflow = SuccessWorkflow() # Run the worklow: # cwd=os.getcwd() successMsg = "SuccessWorkflow has successfully succeeded!\n" successMsg += "\tPlease find your token of successful succeeding here: '%s'\n" % (cwd) retval=wflow.run(mode="local",nCores=8,successMsg=successMsg,mailTo="csaunders@illumina.com") sys.exit(retval) pyflow-1.1.14/pyflow/doc/000077500000000000000000000000001303601460500152015ustar00rootroot00000000000000pyflow-1.1.14/pyflow/doc/ChangeLog.txt000066400000000000000000000214101303601460500175670ustar00rootroot00000000000000v1.1.14 20170112 * STREL-391 improve task throughput for nodes with 100's of cores v1.1.13 20160414 * fix rare issue with sets of dependent checkpoint tasks * fix for travis CI script from Dominic Jodoin v1.1.12 20151203 * lengthen signal file delay tolerance to 4 mins * [#14] Filter environment variables to remove bash functions. This eliminates complications between shellshock bash update and SGE. v1.1.11 20151125 * Improve SGE robustness v1.1.10 20150927 * Remove old custom cluster SGE logic and standardize on h_vmem v1.1.9 20150923 * Add windows CI script to run cross-platform tests on master * Add new cross-platform test script and adjust all unit tests to run on windows * Improve error reporting for missing signal file case v1.1.8 20150918 * Improve windows shell compatibility * [#10] If an error occurs creating the task visualization script, issue warning and allow workflow to continue v1.1.7 20150806 * [#9] improve robustness to filesystem delays for task wrapper parameter file * [#9] improve error log specificity when anomolous task wrapper output occurs in localhost run mode v1.1.6 20150713 * Fix multithread conflict introduced by [#5] fix v1.1.5 20150710 * Changed to BSD 2-Clause license * [#5] fix in-workflow check for nested workflows v1.1.4 20150527 * added check for local SMTP service before sending email notification * added extra check aginst workflow methods running outside of pyflow runtime v1.1.3 20141028 * fix master node memory spike for SGE jobs identified by Lilian Janin * added Windows OS patches form Tobias Mann v1.1.2 20131026 * added python distutils setup.py provided by Ryan Kelley v1.1.1 20130716 * fix issue with new startFromTasks feature when used with subWorkflows v1.1.0 20130715 * add new features to allow only part of workflow to be run and/or continued v1.0.1 20130710 * Fix O(exp) scaling problems in DAG handling methods v1.0.0 20130507 * relicenced to Illumina Open Source Software License v1 v0.6.26 20130304 * fix bug in forceContinue'd workflow runs v0.6.25 20130221 * Add optional warning and error log which contains all logged warning or error messages. * allow specification of a custom notification message on successful workflow completion * allow any task to specify its own retry parameters, overriding the run-level parameters of the same name * add retryMode to allow task retry to be applied to local mode v0.6.24 20121128 * accelerate localhost task sweep so that short-running task workflows can execute more quickly * create new mutex option to addTask, this allows a set of tasks to share a mutex id, causing no more than one in the group to be executed at the same time v0.6.23 20121018 * change cwd parameter to not require directory to exist * fix version number search so that an non-installed version does not require git to be installed v0.6.22 20121002 * fix custom environment option to be more robust in various sge contexts v0.6.21 20120925 * add option to supply custom environment variables to any task * fix error message when invalid runmode is given v0.6.20 20120920 * increase # of retries and timeout length for qstat call v0.6.19 20120914 * check for and allow OSError on fsync call. v0.6.18 201207 * ignore available memory limit in non-local run modes * detect if multiple active pyflow jobs are attempting to use the same data directory v0.6.17 20120622 * minor cleanups: add python version to reports and python 2.7.2 warning to logs v0.6.16 20120529 * issue error when task specifies itself as a dependency * fix issue which could cause pyflow to hang when using python 2.7.2 in sge mode, hang can still occur in local mode. python 2.7.2 should be avoided. v0.6.15 20120525 * Improved developer logging: dump stack for all threads in python 2.5+ during the update interval * Additional sge command error logging * automate pyflow version setting and add this to workflow logs * improved logging scalability for 10000+ task workflows * improved API documentation v0.6.14.1 20120518 * remove typo bug v0.6.14 20120507 * Add timeouts to sge qsub and qstat commands in case these hang (observed at low fequency on sd clusters) * Write SGE job_number to log for evey qsub-ed job * Write the name of the longest queued and longest running tasks in the status update report. * Add new demo demonstrating commandline settings for workflow run arguments v0.6.13 20120503 * Fix regression in sge make jobs introduced with queue option v0.6.12 20120429 * Add cwd argument to addTask to change wd before task execution v0.6.11 20120424 * Remove sub-workflows from status update report * Dump full qsub arg list for each sge job to temporary logs in case of sge anomoly * Log sge job number in case of anomolous state at end of sge job * taskWrapper logs hostname as soon as possible in case of error * More reliable (but slower) flush used for log writes * Add option to provide a list of arguments to qsub/qmake (to specify queue most likely) * Add option to turn off logging to stderr. v0.6.10 20120419 * Provide a 'heartbeat' task status update to the log at a specified interval. v0.6.9 * Improve robustness against NFS update delay for task wrapper file * Include more sge error details in Notification email v0.6.8 20120319 * Better handling on terminal hang-up: capture and ignore SIGHUP and handle failed writes to stderr. You should still use nohup where needed, but if you don't, then proper logging and notification will continue. * flush log writes v0.6.7 20120316 * add tail of task stderr stream to nofications when tasks fail * apply ctrl-C task shutdown to SIGTERM as well v0.6.6 20120315 * include configuration for uscp-prd cluster * Passive graph creation * Reduce thread stack size in later versions of python * More robust ctrl-C shutdown behavior (master workflow on own thread) v0.6.5 20120308 * bagPipes fix v0.6.4 20120307 * Consume filehandles more efficiently when running a very large number of local jobs. Reset SGE default max jobs to something reasonable (128). Allow logging to continue even after filehandles are exhausted in case it manages to happen. v0.6.3 20120305 * Switch sge mode from using qsub sync to a more scalable qsub-and-poll scheme This immediately removes the 99 job sge limit, and provides the infrastructure for queueing or running timeout on tasks. v0.6.2 * allow commands to be submitted as argument lists (in addition to shell strings, as before). Argument lists allow longer commands and obviate a variety of quoting issues. * Change site configuration to an object scheme which simplifies site customization. * change qmake tasks from parallel env to dynamic allocation mode * allow qmake jobs to retry without an expiration window * improved reporting of failed make jobs * fixed minor issue with make path cleanup v0.6.1 20120228 * primarily a bagPipes release * fixed isForceLocal task bug introduced in 0.6 * fixed make task bug introduced in 0.6 v0.6 20120227 * primarily a bagPipes release * added task priority option v0.5.5 20120224 * more robust hostname lookup combines ip query and alias list * fix runner.bash demo failures on some machines * fix pyflowTaskWrapper stderr caching v0.5.4 20120224 * fix configuration for non-sge hosts v0.5.2 20120222 * chuk pthread fix for pyflow tasks v0.5.1 20120221 * Added autmatic chuk sge configuration to allow bagPipes to complete in the uk. * Various fixes from uk testing: (1) full hostname is correctly found in the uk now (2) default configuration for email is it now comes form "pyflow-bot@"YOUR_DOMAIN_NAME now. This is required to correctly get mail sent from a uk box. v0.5 20120220 * Cutting version of pyFlow to sync with first bagPipes prototype * add max SGE jobs to configuration parameters -- default set to 96 * Fix sub-workflows to shutdown properly after task shutdown * More robust handling of commands with quoting and special characters * Non-breaking API change: isTaskComplete lets you query whether a task is in the workflow and completed -- useful for sane interupt/resume behavior * Non-breaking API change: limitNCores(n) and limitMemMb(n) can be used now to reduce your resource request to the maximum available for this run. v0.4 20120216 * Added memory to the task resource tracking * Created pyflowConfig file which contains site specific code -- moved resource to qsub argument translation functions into this config file * Non-breaking API change: Added isCmdStable option to addTask to specify that a command can change on workflow resume. * Non-breaking API change: all add*() methods return task label v0.3 20120213 * Full support for recursive WorkflowRunner task specification -- provide any other WorkflowRunner instance as a task in a workflow() definition. * Report Exceptions in TaskRunner objects as task errors * Report list of failed tasks even during a waitForTasks() holding loop. v0.2 20120207 First versioned released. Major addition is a complete command-line interface for the bcl converter demo pyflow-1.1.14/pyflow/doc/README.txt000066400000000000000000000003371303601460500167020ustar00rootroot00000000000000client_api/ -> contains documetation on the pyflow API which you can use to create your own workflow scripts developer/ -> contains documenation that's only useful if you'd like to change or add features to pyflow itself pyflow-1.1.14/pyflow/doc/client_api/000077500000000000000000000000001303601460500173105ustar00rootroot00000000000000pyflow-1.1.14/pyflow/doc/client_api/README000066400000000000000000000005331303601460500201710ustar00rootroot00000000000000Pre-generated client API documentation is here: WorkflowRunner_API_html_doc Full API documentation can be created as an html tree using the script: 'make_WorkflowRunner_API_html_doc.bash' Note this requires the program 'epydoc'. A simpler ascii documentation page can be generated in pydoc by running: 'make_WorkflowRunner_API_simple_doc.py' pyflow-1.1.14/pyflow/doc/client_api/make_WorkflowRunner_API_html_doc.bash000077500000000000000000000002241303601460500265130ustar00rootroot00000000000000#!/usr/bin/env bash thisdir=$(dirname $0) PYTHONPATH=$thisdir/../../src epydoc pyflow.WorkflowRunner --no-private -o WorkflowRunner_API_html_doc pyflow-1.1.14/pyflow/doc/client_api/make_WorkflowRunner_API_simple_doc.py000077500000000000000000000003531303601460500265560ustar00rootroot00000000000000#!/usr/bin/env python import os.path import sys sys.path.append(os.path.abspath(os.path.dirname(__file__)) + "/../../src") import pyflow # Document the public functions of pyflow's only public class: # help(pyflow.WorkflowRunner) pyflow-1.1.14/pyflow/doc/developer/000077500000000000000000000000001303601460500171665ustar00rootroot00000000000000pyflow-1.1.14/pyflow/doc/developer/README000066400000000000000000000003001303601460500200370ustar00rootroot00000000000000This documentation is intended for anyone interested in changing pyflow itself. For documentation on the API to *use* pyflow, please see pyflow/doc/client_api and demo programs in pyflow/demo pyflow-1.1.14/pyflow/doc/developer/make_pyflow_developer_html_doc.bash000077500000000000000000000001701303601460500262610ustar00rootroot00000000000000#!/usr/bin/env bash thisdir=$(dirname $0) epydoc $thisdir/../../src/*.py -o pyflow_developer_html_doc -v --graph all pyflow-1.1.14/pyflow/setup.py000066400000000000000000000004371303601460500161520ustar00rootroot00000000000000from distutils.core import setup setup( name='pyFlow', version='${VERSION}', description='A lightweight parallel task engine', author='Chris Saunders', author_email='csaunders@illumina.com', packages=['pyflow'], package_dir={'pyflow': 'src'} ) pyflow-1.1.14/pyflow/src/000077500000000000000000000000001303601460500152235ustar00rootroot00000000000000pyflow-1.1.14/pyflow/src/__init__.py000066400000000000000000000000251303601460500173310ustar00rootroot00000000000000from pyflow import * pyflow-1.1.14/pyflow/src/pyflow.py000066400000000000000000004562431303601460500171330ustar00rootroot00000000000000#!/usr/bin/env python # # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # """ pyflow -- a lightweight parallel task engine """ __author__ = 'Christopher Saunders' import copy import datetime import os import re import shutil import subprocess import sys import threading import time import traceback from pyflowConfig import siteConfig moduleDir = os.path.abspath(os.path.dirname(__file__)) # minimum python version # pyver = sys.version_info if pyver[0] != 2 or (pyver[0] == 2 and pyver[1] < 4) : raise Exception("pyflow module has only been tested for python versions [2.4,3.0)") # problem python versions: # # Internal interpreter deadlock issue in python 2.7.2: # http://bugs.python.org/issue13817 # ..is so bad that pyflow can partially, but not completely, work around it -- so issue a warning for this case. if pyver[0] == 2 and pyver[1] == 7 and pyver[2] == 2 : raise Exception("Python interpreter errors in python 2.7.2 may cause a pyflow workflow hang or crash. Please use a different python version.") # The line below is a workaround for a python 2.4/2.5 bug in # the subprocess module. # # Bug is described here: http://bugs.python.org/issue1731717 # Workaround is described here: http://bugs.python.org/issue1236 # subprocess._cleanup = lambda: None # In python 2.5 or greater, we can lower the per-thread stack size to # improve memory consumption when a very large number of jobs are # run. Below it is lowered to 256Kb (compare to linux default of # 8Mb). # try: threading.stack_size(min(256 * 1024, threading.stack_size)) except AttributeError: # Assuming this means python version < 2.5 pass class GlobalSync : """ Control total memory usage in non-local run modes by limiting the number of simultaneous subprocess calls Note that in practice this only controls the total number of qsub/qstat calls in SGE mode """ maxSubprocess = 2 subprocessControl = threading.Semaphore(maxSubprocess) def getPythonVersion() : python_version = sys.version_info return ".".join([str(i) for i in python_version]) pythonVersion = getPythonVersion() # Get pyflow version number # def getPyflowVersion() : # this will be automatically macro-ed in for pyflow releases: pyflowAutoVersion = None # Get version number in regular release code: if pyflowAutoVersion is not None : return pyflowAutoVersion # Get version number during dev: try : proc = subprocess.Popen(["git", "describe"], stdout=subprocess.PIPE, stderr=open(os.devnull, "w"), cwd=moduleDir, shell=False) (stdout, _stderr) = proc.communicate() retval = proc.wait() stdoutList = stdout.split("\n")[:-1] if (retval == 0) and (len(stdoutList) == 1) : return stdoutList[0] except OSError: # no git installed pass return "unknown" __version__ = getPyflowVersion() # portability functions: # def _isWindows() : import platform return (platform.system().find("Windows") > -1) class GlobalConstants : isWindows=_isWindows() def isWindows() : return GlobalConstants.isWindows def forceRename(src,dst) : """ dst is only overwritten in a single atomic operation on *nix on windows, we can't have atomic rename, but we can recreate the behavior otherwise """ if isWindows() : if os.path.exists(dst) : os.remove(dst) maxTrials=5 for trial in range(maxTrials) : try : os.rename(src,dst) return except OSError : if (trial+1) >= maxTrials : raise time.sleep(5) def cleanEnv() : """ clear bash functions out of the env without this change the shellshock security update causes pyflow SGE jobs to fail with the behavior of current (201512) versions of SGE qsub """ ekeys = os.environ.keys() for key in ekeys : if key.endswith("()") : del os.environ[key] # utility values and functions: # def ensureDir(d): """ make directory if it doesn't already exist, raise exception if something else is in the way: """ if os.path.exists(d): if not os.path.isdir(d) : raise Exception("Can't create directory: %s" % (d)) else : os.makedirs(d) # # time functions -- note there's an additional copy in the pyflow wrapper script: # # all times in pyflow are utc (never local) and printed to iso8601 # def timeStampToTimeStr(ts) : """ converts time.time() output to timenow() string """ return datetime.datetime.utcfromtimestamp(ts).isoformat() def timeStrNow(): return timeStampToTimeStr(time.time()) def timeStrToTimeStamp(ts): import calendar d = datetime.datetime(*map(int, re.split(r'[^\d]', ts)[:-1])) return calendar.timegm(d.timetuple()) def isInt(x) : return isinstance(x, (int, long)) def isString(x): return isinstance(x, basestring) def isIterable(x): return (getattr(x, '__iter__', False) != False) def lister(x): """ Convert input into a list, whether it's already iterable or not. Make an exception for individual strings to be returned as a list of one string, instead of being chopped into letters Also, convert None type to empty list: """ # special handling in case a single string is given: if x is None : return [] if (isString(x) or (not isIterable(x))) : return [x] return list(x) def setzer(x) : """ convert user input into a set, handling the pathological case that you have been handed a single string, and you don't want a set of letters: """ return set(lister(x)) class LogState : """ A simple logging enum """ INFO = 1 WARNING = 2 ERROR = 3 @classmethod def toString(cls,logState) : if logState == cls.INFO : return "INFO" if logState == cls.WARNING : return "WARNING" if logState == cls.ERROR : return "ERROR" raise Exception("Unknown log state: " + str(logState)) # allow fsync to be globally turned off class LogGlobals : isFsync = True def hardFlush(ofp): ofp.flush() if ofp.isatty() : return # fsync call has been reported to consistently fail in some contexts (rsh?) # so allow OSError if not LogGlobals.isFsync : return try : os.fsync(ofp.fileno()) except OSError: LogGlobals.isFsync = False def log(ofpList, msgList, linePrefix=None): """ General logging function. @param ofpList: A container of file objects to write to @param msgList: A container of (or a single) multi-line log message string. Final newlines are not required @param linePrefix: A prefix to add before every line. This will come *after* the log function's own '[time] [hostname]' prefix. @return: Returns a boolean tuple of size ofpList indicating the success of writing to each file object """ msgList = lister(msgList) ofpList = setzer(ofpList) retval = [True] * len(ofpList) for msg in msgList : # strip final trailing newline if it exists: if (len(msg) > 0) and (msg[-1] == "\n") : msg = msg[:-1] linePrefixOut = "[%s] [%s]" % (timeStrNow(), siteConfig.getHostName()) if linePrefix is not None : linePrefixOut += " " + linePrefix # split message into prefixable lines: for i, ofp in enumerate(ofpList): # skip io streams which have failed before: if not retval[i] : continue try : for line in msg.split("\n") : ofp.write("%s %s\n" % (linePrefixOut, line)) hardFlush(ofp) except IOError: retval[i] = False return retval def getThreadName(): return threading.currentThread().getName() def isMainThread() : return (getThreadName == "MainThread") class StrFileObject(object) : """ fakes a filehandle for library functions which write to a stream, and captures output in a string """ def __init__(self) : self.str = "" def write(self, string) : self.str += string def __str__(self) : return self.str def getTracebackStr() : return traceback.format_exc() def getExceptionMsg() : msg = ("Unhandled Exception in %s\n" % (getThreadName())) + getTracebackStr() if msg[-1] == "\n" : msg = msg[:-1] return msg.split("\n") def cmdline() : return " ".join(sys.argv) def msgListToMsg(msgList): """ convert string or list of strings into a single string message """ msg = "" isFirst=True for chunk in lister(msgList) : if isFirst : isFirst = False else : msg += "\n" if ((len(chunk)>0) and (chunk[-1] == '\n')) : chunk = chunk[:-1] msg += chunk return msg emailRegex = re.compile(r"(?:^|\s)[-a-z0-9_.]+@(?:[-a-z0-9]+\.)+[a-z]{2,6}(?:\s|$)", re.IGNORECASE) def verifyEmailAddy(x) : return (emailRegex.match(x) is not None) def isLocalSmtp() : """ return true if a local smtp server is available """ import smtplib try : s = smtplib.SMTP('localhost') except : return False return True def sendEmail(mailTo, mailFrom, subject, msgList) : import smtplib # this is the way to import MIMEText in py 2.4: from email.MIMEText import MIMEText # format message list into a single string: msg = msgListToMsg(msgList) mailTo = setzer(mailTo) msg = MIMEText(msg) msg["Subject"] = subject msg["From"] = mailFrom msg["To"] = ", ".join(mailTo) s = smtplib.SMTP('localhost') s.sendmail(mailFrom, list(mailTo), msg.as_string()) s.quit() def boolToStr(b) : return str(int(b)) def argToBool(x) : """ convert argument of unknown type to a bool: """ class FalseStrings : val = ("", "0", "false", "f", "no", "n", "off") if isinstance(x, basestring) : return (x.lower() not in FalseStrings.val) return bool(x) def hashObjectValue(obj) : """ This function hashes objects values -- the hash will be the same for two objects containing the same methods and data, so it corresponds to 'A==B' and *not* 'A is B'. """ import pickle import hashlib hashlib.md5(pickle.dumps(obj, protocol=pickle.HIGHEST_PROTOCOL)).hexdigest() namespaceSep = "+" def namespaceJoin(a, b) : """ join two strings with a separator only if a exists """ if a == "" : return b elif b == "" : return a return a + namespaceSep + b def namespaceLabel(namespace) : """ provide a consistent naming scheme to users for embedded workflows """ if namespace == "" : return "master workflow" else : return "sub-workflow '%s'" % (namespace) class ExpWaiter(object) : """ Convenience object to setup exponentially increasing wait/polling times """ def __init__(self, startSec, factor, maxSec, event = None) : """ optionally allow an event to interrupt wait cycle """ assert (startSec > 0.) assert (factor > 1.) assert (maxSec >= startSec) self.startSec = startSec self.factor = factor self.maxSec = maxSec self.event = event self.sec = self.startSec self.isMax = False def reset(self) : self.sec = self.startSec def wait(self) : if self.event is None : time.sleep(self.sec) else : self.event.wait(self.sec) if self.isMax : return self.sec = min(self.sec * self.factor, self.maxSec) self.isMax = (self.sec == self.maxSec) assert self.sec <= self.maxSec def lockMethod(f): """ method decorator acquires/releases object's lock """ def wrapped(self, *args, **kw): if not hasattr(self,"lock") : self.lock = threading.RLock() self.lock.acquire() try: return f(self, *args, **kw) finally: self.lock.release() return wrapped class Bunch: """ generic struct with named argument constructor """ def __init__(self, **kwds): self.__dict__.update(kwds) def stackDump(dumpfp): """ adapted from haridsv @ stackoverflow: """ athreads = threading.enumerate() tnames = [(th.getName()) for th in athreads] frames = None try: frames = sys._current_frames() except AttributeError: # python version < 2.5 pass id2name = {} try: id2name = dict([(th.ident, th.getName()) for th in athreads]) except AttributeError : # python version < 2.6 pass if (frames is None) or (len(tnames) > 50) : dumpfp.write("ActiveThreadCount: %i\n" % (len(tnames))) dumpfp.write("KnownActiveThreadNames:\n") for name in tnames : dumpfp.write(" %s\n" % (name)) dumpfp.write("\n") return dumpfp.write("ActiveThreadCount: %i\n" % (len(frames))) dumpfp.write("KnownActiveThreadNames:\n") for name in tnames : dumpfp.write(" %s\n" % (name)) dumpfp.write("\n") for tid, stack in frames.items(): dumpfp.write("Thread: %d %s\n" % (tid, id2name.get(tid, "NAME_UNKNOWN"))) for filename, lineno, name, line in traceback.extract_stack(stack): dumpfp.write('File: "%s", line %d, in %s\n' % (filename, lineno, name)) if line is not None: dumpfp.write(" %s\n" % (line.strip())) dumpfp.write("\n") dumpfp.write("\n") ####################################################################### # # these functions are written out to a utility script which allows users # to make a dot graph from their current state directory output. We # keep it in pyflow as working code so that pyflow can call sections of it. # def taskStateHeader() : return "#taskLabel\ttaskNamespace\trunState\terrorCode\trunStateUpdateTime\n" def taskStateParser(stateFile) : class Constants : nStateCols = 5 for line in open(stateFile) : if len(line) and line[0] == "#" : continue line = line.strip() w = line.split("\t") if len(w) != Constants.nStateCols : raise Exception("Unexpected format in taskStateFile: '%s' line: '%s'" % (stateFile, line)) yield [x.strip() for x in w] def taskInfoHeader() : return "#%s\n" % ("\t".join(("taskLabel", "taskNamespace", "taskType", "nCores", "memMb", "priority", "isForceLocal", "dependencies", "cwd", "command"))) def taskInfoParser(infoFile) : class Constants : nInfoCols = 10 for line in open(infoFile) : if len(line) and line[0] == "#" : continue line = line.lstrip() w = line.split("\t", (Constants.nInfoCols - 1)) if len(w) != Constants.nInfoCols : raise Exception("Unexpected format in taskInfoFile: '%s' line: '%s'" % (infoFile, line)) yield [x.strip() for x in w] def getTaskInfoDepSet(s) : # reconstruct dependencies allowing for extraneous whitespace in the file: s = s.strip() if s == "" : return [] return set([d.strip() for d in s.split(",")]) class TaskNodeConstants(object) : validRunstates = ("complete", "running", "queued", "waiting", "error") class DotConfig(object) : """ A static container of configuration data for dot graph output """ runstateDotColor = {"waiting" : "grey", "running" : "green", "queued" : "yellow", "error" : "red", "complete" : "blue" } runstateDotStyle = {"waiting" : "dashed", "running" : None, "queued" : None, "error" : "bold", "complete" : None } @staticmethod def getRunstateDotAttrib(runstate) : color = DotConfig.runstateDotColor[runstate] style = DotConfig.runstateDotStyle[runstate] attrib = "" if color is not None : attrib += " color=%s" % (color) if style is not None : attrib += " style=%s" % (style) return attrib @staticmethod def getTypeDotAttrib(nodeType) : attrib = "" if nodeType == "workflow" : attrib += " shape=rect style=rounded" return attrib @staticmethod def getDotLegend() : string = '{ rank = source; Legend [shape=none, margin=0, label=<\n' string += '\n' string += '\n' for state in TaskNodeConstants.validRunstates : color = DotConfig.runstateDotColor[state] string += '\n' % (state, color) string += '
Legend
%s
>];}\n' return string def writeDotGraph(taskInfoFile, taskStateFile, workflowClassName) : """ write out the current graph state in dot format """ addOrder = [] taskInfo = {} headNodes = set() tailNodes = set() # read info file: for (label, namespace, ptype, _nCores, _memMb, _priority, _isForceLocal, depStr, _cwdStr, _command) in taskInfoParser(taskInfoFile) : tid = (namespace, label) addOrder.append(tid) taskInfo[tid] = Bunch(ptype=ptype, parentLabels=getTaskInfoDepSet(depStr)) if len(taskInfo[tid].parentLabels) == 0 : headNodes.add(tid) tailNodes.add(tid) for plabel in taskInfo[tid].parentLabels : ptid = (namespace, plabel) if ptid in tailNodes : tailNodes.remove(ptid) for (label, namespace, runState, _errorCode, _time) in taskStateParser(taskStateFile) : tid = (namespace, label) taskInfo[tid].runState = runState dotFp = sys.stdout dotFp.write("// Task graph from pyflow object '%s'\n" % (workflowClassName)) dotFp.write("// Process command: '%s'\n" % (cmdline())) dotFp.write("// Process working dir: '%s'\n" % (os.getcwd())) dotFp.write("// Graph capture time: %s\n" % (timeStrNow())) dotFp.write("\n") dotFp.write("digraph %s {\n" % (workflowClassName + "Graph")) dotFp.write("\tcompound=true;\nrankdir=LR;\nnode[fontsize=10];\n") labelToSym = {} namespaceGraph = {} for (i, (namespace, label)) in enumerate(addOrder) : tid = (namespace, label) if namespace not in namespaceGraph : namespaceGraph[namespace] = "" sym = "n%i" % i labelToSym[tid] = sym attrib1 = DotConfig.getRunstateDotAttrib(taskInfo[tid].runState) attrib2 = DotConfig.getTypeDotAttrib(taskInfo[tid].ptype) namespaceGraph[namespace] += "\t\t%s [label=\"%s\"%s%s];\n" % (sym, label, attrib1, attrib2) for (namespace, label) in addOrder : tid = (namespace, label) sym = labelToSym[tid] for plabel in taskInfo[tid].parentLabels : ptid = (namespace, plabel) namespaceGraph[namespace] += ("\t\t%s -> %s;\n" % (labelToSym[ptid], sym)) for (i, ns) in enumerate(namespaceGraph.keys()) : isNs = ((ns is not None) and (ns != "")) dotFp.write("\tsubgraph cluster_sg%i {\n" % (i)) if isNs : dotFp.write("\t\tlabel = \"%s\";\n" % (ns)) else : dotFp.write("\t\tlabel = \"%s\";\n" % (workflowClassName)) dotFp.write(namespaceGraph[ns]) dotFp.write("\t\tbegin%i [label=\"begin\" shape=diamond];\n" % (i)) dotFp.write("\t\tend%i [label=\"end\" shape=diamond];\n" % (i)) for (namespace, label) in headNodes : if namespace != ns : continue sym = labelToSym[(namespace, label)] dotFp.write("\t\tbegin%i -> %s;\n" % (i, sym)) for (namespace, label) in tailNodes : if namespace != ns : continue sym = labelToSym[(namespace, label)] dotFp.write("\t\t%s -> end%i;\n" % (sym, i)) dotFp.write("\t}\n") if ns in labelToSym : dotFp.write("\t%s -> begin%i [style=dotted];\n" % (labelToSym[ns], i)) # in LR orientation this will make the graph look messy: # dotFp.write("\tend%i -> %s [style=invis];\n" % (i,labelToSym[ns])) dotFp.write(DotConfig.getDotLegend()) dotFp.write("}\n") hardFlush(dotFp) def writeDotScript(taskDotScriptFile, taskInfoFileName, taskStateFileName, workflowClassName) : """ write dot task graph creation script """ import inspect dsfp = os.fdopen(os.open(taskDotScriptFile, os.O_WRONLY | os.O_CREAT, 0755), 'w') dsfp.write("""#!/usr/bin/env python # # This is a script to create a dot graph from pyflow state files. # Usage: $script >| task_graph.dot # # Note that script assumes the default pyflow state files are in the script directory. # # This file was autogenerated by process: '%s' # ...from working directory: '%s' # import datetime,os,sys,time scriptDir=os.path.abspath(os.path.dirname(__file__)) """ % (os.getcwd(), cmdline())) for dobj in (timeStampToTimeStr, timeStrNow, cmdline, Bunch, LogGlobals, hardFlush, TaskNodeConstants, DotConfig, taskStateParser, taskInfoParser, getTaskInfoDepSet, writeDotGraph) : dsfp.write("\n\n") dsfp.write(inspect.getsource(dobj)) dsfp.write(""" if __name__ == '__main__' : writeDotGraph(os.path.join(scriptDir,'%s'),os.path.join(scriptDir,'%s'),'%s') """ % (taskInfoFileName, taskStateFileName, workflowClassName)) ################################################################ # # workflowRunner Helper Classes: # # class Command(object) : """ Commands can be presented as strings or argument lists (or none) """ def __init__(self, cmd, cwd, env=None) : # 1: sanitize/error-check cmd if ((cmd is None) or (cmd == "") or (isIterable(cmd) and len(cmd) == 0)) : self.cmd = None self.type = "none" elif isString(cmd) : self.cmd = Command.cleanStr(cmd) self.type = "str" elif isIterable(cmd) : self.cmd = [] for i, s in enumerate(cmd): if not (isString(s) or isInt(s)): raise Exception("Argument: '%s' from position %i in argument list command is not a string or integer. Full command: '%s'" % (str(s), (i + 1), " ".join([str(s) for s in cmd]))) self.cmd.append(Command.cleanStr(s)) self.type = "list" else : raise Exception("Invalid task command: '%s'" % (str(cmd))) # 2: sanitize cwd self.cwd = "" if cwd is not None and cwd != "" : self.cwd = os.path.abspath(cwd) if os.path.exists(self.cwd) and not os.path.isdir(self.cwd) : raise Exception("Cwd argument is not a directory: '%s', provided for command '%s'" % (cwd, str(cmd))) # copy env: self.env = env def __repr__(self) : if self.cmd is None : return "" if self.type == "str" : return self.cmd return " ".join(self.cmd) @staticmethod def cleanStr(s) : if isInt(s) : s = str(s) if "\n" in s : raise Exception("Task command/argument contains newline characters: '%s'" % (s)) return s.strip() class StoppableThread(threading.Thread): """ Thread class with a stop() method. The thread itself has to check regularly for the stopped() condition. Note that this is a very new thread base class for pyflow, and most threads do not (yet) check their stopped status. """ _stopAll = threading.Event() def __init__(self, *args, **kw): threading.Thread.__init__(self, *args, **kw) self._stop = threading.Event() def stop(self): "thread specific stop method, may be overridden to add async thread-specific kill behavior" self._stop.set() @staticmethod def stopAll(): "quick global stop signal for threads that happen to poll stopped() very soon after event" StoppableThread._stopAll.set() def stopped(self): return (StoppableThread._stopAll.isSet() or self._stop.isSet()) def getSGEJobsDefault() : if ((siteConfig.maxSGEJobs is not None) and (siteConfig.maxSGEJobs != "") and (siteConfig.maxSGEJobs != "unlimited")) : return int(siteConfig.maxSGEJobs) return "unlimited" class ModeInfo(object) : """ Stores default values associated with each runmode: local,sge,... """ def __init__(self, defaultCores, defaultMemMbPerCore, defaultIsRetry) : self.defaultCores = defaultCores self.defaultMemMbPerCore = defaultMemMbPerCore self.defaultIsRetry = defaultIsRetry class RunMode(object): data = { "local" : ModeInfo(defaultCores=1, defaultMemMbPerCore=siteConfig.defaultHostMemMbPerCore, defaultIsRetry=False), "sge" : ModeInfo(defaultCores=getSGEJobsDefault(), defaultMemMbPerCore="unlimited", defaultIsRetry=True) } class RetryParam(object) : """ parameters pertaining to task retry behavior """ allowed_modes = [ "nonlocal" , "all" ] def __init__(self, run_mode, retry_max, wait, window, retry_mode) : if retry_mode not in self.allowed_modes : raise Exception("Invalid retry mode parameter '%s'. Accepted retry modes are {%s}." \ % (retry_mode, ",".join(self.allowed_modes))) self._retry_max = retry_max self.wait = wait self.window = window self._retry_mode = retry_mode self._run_mode = run_mode self._finalize() self.validate() def _finalize(self) : """ decide whether to turn retry off based on retry and run modes: """ if (self._retry_mode == "nonlocal") and \ (not RunMode.data[self._run_mode].defaultIsRetry) : self.max = 0 else : self.max = int(self._retry_max) def validate(self): """ check that the public parameters are valid """ def nonNegParamCheck(val, valLabel) : if val < 0 : raise Exception("Parameter %s must be non-negative" % valLabel) nonNegParamCheck(self.max, "retryMax") nonNegParamCheck(self.wait, "retryWait") nonNegParamCheck(self.window, "retryWindow") def getTaskCopy(self,retry_max, wait, window, retry_mode): """ return a deepcopy of the class customized for each individual task for any retry parameters which are not None """ taskself = copy.deepcopy(self) if retry_max is not None: taskself._retry_max = retry_max if wait is not None: taskself.wait = wait if window is not None: taskself.window = window if retry_mode is not None : taskself._retry_mode = retry_mode taskself._finalize() taskself.validate() return taskself class RunningTaskStatus(object) : """ simple object allowing remote task threads to communicate their status back to the TaskManager """ def __init__(self,isFinishedEvent) : self.isFinishedEvent = isFinishedEvent self.isComplete = threading.Event() self.errorCode = 0 # errorMessage is filled in by sub-workflow # and command-line tasks. # # Sub-workflows use this to convey whether they have # failed (1) because of failures of their own tasks or (2) # because of an exception in the sub-workflow code, in which # case the exception message and stacktrace are provided. # # command tasks use this to report the stderr tail of a failing # task # self.errorMessage = "" # only used by sub-workflows to indicate that all tasks have been specified self.isSpecificationComplete = threading.Event() class BaseTaskRunner(StoppableThread) : """ Each individual command-task or sub workflow task is run on its own thread using a class inherited from BaseTaskRunner """ def __init__(self, runStatus, taskStr, sharedFlowLog, setRunstate) : StoppableThread.__init__(self) self.setDaemon(True) self.taskStr = taskStr self.setName("TaskRunner-Thread-%s" % (taskStr)) self.runStatus = runStatus self._sharedFlowLog = sharedFlowLog self.lock = threading.RLock() # allows taskRunner to update between queued and running status: self._setRunstate = setRunstate # this is moved into the ctor now, so that a race condition that would double-launch a task # is now not possible (however unlikely it was before): self.setInitialRunstate() def run(self) : """ BaseTaskRunner's run() method ensures that we can capture exceptions which might occur in this thread. Do not override this method -- instead define the core logic for the task run operation in '_run()' Note that for sub-workflow tasks we're interpreting raw client python code on this thread, so exceptions are *very likely* here -- this is not a corner case. """ retval = 1 retmsg = "" try: (retval, retmsg) = self._run() except WorkflowRunner._AbortWorkflowException : # This indicates an intended workflow interruption. # send a retval of 1 but not an error message pass except: retmsg = getExceptionMsg() self.runStatus.errorCode = retval self.runStatus.errorMessage = retmsg # this indicates that this specific task has finished: self.runStatus.isComplete.set() # this indicates that *any* task has just finished, so # taskmanager can stop polling and immediately sweep self.runStatus.isFinishedEvent.set() return retval def setRunstate(self, *args, **kw) : if self._setRunstate is None : return self._setRunstate(*args, **kw) def setInitialRunstate(self) : self.setRunstate("running") def flowLog(self, msg, logState) : linePrefixOut = "[TaskRunner:%s]" % (self.taskStr) self._sharedFlowLog(msg, linePrefix=linePrefixOut, logState=logState) def infoLog(self, msg) : self.flowLog(msg, logState=LogState.INFO) def warningLog(self, msg) : self.flowLog(msg, logState=LogState.WARNING) def errorLog(self, msg) : self.flowLog(msg, logState=LogState.ERROR) class WorkflowTaskRunner(BaseTaskRunner) : """ Manages a sub-workflow task """ def __init__(self, runStatus, taskStr, workflow, sharedFlowLog, setRunstate) : BaseTaskRunner.__init__(self, runStatus, taskStr, sharedFlowLog, setRunstate) self.workflow = workflow def _run(self) : namespace = self.workflow._getNamespace() nsLabel = namespaceLabel(namespace) self.infoLog("Starting task specification for %s" % (nsLabel)) self.workflow._setRunning(True) self.workflow.workflow() self.workflow._setRunning(False) self.runStatus.isSpecificationComplete.set() self.infoLog("Finished task specification for %s, waiting for task completion" % (nsLabel)) retval = self.workflow._waitForTasksCore(namespace, isVerbose=False) retmsg = "" return (retval, retmsg) class CommandTaskRunner(BaseTaskRunner) : """ Parent to local and SGE TaskRunner specializations for command tasks """ taskWrapper = os.path.join(moduleDir, "pyflowTaskWrapper.py") def __init__(self, runStatus, runid, taskStr, cmd, nCores, memMb, retry, isDryRun, outFile, errFile, tmpDir, schedulerArgList, sharedFlowLog, setRunstate) : """ @param outFile: stdout file @param errFile: stderr file @param tmpDir: location to write files containing output from the task wrapper script (and not the wrapped task) """ BaseTaskRunner.__init__(self, runStatus, taskStr, sharedFlowLog, setRunstate) self.cmd = cmd self.nCores = nCores self.memMb = memMb self.retry = retry self.isDryRun = isDryRun self.outFile = outFile self.errFile = errFile self.tmpDir = tmpDir self.schedulerArgList = schedulerArgList self.runid = runid self.taskStr = taskStr if not os.path.isfile(self.taskWrapper) : raise Exception("Can't find task wrapper script: %s" % self.taskWrapper) def initFileSystemItems(self): import pickle ensureDir(self.tmpDir) self.wrapFile = os.path.join(self.tmpDir, "pyflowTaskWrapper.signal.txt") # setup all the data to be passed to the taskWrapper and put this in argFile: taskInfo = { 'nCores' : self.nCores, 'outFile' : self.outFile, 'errFile' : self.errFile, 'cwd' : self.cmd.cwd, 'env' : self.cmd.env, 'cmd' : self.cmd.cmd, 'isShellCmd' : (self.cmd.type == "str") } argFile = os.path.join(self.tmpDir, "taskWrapperParameters.pickle") pickle.dump(taskInfo, open(argFile, "w")) self.wrapperCmd = [self.taskWrapper, self.runid, self.taskStr, argFile] def _run(self) : """ Outer loop of _run() handles task retry behavior: """ # these initialization steps only need to happen once: self.initFileSystemItems() startTime = time.time() retries = 0 retInfo = Bunch(retval=1, taskExitMsg="", isAllowRetry=False) while not self.stopped() : if retries : self.infoLog("Retrying task: '%s'. Total prior task failures: %i" % (self.taskStr, retries)) if self.isDryRun : self.infoLog("Dryrunning task: '%s' task arg list: [%s]" % (self.taskStr, ",".join(['"%s"' % (s) for s in self.getFullCmd()]))) retInfo.retval = 0 else : self.runOnce(retInfo) if retInfo.retval == 0 : break if retries >= self.retry.max : break elapsed = (time.time() - startTime) if (self.retry.window > 0) and \ (elapsed >= self.retry.window) : break if self.stopped() : break if not retInfo.isAllowRetry : break retries += 1 self.warningLog("Task: '%s' failed but qualifies for retry. Total task failures (including this one): %i. Task command: '%s'" % (self.taskStr, retries, str(self.cmd))) retInfo = Bunch(retval=1, taskExitMsg="", isAllowRetry=False) time.sleep(self.retry.wait) return (retInfo.retval, retInfo.taskExitMsg) def getExitMsg(self) : """ Attempt to extract exit message from a failed command task, do not complain in case of any errors in task signal file for this case. """ msgSize = None wrapFp = open(self.wrapFile) for line in wrapFp: w = line.strip().split() if (len(w) < 6) or (w[4] != "[wrapperSignal]") : break if w[5] == "taskStderrTail" : if (len(w) == 7) : msgSize = int(w[6]) break taskExitMsg = "" if msgSize is not None : i = 0 for line in wrapFp: if i >= msgSize : break taskExitMsg += line i += 1 wrapFp.close() return taskExitMsg def getWrapFileResult(self) : """ When the task is theoretically done, go and read the task wrapper to see the actual task exit code. This is required because: 1) On SGE or similar: We have no other way to get the exit code 2) On all systems, we can distinguish between a conventional task error and other problems, such as (a) linux OOM killer (b) exception in the task wrapper itself (c) filesystem failures. """ def checkWrapFileExit(result) : """ return isError=True on error in file format only, missing or incomplete file is not considered an error and the function should not return an error for this case. """ if not os.path.isfile(self.wrapFile) : return for line in open(self.wrapFile) : # an incomplete line indicates that the file is still being written: if len(line) == 0 or line[-1] != '\n' : return w = line.strip().split() if len(w) < 6 : result.isError = True return if (w[4] != "[wrapperSignal]") : result.isError = True return if w[5] == "taskExitCode" : if (len(w) == 7) : result.taskExitCode = int(w[6]) return retryCount = 8 retryDelaySec = 30 wrapResult = Bunch(taskExitCode=None, isError=False) totalDelaySec = 0 for trialIndex in range(retryCount) : # if the problem occurs at 0 seconds don't bother with a warning, but # if we've gone through a full retry cycle, then the filesystem delay is # getting unusual and should be a warning: if trialIndex > 1 : msg = "No complete signal file found after %i seconds, retrying after delay. Signal file path: '%s'" % (totalDelaySec,self.wrapFile) self.flowLog(msg, logState=LogState.WARNING) if trialIndex != 0 : time.sleep(retryDelaySec) totalDelaySec += retryDelaySec checkWrapFileExit(wrapResult) if wrapResult.isError : break if wrapResult.taskExitCode is not None : break return wrapResult def getWrapperErrorMsg(self) : if os.path.isfile(self.wrapFile) : stderrList = open(self.wrapFile).readlines() taskExitMsg = ["Anomalous task wrapper stderr output. Wrapper signal file: '%s'" % (self.wrapFile), "Logging %i line(s) of task wrapper log output below:" % (len(stderrList))] linePrefix = "[taskWrapper-stderr]" taskExitMsg.extend([linePrefix + " " + line for line in stderrList]) else : taskExitMsg = ["Anomalous task wrapper condition: Wrapper signal file is missing: '%s'" % (self.wrapFile)] return taskExitMsg class LocalTaskRunner(CommandTaskRunner) : def getFullCmd(self) : return [sys.executable] + self.wrapperCmd def runOnce(self, retInfo) : # sys.stderr.write("starting subprocess call. task '%s' cmd '%s'" % (self.taskStr,self.cmd)) # sys.stderr.write("full cmd: "+" ".join(self.getFullCmd()) + "\n") wrapFp = open(self.wrapFile, "w") proc = subprocess.Popen(self.getFullCmd(), stdout=wrapFp, stderr=subprocess.STDOUT, shell=False, bufsize=1) self.infoLog("Task initiated on local node") retInfo.retval = proc.wait() wrapFp.close() wrapResult = self.getWrapFileResult() if (wrapResult.taskExitCode is None) or (wrapResult.taskExitCode != retInfo.retval): retInfo.taskExitMsg = self.getWrapperErrorMsg() retInfo.retval = 1 return retInfo elif retInfo.retval != 0 : retInfo.taskExitMsg = self.getExitMsg() retInfo.isAllowRetry = True # success! (taskWrapper, but maybe not for the task...) return retInfo class QCaller(threading.Thread) : """ Calls to both qsub and qstat go through this run() method so that we can time them out: """ def __init__(self, cmd, infoLog) : threading.Thread.__init__(self) self.setDaemon(True) self.setName("QCaller-Timeout-Thread") self.lock = threading.RLock() self.cmd = cmd self.infoLog = infoLog self.results = Bunch(isComplete=False, retval=1, outList=[]) self.proc = None self.is_kill_attempt = False def run(self) : # Note: Moved Popen() call outside of the mutex and # stopped using proc.communicate() here after # observing python interpreter bug: # http://bugs.python.org/issue13817 # # The interpreter deadlock for this issue has been # observed to block the Popen() call below when using # python 2.7.2: # # Oct 2014 - also wrapped this call with a semaphore because # of the high memory usage associated with each qsub/qstat # subprocess. This was causing pyflow jobs to become unstable # as they would spontaneously exceed the maximum allowed master # process memory. # GlobalSync.subprocessControl.acquire() try : tmp_proc = subprocess.Popen(self.cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, shell=False) self.lock.acquire() try: self.proc = tmp_proc # handle the case where Popen was taking its good sweet time and a killProc() was sent in the meantime: if self.is_kill_attempt: self.killProc() finally: self.lock.release() if self.is_kill_attempt: return for line in self.proc.stdout : self.results.outList.append(line) self.results.retval = self.proc.wait() finally: GlobalSync.subprocessControl.release() self.results.isComplete = True @lockMethod def killProc(self) : import signal self.is_kill_attempt = True if self.proc is None : return try: os.kill(self.proc.pid , signal.SIGTERM) self.infoLog("Sent SIGTERM to sge command process id: %i" % (self.proc.pid)) except OSError : # process ended before we could kill it (hopefully rare, but possible race condition artifact) pass class SGETaskRunner(CommandTaskRunner) : def getFullCmd(self): # qsub options: # qsubCmd = ["qsub", "-V", # import environment variables from shell "-cwd", # use current working directory "-S", sys.executable, # The taskwrapper script is python "-o", self.wrapFile, "-e", self.wrapFile] qsubCmd.extend(self.schedulerArgList) qsubCmd.extend(siteConfig.qsubResourceArg(self.nCores, self.memMb)) qsubCmd.extend(self.wrapperCmd) return tuple(qsubCmd) def setInitialRunstate(self) : self.setRunstate("queued") @lockMethod def setNewJobId(self, jobId) : """ if stopped here, this is the case where a ctrl-c was entered while the qsub command was being submitted, so we must kill the job here: """ self.jobId = jobId if self.stopped(): self._killJob() def runOnce(self, retInfo) : def qcallWithTimeouts(cmd, maxQcallAttempt=1) : maxQcallWait = 180 qcall = None for i in range(maxQcallAttempt) : qcall = QCaller(cmd,self.infoLog) qcall.start() qcall.join(maxQcallWait) if not qcall.isAlive() : break self.infoLog("Trial %i of sge command has timed out. Killing process for cmd '%s'" % ((i + 1), cmd)) qcall.killProc() self.infoLog("Finished attempting to kill sge command") return qcall.results # 1) call qsub, check for errors and retrieve taskId: # if os.path.isfile(self.wrapFile): os.remove(self.wrapFile) # write extra info, just in case we need it for post-mortem debug: qsubFile = os.path.join(os.path.dirname(self.wrapFile), "qsub.args.txt") if os.path.isfile(qsubFile): os.remove(qsubFile) qsubfp = open(qsubFile, "w") for arg in self.getFullCmd() : qsubfp.write(arg + "\n") qsubfp.close() results = qcallWithTimeouts(self.getFullCmd()) isQsubError = False self.jobId = None if len(results.outList) != 1 : isQsubError = True else : w = results.outList[0].split() if (len(w) > 3) and (w[0] == "Your") and (w[1] == "job") : self.setNewJobId(int(w[2])) else : isQsubError = True if not results.isComplete : self._killJob() # just in case... retInfo.taskExitMsg = ["Job submission failure -- qsub command timed-out"] return retInfo if isQsubError or (self.jobId is None): retInfo.taskExitMsg = ["Unexpected qsub output. Logging %i line(s) of qsub output below:" % (len(results.outList)) ] retInfo.taskExitMsg.extend([ "[qsub-out] " + line for line in results.outList ]) return retInfo if results.retval != 0 : retInfo.retval = results.retval retInfo.taskExitMsg = ["Job submission failure -- qsub returned exit code: %i" % (retInfo.retval)] return retInfo # No qsub errors detected and an sge job_number is acquired -- success! self.infoLog("Task submitted to sge queue with job_number: %i" % (self.jobId)) # 2) poll jobId until sge indicates it's not running or queued: # queueStatus = Bunch(isQueued=True, runStartTimeStamp=None) def checkWrapFileRunStart(result) : """ check wrapper file for a line indicating that it has transitioned from queued to running state. Allow for NFS delay or incomplete file """ if not os.path.isfile(self.wrapFile) : return for line in open(self.wrapFile) : w = line.strip().split() if (len(w) < 6) or (w[4] != "[wrapperSignal]") : # this could be incomplete flush to the signal file, so # don't treat it as error: return if w[5] == "taskStart" : result.runStartTimeStamp = timeStrToTimeStamp(w[0].strip('[]')) result.isQueued = False return # exponential polling times -- make small jobs responsive but give sge a break on long runs... ewaiter = ExpWaiter(5, 1.7, 60) pollCmd = ("/bin/bash", "--noprofile", "-o", "pipefail", "-c", "qstat -j %i | awk '/^error reason/'" % (self.jobId)) while not self.stopped(): results = qcallWithTimeouts(pollCmd, 6) isQstatError = False if results.retval != 0: if ((len(results.outList) == 2) and (results.outList[0].strip() == "Following jobs do not exist:") and (int(results.outList[1]) == self.jobId)) : break else : isQstatError = True else : if (len(results.outList) != 0) : isQstatError = True if isQstatError : if not results.isComplete : retInfo.taskExitMsg = ["The qstat command for sge job_number %i has timed out for all attempted retries" % (self.jobId)] self._killJob() else : retInfo.taskExitMsg = ["Unexpected qstat output or task has entered sge error state. Sge job_number: %i" % (self.jobId)] retInfo.taskExitMsg.extend(["Logging %i line(s) of qstat output below:" % (len(results.outList)) ]) retInfo.taskExitMsg.extend([ "[qstat-out] " + line for line in results.outList ]) # self._killJob() # leave the job there so the user can better diagnose whetever unexpected pattern has occurred return retInfo # also check to see if job has transitioned from queued to running state: if queueStatus.isQueued : checkWrapFileRunStart(queueStatus) if not queueStatus.isQueued : self.setRunstate("running", queueStatus.runStartTimeStamp) ewaiter.wait() if self.stopped() : # self._killJob() # no need, job should already have been killed at the stop() call... return retInfo lastJobId = self.jobId # if we've correctly communicated with SGE, then its roll is done here # if a job kill is required for any of the error states above, it needs to be # added before this point: self.jobId = None wrapResult = self.getWrapFileResult() if wrapResult.taskExitCode is None : retInfo.taskExitMsg = ["Sge job_number: '%s'" % (lastJobId)] retInfo.taskExitMsg.extend(self.getWrapperErrorMsg()) retInfo.retval = 1 return retInfo elif wrapResult.taskExitCode != 0 : retInfo.taskExitMsg = self.getExitMsg() retInfo.retval = wrapResult.taskExitCode retInfo.isAllowRetry = True # success! (for sge & taskWrapper, but maybe not for the task...) return retInfo @lockMethod def _killJob(self) : """ (possibly) asynchronous job kill """ try : isKilled = self.isKilled except AttributeError: isKilled = False if isKilled: return try : jobId = self.jobId except AttributeError: jobId = None if jobId is None: return killCmd = ["qdel", "%i" % (int(jobId))] # don't wait for or check exit code of kill cmd... just give it one try # because we want cleanup to go as quickly as possible subprocess.Popen(killCmd, shell=False) self.isKilled = True @lockMethod def stop(self) : """ overload thead stop function to provide a qdel any running tasks. """ CommandTaskRunner.stop(self) self._killJob() class TaskFileWriter(StoppableThread) : """ This class runs on a separate thread and is responsible for updating the state and info task files """ def __init__(self, writeFunc) : StoppableThread.__init__(self) # parameter copy: self.writeFunc = writeFunc # thread settings: self.setDaemon(True) self.setName("TaskFileWriter-Thread") self.isWrite = threading.Event() def run(self) : while not self.stopped() : self._writeIfSet() time.sleep(5) self.isWrite.wait() def flush(self): self._writeIfSet() def _writeIfSet(self) : if self.isWrite.isSet() : self.isWrite.clear() self.writeFunc() class TaskManager(StoppableThread) : """ This class runs on a separate thread from workflowRunner, launching jobs based on the current state of the TaskDAG """ def __init__(self, cdata, tdag) : """ @param cdata: data from WorkflowRunner instance which will be constant during the lifetime of the TaskManager, should be safe to lookup w/o locking @param tdag: task graph """ StoppableThread.__init__(self) # parameter copy: self._cdata = cdata self.tdag = tdag # thread settings: self.setDaemon(True) self.setName("TaskManager-Thread") # lock is used for function (harvest), which is checked by # the WorkflowRunner under (literally) exceptional circumstances only self.lock = threading.RLock() # rm configuration: self.freeCores = self._cdata.param.nCores self.freeMemMb = self._cdata.param.memMb self.runningTasks = {} # This is used to track 'pyflow mutexes' -- for each key only a single # task can run at once. Key is set to True if mutex is occupied. self.taskMutexState = {} def run(self) : """ TaskManager runs so long as there are outstanding jobs """ try: cleanEnv() while not self._isTerm() : # update status of running jobs self.tdag.isFinishedEvent.clear() self.harvestTasks() # try to launch jobs: if self.stopped() : continue self._startTasks() self.tdag.isFinishedEvent.wait(5) except: msg = getExceptionMsg() self._flowLog(msg,logState=LogState.ERROR) self._cdata.emailNotification(msg, self._flowLog) self._cdata.setTaskManagerException() def _getCommandTaskRunner(self, task) : """ assist launch of a command-task """ # shortcuts: payload = task.payload param = self._cdata.param if payload.cmd.cmd is None : # Note these should have been marked off by the TaskManager already: raise Exception("Attempting to launch checkpoint task: %s" % (task.fullLabel())) isForcedLocal = ((param.mode != "local") and (payload.isForceLocal)) # mark task resources as occupied: if not isForcedLocal : if self.freeCores != "unlimited" : if (self.freeCores < payload.nCores) : raise Exception("Not enough free cores to launch task") self.freeCores -= payload.nCores if self.freeMemMb != "unlimited" : if (self.freeMemMb < payload.memMb) : raise Exception("Not enough free memory to launch task") self.freeMemMb -= payload.memMb if payload.mutex is not None : self.taskMutexState[payload.mutex] = True TaskRunner = None if param.mode == "local" or payload.isForceLocal or payload.isCmdMakePath : TaskRunner = LocalTaskRunner elif param.mode == "sge" : TaskRunner = SGETaskRunner else : raise Exception("Can't support mode: '%s'" % (param.mode)) # # TODO: find less hacky way to handle make tasks: # taskRetry = payload.retry if payload.isCmdMakePath : taskRetry = copy.deepcopy(payload.retry) taskRetry.window = 0 if param.mode == "local" or payload.isForceLocal : launchCmdList = ["make", "-j", str(payload.nCores)] elif param.mode == "sge" : launchCmdList = siteConfig.getSgeMakePrefix(payload.nCores, payload.memMb, param.schedulerArgList) else : raise Exception("Can't support mode: '%s'" % (param.mode)) launchCmdList.extend(["-C", payload.cmd.cmd]) payload.launchCmd = Command(launchCmdList, payload.cmd.cwd, payload.cmd.env) # # each commandTaskRunner requires a unique tmp dir to write # wrapper signals to. TaskRunner will create this directory -- it does not bother to destroy it right now: # # split the task id into two parts to keep from adding too many files to one directory: tmpDirId1 = "%03i" % ((int(task.id) / 1000)) tmpDirId2 = "%03i" % ((int(task.id) % 1000)) taskRunnerTmpDir = os.path.join(self._cdata.wrapperLogDir, tmpDirId1, tmpDirId2) return TaskRunner(task.runStatus, self._cdata.getRunid(), task.fullLabel(), payload.launchCmd, payload.nCores, payload.memMb, taskRetry, param.isDryRun, self._cdata.taskStdoutFile, self._cdata.taskStderrFile, taskRunnerTmpDir, param.schedulerArgList, self._cdata.flowLog, task.setRunstate) def _getWorkflowTaskRunner(self, task) : """ assist launch of a workflow-task """ return WorkflowTaskRunner(task.runStatus, task.fullLabel(), task.payload.workflow, self._cdata.flowLog, task.setRunstate) def _launchTask(self, task) : """ launch a specific task """ if task.payload.type() == "command" : trun = self._getCommandTaskRunner(task) elif task.payload.type() == "workflow" : trun = self._getWorkflowTaskRunner(task) else : assert 0 self._infoLog("Launching %s: '%s' from %s" % (task.payload.desc(), task.fullLabel(), namespaceLabel(task.namespace))) trun.start() self.runningTasks[task] = trun @lockMethod def _startTasks(self) : """ determine what tasks, if any, can be started Note that the lock is here to protect self.runningTasks """ # trace through DAG, completing any empty-command checkpoints # found with all dependencies completed: (ready, completed) = self.tdag.getReadyTasks() for node in completed: if self.stopped() : return self._infoLog("Completed %s: '%s' launched from %s" % (node.payload.desc(), node.fullLabel(), namespaceLabel(node.namespace))) # launch all workflows first, then command tasks as resources # allow: ready_workflows = [r for r in ready if r.payload.type() == "workflow"] for task in ready_workflows : if self.stopped() : return self._launchTask(task) # task submission could be shutdown, eg. in response to a task # error: if (not self._cdata.isTaskSubmissionActive()) : return isNonLocal = (self._cdata.param.mode != "local") # start command task launch: ready_commands = [r for r in ready if r.payload.type() == "command"] ready_commands.sort(key=lambda t: (t.payload.priority, t.payload.nCores), reverse=True) for task in ready_commands : if self.stopped() : return # In a non-local run mode, "isForceLocal" tasks are not subject to # global core and memory restrictions: isForcedLocal = (isNonLocal and task.payload.isForceLocal) if not isForcedLocal : if ((self.freeCores != "unlimited") and (task.payload.nCores > self.freeCores)) : continue if ((self.freeMemMb != "unlimited") and (task.payload.memMb > self.freeMemMb)) : continue # all command tasks must obey separate mutex restrictions: if ((task.payload.mutex is not None) and (task.payload.mutex in self.taskMutexState) and (self.taskMutexState[task.payload.mutex])) : continue self._launchTask(task) @lockMethod def harvestTasks(self) : """ Check the set of running tasks to see if they've completed and update Node status accordingly: """ notrunning = set() for task in self.runningTasks.keys() : if self.stopped() : break trun = self.runningTasks[task] if not task.runStatus.isComplete.isSet() : if trun.isAlive() : continue # if not complete and thread is dead then we don't know what happened, very bad!: task.errorstate = 1 task.errorMessage = "Thread: '%s', has stopped without a traceable cause" % (trun.getName()) else : task.errorstate = task.runStatus.errorCode task.errorMessage = task.runStatus.errorMessage if task.errorstate == 0 : task.setRunstate("complete") else: task.setRunstate("error") notrunning.add(task) if not task.isError() : self._infoLog("Completed %s: '%s' launched from %s" % (task.payload.desc(), task.fullLabel(), namespaceLabel(task.namespace))) else: msg = task.getTaskErrorMsg() if self._cdata.isTaskSubmissionActive() : # if this is the first error in the workflow, then # we elaborate a bit on the workflow's response to # the error. We also send any email-notifications # for the first error only: msg.extend(["Shutting down task submission. Waiting for remaining tasks to complete."]) self._errorLog(msg) if self._cdata.isTaskSubmissionActive() : self._cdata.emailNotification(msg, self._flowLog) # Be sure to send notifications *before* setting error # bits, because the WorkflowRunner may decide to # immediately shutdown all tasks and pyflow threads on # the first error: self._cdata.setTaskError(task) # shortcut: param = self._cdata.param # recover task resources: for task in notrunning : if task.payload.type() == "command" : isForcedLocal = ((param.mode != "local") and (task.payload.isForceLocal)) if not isForcedLocal : if self.freeCores != "unlimited" : self.freeCores += task.payload.nCores if self.freeMemMb != "unlimited" : self.freeMemMb += task.payload.memMb if task.payload.mutex is not None : self.taskMutexState[task.payload.mutex] = False for task in notrunning: del self.runningTasks[task] @lockMethod def stop(self) : StoppableThread.stop(self) for trun in self.runningTasks.values() : trun.stop() @lockMethod def _areTasksDead(self) : for trun in self.runningTasks.values() : if trun.isAlive(): return False return True def _isTerm(self) : # check for explicit thread stop request (presumably from the workflowManager): # if this happens we exit the polling loop # if self.stopped() : while True : if self._areTasksDead() : return True time.sleep(1) # check for "regular" termination conditions: if (not self._cdata.isTaskSubmissionActive()) : return (len(self.runningTasks) == 0) else : if self.tdag.isRunComplete() : if (len(self.runningTasks) != 0) : raise Exception("Inconsistent TaskManager state: workflow appears complete but there are still running tasks") return True elif self.tdag.isRunExhausted() : return True else : return False def _flowLog(self, msg, logState) : linePrefixOut = "[TaskManager]" # if linePrefix is not None : linePrefixOut+=" "+linePrefix self._cdata.flowLog(msg, linePrefix=linePrefixOut, logState=logState) def _infoLog(self, msg) : self._flowLog(msg, logState=LogState.INFO) def _errorLog(self, msg) : self._flowLog(msg, logState=LogState.ERROR) # payloads are used to manage the different # possible actions attributed to task nodes: # class CmdPayload(object) : def __init__(self, fullLabel, cmd, nCores, memMb, priority, isForceLocal, isCmdMakePath=False, isTaskStable=True, mutex=None, retry=None) : self.cmd = cmd self.nCores = nCores self.memMb = memMb self.priority = priority self.isForceLocal = isForceLocal self.isCmdMakePath = isCmdMakePath self.isTaskStable = isTaskStable self.mutex = mutex self.retry = retry # launch command includes make/qmake wrapper for Make path commands: self.launchCmd = cmd if (cmd.cmd is None) and ((nCores != 0) or (memMb != 0)) : raise Exception("Null tasks should not have resource requirements. task: '%s'" % (fullLabel)) def type(self) : return "command" def desc(self) : return "command task" class WorkflowPayload(object) : def __init__(self, workflow) : self.workflow = workflow self.isTaskStable = True def type(self) : return "workflow" def name(self) : if self.workflow is None : return "None" else : return self.workflow._whoami() def desc(self) : return "sub-workflow task" class TaskNode(object) : """ Represents an individual task in the task graph """ def __init__(self, lock, init_id, namespace, label, payload, isContinued, isFinishedEvent, isWriteTaskStatus) : self.lock = lock self.id = init_id self.namespace = namespace self.label = label self.payload = payload self.isContinued = isContinued self.isWriteTaskStatus = isWriteTaskStatus # if true, do not execute this task or honor it as a dependency for child tasks self.isIgnoreThis = False # if true, set the ignore state for all children of this task to true self.isIgnoreChildren = False # if true, this task and its dependents will be automatically marked as completed (until # a startFromTasks node is found) self.isAutoCompleted = False # task is reset to waiting runstate in a continued run self.isReset = False self.parents = set() self.children = set() self.runstateUpdateTimeStamp = time.time() if self.isContinued: self.runstate = "complete" else: self.runstate = "waiting" self.errorstate = 0 # errorMessage is used by sub-workflow tasks, but not by command taks: self.errorMessage = "" # This is a link to the live status object updated by TaskRunner: self.runStatus = RunningTaskStatus(isFinishedEvent) def __str__(self) : msg = "TASK id: %s state: %s error: %i" % (self.fullLabel(), self.runstate, self.errorstate) return msg def fullLabel(self) : return namespaceJoin(self.namespace, self.label) @lockMethod def isDone(self) : "task has gone as far as it can" return ((self.runstate == "error") or (self.runstate == "complete")) @lockMethod def isError(self) : "true if an error occurred in this node" return ((self.errorstate != 0) or (self.runstate == "error")) @lockMethod def isComplete(self) : "task completed without error" return ((self.errorstate == 0) and (self.runstate == "complete")) @lockMethod def isReady(self) : "task is ready to be run" retval = ((self.runstate == "waiting") and (self.errorstate == 0) and (not self.isIgnoreThis)) if retval : for p in self.parents : if p.isIgnoreThis : continue if not p.isComplete() : retval = False break return retval def _isDeadWalker(self, searched) : "recursive helper function for isDead()" # the fact that you're still searching means that it must have returned False last time: if self in searched : return False searched.add(self) if self.isError() : return True if self.isComplete() : return False for p in self.parents : if p._isDeadWalker(searched) : return True return False @lockMethod def isDead(self) : """ If true, there's no longer a point to waiting for this task, because it either has an error or there is an error in an upstream dependency """ # searched is used to restrict the complexity of this # operation on large graphs: searched = set() return self._isDeadWalker(searched) @lockMethod def setRunstate(self, runstate, updateTimeStamp=None) : """ updateTimeStamp is only supplied in the case where the state transition time is interestingly different than the function call time. This can happen with the state update comes from a polling function with a long poll interval. """ if runstate not in TaskNodeConstants.validRunstates : raise Exception("Can't set TaskNode runstate to %s" % (runstate)) if updateTimeStamp is None : self.runstateUpdateTimeStamp = time.time() else : self.runstateUpdateTimeStamp = updateTimeStamp self.runstate = runstate self.isWriteTaskStatus.set() #def getParents(self) : # return self.parents #def getChildren(self) : # return self.children @lockMethod def getTaskErrorMsg(self) : """ generate consistent task error message from task state """ if not self.isError() : return [] msg = "Failed to complete %s: '%s' launched from %s" % (self.payload.desc(), self.fullLabel(), namespaceLabel(self.namespace)) if self.payload.type() == "command" : msg += ", error code: %s, command: '%s'" % (str(self.errorstate), str(self.payload.launchCmd)) elif self.payload.type() == "workflow" : msg += ", failed sub-workflow classname: '%s'" % (self.payload.name()) else : assert 0 msg = lister(msg) if self.errorMessage != "" : msg2 = ["Error Message:"] msg2.extend(lister(self.errorMessage)) linePrefix = "[%s] " % (self.fullLabel()) for i in range(len(msg2)) : msg2[i] = linePrefix + msg2[i] msg.extend(msg2) return msg class TaskDAG(object) : """ Holds all tasks and their dependencies. Also responsible for task state persistence/continue across interrupted runs. Object is accessed by both the workflow and taskrunner threads, so it needs to be thread-safe. """ def __init__(self, isContinue, isForceContinue, isDryRun, taskInfoFile, taskStateFile, workflowClassName, startFromTasks, ignoreTasksAfter, resetTasks, flowLog) : """ No other object gets to access the taskStateFile, file locks are not required (but thread locks are) """ self.isContinue = isContinue self.isForceContinue = isForceContinue self.isDryRun = isDryRun self.taskInfoFile = taskInfoFile self.taskStateFile = taskStateFile self.workflowClassName = workflowClassName self.startFromTasks = startFromTasks self.ignoreTasksAfter = ignoreTasksAfter self.resetTasks = resetTasks self.flowLog = flowLog # unique id for each task in each run -- not persistent across continued runs: self.taskId = 0 # as tasks are added, occasionally spool task info to disk, and record the last # task index written + 1 self.lastTaskIdWritten = 0 # it will be easier for people to read the task status file if # the tasks are in approximately the same order as they were # added by the workflow: self.addOrder = [] self.labelMap = {} self.headNodes = set() self.tailNodes = set() self.lock = threading.RLock() # this event can be used to optionally accelerate the task cycle # when running in modes where task can set this event on completion # (ie. local mode but not sge), if this isn't set the normal polling # cycle applies self.isFinishedEvent = threading.Event() self.isWriteTaskInfo = None self.isWriteTaskStatus = None @lockMethod def isTaskPresent(self, namespace, label) : return ((namespace, label) in self.labelMap) @lockMethod def getTask(self, namespace, label) : if (namespace, label) in self.labelMap : return self.labelMap[(namespace, label)] return None @lockMethod def getHeadNodes(self) : "all tasks with no parents" return list(self.headNodes) @lockMethod def getTailNodes(self) : "all tasks with no (runnable) children" return list(self.tailNodes) @lockMethod def getAllNodes(self, namespace="") : "get all nodes in this namespace" retval = [] for (taskNamespace, taskLabel) in self.addOrder : if namespace != taskNamespace : continue node=self.labelMap[(taskNamespace, taskLabel)] if node.isIgnoreThis : continue retval.append(node) return retval def _isRunExhaustedNode(self, node, searched) : # the fact that you're still searching means that it must have returned true last time: if node in searched : return True searched.add(node) if not node.isIgnoreThis : if not node.isDone() : return False if node.isComplete() : for c in node.children : if not self._isRunExhaustedNode(c, searched) : return False return True @lockMethod def isRunExhausted(self) : """ Returns true if the run is as complete as possible due to errors """ # searched is used to restrict the complexity of this # operation on large graphs: searched = set() for node in self.getHeadNodes() : if not self._isRunExhaustedNode(node,searched) : return False return True @lockMethod def isRunComplete(self) : "returns true if run is complete and error free" for node in self.labelMap.values(): if node.isIgnoreThis : continue if not node.isComplete() : return False return True def _getReadyTasksFromNode(self, node, ready, searched) : "helper function for getReadyTasks" if node.isIgnoreThis : return if node in searched : return searched.add(node) if node.isReady() : ready.add(node) else: if not node.isComplete() : for c in node.parents : self._getReadyTasksFromNode(c, ready, searched) @lockMethod def getReadyTasks(self) : """ Go through DAG from the tail nodes and find all tasks which have all prerequisites completed: """ completed = self.markCheckPointsComplete() ready = set() # searched is used to restrict the complexity of this # operation on large graphs: searched = set() for node in self.getTailNodes() : self._getReadyTasksFromNode(node, ready, searched) return (list(ready), list(completed)) def _markCheckPointsCompleteFromNode(self, node, completed, searched) : "helper function for markCheckPointsComplete" if node.isIgnoreThis : return if node in searched : return searched.add(node) if node.isComplete() : return for c in node.parents : self._markCheckPointsCompleteFromNode(c, completed, searched) if (node.payload.type() == "command") and (node.payload.cmd.cmd is None) and (node.isReady()) : node.setRunstate("complete") completed.add(node) @lockMethod def markCheckPointsComplete(self) : """ traverse from tail nodes up, marking any checkpoint tasks (task.cmd=None) jobs that are ready as complete, return set of newly completed tasks: """ completed = set() # searched is used to restrict the complexity of this # operation on large graphs: searched = set() for node in self.getTailNodes() : self._markCheckPointsCompleteFromNode(node, completed, searched) return completed @lockMethod def addTask(self, namespace, label, payload, dependencies, isContinued=False) : """ add new task to the DAG isContinued indicates the task is being read from state history during a continuation run """ # internal data structures use these separately, but for logging we # create one string: fullLabel = namespaceJoin(namespace, label) # first check to see if task exists in DAG already, this is not allowed unless # we are continuing a previous run, in which case it's allowed once: if not isContinued and self.isTaskPresent(namespace, label): if self.isContinue and self.labelMap[(namespace, label)].isContinued: # confirm that task is a match, flip off the isContinued flag and return: task = self.labelMap[(namespace, label)] parentLabels = set([p.label for p in task.parents]) excPrefix = "Task: '%s' does not match previous definition defined in '%s'." % (fullLabel, self.taskInfoFile) if task.payload.type() != payload.type() : msg = excPrefix + " New/old payload type: '%s'/'%s'" % (payload.type(), task.payload.type()) raise Exception(msg) if payload.isTaskStable : if (payload.type() == "command") and (str(task.payload.cmd) != str(payload.cmd)) : msg = excPrefix + " New/old command: '%s'/'%s'" % (str(payload.cmd), str(task.payload.cmd)) if self.isForceContinue : self.flowLog(msg,logState=LogState.WARNING) else : raise Exception(msg) if (parentLabels != set(dependencies)) : msg = excPrefix + " New/old dependencies: '%s'/'%s'" % (",".join(dependencies), ",".join(parentLabels)) if self.isForceContinue : self.flowLog(msg,logState=LogState.WARNING) else : raise Exception(msg) if payload.type() == "command" : task.payload.cmd = payload.cmd task.payload.isCmdMakePath = payload.isCmdMakePath task.isContinued = False return else: raise Exception("Task: '%s' is already in TaskDAG" % (fullLabel)) task = TaskNode(self.lock, self.taskId, namespace, label, payload, isContinued, self.isFinishedEvent, self.isWriteTaskStatus) self.taskId += 1 self.addOrder.append((namespace, label)) self.labelMap[(namespace, label)] = task for d in dependencies : parent = self.getTask(namespace, d) if parent is task : raise Exception("Task: '%s' cannot specify its own task label as a dependency" % (fullLabel)) if parent is None : raise Exception("Dependency: '%s' for task: '%s' does not exist in TaskDAG" % (namespaceJoin(namespace, d), fullLabel)) task.parents.add(parent) parent.children.add(task) if isContinued : isReset=False if label in self.resetTasks : isReset=True else : for p in task.parents : if p.isReset : isReset = True break if isReset : task.setRunstate("waiting") task.isReset=True if not isContinued: self.isWriteTaskInfo.set() self.isWriteTaskStatus.set() # determine if this is an ignoreTasksAfter node if label in self.ignoreTasksAfter : task.isIgnoreChildren = True # determine if this is an ignoreTasksAfter descendent for p in task.parents : if p.isIgnoreChildren : task.isIgnoreThis = True task.isIgnoreChildren = True break # update headNodes if len(task.parents) == 0 : self.headNodes.add(task) # update isAutoCompleted: if (self.startFromTasks and (label not in self.startFromTasks)) : task.isAutoCompleted = True for p in task.parents : if not p.isAutoCompleted : task.isAutoCompleted = False break # in case of no-parents, also check sub-workflow node if task.isAutoCompleted and (len(task.parents) == 0) and (namespace != ""): wval=namespace.rsplit(namespaceSep,1) if len(wval) == 2 : (workflowNamespace,workflowLabel)=wval else : workflowNamespace="" workflowLabel=wval[0] workflowParent = self.labelMap[(workflowNamespace, workflowLabel)] if not workflowParent.isAutoCompleted : task.isAutoCompleted = False if task.isAutoCompleted : task.setRunstate("complete") # update tailNodes: if not task.isIgnoreThis : self.tailNodes.add(task) for p in task.parents : if p in self.tailNodes : self.tailNodes.remove(p) # check dependency runState consistency: if task.isDone() : for p in task.parents : if p.isIgnoreThis : continue if p.isComplete() : continue raise Exception("Task: '%s' has invalid continuation state. Task dependencies are incomplete") @lockMethod def writeTaskStatus(self) : """ (atomic on *nix) update of the runstate and errorstate for all tasks """ # don't write task status during dry runs: if self.isDryRun : return tmpFile = self.taskStateFile + ".update.incomplete" tmpFp = open(tmpFile, "w") tmpFp.write(taskStateHeader()) for (namespace, label) in self.addOrder : node = self.labelMap[(namespace, label)] runstateUpdateTimeStr = timeStampToTimeStr(node.runstateUpdateTimeStamp) tmpFp.write("%s\t%s\t%s\t%i\t%s\n" % (label, namespace, node.runstate, node.errorstate, runstateUpdateTimeStr)) tmpFp.close() forceRename(tmpFile, self.taskStateFile) @lockMethod def getTaskStatus(self) : """ Enumerate status of command tasks (but look at sub-workflows to determine if specification is complete) """ val = Bunch(waiting=0, queued=0, running=0, complete=0, error=0, isAllSpecComplete=True, longestQueueSec=0, longestRunSec=0, longestQueueName="", longestRunName="") currentSec = time.time() for (namespace, label) in self.addOrder : node = self.labelMap[(namespace, label)] # special check just for workflow tasks: if node.payload.type() == "workflow" : if not node.runStatus.isSpecificationComplete.isSet() : val.isAllSpecComplete = False # the rest of this enumeration is for command tasks only: continue taskTime = int(currentSec - node.runstateUpdateTimeStamp) if node.runstate == "waiting" : val.waiting += 1 elif node.runstate == "queued" : val.queued += 1 if val.longestQueueSec < taskTime : val.longestQueueSec = taskTime val.longestQueueName = node.fullLabel() elif node.runstate == "running" : val.running += 1 if val.longestRunSec < taskTime : val.longestRunSec = taskTime val.longestRunName = node.fullLabel() elif node.runstate == "complete" : val.complete += 1 elif node.runstate == "error" : val.error += 1 return val @lockMethod def writeTaskInfoOld(self, task) : """ appends a description of new tasks to the taskInfo file """ depstring = "" if len(task.parents) : depstring = ",".join([p.label for p in task.parents]) cmdstring = "" nCores = "0" memMb = "0" priority = "0" isForceLocal = "0" payload = task.payload cwdstring = "" if payload.type() == "command" : cmdstring = str(payload.cmd) nCores = str(payload.nCores) memMb = str(payload.memMb) priority = str(payload.priority) isForceLocal = boolToStr(payload.isForceLocal) cwdstring = payload.cmd.cwd elif payload.type() == "workflow" : cmdstring = payload.name() else : assert 0 taskline = "\t".join((task.label, task.namespace, payload.type(), nCores, memMb, priority, isForceLocal, depstring, cwdstring, cmdstring)) fp = open(self.taskInfoFile, "a") fp.write(taskline + "\n") fp.close() @lockMethod def writeTaskInfo(self) : """ appends a description of all new tasks to the taskInfo file """ def getTaskLineFromTask(task) : """ translate a task into its single-line summary format in the taskInfo file """ depstring = "" if len(task.parents) : depstring = ",".join([p.label for p in task.parents]) cmdstring = "" nCores = "0" memMb = "0" priority = "0" isForceLocal = "0" payload = task.payload cwdstring = "" if payload.type() == "command" : cmdstring = str(payload.cmd) nCores = str(payload.nCores) memMb = str(payload.memMb) priority = str(payload.priority) isForceLocal = boolToStr(payload.isForceLocal) cwdstring = payload.cmd.cwd elif payload.type() == "workflow" : cmdstring = payload.name() else : assert 0 return "\t".join((task.label, task.namespace, payload.type(), nCores, memMb, priority, isForceLocal, depstring, cwdstring, cmdstring)) assert (self.lastTaskIdWritten <= self.taskId) if self.lastTaskIdWritten == self.taskId : return newTaskLines = [] while self.lastTaskIdWritten < self.taskId : task = self.labelMap[self.addOrder[self.lastTaskIdWritten]] newTaskLines.append(getTaskLineFromTask(task)) self.lastTaskIdWritten += 1 fp = open(self.taskInfoFile, "a") for taskLine in newTaskLines : fp.write(taskLine + "\n") fp.close() # workflowRunner: # # special exception used for the case where pyflow data dir is already in use: # class DataDirException(Exception) : def __init__(self, msg) : Exception.__init__(self) self.msg = msg class WorkflowRunnerThreadSharedData(object) : """ All data used by the WorkflowRunner which will be constant over the lifetime of a TaskManager instance. All of the information in this class will be accessed by both threads without locking. """ def __init__(self) : self.lock = threading.RLock() self.pid = os.getpid() self.runcount = 0 self.cwd = os.path.abspath(os.getcwd()) self.markFile = None # we potentially have to log before the logfile is setup (eg # an exception is thrown reading run parameters), so provide # an explicit notification that there's no log file: self.flowLogFp = None self.warningLogFp = None self.errorLogFp = None self.resetRun() # two elements required to implement a nohup-like behavior: self.isHangUp = threading.Event() self._isStderrAlive = True @staticmethod def _validateFixParam(param): """ validate and refine raw run() parameters for use by workflow """ param.mailTo = setzer(param.mailTo) param.schedulerArgList = lister(param.schedulerArgList) if param.successMsg is not None : if not isString(param.successMsg) : raise Exception("successMsg argument to WorkflowRunner.run() is not a string") # create combined task retry settings manager: param.retry=RetryParam(param.mode, param.retryMax, param.retryWait, param.retryWindow, param.retryMode) # setup resource parameters if param.nCores is None : param.nCores = RunMode.data[param.mode].defaultCores # ignore total available memory settings in non-local modes: if param.mode != "local" : param.memMb = "unlimited" if param.mode == "sge" : if siteConfig.maxSGEJobs != "unlimited" : if ((param.nCores == "unlimited") or (int(param.nCores) > int(siteConfig.maxSGEJobs))) : param.nCores = int(siteConfig.maxSGEJobs) if param.nCores != "unlimited" : param.nCores = int(param.nCores) if param.nCores < 1 : raise Exception("Invalid run mode nCores argument: %s. Value must be 'unlimited' or an integer no less than 1" % (param.nCores)) if param.memMb is None : if param.nCores == "unlimited" : param.memMb = "unlimited" mpc = RunMode.data[param.mode].defaultMemMbPerCore if mpc == "unlimited" : param.memMb = "unlimited" else : param.memMb = mpc * param.nCores elif param.memMb != "unlimited" : param.memMb = int(param.memMb) if param.memMb < 1 : raise Exception("Invalid run mode memMb argument: %s. Value must be 'unlimited' or an integer no less than 1" % (param.memMb)) # verify/normalize input settings: if param.mode not in RunMode.data.keys() : raise Exception("Invalid mode argument '%s'. Accepted modes are {%s}." \ % (param.mode, ",".join(RunMode.data.keys()))) if param.mode == "sge" : # TODO not-portable to windows (but is this a moot point -- all of sge mode is non-portable, no?): def checkSgeProg(prog) : proc = subprocess.Popen(("which", prog), stdout=open(os.devnull, "w"), shell=False) retval = proc.wait() if retval != 0 : raise Exception("Run mode is sge, but no %s in path" % (prog)) checkSgeProg("qsub") checkSgeProg("qstat") stateDir = os.path.join(param.dataDir, "state") if param.isContinue == "Auto" : param.isContinue = os.path.exists(stateDir) if param.isContinue : if not os.path.exists(stateDir) : raise Exception("Cannot continue run without providing a pyflow dataDir containing previous state.: '%s'" % (stateDir)) for email in param.mailTo : if not verifyEmailAddy(email): raise Exception("Invalid email address: '%s'" % (email)) def _setCustomLogs(self) : if (self.warningLogFp is None) and (self.param.warningLogFile is not None) : self.warningLogFp = open(self.param.warningLogFile,"w") if (self.errorLogFp is None) and (self.param.errorLogFile is not None) : self.errorLogFp = open(self.param.errorLogFile,"w") def setupNewRun(self, param) : self.param = param # setup log file-handle first, then run the rest of parameter validation: # (hold this file open so that we can still log if pyflow runs out of filehandles) self.param.dataDir = os.path.abspath(self.param.dataDir) self.param.dataDir = os.path.join(self.param.dataDir, "pyflow.data") logDir = os.path.join(self.param.dataDir, "logs") ensureDir(logDir) self.flowLogFile = os.path.join(logDir, "pyflow_log.txt") self.flowLogFp = open(self.flowLogFile, "a") # run remaining validation self._validateFixParam(self.param) # initial per-run data self.taskErrors = set() # this set actually contains every task that failed -- tasks contain all of their own error info self.isTaskManagerException = False # create data directory if it does not exist ensureDir(self.param.dataDir) # check whether a process already exists: self.markFile = os.path.join(self.param.dataDir, "active_pyflow_process.txt") if os.path.exists(self.markFile) : # Non-conventional logging situation -- another pyflow process is possibly using this same data directory, so we want # to log to stderr (even if the user has set isQuiet) and not interfere with the other process's log self.flowLogFp = None self.param.isQuiet = False msg = [ "Can't initialize pyflow run because the data directory appears to be in use by another process.", "\tData directory: '%s'" % (self.param.dataDir), "\tIt is possible that a previous process was abruptly interrupted and did not clean up properly. To determine if this is", "\tthe case, please refer to the file '%s'" % (self.markFile), "\tIf this file refers to a non-running process, delete the file and relaunch pyflow,", "\totherwise, specify a new data directory. At the API-level this can be done with the dataDirRoot option." ] self.markFile = None # this keeps pyflow from deleting this file, as it normally would on exit raise DataDirException(msg) else : mfp = open(self.markFile, "w") msg = """ This file provides details of the pyflow instance currently using this data directory. During normal pyflow run termination (due to job completion, error, SIGINT, etc...), this file should be deleted. If this file is present it should mean either: (1) the data directory is still in use by a running workflow (2) a sudden job failure occurred that prevented normal run termination The associated pyflow job details are as follows: """ mfp.write(msg + "\n") for line in self.getInfoMsg() : mfp.write(line + "\n") mfp.write("\n") mfp.close() stateDir = os.path.join(self.param.dataDir, "state") ensureDir(stateDir) # setup other instance data: self.runcount += 1 # initialize directories self.wrapperLogDir = os.path.join(logDir, "tmp", "taskWrapperLogs") ensureDir(self.wrapperLogDir) stackDumpLogDir = os.path.join(logDir, "tmp", "stackDumpLog") ensureDir(stackDumpLogDir) # initialize filenames: taskStateFileName = "pyflow_tasks_runstate.txt" taskInfoFileName = "pyflow_tasks_info.txt" self.taskStdoutFile = os.path.join(logDir, "pyflow_tasks_stdout_log.txt") self.taskStderrFile = os.path.join(logDir, "pyflow_tasks_stderr_log.txt") self.taskStateFile = os.path.join(stateDir, taskStateFileName) self.taskInfoFile = os.path.join(stateDir, taskInfoFileName) self.taskDotScriptFile = os.path.join(stateDir, "make_pyflow_task_graph.py") self.stackDumpLogFile = os.path.join(stackDumpLogDir, "pyflow_stack_dump.txt") # empty file: if not self.param.isContinue: fp = open(self.taskInfoFile, "w") fp.write(taskInfoHeader()) fp.close() self._setCustomLogs() # finally write dot task graph creation script: # # this could fail because of script permission settings, buk it is not critical for # workflow completion so we get away with a warning try : writeDotScript(self.taskDotScriptFile, taskInfoFileName, taskStateFileName, self.param.workflowClassName) except OSError: msg = ["Failed to write task graph visualization script to %s" % (self.taskDotScriptFile)] self.flowLog(msg,logState=LogState.WARNING) def resetRun(self) : """ Anything that needs to be cleaned up at the end of a run Right now this just make sure we don't log to the previous run's log file """ self.flowLogFile = None self.param = None if self.flowLogFp is not None : self.flowLogFp.close() self.flowLogFp = None if self.warningLogFp is not None : self.warningLogFp.close() self.warningLogFp = None if self.errorLogFp is not None : self.errorLogFp.close() self.errorLogFp = None if self.markFile is not None : if os.path.exists(self.markFile) : os.unlink(self.markFile) self.markFile = None def getRunid(self) : return "%s_%s" % (self.pid, self.runcount) @lockMethod def setTaskError(self, task) : self.taskErrors.add(task) @lockMethod def isTaskError(self) : return (len(self.taskErrors) != 0) def isTaskSubmissionActive(self) : """ wait() pollers need to know if task submission has been shutdown to implement sane behavior. """ return (not self.isTaskError()) @lockMethod def setTaskManagerException(self) : self.isTaskManagerException = True @lockMethod def flowLog(self, msg, linePrefix=None, logState = LogState.INFO) : linePrefixOut = "[%s]" % (self.getRunid()) if linePrefix is not None : linePrefixOut += " " + linePrefix if (logState == LogState.ERROR) or (logState == LogState.WARNING) : linePrefixOut += " [" + LogState.toString(logState) + "]" ofpList = [] isAddStderr = (self._isStderrAlive and ((self.flowLogFp is None) or (self.param is None) or (not self.param.isQuiet))) if isAddStderr: ofpList.append(sys.stderr) if self.flowLogFp is not None : ofpList.append(self.flowLogFp) # make a last ditch effort to open the special error logs if these are not available already: try : self._setCustomLogs() except : pass if (self.warningLogFp is not None) and (logState == LogState.WARNING) : ofpList.append(self.warningLogFp) if (self.errorLogFp is not None) and (logState == LogState.ERROR) : ofpList.append(self.errorLogFp) if len(ofpList) == 0 : return retval = log(ofpList, msg, linePrefixOut) # check if stderr stream failed. If so, turn it off for the remainder of run (assume terminal hup): if isAddStderr and (not retval[0]) : if self.isHangUp.isSet() : self._isStderrAlive = False def getInfoMsg(self) : """ return a string array with general stats about this run """ msg = [ "%s\t%s" % ("pyFlowClientWorkflowClass:", self.param.workflowClassName), "%s\t%s" % ("pyFlowVersion:", __version__), "%s\t%s" % ("pythonVersion:", pythonVersion), "%s\t%s" % ("Runid:", self.getRunid()), "%s\t%s UTC" % ("RunStartTime:", self.param.logRunStartTime), "%s\t%s UTC" % ("NotificationTime:", timeStrNow()), "%s\t%s" % ("HostName:", siteConfig.getHostName()), "%s\t%s" % ("WorkingDir:", self.cwd), "%s\t%s" % ("DataDir:", self.param.dataDir), "%s\t'%s'" % ("ProcessCmdLine:", cmdline()) ] return msg def emailNotification(self, msgList, emailErrorLog=None) : # # email addy might not be setup yet: # # if errorLog is specified, then an email send exception will # be handled and logged, otherwise the exception will be re-raised # down to the caller. # if self.param is None : return if len(self.param.mailTo) == 0 : return if not isLocalSmtp() : if emailErrorLog : msg = ["email notification failed, no local smtp server"] emailErrorLog(msg,logState=LogState.WARNING) return mailTo = sorted(list(self.param.mailTo)) subject = "pyflow notification from %s run: %s" % (self.param.workflowClassName, self.getRunid()) msg = msgListToMsg(msgList) fullMsgList = ["Message:", '"""', msg, '"""'] fullMsgList.extend(self.getInfoMsg()) import smtplib try: sendEmail(mailTo, siteConfig.mailFrom, subject, fullMsgList) except smtplib.SMTPException : if emailErrorLog is None : raise msg = ["email notification failed"] eMsg = lister(getExceptionMsg()) msg.extend(eMsg) emailErrorLog(msg,logState=LogState.WARNING) class WorkflowRunner(object) : """ This object is designed to be inherited by a class in client code. This inheriting class can override the L{workflow()} method to define the tasks that need to be run and their dependencies. The inheriting class defining a workflow can be executed in client code by calling the WorkflowRunner.run() method. This method provides various run options such as whether to run locally or on sge. """ _maxWorkflowRecursion = 30 """ This limit protects against a runaway forkbomb in case a workflow task recursively adds itself w/o termination: """ def run(self, mode="local", dataDirRoot=".", isContinue=False, isForceContinue=False, nCores=None, memMb=None, isDryRun=False, retryMax=2, retryWait=90, retryWindow=360, retryMode="nonlocal", mailTo=None, updateInterval=60, schedulerArgList=None, isQuiet=False, warningLogFile=None, errorLogFile=None, successMsg=None, startFromTasks=None, ignoreTasksAfter=None, resetTasks=None) : """ Call this method to execute the workflow() method overridden in a child class and specify the resources available for the workflow to run. Task retry behavior: Retry attempts will be made per the arguments below for distributed workflow runs (eg. sge run mode). Note this means that retries will be attempted for tasks with an 'isForceLocal' setting during distributed runs. Task error behavior: When a task error occurs the task manager stops submitting new tasks and allows all currently running tasks to complete. Note that in this case 'task error' means that the task could not be completed after exhausting attempted retries. Workflow exception behavior: Any exceptions thrown from the python code of classes derived from WorkflowRunner will be logged and trigger notification (e.g. email). The exception will not come down to the client's stack. In sub-workflows the exception is handled exactly like a task error (ie. task submission is shut-down and remaining tasks are allowed to complete). An exception in the master workflow will lead to workflow termination without waiting for currently running tasks to finish. @return: 0 if all tasks completed successfully and 1 otherwise @param mode: Workflow run mode. Current options are (local|sge) @param dataDirRoot: All workflow data is written to {dataDirRoot}/pyflow.data/ These include workflow/task logs, persistent task state data, and summary run info. Two workflows cannot simultaneously use the same dataDir. @param isContinue: If True, continue workflow from a previous incomplete run based on the workflow data files. You must use the same dataDirRoot as a previous run for this to work. Set to 'Auto' to have the run continue only if the previous dataDir exists. (default: False) @param isForceContinue: Only used if isContinue is not False. Normally when isContinue is run, the commands of completed tasks are checked to ensure they match. When isForceContinue is true, failing this check is reduced from an error to a warning @param nCores: Total number of cores available, or 'unlimited', sge is currently configured for a maximum job count of %s, any value higher than this in sge mode will be reduced to the maximum. (default: 1 for local mode, %s for sge mode) @param memMb: Total memory available (in megabytes), or 'unlimited', Note that this value will be ignored in non-local modes (such as sge), because in this case total memory available is expected to be known by the scheduler for each node in its cluster. (default: %i*nCores for local mode, 'unlimited' for sge mode) @param isDryRun: List the commands to be executed without running them. Note that recursive and dynamic workflows will potentially have to account for the fact that expected files will be missing -- here 'recursive workflow' refers to any workflow which uses the addWorkflowTask() method, and 'dynamic workflow' refers to any workflow which uses the waitForTasks() method. These types of workflows can query this status with the isDryRun() to make accomadations. (default: False) @param retryMax: Maximum number of task retries @param retryWait: Delay (in seconds) before resubmitting task @param retryWindow: Maximum time (in seconds) after the first task submission in which retries are allowed. A value of zero or less puts no limit on the time when retries will be attempted. Retries are always allowed (up to retryMax times), for failed make jobs. @param retryMode: Modes are 'nonlocal' and 'all'. For 'nonlocal' retries are not attempted in local run mode. For 'all' retries are attempted for any run mode. The default mode is 'nonolocal'. @param mailTo: An email address or container of email addresses. Notification will be sent to each email address when either (1) the run successfully completes (2) the first task error occurs or (3) an unhandled exception is raised. The intention is to send one status message per run() indicating either success or the reason for failure. This should occur for all cases except a host hardware/power failure. Note that mail comes from '%s' (configurable), which may be classified as junk-mail by your system. @param updateInterval: How often (in minutes) should pyflow log a status update message summarizing the run status. Set this to zero or less to turn the update off. @param schedulerArgList: A list of arguments can be specified to be passed on to an external scheduler when non-local modes are used (e.g. in sge mode you could pass schedulerArgList=['-q','work.q'] to put the whole pyflow job into the sge work.q queue) @param isQuiet: Don't write any logging output to stderr (but still write log to pyflow_log.txt) @param warningLogFile: Replicate all warning messages to the specified file. Warning messages will still appear in the standard logs, this file will contain a subset of the log messages pertaining to warnings only. @param errorLogFile: Replicate all error messages to the specified file. Error messages will still appear in the standard logs, this file will contain a subset of the log messages pertaining to errors only. It should be empty for a successful run. @param successMsg: Provide a string containing a custom message which will be prepended to pyflow's standard success notification. This message will appear in the log and any configured notifications (e.g. email). The message may contain linebreaks. @param startFromTasks: A task label or container of task labels. Any tasks which are not in this set or descendants of this set will be marked as completed. @type startFromTasks: A single string, or set, tuple or list of strings @param ignoreTasksAfter: A task label or container of task labels. All descendants of these task labels will be ignored. @type ignoreTasksAfter: A single string, or set, tuple or list of strings @param resetTasks: A task label or container of task labels. These tasks and all of their descendants will be reset to the "waiting" state to be re-run. Note this option will only affect a workflow which has been continued from a previous run. This will not override any nodes altered by the startFromTasks setting in the case that both options are used together. @type resetTasks: A single string, or set, tuple or list of strings """ # Setup pyflow signal handlers: # inHandlers = Bunch(isSet=False) class SigTermException(Exception) : pass def sigtermHandler(_signum, _frame) : raise SigTermException def sighupHandler(_signum, _frame) : self._warningLog("pyflow recieved hangup signal. pyflow will continue, but this signal may still interrupt running tasks.") # tell cdata to turn off any tty writes: self._cdata().isHangUp.set() def set_pyflow_sig_handlers() : import signal if not inHandlers.isSet : inHandlers.sigterm = signal.getsignal(signal.SIGTERM) if not isWindows() : inHandlers.sighup = signal.getsignal(signal.SIGHUP) inHandlers.isSet = True try: signal.signal(signal.SIGTERM, sigtermHandler) if not isWindows() : signal.signal(signal.SIGHUP, sighupHandler) except ValueError: if isMainThread() : raise else : self._warningLog("pyflow has not been initialized on main thread, all custom signal handling disabled") def unset_pyflow_sig_handlers() : import signal if not inHandlers.isSet : return try : signal.signal(signal.SIGTERM, inHandlers.sigterm) if not isWindows() : signal.signal(signal.SIGHUP, inHandlers.sighup) except ValueError: if isMainThread() : raise else: pass # if return value is somehow not set after this then something bad happened, so init to 1: retval = 1 try: set_pyflow_sig_handlers() def exceptionMessaging(prefixMsg=None) : msg = lister(prefixMsg) eMsg = lister(getExceptionMsg()) msg.extend(eMsg) self._notify(msg,logState=LogState.ERROR) try: self.runStartTimeStamp = time.time() self.updateInterval = int(updateInterval) # a container to haul all the run() options around in: param = Bunch(mode=mode, dataDir=dataDirRoot, isContinue=isContinue, isForceContinue=isForceContinue, nCores=nCores, memMb=memMb, isDryRun=isDryRun, retryMax=retryMax, retryWait=retryWait, retryWindow=retryWindow, retryMode=retryMode, mailTo=mailTo, logRunStartTime=timeStampToTimeStr(self.runStartTimeStamp), workflowClassName=self._whoami(), schedulerArgList=schedulerArgList, isQuiet=isQuiet, warningLogFile=warningLogFile, errorLogFile=errorLogFile, successMsg=successMsg, startFromTasks=setzer(startFromTasks), ignoreTasksAfter=setzer(ignoreTasksAfter), resetTasks=setzer(resetTasks)) retval = self._runWorkflow(param) except SigTermException: msg = "Received termination signal, shutting down running tasks..." self._killWorkflow(msg) except KeyboardInterrupt: msg = "Keyboard Interrupt, shutting down running tasks..." self._killWorkflow(msg) except DataDirException, e: self._notify(e.msg,logState=LogState.ERROR) except: exceptionMessaging() raise finally: # last set: disconnect the workflow log: self._cdata().resetRun() unset_pyflow_sig_handlers() return retval # configurable elements of docstring run.__doc__ = run.__doc__ % (siteConfig.maxSGEJobs, RunMode.data["sge"].defaultCores, siteConfig.defaultHostMemMbPerCore, siteConfig.mailFrom) # protected methods which can be called within the workflow method: def addTask(self, label, command=None, cwd=None, env=None, nCores=1, memMb=siteConfig.defaultTaskMemMb, dependencies=None, priority=0, isForceLocal=False, isCommandMakePath=False, isTaskStable=True, mutex=None, retryMax=None, retryWait=None, retryWindow=None, retryMode=None) : """ Add task to workflow, including resource requirements and specification of dependencies. Dependency tasks must already exist in the workflow. @return: The 'label' argument is returned without modification. @param label: A string used to identify each task. The label must be composed of only ascii letters, digits, underscores and dashes (ie. /[A-Za-z0-9_-]+/). The label must also be unique within the workflow, and non-empty. @param command: The task command. Commands can be: (1) a shell string (2) an iterable container of strings (argument list) (3) None. In all cases strings must not contain newline characters. A single string is typically used for commands that require shell features (such as pipes), an argument list can be used for any other commands, this is often a useful way to simplify quoting issues or to submit extremely long commands. The default command (None), can be used to create a 'checkpoint', ie. a task which does not run anything, but provides a label associated with the completion of a set of dependencies. @param cwd: Specify current working directory to use for command execution. Note that if submitting the command as an argument list (as opposed to a shell string) the executable (arg[0]) is searched for before changing the working directory, so you cannot specify the executable relative to the cwd setting. If submitting a shell string command this restriction does not apply. @param env: A map of environment variables for this task, for example 'env={"PATH": "/usr/bin"}'. When env is set to None (the default) the environment of the pyflow client process is used. @param nCores: Number of cpu threads required @param memMb: Amount of memory required (in megabytes) @param dependencies: A task label or container of task labels specifying all dependent tasks. Dependent tasks must already exist in the workflow. @type dependencies: A single string, or set, tuple or list of strings @param priority: Among all tasks which are eligible to run at the same time, launch tasks with higher priority first. this value can be set from[-100,100]. Note that this will strongly control the order of task launch on a local run, but will only control task submission order to a secondary scheduler (like sge). All jobs with the same priority are already submitted in order from highest to lowest nCores requested, so there is no need to set priorities to replicate this behavior. The taskManager can start executing tasks as soon as each addTask() method is called, so lower-priority tasks may be launched first if they are specified first in the workflow. @param isForceLocal: Force this task to run locally when a distributed task mode is used. This can be used to launch very small jobs outside of the sge queue. Note that 'isForceLocal' jobs launched during a non-local task mode are not subject to resource management, so it is important that these represent small jobs. Tasks which delete, move or touch a small number of files are ideal for this setting. @param isCommandMakePath: If true, command is assumed to be a path containing a makefile. It will be run using make/qmake according to the run's mode and the task's isForceLocal setting @param isTaskStable: If false, indicates that the task command and/or dependencies may change if the run is interrupted and restarted. A command marked as unstable will not be checked to make sure it matches its previous definition during run continuation. Unstable examples: command contains a date/time, or lists a set of files which are deleted at some point in the workflow, etc. @param mutex: Provide an optional id associated with a pyflow task mutex. For all tasks with the same mutex id, no more than one will be run at once. Id name must follow task id restrictions. Mutex ids are global across all recursively invoked workflows. Example use case: This feature has been added as a simpler alternative to file locking, to ensure sequential, but not ordered, access to a file. @param retryMax: The number of times this task will be retried after failing. If defined, this overrides the workflow retryMax value. @param retryWait: The number of seconds to wait before relaunching a failed task. If defined, this overrides the workflow retryWait value. @param retryWindow: The number of seconds after job submission in which retries will be attempted for non-make jobs. A value of zero or less causes retries to be attempted anytime after job submission. If defined, this overrides the workflow retryWindow value. @param retryMode: Modes are 'nonlocal' and 'all'. For 'nonlocal' retries are not attempted in local run mode. For 'all' retries are attempted for any run mode. If defined, this overrides the workflow retryMode value. """ self._requireInWorkflow() #### Canceled plans to add deferred dependencies: # # deferredDependencies -- A container of labels specifying dependent # # tasks which have not yet been added to the # # workflow. In this case the added task will # # wait for the dependency to be defined *and* # # complete. Avoid these in favor or regular # # dependencies if possible. # sanitize bools: isForceLocal = argToBool(isForceLocal) isCommandMakePath = argToBool(isCommandMakePath) # sanitize ints: nCores = int(nCores) memMb = int(memMb) priority = int(priority) if (priority > 100) or (priority < -100) : raise Exception("priority must be an integer in the range [-100,100]") # sanity check label: WorkflowRunner._checkTaskLabel(label) fullLabel = namespaceJoin(self._getNamespace(), label) # verify/sanitize command: cmd = Command(command, cwd, env) # deal with command/resource relationship: if cmd.cmd is None : nCores = 0 memMb = 0 else: if nCores <= 0 : raise Exception("Unexpected core requirement for task: '%s' nCores: %i" % (fullLabel, nCores)) if memMb <= 0: raise Exception("Unexpected memory requirement for task: '%s' memory: %i (megabytes)" % (fullLabel, memMb)) if (self._cdata().param.nCores != "unlimited") and (nCores > self._cdata().param.nCores) : raise Exception("Task core requirement exceeds full available resources") if (self._cdata().param.memMb != "unlimited") and (memMb > self._cdata().param.memMb) : raise Exception("Task memory requirement exceeds full available resources") # check that make path commands point to a directory: # if isCommandMakePath : if cmd.type != "str" : raise Exception("isCommandMakePath is set, but no path is provided in task: '%s'" % (fullLabel)) cmd.cmd = os.path.abspath(cmd.cmd) # sanitize mutex option if mutex is not None : WorkflowRunner._checkTaskLabel(mutex) task_retry = self._cdata().param.retry.getTaskCopy(retryMax, retryWait, retryWindow, retryMode) # private _addTaskCore gets hijacked in recursive workflow submission: # payload = CmdPayload(fullLabel, cmd, nCores, memMb, priority, isForceLocal, isCommandMakePath, isTaskStable, mutex, task_retry) self._addTaskCore(self._getNamespace(), label, payload, dependencies) return label def addWorkflowTask(self, label, workflowRunnerInstance, dependencies=None) : """ Add another WorkflowRunner instance as a task to this workflow. The added Workflow's workflow() method will be called once the dependencies specified in this call have completed. Once started, all of the submitted workflow's method calls (like addTask) will be placed into the enclosing workflow instance and bound by the run parameters of the enclosing workflow. This task will be marked complete once the submitted workflow's workflow() method has finished, and any tasks it initiated have completed. Note that all workflow tasks will have their own tasks namespaced with the workflow task label. This namespace is recursive in the case that you add workflow tasks which add their own workflow tasks, etc. Note that the submitted workflow instance will be deep copied before being altered in any way. @return: The 'label' argument is returned without modification. @param label: A string used to identify each task. The label must be composed of only ascii letters, digits, underscores and dashes (ie. /[A-Za-z0-9_-]+/). The label must also be unique within the workflow, and non-empty. @param workflowRunnerInstance: A L{WorkflowRunner} instance. @param dependencies: A label string or container of labels specifying all dependent tasks. Dependent tasks must already exist in the workflow. @type dependencies: A single string, or set, tuple or list of strings """ self._requireInWorkflow() # sanity check label: WorkflowRunner._checkTaskLabel(label) import inspect # copy and 'hijack' the workflow: workflowCopy = copy.deepcopy(workflowRunnerInstance) # hijack! -- take all public methods at the WorkflowRunner level # (except workflow()), and insert the self copy: publicExclude = ["workflow", "addTask", "addWorkflowTask", "waitForTasks"] for (n, _v) in inspect.getmembers(WorkflowRunner, predicate=inspect.ismethod) : if n[0] == "_" : continue # skip private/special methods if n in publicExclude : continue setattr(workflowCopy, n, getattr(self, n)) privateInclude = ["_cdata", "_addTaskCore", "_waitForTasksCore", "_isTaskCompleteCore","_setRunning","_getRunning"] for n in privateInclude : setattr(workflowCopy, n, getattr(self, n)) # final step: disable the run() function to be extra safe... workflowCopy.run = None # set the task namespace: workflowCopy._appendNamespace(self._getNamespaceList()) workflowCopy._appendNamespace(label) # add workflow task to the task-dag, and launch a new taskrunner thread # if one isn't already running: payload = WorkflowPayload(workflowCopy) self._addTaskCore(self._getNamespace(), label, payload, dependencies) return label def waitForTasks(self, labels=None) : """ Wait for a list of tasks to complete. @return: In case of an error in a task being waited for, or in one of these task's dependencies, the function returns 1. Else return 0. @param labels: Container of task labels to wait for. If an empty container is given or no list is provided then wait for all outstanding tasks to complete. @type labels: A single string, or set, tuple or list of strings """ self._requireInWorkflow() return self._waitForTasksCore(self._getNamespace(), labels) def isTaskComplete(self, taskLabel) : """ Query if a specific task is in the workflow and completed without error. This can assist workflows with providing stable interrupt/resume behavior. @param taskLabel: A task string @return: Completion status of task """ return self._isTaskCompleteCore(self._getNamespace(), taskLabel) def getRunMode(self) : """ Get the current run mode This can be used to access the current run mode from within the workflow function. Although the runmode should be transparent to client code, this is occasionally needed to hack workarounds. @return: Current run mode """ self._requireInWorkflow() return self._cdata().param.mode def getNCores(self) : """ Get the current run core limit This function can be used to access the current run's core limit from within the workflow function. This can be useful to eg. limit the number of cores requested by a single task. @return: Total cores available to this workflow run @rtype: Integer value or 'unlimited' """ self._requireInWorkflow() return self._cdata().param.nCores def limitNCores(self, nCores) : """ Takes an task nCores argument and reduces it to the maximum value allowed for the current run. @param nCores: Proposed core requirement @return: Min(nCores,Total cores available to this workflow run) """ self._requireInWorkflow() nCores = int(nCores) runNCores = self._cdata().param.nCores if runNCores == "unlimited" : return nCores return min(nCores, runNCores) def getMemMb(self) : """ Get the current run's total memory limit (in megabytes) @return: Memory limit in megabytes @rtype: Integer value or 'unlimited' """ self._requireInWorkflow() return self._cdata().param.memMb def limitMemMb(self, memMb) : """ Takes a task memMb argument and reduces it to the maximum value allowed for the current run. @param memMb: Proposed task memory requirement in megabytes @return: Min(memMb,Total memory available to this workflow run) """ self._requireInWorkflow() memMb = int(memMb) runMemMb = self._cdata().param.memMb if runMemMb == "unlimited" : return memMb return min(memMb, runMemMb) def isDryRun(self) : """ Get isDryRun flag value. When the dryrun flag is set, no commands are actually run. Querying this flag allows dynamic workflows to correct for dry run behaviors, such as tasks which do no produce expected files. @return: DryRun status flag """ self._requireInWorkflow() return self._cdata().param.isDryRun @staticmethod def runModeDefaultCores(mode) : """ Get the default core limit for run mode (local,sge,..) @param mode: run mode, as specified in L{the run() method} @return: Default maximum number of cores for mode @rtype: Either 'unlimited', or a string representation of the integer limit """ return str(RunMode.data[mode].defaultCores) def flowLog(self, msg, logState = LogState.INFO) : """ Send a message to the WorkflowRunner's log. @param msg: Log message @type msg: A string or an array of strings. String arrays will be separated by newlines in the log. @param logState: Message severity, defaults to INFO. @type logState: A value in pyflow.LogState.{INFO,WARNING,ERROR} """ self._requireInWorkflow() linePrefixOut = "[%s]" % (self._cdata().param.workflowClassName) self._cdata().flowLog(msg, linePrefix=linePrefixOut, logState=logState) # Protected methods for client derived-class override: def workflow(self) : """ Workflow definition defined in child class This method should be overridden in the class derived from L{WorkflowRunner} to specify the actual workflow logic. Client code should not call this method directly. """ pass # private methods: # special workflowRunner Exception used to terminate workflow() function # if a ctrl-c is issued class _AbortWorkflowException(Exception) : pass def _flowLog(self, msg, logState) : linePrefixOut = "[WorkflowRunner]" self._cdata().flowLog(msg, linePrefix=linePrefixOut, logState=logState) def _infoLog(self, msg) : self._flowLog(msg,logState=LogState.INFO) def _warningLog(self, msg) : self._flowLog(msg,logState=LogState.WARNING) def _errorLog(self, msg) : self._flowLog(msg,logState=LogState.ERROR) def _whoami(self) : # returns name of *derived* class return self.__class__.__name__ def _getNamespaceList(self) : try: return self._namespaceList except AttributeError: self._namespaceList = [] return self._namespaceList def _getNamespace(self) : return namespaceSep.join(self._getNamespaceList()) def _appendNamespace(self, names) : names = lister(names) for name in names : # check against runaway recursion: if len(self._getNamespaceList()) >= WorkflowRunner._maxWorkflowRecursion : raise Exception("Recursive workflow invocation depth exceeds maximum allowed depth of %i" % (WorkflowRunner._maxWorkflowRecursion)) WorkflowRunner._checkTaskLabel(name) self._getNamespaceList().append(name) # flag used to request the termination of all task submission: # _allStop = threading.Event() @staticmethod def _stopAllWorkflows() : # request all workflows stop task submission: WorkflowRunner._allStop.set() @staticmethod def _isWorkflowStopped() : # check whether a global signal has been give to stop all workflow submission # this should only be true when a ctrl-C or similar event has occurred. return WorkflowRunner._allStop.isSet() def _addTaskCore(self, namespace, label, payload, dependencies) : # private core taskAdd routine for hijacking # fromWorkflow is the workflow instance used to launch the task # # add workflow task to the task-dag, and launch a new taskrunner thread # if one isn't already running: if self._isWorkflowStopped() : raise WorkflowRunner._AbortWorkflowException self._infoLog("Adding %s '%s' to %s" % (payload.desc(), namespaceJoin(namespace, label), namespaceLabel(namespace))) # add task to the task-dag, and launch a new taskrunner thread # if one isn't already running: dependencies = setzer(dependencies) self._tdag.addTask(namespace, label, payload, dependencies) self._startTaskManager() def _getWaitStatus(self, namespace, labels, status) : # update and return two values: # (1) isAllTaskDone -- are all tasks done (ie. error or complete state # (2) retval -- this is set to one if any tasks have errors # def updateStatusFromTask(task, status) : if not task.isDone() : status.isAllTaskDone = False elif not task.isComplete() : status.retval = 1 if status.retval == 0 and (not self._cdata().isTaskSubmissionActive()) : status.retval = 1 if status.retval == 0 and task.isDead() : status.retval = 1 if len(labels) == 0 : if namespace == "" : if self._tdag.isRunExhausted() or (not self._tman.isAlive()) : if not self._tdag.isRunComplete() : status.retval = 1 else: status.isAllTaskDone = False else : for task in self._tdag.getAllNodes(namespace) : updateStatusFromTask(task, status) else : for l in labels : if not self._tdag.isTaskPresent(namespace, l) : raise Exception("Task: '%s' is not in taskDAG" % (namespaceJoin(namespace, l))) task = self._tdag.getTask(namespace, l) updateStatusFromTask(task, status) def _waitForTasksCore(self, namespace, labels=None, isVerbose=True) : labels = setzer(labels) if isVerbose : msg = "Pausing %s until completion of" % (namespaceLabel(namespace)) if len(labels) == 0 : self._infoLog(msg + " its current tasks") else: self._infoLog(msg + " task(s): %s" % (",".join([namespaceJoin(namespace, l) for l in labels]))) class WaitStatus: def __init__(self) : self.isAllTaskDone = True self.retval = 0 ewaiter = ExpWaiter(1, 1.7, 15) while True : if self._isWorkflowStopped() : raise WorkflowRunner._AbortWorkflowException status = WaitStatus() self._getWaitStatus(namespace, labels, status) if status.isAllTaskDone or (status.retval != 0) : break ewaiter.wait() if isVerbose : msg = "Resuming %s" % (namespaceLabel(namespace)) self._infoLog(msg) return status.retval def _isTaskCompleteCore(self, namespace, taskLabel) : if not self._tdag.isTaskPresent(namespace, taskLabel) : return False task = self._tdag.getTask(namespace, taskLabel) return task.isComplete() @staticmethod def _checkTaskLabel(label) : # sanity check label: if not isinstance(label, basestring) : raise Exception ("Task label is not a string") if label == "" : raise Exception ("Task label is empty") if not re.match("^[A-Za-z0-9_-]+$", label) : raise Exception ("Task label is invalid due to disallowed characters. Label: '%s'" % (label)) def _startTaskManager(self) : # start a new task manager if one isn't already running: # if (self._tman is not None) and (self._tman.isAlive()) : return if not self._cdata().isTaskManagerException : self._tman = TaskManager(self._cdata(), self._tdag) self._tman.start() def _notify(self, msg, logState) : # msg is printed to log AND sent to any email or other requested # notification systems: self._flowLog(msg,logState) self._cdata().emailNotification(msg, self._flowLog) def _killWorkflow(self, errorMsg) : self._notify(errorMsg,logState=LogState.ERROR) self._shutdownAll(timeoutSec=10) sys.exit(1) def _shutdownAll(self, timeoutSec) : # Try to shut down the task manager, all command-tasks, # and all sub-workflow tasks. # if (self._tman is None) or (not self._tman.isAlive()) : return StoppableThread.stopAll() self._stopAllWorkflows() self._tman.stop() for _ in range(timeoutSec) : time.sleep(1) if not self._tman.isAlive() : self._infoLog("Task shutdown complete") return self._infoLog("Task shutdown timed out") def _cdata(self) : # We're doing this convoluted setup only to avoid having a # ctor for ease of use by the client. See what pyFlow goes # through for you client code?? # try: return self._constantData except AttributeError: self._constantData = WorkflowRunnerThreadSharedData() return self._constantData # TODO: Better definition of the status thread shutdown at the end of a pyflow run to # prevent race conditions -- ie. what happens if the status update is running while # pyflow is shutting down? Every method called by the status updater should be safety # checked wrt this issue. # def _runUpdate(self, runStatus) : while True : time.sleep(self.updateInterval * 60) status = self._tdag.getTaskStatus() isSpecComplete = (runStatus.isSpecificationComplete.isSet() and status.isAllSpecComplete) report = [] report.append("===== " + self._whoami() + " StatusUpdate =====") report.append("Workflow specification is complete?: %s" % (str(isSpecComplete))) report.append("Task status (waiting/queued/running/complete/error): %i/%i/%i/%i/%i" % (status.waiting, status.queued, status.running, status.complete, status.error)) report.append("Longest ongoing queued task time (hrs): %.4f" % (status.longestQueueSec / 3600.)) report.append("Longest ongoing queued task name: '%s'" % (status.longestQueueName)) report.append("Longest ongoing running task time (hrs): %.4f" % (status.longestRunSec / 3600.)) report.append("Longest ongoing running task name: '%s'" % (status.longestRunName)) report = [ "[StatusUpdate] " + line for line in report ] self._infoLog(report) # Update interval is also an appropriate interval to dump a stack-trace of all active # threads. This is a useful post-mortem in the event of a large class of hang/deadlock # errors: # stackDumpFp = open(self._cdata().stackDumpLogFile, "a") # create one fully decorated line in the stack dump file as a prefix to the report: linePrefixOut = "[%s] [StackDump]" % (self._cdata().getRunid()) ofpList = [stackDumpFp] log(ofpList, "Initiating stack dump for all threads", linePrefixOut) stackDump(stackDumpFp) hardFlush(stackDumpFp) stackDumpFp.close() def _runWorkflow(self, param) : # # Primary workflow logic when nothing goes wrong: # self._setupWorkflow(param) self._initMessage() runStatus = RunningTaskStatus(self._tdag.isFinishedEvent) # start status update reporter: # # TODO: stop this thread at end of run # if(self.updateInterval > 0) : hb = threading.Thread(target=WorkflowRunner._runUpdate, args=(self, runStatus)) hb.setDaemon(True) hb.setName("StatusUpdate-Thread") hb.start() # run workflow() function on a separate thread, using exactly # the same method we use for sub-workflows: # # TODO: move the master workflow further into the code path used by sub-workflows, # so that we aren't replicating polling and error handling code in this function: # trun = WorkflowTaskRunner(runStatus, "masterWorkflow", self, self._cdata().flowLog, None) trun.start() # can't join() because that blocks SIGINT ewaiter = ExpWaiter(1, 1.7, 15,runStatus.isComplete) while True : if not trun.isAlive() : break ewaiter.wait() if not runStatus.isComplete.isSet() : # if not complete then we don't know what happened, very bad!: runStatus.errorCode = 1 runStatus.errorMessage = "Thread: '%s', has stopped without a traceable cause" % (trun.getName()) self._taskInfoWriter.flush() self._taskStatusWriter.flush() return self._evalWorkflow(runStatus) def _setupWorkflow(self, param) : cdata = self._cdata() # setup instance user parameters: cdata.setupNewRun(param) # setup other instance data: self._tdag = TaskDAG(cdata.param.isContinue, cdata.param.isForceContinue, cdata.param.isDryRun, cdata.taskInfoFile, cdata.taskStateFile, cdata.param.workflowClassName, cdata.param.startFromTasks, cdata.param.ignoreTasksAfter, cdata.param.resetTasks, self._flowLog) self._tman = None def backupFile(inputFile) : """ backup old state files if they exist """ if not os.path.isfile(inputFile) : return fileDir = os.path.dirname(inputFile) fileName = os.path.basename(inputFile) backupDir = os.path.join(fileDir, "backup") ensureDir(backupDir) backupFileName = fileName + ".backup_before_starting_run_%s.txt" % (cdata.getRunid()) backupFile = os.path.join(backupDir, backupFileName) shutil.copyfile(inputFile, backupFile) backupFile(cdata.taskStateFile) backupFile(cdata.taskInfoFile) if cdata.param.isContinue : self._setupContinuedWorkflow() self._taskInfoWriter = TaskFileWriter(self._tdag.writeTaskInfo) self._taskStatusWriter = TaskFileWriter(self._tdag.writeTaskStatus) self._tdag.isWriteTaskInfo = self._taskInfoWriter.isWrite self._tdag.isWriteTaskStatus = self._taskStatusWriter.isWrite self._taskInfoWriter.start() self._taskStatusWriter.start() def _createContinuedStateFile(self) : # # create continued version of task state file # cdata = self._cdata() if not os.path.isfile(cdata.taskStateFile) : return set() tmpFile = cdata.taskStateFile + ".update.incomplete" tmpfp = open(tmpFile, "w") tmpfp.write(taskStateHeader()) complete = set() for words in taskStateParser(cdata.taskStateFile) : (runState, errorCode) = words[2:4] if (runState != "complete") or (int(errorCode) != 0) : continue tmpfp.write("\t".join(words) + "\n") (label, namespace) = words[0:2] complete.add(namespaceJoin(namespace, label)) tmpfp.close() forceRename(tmpFile, cdata.taskStateFile) return complete def _createContinuedInfoFile(self, complete) : # # create continued version of task info file # cdata = self._cdata() if not os.path.isfile(cdata.taskInfoFile) : return tmpFile = cdata.taskInfoFile + ".update.incomplete" tmpfp = open(tmpFile, "w") tmpfp.write(taskInfoHeader()) for words in taskInfoParser(cdata.taskInfoFile) : (label, namespace, ptype, nCores, memMb, priority, isForceLocal, depStr, cwdStr, command) = words fullLabel = namespaceJoin(namespace, label) if fullLabel not in complete : continue tmpfp.write("\t".join(words) + "\n") if ptype == "command" : if command == "" : command = None payload = CmdPayload(fullLabel, Command(command, cwdStr), int(nCores), int(memMb), int(priority), argToBool(isForceLocal)) elif ptype == "workflow" : payload = WorkflowPayload(None) else : assert 0 self._tdag.addTask(namespace, label, payload, getTaskInfoDepSet(depStr), isContinued=True) tmpfp.close() forceRename(tmpFile, cdata.taskInfoFile) def _setupContinuedWorkflow(self) : # reduce both state files to completed states only. complete = self._createContinuedStateFile() self._createContinuedInfoFile(complete) def _initMessage(self) : param = self._cdata().param # shortcut msg = ["Initiating pyFlow run"] msg.append("pyFlowClientWorkflowClass: %s" % (param.workflowClassName)) msg.append("pyFlowVersion: %s" % (__version__)) msg.append("pythonVersion: %s" % (pythonVersion)) msg.append("WorkingDir: '%s'" % (self._cdata().cwd)) msg.append("ProcessCmdLine: '%s'" % (cmdline())) parammsg = ["mode: %s" % (param.mode), "nCores: %s" % (str(param.nCores)), "memMb: %s" % (str(param.memMb)), "dataDir: %s" % (str(param.dataDir)), "isDryRun: %s" % (str(param.isDryRun)), "isContinue: %s" % (str(param.isContinue)), "isForceContinue: %s" % (str(param.isForceContinue)), "mailTo: '%s'" % (",".join(param.mailTo))] for i in range(len(parammsg)): parammsg[i] = "[RunParameters] " + parammsg[i] msg += parammsg self._infoLog(msg) def _getTaskErrorsSummaryMsg(self, isForceTaskHarvest=False) : # isForceHarvest means we try to force an update of the shared # taskError information in case this thread is ahead of the # task manager. if isForceTaskHarvest : if (self._tman is not None) and (self._tman.isAlive()) : self._tman.harvestTasks() if not self._cdata().isTaskError() : return [] # this case has already been emailed in the TaskManager @ first error occurrence: msg = ["Worklow terminated due to the following task errors:"] for task in self._cdata().taskErrors : msg.extend(task.getTaskErrorMsg()) return msg def _evalWorkflow(self, masterRunStatus) : isError = False if self._cdata().isTaskError() : msg = self._getTaskErrorsSummaryMsg() self._errorLog(msg) isError = True if masterRunStatus.errorCode != 0 : eMsg = lister(masterRunStatus.errorMessage) if (len(eMsg) > 1) or (len(eMsg) == 1 and eMsg[0] != "") : msg = ["Failed to complete master workflow, error code: %s" % (str(masterRunStatus.errorCode))] msg.append("errorMessage:") msg.extend(eMsg) self._notify(msg,logState=LogState.ERROR) isError = True if self._cdata().isTaskManagerException : # this case has already been emailed in the TaskManager: self._errorLog("Workflow terminated due to unhandled exception in TaskManager") isError = True if (not isError) and (not self._tdag.isRunComplete()) : msg = "Workflow terminated with unknown error condition" self._notify(msg,logState=LogState.ERROR) isError = True if isError: return 1 elapsed = int(time.time() - self.runStartTimeStamp) msg = [] if self._cdata().param.successMsg is not None : msg.extend([self._cdata().param.successMsg,""]) msg.extend(["Workflow successfully completed all tasks", "Elapsed time for full workflow: %s sec" % (elapsed)]) self._notify(msg,logState=LogState.INFO) return 0 def _requireInWorkflow(self) : """ check that the calling method is being called as part of a pyflow workflow() method only """ if not self._getRunning(): raise Exception("Method must be a (call stack) descendant of WorkflowRunner workflow() method (via run() method)") def _initRunning(self): try : assert(self._isRunning >= 0) except AttributeError : self._isRunning = 0 @lockMethod def _setRunning(self, isRunning) : self._initRunning() if isRunning : self._isRunning += 1 else : self._isRunning -= 1 @lockMethod def _getRunning(self) : self._initRunning() return (self._isRunning > 0) if __name__ == "__main__" : help(WorkflowRunner) pyflow-1.1.14/pyflow/src/pyflowConfig.py000066400000000000000000000136321303601460500202500ustar00rootroot00000000000000# # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # """ pyflowConfig This file defines a class instance 'siteConfig' containing pyflow components which are the most likely to need site-specific configuration. """ import os # this is the only object pyflow needs to import, it # is defined at the end of this module: # siteConfig = None # depending on network setup getfqdn() can be slow, so cache calls to this function here: # def _getHostName() : import socket return socket.getfqdn() cachedHostName = None def getHostName() : global cachedHostName if cachedHostName is None : cachedHostName = _getHostName() return cachedHostName def getDomainName() : hn = getHostName().split(".") if len(hn) > 1 : hn = hn[1:] return ".".join(hn) class DefaultSiteConfig(object) : """ Default configuration settings are designed to work with as many sites as technically feasible """ # All email notifications come from the following e-mail address # mailFrom = "pyflow-bot@" + getDomainName() # Default memory (in megabytes) requested by each command task: # defaultTaskMemMb = 2048 # In local run mode, this is the defalt memory per thread that we # assume is available: # defaultHostMemMbPerCore = 2048 # maximum number of jobs which can be submitted to sge at once: # # allowed values are "unlimited" or None for unlimited jobs, or # a positive integer describing the max number of jobs # maxSGEJobs = 128 # both getHostName and getDomainName are used in the # siteConfig factory, so these are not designed to be # overridden at present: getHostName = staticmethod(getHostName) getDomainName = staticmethod(getDomainName) @classmethod def qsubResourceArg(cls, nCores, memMb) : """ When a task is launched using qsub in sge mode, it will call this function to specify the requested number of threads and megabytes of memory. The returned argument list will be appended to the qsub arguments. nCores -- number of threads requested memMb -- memory requested (in megabytes) """ nCores = int(nCores) memMb = int(memMb) return cls._qsubResourceArgConfig(nCores, memMb) @classmethod def _qsubResourceArgConfig(cls, nCores, memMb) : """ The default function is designed for maximum portability -- it just provides more memory via more threads. """ # this is the memory we assume is available per # thread on the cluster: # class Constants(object) : megsPerCore = 4096 memCores = 1 + ((memMb - 1) / Constants.megsPerCore) qsubCores = max(nCores, memCores) if qsubCores <= 1 : return [] return ["-pe", "threaded", str(qsubCores)] @classmethod def getSgeMakePrefix(cls, nCores, memMb, schedulerArgList) : """ This prefix will be added to ' -C directory', and run from a local process to handle sge make jobs. Note that memMb hasn't been well defined for make jobs yet, is it the per task memory limit? The first application to accually make use of this will have to setup the convention, it is ignored right now... """ nCores = int(nCores) memMb = int(memMb) retval = ["qmake", "-V", "-now", "n", "-cwd", "-N", "pyflowMakeTask"] # user arguments to run() (usually q specification: retval.extend(schedulerArgList) #### use qmake parallel environment: # retval.extend(["-pe","make",str(nCores),"--"]) #### ...OR use 'dynamic' sge make environment: retval.extend(["--", "-j", str(nCores)]) return retval def getEnvVar(key) : if key in os.environ : return os.environ[key] return None class hvmemSGEConfig(DefaultSiteConfig) : """ This config assumes 'h_vmem' is defined on the SGE instance """ @classmethod def _qsubResourceArgConfig(cls, nCores, memMb) : retval = [] # specify memory requirements memGb = 1 + ((memMb - 1) / 1024) reqArg = "h_vmem=%iG" % (memGb) retval.extend(["-l", reqArg]) if nCores > 1 : retval.extend(["-pe", "threaded", str(nCores)]) return retval # # final step is the selection of this run's siteConfig object: # def siteConfigFactory() : # TODO: add an import time test to determine if h_vmem is valid return hvmemSGEConfig siteConfig = siteConfigFactory() pyflow-1.1.14/pyflow/src/pyflowTaskWrapper.py000066400000000000000000000240021303601460500212770ustar00rootroot00000000000000# # pyFlow - a lightweight parallel task engine # # Copyright (c) 2012-2015 Illumina, Inc. # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS # FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE # COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, # INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, # BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; # LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER # CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT # LIABILITY, OR TORT INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY # WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE # POSSIBILITY OF SUCH DAMAGE. # """ This script wraps workflow tasks for execution on local or remote hosts. It is responsible for adding log decorations to task's stderr output (which is diverted to a file), and writing task state transition and error information to the wrapper's stderr, which becomes the task's 'signal' file from pyflow's perspective. The signal file is used to determine task exit status, total runtime, and queue->run state transition when pyflow is run in SGE mode. """ import datetime import os import subprocess import sys import time scriptName = "pyflowTaskWrapper.py" def getTracebackStr() : import traceback return traceback.format_exc() def getExceptionMsg() : return ("[ERROR] Unhandled Exception in pyflowTaskWrapper\n" + getTracebackStr()) def timeStampToTimeStr(ts) : """ converts timeStamp (time.time()) output to timeStr """ return datetime.datetime.utcfromtimestamp(ts).isoformat() def timeStrNow(): return timeStampToTimeStr(time.time()) def hardFlush(ofp): ofp.flush() if ofp.isatty() : return os.fsync(ofp.fileno()) def isWindows() : import platform return (platform.system().find("Windows") > -1) class SimpleFifo(object) : """ Store up to last N objects, not thread safe. Note extraction does not follow any traditional fifo interface """ def __init__(self, size) : self._size = int(size) assert (self._size > 0) self._data = [None] * self._size self._head = 0 self._occup = 0 self._counter = 0 def count(self) : """ Get the total number of adds for this fifo """ return self._counter def add(self, obj) : """ add obj to fifo, and return obj for convenience """ self._data[self._head] = obj self._counter += 1 if self._occup < self._size : self._occup += 1 self._head += 1 if self._head == self._size : self._head = 0 assert (self._head < self._size) return obj def get(self) : """ return an array of the fifo contents """ retval = [] current = (self._head + self._size) - self._occup for _ in range(self._occup) : while current >= self._size : current -= self._size retval.append(self._data[current]) current += 1 return retval class StringBling(object) : def __init__(self, runid, taskStr) : def getHostName() : import socket # return socket.gethostbyaddr(socket.gethostname())[0] return socket.getfqdn() self.runid = runid self.taskStr = taskStr self.hostname = getHostName() def _writeMsg(self, ofp, msg, taskStr, writeFilter=lambda x: x) : """ log a possibly multi-line message with decoration: """ prefix = "[%s] [%s] [%s] [%s] " % (timeStrNow(), self.hostname, self.runid, taskStr) if msg[-1] == "\n" : msg = msg[:-1] for line in msg.split("\n") : ofp.write(writeFilter(prefix + line + "\n")) hardFlush(ofp) def transfer(self, inos, outos, writeFilter=lambda x: x): """ This function is used to decorate the stderr stream from the launched task itself """ # # write line-read loop this way to workaround python bug: # http://bugs.python.org/issue3907 # while True: line = inos.readline() if not line: break self._writeMsg(outos, line, self.taskStr, writeFilter) def wrapperLog(self, log_os, msg) : """ Used by the wrapper to decorate each msg line with a prefix. The decoration is similar to that for the task's own stderr, but we prefix the task with 'pyflowTaskWrapper' to differentiate the source. """ self._writeMsg(log_os, msg, "pyflowTaskWrapper:" + self.taskStr) def getParams(paramsFile) : import pickle paramhash = pickle.load(open(paramsFile)) class Params : pass params = Params() for (k, v) in paramhash.items() : setattr(params, k, v) return params def main(): usage = """ Usage: %s runid taskid parameter_pickle_file The parameter pickle file contains all of the task parameters required by the wrapper """ % (scriptName) def badUsage(msg=None) : sys.stderr.write(usage) if msg is not None : sys.stderr.write(msg) exitval = 1 else: exitval = 2 hardFlush(sys.stderr) sys.exit(exitval) def checkExpectArgCount(expectArgCount) : if len(sys.argv) == expectArgCount : return badUsage("Incorrect argument count, expected: %i observed: %i\n" % (expectArgCount, len(sys.argv))) runid = "unknown" taskStr = "unknown" if len(sys.argv) > 2 : runid = sys.argv[1] taskStr = sys.argv[2] bling = StringBling(runid, taskStr) # send a signal for wrapper start as early as possible to help ensure hostname is logged pffp = sys.stderr bling.wrapperLog(pffp, "[wrapperSignal] wrapperStart") checkExpectArgCount(4) picklefile = sys.argv[3] # try multiple times to read the argument file in case of NFS delay: # retryDelaySec = 30 maxTrials = 3 for _ in range(maxTrials) : if os.path.exists(picklefile) : break time.sleep(retryDelaySec) if not os.path.exists(picklefile) : badUsage("First argument does not exist: " + picklefile) if not os.path.isfile(picklefile) : badUsage("First argument is not a file: " + picklefile) # add another multi-trial loop on the pickle load operation -- # on some filesystems the file can appear to exist but not # be fully instantiated yet: # for t in range(maxTrials) : try : params = getParams(picklefile) except : if (t+1) == maxTrials : raise time.sleep(retryDelaySec) continue break if params.cmd is None : badUsage("Invalid TaskWrapper input: task command set to NONE") if params.cwd == "" or params.cwd == "None" : params.cwd = None toutFp = open(params.outFile, "a") terrFp = open(params.errFile, "a") # always keep last N lines of task stderr: fifo = SimpleFifo(20) isWin=isWindows() # Present shell as arg list with Popen(shell=False), so that # we minimize quoting/escaping issues for 'cmd' itself: # fullcmd = [] if (not isWin) and params.isShellCmd : # TODO shell selection should be configurable somewhere: shell = ["/bin/bash", "--noprofile", "-o", "pipefail"] fullcmd = shell + ["-c", params.cmd] else : fullcmd = params.cmd retval = 1 isShell=isWin try: startTime = time.time() bling.wrapperLog(pffp, "[wrapperSignal] taskStart") # turn off buffering so that stderr is updated correctly and its timestamps # are more accurate: # TODO: is there a way to do this for stderr only? proc = subprocess.Popen(fullcmd, stdout=toutFp, stderr=subprocess.PIPE, shell=isShell, bufsize=1, cwd=params.cwd, env=params.env) bling.transfer(proc.stderr, terrFp, fifo.add) retval = proc.wait() elapsed = (time.time() - startTime) # communication back to pyflow: bling.wrapperLog(pffp, "[wrapperSignal] taskExitCode %i" % (retval)) # communication to human-readable log: msg = "Task: '%s' exit code: '%i'" % (taskStr, retval) bling.wrapperLog(terrFp, msg) if retval == 0 : # communication back to pyflow: bling.wrapperLog(pffp, "[wrapperSignal] taskElapsedSec %i" % (int(elapsed))) # communication to human-readable log: msg = "Task: '%s' complete." % (taskStr) msg += " elapsedSec: %i" % (int(elapsed)) msg += " elapsedCoreSec: %i" % (int(elapsed * params.nCores)) msg += "\n" bling.wrapperLog(terrFp, msg) else : # communication back to pyflow: tailMsg = fifo.get() bling.wrapperLog(pffp, "[wrapperSignal] taskStderrTail %i" % (1 + len(tailMsg))) pffp.write("Last %i stderr lines from task (of %i total lines):\n" % (len(tailMsg), fifo.count())) for line in tailMsg : pffp.write(line) hardFlush(pffp) except KeyboardInterrupt: msg = "[ERROR] Keyboard Interupt, shutting down task." bling.wrapperLog(terrFp, msg) sys.exit(1) except: msg = getExceptionMsg() bling.wrapperLog(terrFp, msg) raise sys.exit(retval) if __name__ == "__main__" : main() pyflow-1.1.14/scratch/000077500000000000000000000000001303601460500145435ustar00rootroot00000000000000pyflow-1.1.14/scratch/README.txt000066400000000000000000000001651303601460500162430ustar00rootroot00000000000000 notes/ random design/development notes pybox/ test code snippets test/ unit and integration test scripts pyflow-1.1.14/scratch/bench/000077500000000000000000000000001303601460500156225ustar00rootroot00000000000000pyflow-1.1.14/scratch/bench/README.md000066400000000000000000000002751303601460500171050ustar00rootroot00000000000000The manyThreads benchmark differentiates the optimizations introduced by STREL-391 to help improve total task throughput when a very high number of cores is available on a single machine. pyflow-1.1.14/scratch/bench/manyThreads.py000066400000000000000000000013151303601460500204530ustar00rootroot00000000000000#!/usr/bin/env python import os.path import sys # add module path by hand # scriptDir=os.path.abspath(os.path.dirname(__file__)) sys.path.append(scriptDir+"/../../pyflow/src") from pyflow import WorkflowRunner class SimpleWorkflow(WorkflowRunner) : """ A workflow designed to differentiate the runtime impact of STREL-391 """ def __init__(self) : pass def workflow(self) : for i in range(4000) : self.addTask("task%s" % (i),["sleep","0"]) # Instantiate the workflow # # parameters are passed into the workflow via its constructor: # wflow = SimpleWorkflow() # Run the worklow: # retval=wflow.run(mode="local",nCores=400,isQuiet=True) sys.exit(retval) pyflow-1.1.14/scratch/delete_trailing_wspace.bash000077500000000000000000000011071303601460500221010ustar00rootroot00000000000000#!/usr/bin/env bash # # clean trailing w/s from pyflow source # # pretty hacky script... probably best to avoid running this if you have a lot of uncommitted changes # set -o nounset scriptdir=$(cd $(dirname $0); pwd -P) get_source() { find $scriptdir/../pyflow -type f \ -name "*.bash" -or \ -name "*.py" } tempfile=$(mktemp) for f in $(get_source); do echo "checking: $f" cat $f |\ sed 's/[ ]*$//' >|\ $tempfile if ! diff $tempfile $f > /dev/null; then mv -f $tempfile $f else rm -f $tempfile fi done pyflow-1.1.14/scratch/make_release_tarball.bash000077500000000000000000000023241303601460500215240ustar00rootroot00000000000000#!/usr/bin/env bash # # this script makes the pyflow release tarball assuming it's being called in the git repo # already checked out to the targeted version # # the tarball is written to the callers cwd # set -o nounset set -o xtrace set -o errexit pname_root="" if [ $# -gt 1 ]; then echo "usage: $0 [rootname]" exit 2 elif [ $# == 1 ]; then pname_root=$1 fi get_abs_path() { (cd $1; pwd -P) } script_dir=$(get_abs_path $(dirname $0)) outdir=$(pwd) echo $outdir cd $script_dir echo $script_dir gitversion=$(git describe | sed s/^v//) if [ "$pname_root" == "" ]; then pname_root=pyflow-$gitversion fi pname=$outdir/$pname_root cd .. # use archive instead of copy so that we clean up any tmp files in the working directory: git archive --prefix=$pname_root/ HEAD:pyflow/ | tar -x -C $outdir # make version number substitutions: cat pyflow/src/pyflow.py |\ sed "s/pyflowAutoVersion = None/pyflowAutoVersion = \"$gitversion\"/" >|\ $pname/src/pyflow.py cat pyflow/README.md |\ sed "s/\${VERSION}/$gitversion/" >|\ $pname/README.md cat pyflow/setup.py |\ sed "s/\${VERSION}/$gitversion/" >|\ $pname/setup.py chmod +x $pname/src/pyflow.py cd $outdir tar -cz $pname_root -f $pname.tar.gz rm -rf $pname pyflow-1.1.14/scratch/notes/000077500000000000000000000000001303601460500156735ustar00rootroot00000000000000pyflow-1.1.14/scratch/notes/design.notes000066400000000000000000000045141303601460500202220ustar00rootroot000000000000001.Get example task file and launch command. launch cmd: """ /illumina/software/casava/CASAVA-1.8.2/bin/taskServer.pl --tasksFile=/illumina/builds/lox/Saturn/Saturn1_BB0065ACXX_builds/temp_build/tasks.21_09_49_26_01_12.txt --host=ukch-dev-lndt01 --mode=sge /illumina/software/casava/CASAVA-1.8.2/bin/taskServer.pl --tasksFile=/illumina/builds/lox/Saturn/Saturn1_BB0065ACXX_builds/temp_build/tasks.21_09_49_26_01_12.txt --host=localhost --jobsLimit=1 --mode=local """ new task specification file: xml contains tasks and edges no special checkpoints anymore, these are just tasks without commands a separate "status" file associates a state with each task OR: new task specification script: perl too much change at once dynamic_task_manager: w=WorkflowClass(config) w.run(filename) s.init(mode="local|sge", ncores=X|inf, workflow_file_prefix, is_continue=0|1) s.add_task(label,description,command); s.add_dependency(label,label2,is_optional); s.close() dynamic task manager: workflow_dir is used to write the stdout and stderr log, as well as the status file prefix/runid.stderr.log prefix/runid.stdout.log prefix/runid.taskstatus.txt prefix/taskstatus.txt prefix/workflow_run_history.txt s.add_task(label,command,n_cores,[task_dep_list]) s.add_task(label,command,n_cores,[task_dep_list]) Error policy: Stop launching new jobs. Record total number of errors and write this on final log line. write_to_file: dir/tasks.error.txt Logs (all append only): # all messages from the workflow engine itself: dir/logs/workflow_log.txt # all messages from task, including the task wrapper: dir/logs/tasks_stderr_log.txt dir/logs/tasks_stdout_log.txt persistence data: # record of all data supplied in each add-task call: dir/tasks.info.txt (unique append-only) dir/task.history.txt convenience: dir/tasks.corehourtime.txt (append-only) dir/tasks.dot (refresh at complete workflow specification) if isContinue: 1) read in state files and reconstruct data structures from these for complete tasks only, set a new isContinued bit, which persists until the new workflow confirms it with an addTask(). An isContinued task cannot be run, but this doesn't matter sense these are complete tasks only. Complete tasks must match their original descriptions, but all other tasks can change 2) use these to verify and reassign runstate for completed tasks only pyflow-1.1.14/scratch/notes/todo000066400000000000000000000051271303601460500165700ustar00rootroot00000000000000pyflow: switch to standard python logging module summarize total warnings and errors at end of run -- have a per run log folder to store this stuff abiltiy to change global defaults at the start of a script. add timeouts add a callable task, to allow delayed execution of pure python (how to prevent user from doing intense computation on this task?) allow file task dependencies specify maxcpuCores -- communicate back through an env variable better run_num bump allow tasks to specify a set of output files, to make nfs tracking better allow filenames as task ids bagPipes: check/warning for default sample sheet case Java mem grower? -- scan for "java.lang.OutOfMemoryError" and add more memory? ****Add logging for events expected to be rare: any qstat timeout/retry ****Good system to detect two pyflow jobs trying to use the same pyflow.data directory ****Get git describe into pyflow automatically ****version number management ****Pyflow use two layers of tmp folders ****Provide last ?10? lines of task stderr on notification when a task fails ****turn on thread mem reduction ****Don't write graph on update -- instead provide a script to generate this. ****setup taskWrapper->parent communication via stderr ****add something more 'psubmake-ish' for the makefile handling ****mode change should not interfere with continuation (make/qmake) ****send cmd as arg list in addition to string ****resource management for memory (on local at least) ****specify a mapping function between resources and SGE qsub phrases -- allow this to be overridden for different SGE configurations. ****add subworkflows as tasks (this goes in a subworkflow namespace) ****waitForTask() right now blocks the specification of all other non-dependent tasks. Think of a scheme to get around this -- threads in the worklow function? ****color graph ****write dot file (on command?, automatically at end of workflow specification?) ****add run parameters to log ****add public log function ****add exception notification email ****make error notifacation email more robust ****email events: onComplete, onFirstTaskError ****create dryrun mode, include runMode() in interface ****start working on persistence ****start developing total task time methods ****add task retry ****rename file and git to 'pyflow' ****add forceLocal flag to enable qmake/make runs ****start working on SGE ****put task stdout/stderr somewhere ****write most workflow exceptions to workflow_log ****check task names for spaces ****should jobs be killed on ctrl-c? ****start developing error handle/report polices ****check that subprocess is correct for intense tasks ****fix task4 bug pyflow-1.1.14/scratch/pybox/000077500000000000000000000000001303601460500157045ustar00rootroot00000000000000pyflow-1.1.14/scratch/pybox/email_test.py000066400000000000000000000011631303601460500204050ustar00rootroot00000000000000 import smtplib from email.MIMEText import MIMEText def getHostName() : import socket #return socket.gethostbyaddr(socket.gethostname())[0] return socket.getfqdn() def getDomainName() : "maybe this isn't the technical term -- this is just the hostname - the host" hn=getHostName().split(".") if len(hn)>1 : hn=hn[1:] return ".".join(hn) me = "pyflow-bot@"+getDomainName() to = "csaunders@illumina.com" msg=MIMEText("foo foo") msg["Subject"] = "pyFlow: job: XXX complete" msg["From"] = me msg["To"] = to msg.as_string() s=smtplib.SMTP('localhost') s.sendmail(me,to,msg.as_string()) s.quit() pyflow-1.1.14/scratch/pybox/hijack.py000066400000000000000000000002731303601460500175110ustar00rootroot00000000000000 class A : def __init__(self) : self.x = 1 def inc(self) : self.x += 1 a = A() b = A() a.inc() b.inc() # hijack: b.inc = a.inc b.inc() print "a", a.x print "b", b.x pyflow-1.1.14/scratch/pybox/inspect.py000066400000000000000000000001071303601460500177210ustar00rootroot00000000000000 def f(x) : return x + 2 import inspect print inspect.getsource(f) pyflow-1.1.14/scratch/pybox/memTest.py000077500000000000000000000021161303601460500176770ustar00rootroot00000000000000#!/usr/bin/env python # # This demo shows possibly the simplist possible pyflow we can create -- # a single 'hello world' task. After experimenting with this file # please see the 'simpleDemo' for coverage of a few more pyflow features # import os.path import sys # add module path by hand # sys.path.append(os.path.abspath(os.path.dirname(__file__))+"/../pyflow/src") from pyflow import WorkflowRunner # all pyflow workflows are written into classes derived from pyflow.WorkflowRunner: # class MemTestWorkflow(WorkflowRunner) : # a workflow is defined by overloading the WorkflowRunner.workflow() method: # def workflow(self) : # The following is our first and only task for this workflow: self.addTask("easy_task1","echo 'Hello World!'") self.addTask("easy_task2","echo 'Hello World!'") self.addTask("easy_task3","echo 'Hello World!'") self.addTask("easy_task4","echo 'Hello World!'",memMb=1) # Instantiate the workflow # wflow = MemTestWorkflow() # Run the worklow: # retval=wflow.run(nCores=8,memMb=2049) # done! sys.exit(retval) pyflow-1.1.14/scratch/test/000077500000000000000000000000001303601460500155225ustar00rootroot00000000000000pyflow-1.1.14/scratch/test/README.md000066400000000000000000000013161303601460500170020ustar00rootroot00000000000000 ## pyflow test scripts ### Global test scripts The new global test script maintained for *nix and windows is: test_pyflow.py The previous global test script written for *nix only is: test_release_tarball.bash ...this currently contains more tests, and will still be the test target for travis until windows support is complete. ### Component test scripts * pyflow_unit_tests.py - all pyflow unit tests * pyflow_basic_feature_runner.py - runs a number of pyflow operations for local or sge modes * demos - Running through the various small demo scripts and making sure they complete without error is used to round out the full test process. Most demo scripts are linux-only at this point. pyflow-1.1.14/scratch/test/pyflow_basic_feature_runner.py000077500000000000000000000062301303601460500236650ustar00rootroot00000000000000#!/usr/bin/env python """ this is a script which runs a lot of features... it does not provide test coverage as to whether everything ran correctly... it will only pick up a basic crash or hang. """ import os.path import sys # bad example of how to add the path: scriptDir=os.path.abspath(os.path.dirname(__file__)) # setup PYTHONPATH instead... #sys.path.append(scriptDir+"/../pyflow/src") from pyflow import WorkflowRunner testJobDir=os.path.join(scriptDir,"testtasks") sleepjob=os.path.join(testJobDir,"sleeper.bash") yelljob=os.path.join(testJobDir,"yeller.bash") runjob=os.path.join(testJobDir,"runner.bash") class SubSubWorkflow(WorkflowRunner) : def workflow(self) : self.addTask("easy_task1",yelljob+" 1") self.addTask("easy_task2",runjob+" 2",nCores=3,dependencies=["easy_task1"]) self.waitForTasks("easy_task2") self.addTask("easy_task3",runjob+" 2",nCores=3,dependencies=["easy_task2"]) # intentional fail: #self.addTask("easy_task3b",runjob,dependencies=["easy_task2"]) class SubWorkflow(WorkflowRunner) : def workflow(self) : self.addTask("easy_task1",yelljob+" 1") self.addTask("easy_task2",runjob+" 2",nCores=3,dependencies=["easy_task1"]) self.addTask("easy_task3",runjob+" 2",nCores=3,dependencies=["easy_task2"]) wflow=SubSubWorkflow() self.addWorkflowTask("subsubwf_task1",wflow,dependencies="easy_task1") class TestWorkflow(WorkflowRunner) : def workflow(self) : job=sleepjob+" 1" self.addTask("easy_task1",yelljob+" 1") waitTask=self.addTask("easy_task3",runjob+" 10",nCores=2,memMb=1024,isForceLocal=True) self.flowLog("My message") swflow=SubWorkflow() self.addWorkflowTask("subwf_task1",swflow,dependencies=waitTask) self.addWorkflowTask("subwf_task2",swflow,dependencies=waitTask) self.addTask("easy_task4",runjob+" 2",nCores=3,dependencies=["subwf_task1","subwf_task2"]) self.addTask("easy_task5",job,nCores=1) # and stop here self.waitForTasks() self.flowLog("ITC1: "+str(self.isTaskComplete("easy_task1"))) self.flowLog("ITC6: "+str(self.isTaskComplete("easy_task6"))) self.addTask("easy_task6",job) #self.addTask("easy_task2",sleepjob) self.addTask("checkpoint_task",dependencies=["easy_task1","easy_task6","easy_task4"]) self.addTask("dep_task",sleepjob+" 4",dependencies=["checkpoint_task"]) def getRunOptions() : from optparse import OptionParser defaults = { "mode" : "local" } parser = OptionParser() parser.set_defaults(**defaults) parser.add_option("-m", "--mode", type="string", dest="mode", help="Select run mode {local,sge} (default: %default)") (options, args) = parser.parse_args() if len(args) : parser.print_help() sys.exit(2) if options.mode not in ["sge","local"] : parser.print_help() sys.exit(2) return options def main() : options = getRunOptions() wflow = TestWorkflow() retval=wflow.run(options.mode,nCores=8,memMb=8*1024,isContinue=False) sys.exit(retval) if __name__ == "__main__" : main() pyflow-1.1.14/scratch/test/pyflow_unit_tests.py000077500000000000000000000334001303601460500217000ustar00rootroot00000000000000#!/usr/bin/env python import unittest import os import sys scriptDir=os.path.abspath(os.path.dirname(__file__)) def pyflow_lib_dir() : return os.path.abspath(os.path.join(scriptDir,os.pardir,os.pardir,"pyflow","src")) try : # if pyflow is in PYTHONPATH already then use the specified copy: from pyflow import isWindows,WorkflowRunner except : # otherwise use the relative path within this repo: sys.path.append(pyflow_lib_dir()) from pyflow import isWindows,WorkflowRunner def getRmCmd() : if isWindows(): return ["del","/f"] else: return ["rm","-f"] def getSleepCmd() : if isWindows(): return ["timeout"] else: return ["sleep"] def getCatCmd() : if isWindows(): return ["type"] else: return ["cat"] def getCmdString(cmdList) : return " ".join(cmdList) class NullWorkflow(WorkflowRunner) : pass class TestWorkflowRunner(unittest.TestCase) : def __init__(self, *args, **kw) : unittest.TestCase.__init__(self, *args, **kw) self.testPath="testDataRoot" def setUp(self) : self.clearTestPath() def tearDown(self) : self.clearTestPath() def clearTestPath(self) : import shutil if os.path.isdir(self.testPath) : shutil.rmtree(self.testPath) def test_createDataDir(self) : w=NullWorkflow() w.run("local",self.testPath,isQuiet=True) self.assertTrue(os.path.isdir(self.testPath)) def test_badMode(self) : w=NullWorkflow() try: w.run("foomode",self.testPath,isQuiet=True) self.fail("Didn't raise Exception") except KeyError: self.assertTrue(sys.exc_info()[1].args[0].find("foomode") != -1) def test_errorLogPositive(self) : """ Test that errors are written to separate log when requested """ os.mkdir(self.testPath) logFile=os.path.join(self.testPath,"error.log") w=NullWorkflow() try: w.run("foomode",self.testPath,errorLogFile=logFile,isQuiet=True) self.fail("Didn't raise Exception") except KeyError: self.assertTrue(sys.exc_info()[1].args[0].find("foomode") != -1) self.assertTrue((os.path.getsize(logFile) > 0)) def test_errorLogNegative(self) : """ Test that no errors are written to separate error log when none occur """ os.mkdir(self.testPath) logFile=os.path.join(self.testPath,"error.log") w=NullWorkflow() w.run("local",self.testPath,errorLogFile=logFile,isQuiet=True) self.assertTrue((os.path.getsize(logFile) == 0)) def test_dataDirCollision(self) : """ Test that when two pyflow jobs are launched with the same dataDir, the second will fail. """ import threading,time class StallWorkflow(WorkflowRunner) : def workflow(self2) : self2.addTask("sleeper",getSleepCmd()+["5"]) class runner(threading.Thread) : def __init__(self2) : threading.Thread.__init__(self2) self2.retval1=1 def run(self2) : w=StallWorkflow() self2.retval1=w.run("local",self.testPath,isQuiet=True) w2=StallWorkflow() r1=runner() r1.start() time.sleep(1) retval2=w2.run("local",self.testPath,isQuiet=True) self.assertTrue(retval2==1) r1.join() self.assertTrue(r1.retval1==0) def test_forceContinue(self) : class TestWorkflow(WorkflowRunner) : color="red" def setColor(self2,color) : self2.color=color def workflow(self2) : self2.addTask("A","echo "+self2.color) w=TestWorkflow() retval=w.run("local",self.testPath,isQuiet=True) self.assertTrue(retval==0) retval=w.run("local",self.testPath,isContinue=True,isQuiet=True) self.assertTrue(retval==0) w.setColor("green") retval=w.run("local",self.testPath,isContinue=True,isQuiet=True) self.assertTrue(retval==1) retval=w.run("local",self.testPath,isContinue=True,isForceContinue=True,isQuiet=True) self.assertTrue(retval==0) def test_badContinue(self) : w=NullWorkflow() try: w.run("local",self.testPath,isContinue=True,isQuiet=True) self.fail("Didn't raise Exception") except Exception: self.assertTrue(sys.exc_info()[1].args[0].find("Cannot continue run") != -1) def test_goodContinue(self) : w=NullWorkflow() retval1=w.run("local",self.testPath,isQuiet=True) retval2=w.run("local",self.testPath,isContinue=True,isQuiet=True) self.assertTrue((retval1==0) and (retval2==0)) def test_autoContinue(self) : w=NullWorkflow() retval1=w.run("local",self.testPath,isContinue="Auto",isQuiet=True) retval2=w.run("local",self.testPath,isContinue="Auto",isQuiet=True) self.assertTrue((retval1==0) and (retval2==0)) def test_simpleDependency(self) : "make sure B waits for A" class TestWorkflow(WorkflowRunner) : def workflow(self2) : filePath=os.path.join(self.testPath,"tmp.txt") self2.addTask("A","echo foo > " +filePath) self2.addTask("B",getCmdString(getCatCmd()) + " " + filePath + " && " + getCmdString(getRmCmd())+ " " + filePath,dependencies="A") w=TestWorkflow() self.assertTrue((0==w.run("local",self.testPath,isQuiet=True))) def test_waitDependency(self) : "make sure waitForTasks waits for A on the workflow thread" class TestWorkflow(WorkflowRunner) : def workflow(self2) : filePath=os.path.join(self.testPath,"tmp.txt") if os.path.isfile(filePath) : os.remove(filePath) self2.addTask("A",getCmdString(getSleepCmd()) + " 5 && echo foo > %s" % (filePath)) self2.waitForTasks("A") assert(os.path.isfile(filePath)) self2.addTask("B",getCmdString(getCatCmd()) + " " + filePath +" && " + getCmdString(getRmCmd())+ " " + filePath) w=TestWorkflow() self.assertTrue(0==w.run("local",self.testPath,isQuiet=True)) def test_flowLog(self) : "make sure flowLog doesn't throw -- but this does not check if the log is updated" class TestWorkflow(WorkflowRunner) : def workflow(self2) : self2.flowLog("My Message") w=TestWorkflow() self.assertTrue(0==w.run("local",self.testPath,isQuiet=True)) def test_deadSibling(self) : """ Tests that when a task error occurs in one sub-workflow, its sibling workflows exit correctly (instead of hanging forever). This test is an early library error case. """ class SubWorkflow1(WorkflowRunner) : "this one fails" def workflow(self2) : self2.addTask("A",getSleepCmd()+["5"]) self2.addTask("B","boogyman!",dependencies="A") class SubWorkflow2(WorkflowRunner) : "this one doesn't fail" def workflow(self2) : self2.addTask("A",getSleepCmd()+["5"]) self2.addTask("B",getSleepCmd()+["5"],dependencies="A") class MasterWorkflow(WorkflowRunner) : def workflow(self2) : wflow1=SubWorkflow1() wflow2=SubWorkflow2() self2.addWorkflowTask("wf1",wflow1) self2.addWorkflowTask("wf2",wflow2) w=MasterWorkflow() self.assertTrue(1==w.run("local",self.testPath,nCores=2,isQuiet=True)) def test_selfDependency1(self) : """ """ class SelfWorkflow(WorkflowRunner) : def workflow(self2) : self2.addTask("A",getSleepCmd()+["5"],dependencies="A") w=SelfWorkflow() self.assertTrue(1==w.run("local",self.testPath,isQuiet=True)) def test_expGraphScaling(self) : """ This tests that pyflow does not scale poorly with highly connected subgraphs. When the error occurs, it locks the primary thread, so we put the test workflow on its own thread so that we can time it and issue an error. Issue reported by R Kelley and A Halpern """ import threading class ScalingWorkflow(WorkflowRunner) : def workflow(self2) : tasks = set() for idx in xrange(60) : sidx = str(idx) tasks.add(self2.addTask("task_" + sidx, "echo " + sidx, dependencies = tasks)) self2.waitForTasks("task_50") tasks.add(self2.addTask("task_1000", "echo 1000", dependencies = tasks)) class runner(threading.Thread) : def __init__(self2) : threading.Thread.__init__(self2) self2.setDaemon(True) def run(self2) : w=ScalingWorkflow() w.run("local",self.testPath,isQuiet=True) r1=runner() r1.start() r1.join(30) self.assertTrue(not r1.isAlive()) def test_startFromTasks(self) : """ run() option to ignore all tasks before a specified task node """ filePath=os.path.join(self.testPath,"tmp.txt") class SelfWorkflow(WorkflowRunner) : def workflow(self2) : self2.addTask("A","echo foo > "+filePath) self2.addTask("B",getSleepCmd()+["1"],dependencies="A") self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B")) w=SelfWorkflow() self.assertTrue(0==w.run("local",self.testPath,isQuiet=True,startFromTasks="B")) self.assertTrue(not os.path.exists(filePath)) def test_startFromTasksSubWflow(self) : """ run() option to ignore all tasks before a specified task node """ filePath=os.path.join(self.testPath,"tmp.txt") class SubWorkflow(WorkflowRunner) : def workflow(self2) : self2.addTask("D","echo foo > "+filePath) class SelfWorkflow(WorkflowRunner) : def workflow(self2) : self2.addTask("A",getSleepCmd()+["1"]) self2.addWorkflowTask("B",SubWorkflow(),dependencies="A") self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B")) w=SelfWorkflow() self.assertTrue(0==w.run("local",self.testPath,isQuiet=True,startFromTasks="B")) self.assertTrue(os.path.exists(filePath)) def test_startFromTasksSubWflow2(self) : """ run() option to ignore all tasks before a specified task node """ filePath=os.path.join(self.testPath,"tmp.txt") class SubWorkflow(WorkflowRunner) : def workflow(self2) : self2.addTask("D","echo foo > "+filePath) class SelfWorkflow(WorkflowRunner) : def workflow(self2) : self2.addTask("A",getSleepCmd()+["1"]) self2.addWorkflowTask("B",SubWorkflow(),dependencies="A") self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B")) w=SelfWorkflow() self.assertTrue(0==w.run("local",self.testPath,isQuiet=True,startFromTasks="C")) self.assertTrue(not os.path.exists(filePath)) def test_ignoreTasksAfter(self) : """ run() option to ignore all tasks below a specified task node """ class SelfWorkflow(WorkflowRunner) : def workflow(self2) : self2.addTask("A",getSleepCmd()+["1"]) self2.addTask("B",getSleepCmd()+["1"],dependencies="A") self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B")) w=SelfWorkflow() self.assertTrue(0==w.run("local",self.testPath,isQuiet=True,ignoreTasksAfter="B")) self.assertTrue(not w.isTaskComplete("C")) def test_addTaskOutsideWorkflow(self) : """ test that calling addTask() outside of a workflow() method raises an exception """ class SelfWorkflow(WorkflowRunner) : def __init__(self2) : self2.addTask("A",getSleepCmd()+["1"]) try : w=SelfWorkflow() self.fail("Didn't raise Exception") except : pass def test_runModeInSubWorkflow(self) : """ test that calling getRunMode() in a sub-workflow() method does not raise an exception (github issue #5) """ class SubWorkflow(WorkflowRunner) : def workflow(self2) : if self2.getRunMode() == "local" : self2.addTask("D",getSleepCmd()+["1"]) class SelfWorkflow(WorkflowRunner) : def workflow(self2) : self2.addTask("A",getSleepCmd()+["1"]) self2.addWorkflowTask("B",SubWorkflow(),dependencies="A") self2.addTask("C",getSleepCmd()+["1"],dependencies=("A","B")) try : w=SelfWorkflow() self.assertTrue(0==w.run("local",self.testPath,isQuiet=True)) except : self.fail("Should not raise Exception") def test_CheckpointChain(self) : """ Test that checkout points are handled correctly even when multiple checkpoints have a parent-child relationship """ class SelfWorkflow(WorkflowRunner) : def workflow(self2) : self2.addTask("A") self2.addTask("B") self2.addTask("C",dependencies=["A","B"]) try : w=SelfWorkflow() self.assertTrue(0==w.run("local",self.testPath,isQuiet=True)) except : self.fail("Should not raise Exception") if __name__ == '__main__' : unittest.main() pyflow-1.1.14/scratch/test/test_pyflow.py000077500000000000000000000032071303601460500204600ustar00rootroot00000000000000#!/usr/bin/env python # """ automation friendly cross-platform tests for pyflow """ import os import sys scriptDir=os.path.abspath(os.path.dirname(__file__)) def getOptions() : from optparse import OptionParser usage = "usage: %prog [options]" parser = OptionParser(usage=usage,description="Run all pyflow tests") parser.add_option("--nosge",dest="isSkipSge", action="store_true", help="skip SGE testing") (options,args) = parser.parse_args() if len(args) != 0 : parser.print_help() sys.exit(2) return (options,args) def main() : import subprocess (options,args) = getOptions() pyflowRootDir=os.path.abspath(os.path.join(scriptDir,os.pardir,os.pardir)) pyflowDir=os.path.join(pyflowRootDir,"pyflow") utScriptPath=os.path.join(scriptDir,"pyflow_unit_tests.py") if True : # process-out to run the unit tests for now -- TODO: can we just import this instead? utCmd=[sys.executable,"-E",utScriptPath,"-v"] proc = subprocess.Popen(utCmd) proc.wait() if proc.returncode != 0 : raise Exception("Pyflow unit test run failed") # run through demos (only helloWorld is working on windows) if True : demoDir=os.path.join(pyflowDir,"demo") for demoName in ["helloWorld"] : demoScriptPath=os.path.join(demoDir,demoName,demoName+".py") demoCmd=[sys.executable,"-E",demoScriptPath] proc = subprocess.Popen(demoCmd) proc.wait() if proc.returncode != 0 : raise Exception("Pyflow demo failed: '%s'" % (demoScriptPath)) main() pyflow-1.1.14/scratch/test/test_release_tarball.bash000077500000000000000000000020271303601460500225450ustar00rootroot00000000000000#!/usr/bin/env bash # # this script brings everything together for an automated build/test system # set -o errexit set -o nounset set -o xtrace if [ $# -gt 1 ]; then echo "usage: $0 [ -nosge ]" 2>&1 exit 2 fi is_sge=1 if [ $# -ge 1 ] && [ "$1" == "-nosge" ]; then is_sge=0 fi thisdir=$(dirname $0) cd $thisdir/.. testname=TESTBALL bash ./make_release_tarball.bash $testname tar -xzf $testname.tar.gz testdir=$(pwd)/$testname # run through tests: PYTHONPATH=$testdir/src test/pyflow_unit_tests.py -v # run this a few times just in case we can russle out any subtle/rare race conditions: for f in $(seq 5); do PYTHONPATH=$testdir/src test/pyflow_basic_feature_runner.py --mode local done if [ $is_sge == 1 ]; then PYTHONPATH=$testdir/src test/pyflow_basic_feature_runner.py --mode sge fi # run through demos: for f in cwdDemo envDemo helloWorld makeDemo memoryDemo mutexDemo simpleDemo subWorkflow; do cd $testdir/demo/$f python $f.py python pyflow.data/state/make_pyflow_task_graph.py >| test.dot done pyflow-1.1.14/scratch/test/testtasks/000077500000000000000000000000001303601460500175475ustar00rootroot00000000000000pyflow-1.1.14/scratch/test/testtasks/runner.bash000077500000000000000000000003631303601460500217240ustar00rootroot00000000000000#!/usr/bin/env bash thisdir=$(dirname $0) cd $thisdir if ! [ -e ./runner ]; then # turning on -O2 is too variable accross different platforms, so leave off: gcc ./runner.c -lm -o runner.tmp && mv runner.tmp runner fi ./runner $1 pyflow-1.1.14/scratch/test/testtasks/runner.c000066400000000000000000000003521303601460500212240ustar00rootroot00000000000000#include "math.h" #include "assert.h" int main(int argc, char**argv) { assert(argc==2); int mult=atoi(argv[1]); int i,j; double a=0; long total=50000000; for(j=0;j&2 done echo pid: $pid arg: $arg ending sleep