diff --git a/Dockerfile b/Dockerfile
index 0930b64..4f8ca13 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,22 +1,26 @@
-FROM alpine:3.9
-MAINTAINER Bartosz Balis <balis@agh.edu.pl>
-
-ENV HYPERFLOW_JOB_EXECUTOR_VERSION=v1.0.11
-
-RUN apk --update add openjdk7-jre \
- && apk add curl bash npm \
- && apk add --no-cache --repository http://dl-cdn.alpinelinux.org/alpine/v3.9/main/ nodejs=10.14.2-r0 \
- && apk add python3 libpcap libpcap-dev util-linux
-
-RUN npm install -g https://github.com/hyperflow-wms/hyperflow-job-executor/archive/${HYPERFLOW_JOB_EXECUTOR_VERSION}.tar.gz
-
-WORKDIR /soykb
-COPY software/software.tar.gz .
-RUN tar zxvf software.tar.gz
-RUN chmod +x software/bwa-0.7.4/bwa
-COPY software/*-wrapper ./
-COPY software/libnethogs.so.0.8.5-63-g68033bf /usr/local/lib
-COPY software/nethogs-wrapper.py /usr/local/bin 
-RUN chmod +x /usr/local/bin/nethogs-wrapper.py
-
-ENV PATH="/soykb:${PATH}"
+FROM archlinux
+MAINTAINER Mateusz Plinta
+
+ENV HYPERFLOW_JOB_EXECUTOR_VERSION=v1.0.13
+
+RUN pacman -Sy 
+RUN pacman -S --needed --noconfirm git jre7-openjdk npm python3 libpcap util-linux base-devel libffi glibc lib32-glibc 
+
+RUN pacman -S --needed --noconfirm sudo
+RUN useradd builduser -m
+RUN passwd -d builduser
+RUN printf 'builduser ALL=(ALL) ALL\n' | tee -a /etc/sudoers
+RUN sudo -u builduser bash -c 'cd ~ && git clone https://aur.archlinux.org/ncurses5-compat-libs.git && cd ncurses5-compat-libs && makepkg -si --skippgpcheck --noconfirm'
+RUN sudo -u builduser bash -c 'cd ~ && git clone https://aur.archlinux.org/libffi6.git && cd libffi6 && makepkg -si --noconfirm'
+
+RUN npm install -g https://github.com/hyperflow-wms/hyperflow-job-executor/archive/${HYPERFLOW_JOB_EXECUTOR_VERSION}.tar.gz
+
+WORKDIR /soykb
+COPY software/software.tar.gz .
+RUN tar zxvf software.tar.gz
+COPY software/*-wrapper ./
+COPY software/libnethogs.so.0.8.5-63-g68033bf /usr/local/lib
+COPY software/nethogs-wrapper.py /usr/local/bin 
+RUN chmod +x /usr/local/bin/nethogs-wrapper.py
+
+ENV PATH="/soykb:${PATH}"
diff --git a/Dockerfile.alpine b/Dockerfile.alpine
new file mode 100644
index 0000000..3367977
--- /dev/null
+++ b/Dockerfile.alpine
@@ -0,0 +1,24 @@
+# FROM alpine:3.11
+FROM frolvlad/alpine-glibc
+MAINTAINER Bartosz Balis <balis@agh.edu.pl>
+
+ENV HYPERFLOW_JOB_EXECUTOR_VERSION=v1.0.11
+
+RUN apk --update add openjdk7-jre \
+ && apk add curl bash ncurses ncurses5 ncurses5-libs npm \
+#  && apk add --no-cache --repository http://dl-cdn.alpinelinux.org/alpine/v3.11/main/ nodejs=10.14.2-r0 \
+ && apk add python3 libpcap libpcap-dev util-linux
+
+RUN npm install -g https://github.com/hyperflow-wms/hyperflow-job-executor/archive/${HYPERFLOW_JOB_EXECUTOR_VERSION}.tar.gz
+
+RUN ln -s /usr/lib/libncurses.so.5 /usr/lib/libtinfo.so.5
+
+WORKDIR /soykb
+COPY software/software.tar.gz .
+RUN tar zxvf software.tar.gz
+COPY software/*-wrapper ./
+COPY software/libnethogs.so.0.8.5-63-g68033bf /usr/local/lib
+COPY software/nethogs-wrapper.py /usr/local/bin 
+RUN chmod +x /usr/local/bin/nethogs-wrapper.py
+
+ENV PATH="/soykb:${PATH}"
diff --git a/Makefile b/Makefile
index 0743019..86ac629 100644
--- a/Makefile
+++ b/Makefile
@@ -1,13 +1,16 @@
 TAG = $(shell git describe --tags --always)
-PREFIX = hyperflowwms
-REPO_NAME = soykb-workflow-worker
+# PREFIX = $(shell git config --get remote.origin.url | tr ':.' '/'  | rev | cut -d '/' -f 3 | rev)
+# REPO_NAME = $(shell git config --get remote.origin.url | tr ':.' '/'  | rev | cut -d '/' -f 2 | rev)
+
+REPO_NAME = 'soykb-worker'
+PREFIX = 'hyperflowwms'
 
 all: push
 
 container: image
 
 image:
-	docker build -t $(PREFIX)/$(REPO_NAME) . # Build new image and automatically tag it as latest
+	docker build --no-cache -t $(PREFIX)/$(REPO_NAME) . # Build new image and automatically tag it as latest
 	docker tag $(PREFIX)/$(REPO_NAME) $(PREFIX)/$(REPO_NAME):$(TAG)  # Add the version tag to the latest image
 
 push: image
diff --git a/README.md b/README.md
index 424d9cf..7c4cf04 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,16 @@
 # Soykb workflow for HyperFlow
 
 [![](https://images.microbadger.com/badges/version/hyperflowwms/soykb-workflow-worker.svg)](https://microbadger.com/images/hyperflowwms/soykb-workflow-worker "Get your own version badge on microbadger.com")
+## Generate example workflows
+
+Generate example workflow:
+- `genwf-size2.sh` (size 2)
+
+The scripts invoke Docker images and create:
+- `data` subdirectory with workflow `workflow.json` and `haplotype-files.list`
+
+You can also directly use the `hyperflowwms/soykb-generator` image to generate other workflows, see the scripts for command examples. For example, to generate smaller workflows, use a smaller value of the fastq files parameter.
+
 
 ## Build and publish image
 HyperFlow Docker image contains Soykb binaries and HyperFlow job executor
diff --git a/genwf-size2.sh b/genwf-size2.sh
new file mode 100755
index 0000000..46c86fe
--- /dev/null
+++ b/genwf-size2.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+
+docker run -v $PWD:/workdir hyperflowwms/soykb-generator sh -c 'generate-workflow 2'
\ No newline at end of file
diff --git a/run.sh b/run.sh
new file mode 100755
index 0000000..452dc09
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,8 @@
+#!/bin/bash
+# This script runs everything in containers, so that you only need Docker on your host machine
+
+echo Before running this script, start Redis container as follows:
+echo docker run -d --name redis redis --bind 127.0.0.1
+echo
+
+docker run -a stdout -a stderr --rm --network container:redis -e HF_VAR_WORKER_CONTAINER="hyperflowwms/soykb-workflow-worker" -e HF_VAR_WORK_DIR="$PWD/data" -e HF_VAR_HFLOW_IN_CONTAINER="true" -e HF_VAR_function="redisCommand" -e REDIS_URL="redis://127.0.0.1:6379" --name hyperflow -v /var/run/docker.sock:/var/run/docker.sock -v $PWD:/wfdir --entrypoint "/bin/sh" hyperflowwms/hyperflow:v1.3.23 -c "apk add docker && hflow run /wfdir"
diff --git a/software/bwa-wrapper b/software/bwa-wrapper
index 6d15a87..fe3cf39 100755
--- a/software/bwa-wrapper
+++ b/software/bwa-wrapper
@@ -2,5 +2,7 @@
 
 set -e
 
+export TMPDIR=`pwd`
+
 /soykb/software/bwa-0.7.4/bwa "$@"
 
diff --git a/software/gatk-wrapper b/software/gatk-wrapper
index a8d4a74..ab6b463 100755
--- a/software/gatk-wrapper
+++ b/software/gatk-wrapper
@@ -4,14 +4,16 @@ export TMPDIR=`pwd`
 
 OUTFILE=`mktemp -t gatk-output.XXXXXXXXXX` || exit 1
 
-# memory depends on what subsystem we call
-OPTIONS="-Xmx2g -XX:+UseSerialGC"
-#if (echo "'$@'" | grep -i "HaplotypeCaller") >/dev/null; then
-#    OPTIONS="-Xmx2g -XX:+UseSerialGC"
-#elif (echo "'$@'" | grep -i "CombineGVCFs") >/dev/null; then
-#    OPTIONS="-Xmx17g -XX:+UseSerialGC"
-#fi
-OPTIONS="$OPTIONS -Djava.io.tmpdir=$TMPDIR"
+# first argument is memory, rest is GATK args
+MEM_TOTAL=$1
+shift
+
+# Java mx should be a little bit lower than requested memory
+MEM_JAVA_MX=$(($MEM_TOTAL - 2))
+
+OPTIONS="-Xmx${MEM_JAVA_MX}g -XX:+UseSerialGC"
+
+OPTIONS="-Djava.io.tmpdir=$TMPDIR $OPTIONS"
 
 java $OPTIONS \
      -jar /soykb/software/GenomeAnalysisTK-3.0.0/GenomeAnalysisTK.jar \
diff --git a/software/gunzip-wrapper b/software/gunzip-wrapper
new file mode 100755
index 0000000..1ca980c
--- /dev/null
+++ b/software/gunzip-wrapper
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+set -e
+
+gunzip -c $1 > $2
+
+
diff --git a/software/picard-wrapper b/software/picard-wrapper
index 81d41f2..4ab9377 100755
--- a/software/picard-wrapper
+++ b/software/picard-wrapper
@@ -1,6 +1,7 @@
 #!/bin/bash
 
 set -e
+export TMPDIR=`pwd`
 
-java -Xmx2g -XX:+UseSerialGC -jar /soykb/software/picard-tools-1.92/"$@"
+java -Djava.io.tmpdir=$TMPDIR -Xmx15g -XX:+UseSerialGC -jar /soykb/software/picard-tools-1.92/"$@"
 
diff --git a/software/samtools-wrapper b/software/samtools-wrapper
new file mode 100755
index 0000000..2b66661
--- /dev/null
+++ b/software/samtools-wrapper
@@ -0,0 +1,15 @@
+#!/bin/bash
+
+set -e
+
+export TMPDIR=`pwd`
+
+# sometimes we are asked to "merge" only one file
+if [ "X$1" = "Xmerge" -a "X$4" = "X" ]; then
+    # just copy
+    cp "$3" "$2"
+    exit 0
+fi
+
+/soykb/software/samtools-1.0/samtools "$@"
+
diff --git a/software/software-wrapper b/software/software-wrapper
new file mode 100755
index 0000000..43cdd4f
--- /dev/null
+++ b/software/software-wrapper
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+set -e
+
+if [ ! -d /soykb/software ]; then
+    tar xzf /soykb/software.tar.gz
+fi
+
+# fix for leftover files in the home directory at TACC
+find ~/ -maxdepth 1 -name slurm.\* -mtime +5 -exec rm -f {} \; || /bin/true
+find ~/ -maxdepth 1 -name gram\*.log -mtime +5 -exec rm -f {} \; || /bin/true
+
+
diff --git a/software/software.tar.gz b/software/software.tar.gz
index 892e014..119e38f 100644
Binary files a/software/software.tar.gz and b/software/software.tar.gz differ
diff --git a/workflow-generator/.gitignore b/workflow-generator/.gitignore
new file mode 100644
index 0000000..52ff8b6
--- /dev/null
+++ b/workflow-generator/.gitignore
@@ -0,0 +1,3 @@
+*.pyc
+software
+software.tar.gz
diff --git a/workflow-generator/Dockerfile b/workflow-generator/Dockerfile
new file mode 100644
index 0000000..4bc1c0d
--- /dev/null
+++ b/workflow-generator/Dockerfile
@@ -0,0 +1,15 @@
+FROM mhart/alpine-node:12
+
+LABEL maintainer "Mateusz Plinta <matplinta@gmail.com>"
+
+RUN apk add python-dev
+
+RUN mkdir /soykb-workflow
+
+ADD . /soykb-workflow/
+
+RUN npm install https://github.com/hyperflow-wms/pegasus-hyperflow-converter/archive/master.tar.gz /
+
+ENV PATH /soykb-workflow:/node_modules/.bin:$PATH
+ENV PYTHONPATH=/soykb-workflow
+WORKDIR /soykb-workflow
diff --git a/workflow-generator/Makefile b/workflow-generator/Makefile
new file mode 100644
index 0000000..ea8f0f5
--- /dev/null
+++ b/workflow-generator/Makefile
@@ -0,0 +1,20 @@
+TAG = $(shell git describe --tags --always)
+# PREFIX = $(shell git config --get remote.origin.url | tr ':.' '/'  | rev | cut -d '/' -f 3 | rev)
+# REPO_NAME = $(shell git config --get remote.origin.url | tr ':.' '/'  | rev | cut -d '/' -f 2 | rev)
+
+REPO_NAME = 'soykb-generator'
+PREFIX = 'hyperflowwms'
+
+all: push
+
+container: image
+
+image:
+	docker build -t $(PREFIX)/$(REPO_NAME) . # Build new image and automatically tag it as latest
+	docker tag $(PREFIX)/$(REPO_NAME) $(PREFIX)/$(REPO_NAME):$(TAG)  # Add the version tag to the latest image
+
+push: image
+	docker push $(PREFIX)/$(REPO_NAME) # Push image tagged as latest to repository
+	docker push $(PREFIX)/$(REPO_NAME):$(TAG) # Push version tagged image to repository (since this image is already pushed it will simply create or update version tag)
+
+clean:
diff --git a/workflow-generator/Pegasus/AutoADAG.py b/workflow-generator/Pegasus/AutoADAG.py
new file mode 100644
index 0000000..ce59c74
--- /dev/null
+++ b/workflow-generator/Pegasus/AutoADAG.py
@@ -0,0 +1,93 @@
+# #
+#  Copyright 2007-2012 University Of Southern California
+#
+#  Licensed under the Apache License, Version 2.0 (the 'License');
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an 'AS IS' BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+# #
+
+__author__ = 'Rajiv Mayani'
+
+import logging
+
+try:
+    from Pegasus.DAX3 import ADAG, Job, File, Executable, PFN, Link, When, DuplicateError
+except ImportError, e:
+    logging.error('Include Pegasus Python libraries in your PYTHONPATH')
+
+
+class AutoADAG(object, ADAG):
+    """
+    Automatically determine the dependencies between jobs based on the file usages.
+    All jobs consuming a file F depend on the singular job that produces that file.
+    """
+    def __init__(self, name, count=None, index=None):
+        ADAG.__init__(self, name, count, index)
+
+    def writeXML(self, out):
+
+        mapping = {}
+
+        def addOutput(job, file_obj):
+
+            if file_obj:
+                file_obj = file_obj.name
+
+                if file_obj not in mapping:
+                    mapping[file_obj] = (set(), set())
+
+                mapping[file_obj][1].add(job)
+
+        # Automatically determine dependencies
+
+        # Traverse each job
+        for job_id, job in self.jobs.iteritems():
+            file_used = job.used
+
+            # If job produces to stdout, identify it as an output file
+            addOutput(job, job.stdout)
+            # If job produces to stderr, identify it as an output file
+            addOutput(job, job.stderr)
+
+            # If job consumes from stdin, identify it as an input file
+            if job.stdin:
+                if job.stdin.name not in mapping:
+                    mapping[job.stdin.name] = (set(), set())
+
+                mapping[job.stdin.name][0].add(job)
+
+
+            for file in file_used:
+
+                if file.name not in mapping:
+                    mapping[file.name] = (set(), set())
+
+                if file.link == Link.INPUT:
+                    mapping[file.name][0].add(job)
+                else:
+                    mapping[file.name][1].add(job)
+
+        for file_name, io in mapping.iteritems():
+
+            # Go through the mapping and for each file add dependencies between the
+            # job producing a file and the jobs consuming the file
+            inputs = io[0]
+
+            if len(io[1]) > 0:
+                output = io[1].pop()
+
+                for input in inputs:
+                    try:
+                        self.depends(parent=output, child=input)
+                    except DuplicateError:
+                        pass
+
+        super(AutoADAG, self).writeXML(out)
diff --git a/workflow-generator/Pegasus/DAX2.py b/workflow-generator/Pegasus/DAX2.py
new file mode 100644
index 0000000..8b930f8
--- /dev/null
+++ b/workflow-generator/Pegasus/DAX2.py
@@ -0,0 +1,902 @@
+#  Copyright 2009 University Of Southern California
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""API for generating Pegasus DAXes
+
+The classes in this module can be used to generate DAXes that can be
+read by Pegasus.
+
+The official DAX schema is here: http://pegasus.isi.edu/schema/dax-2.1.xsd
+"""
+from __future__ import print_function
+
+__author__ = "Gideon Juve <juve@usc.edu>"
+__all__ = ["DAX","Filename","Profile","Job","Namespace","LFN",
+	"parse","parseString"]
+__version__ = "2.1"
+
+import datetime, pwd, os
+from cStringIO import StringIO
+import xml.sax
+import xml.sax.handler
+import shlex
+
+SCHEMA_NAMESPACE = u"http://pegasus.isi.edu/schema/DAX"
+SCHEMA_LOCATION = u"http://pegasus.isi.edu/schema/dax-2.1.xsd"
+SCHEMA_VERSION = u"2.1"
+
+
+class Namespace:
+	"""Namespace values recognized by Pegasus. You can use these, or just
+	pass your own value when creating a Profile object (see Profile).
+	"""
+	
+	PEGASUS = u'pegasus'
+	CONDOR = u'condor'
+	DAGMAN = u'dagman'
+	ENV = u'env'
+	HINTS = u'hints'
+	GLOBUS = u'globus'
+	SELECTOR = u'selector'
+
+
+class LFN:
+	"""Logical file name attributes. These include:
+	
+	Linkage Attributes:
+		NONE, INPUT, OUTPUT, INOUT
+	Type Attributes:
+	 	TYPE_DATA, TYPE_EXECUTABLE, TYPE_PATTERN
+	Transfer Attributes:
+		XFER_NOT, XFER_OPTIONAL, XFER_MANDATORY
+	"""
+
+	# Linkage
+	NONE = u'none'
+	INPUT = u'input'
+	OUTPUT = u'output'
+	INOUT = u'inout'
+
+	# File type
+	TYPE_DATA = u'data'
+	TYPE_EXECUTABLE = u'executable'
+	TYPE_PATTERN = u'pattern'
+
+	# Transfer
+	XFER_NOT = u'false'
+	XFER_OPTIONAL = u'optional'
+	XFER_MANDATORY = u'true'
+
+
+class Filename:
+	"""Filename(filename[,type][,link][,register][,transfer][,optional][,varname])
+	
+	A logical file name.
+	
+	Examples:
+		input = Filename('input.txt',link=LFN.INPUT,transfer=True)
+		intermediate = Filename('intermediate.txt',link=LFN.OUTPUT)
+		result = Filename('result.txt',link=LFN.OUTPUT,register=True,transfer=True)
+		opt = Filename('optional.txt',link=LFN.OUTPUT,optional=True)
+		binary = Filename('bin/binary',link=LFN.INPUT,type=LFN.TYPE_EXECUTABLE,transfer=True)
+	"""
+	
+	def __init__(self, filename, type=LFN.TYPE_DATA, link=LFN.NONE, 
+				register=False, transfer=LFN.XFER_NOT, optional=None, varname=None):
+		"""
+		All arguments specify the workflow-level behavior of this Filename. Job-level
+		behavior can be defined when adding the Filename to a Job's uses. If the
+		properties are not overridden at the job-level, then the workflow-level
+		values are used as defaults.
+		
+		If this LFN is to be used as a job's stdin/stdout/stderr then the value
+		of link is ignored when generating the <std*> tags.
+		
+		Arguments:
+			filename: The name of the file (required)
+			type: The file type (see LFN)
+			link: Is this file a workflow-level input/output/both? (see LFN)
+			register: The default value for register (True/False)
+			transfer: The default value for transfer (see LFN, or True/False)
+			optional: The default value for optional (True/False)
+			type: The file type (see LFN)
+			varname: Only used for stdio files
+		"""
+		if filename is None:
+			raise ValueError('filename required')
+		self.filename = filename
+		self.link = link
+		self.register = register
+		self.transfer = transfer
+		self.optional = optional
+		self.type = type
+		self.varname = varname
+		
+	def getFilename(self):
+		return self.filename
+	def setFilename(self, filename):
+		self.filename = filename
+	def getType(self):
+		return self.type
+	def setType(self, type):
+		self.type = type
+	def getLink(self):
+		return self.link
+	def setLink(self, link):
+		self.link = link	
+	def getRegister(self): 
+		return self.register
+	def setRegister(self, register):
+		self.register = register
+	def getTransfer(self):
+		return self.transfer
+	def setTransfer(self, transfer):
+		self.transfer = transfer
+	def getOptional(self):
+		return self.optional
+	def setOptional(self, optional):
+		self.optional = optional
+	def getVarname(self):
+		return self.varname
+	def setVarname(self, varname):
+		self.varname = varname
+		
+	def __str__(self):
+		"""Returns argument-style version of the filename XML tag"""
+		return self.toArgumentXML()
+
+	def toArgumentXML(self):
+		"""Returns an XML representation of this file as a short filename 
+		tag for use in job arguments"""
+		return u'<filename file="%s"/>' % (self.filename)
+	
+	def toFilenameXML(self):
+		"""Returns an XML representation of this file as a filename tag"""
+		xml = StringIO()
+
+		xml.write(u'<filename file="%s"' % self.filename)
+		if self.link is not None:
+			xml.write(u' link="%s"' % self.link)
+		if self.optional is not None:
+			if isinstance(self.optional, bool):
+				xml.write(u' optional="%s"' % str(self.optional).lower())
+			else:
+				xml.write(u' optional="%s"' % self.optional)
+		xml.write(u'/>')
+
+		result = xml.getvalue()
+		xml.close()
+		return result
+		
+	def toStdioXML(self, tag):
+		"""Returns an XML representation of this file as a stdin/out/err tag"""
+		xml = StringIO()
+		xml.write(u'<%s file="%s"' % (tag, self.filename))
+		if self.varname is not None:
+			xml.write(u' varname="%s"' % self.varname)
+		if tag is 'stdin':
+			xml.write(u' link="input"') # stdin is always input
+		else:
+			xml.write(u' link="output"') # stdout/stderr are always output
+		xml.write(u'/>')
+		
+		result = xml.getvalue()
+		xml.close()
+		return result
+	
+
+class Profile:
+	"""Profile(namespace,key,value[,origin])
+	
+	A Profile captures scheduler-, system-, and environment-specific 
+	parameters in a uniform fashion. Each profile declaration assigns a value
+	to a key within a namespace. The origin records what entity is responsible
+	for setting the profile and is optional.
+	
+	Examples:
+		path = Profile(Namespace.ENV,'PATH','/bin')
+		vanilla = Profile(Namespace.CONDOR,'universe','vanilla')
+		path = Profile(namespace='env',key='PATH',value='/bin')
+		path = Profile('env','PATH','/bin')
+	"""
+	
+	def __init__(self, namespace, key, value, origin=None):
+		"""
+		Arguments:
+			namespace: The namespace of the profile (see Namespace) 
+			key: The key name. Can be anything that responds to str().
+			value: The value for the profile. Can be anything that responds to str().
+			origin: The entity responsible for setting this profile (optional)
+		"""
+		self.namespace = namespace
+		self.key = key
+		self.value = value
+		self.origin = origin
+
+	def toXML(self):
+		"""Return an XML representation of this profile"""
+		xml = StringIO()
+		xml.write(u'<profile namespace="%s" key="%s"' % (self.namespace, self.key))
+		if self.origin is not None:
+			xml.write(u' origin="%s"' % self.origin)
+		xml.write(u'>')
+		xml.write(unicode(self.value))
+		xml.write(u'</profile>')
+		result = xml.getvalue()
+		xml.close()
+		return result
+		
+	def __str__(self):
+		return u'%s:%s = %s' % (self.namespace, self.key, self.value)
+
+
+class Job:
+	"""Job(name[,id][,namespace][,version][,dv_name][,dv_namespace][,dv_version][,level][,compound])
+	
+	This class defines the specifics of a job to run in an abstract manner.
+	All filename references still refer to logical files. All references
+	transformations also refer to logical transformations, though
+	physical location hints can be passed through profiles.
+	
+	Examples:
+		sleep = Job(id="ID0001",name="sleep")
+		jbsim = Job(id="ID0002",name="jbsim",namespace="cybershake",version="2.1")
+		merge = Job(name="merge",level=2) 
+		
+	Several arguments can be added at the same time:
+		input = Filename(...)
+		output = Filename(...)
+		job.addArguments("-i",input,"-o",output)
+	
+	Profiles are added similarly:
+		job.addProfile(Profile(Namespace.ENV,key='PATH',value='/bin'))
+		
+	Adding file uses is simple, and you can override global Filename attributes:
+		job.addUses(input,LFN.INPUT)
+		job.addUses(output,LFN.OUTPUT,transfer=True,register=True)
+	"""
+	
+	class Use:
+		"""Use(file[,link][,register][,transfer][,optional][,temporaryHint])
+
+		Use of a logical file name. Used for referencing LFNs in the DAX.
+
+		Note: This class is used internally. You shouldn't need to use it in
+		your code. You should use Job.addUses(...).
+		"""
+
+		def __init__(self, file, link=None, register=None, transfer=None, 
+					optional=None, temporaryHint=None):
+			if file is None:
+				raise ValueError('file required')
+			self.file = file
+			self.link = link
+			self.optional = optional
+			self.register = register
+			self.transfer = transfer
+			self.temporaryHint = temporaryHint
+
+		def toXML(self):
+			xml = StringIO()
+
+			if self.link is None: link = self.file.getLink()
+			else: link = self.link
+			if self.optional is None: optional = self.file.getOptional()
+			else: optional = self.optional
+			if self.register is None: register = self.file.getRegister()
+			else: register = self.register
+			if self.transfer is None: transfer = self.file.getTransfer()
+			else: transfer = self.transfer
+			type = self.file.getType()
+			temporaryHint = self.temporaryHint
+			
+			xml.write(u'<uses file="%s"' % self.file.getFilename())
+			if temporaryHint is not None:
+				if isinstance(temporaryHint, bool):
+					xml.write(u' temporaryHint="%s"' % unicode(temporaryHint).lower())
+				else:
+					xml.write(u' temporaryHint="%s"' % temporaryHint)
+			if link is not None:
+				xml.write(u' link="%s"' % link)
+			if optional is not None:
+				if isinstance(optional, bool):
+					xml.write(u' optional="%s"' % unicode(optional).lower())
+				else:
+					xml.write(u' optional="%s"' % optional)
+			if register is not None:
+				if isinstance(register, bool):
+					xml.write(u' register="%s"' % unicode(register).lower())
+				else:
+					xml.write(u' register="%s"' % register)
+			if transfer is not None:
+				if isinstance(transfer, bool):
+					xml.write(u' transfer="%s"' % unicode(transfer).lower())
+				else:
+					xml.write(u' transfer="%s"' % transfer)
+			if type is not None:
+				xml.write(u' type="%s"' % type)
+			xml.write(u'/>')
+
+			result = xml.getvalue()
+			xml.close()
+			return result
+			
+	def __init__(self, name, id=None, namespace=None, version=None,
+				dv_name=None, dv_namespace=None, dv_version=None,
+				level=None, compound=None):
+		"""The ID for each job should be unique in the DAX. If it is None, then
+		it will be automatically generated when the job is added to the DAX.
+		As far as I can tell this ID is only used for uniqueness during
+		planning, and is otherwise ignored. For example, when Condor is running
+		the job there doesn't seem to be a way to use this ID to trace the
+		running job back to its entry in the DAX.
+		
+		The name, namespace, and version should match what you have in your
+		transformation catalog. For example, if namespace="foo" name="bar" 
+		and version="1.0", then the transformation catalog should have an
+		entry for "foo::bar:1.0".
+		
+		Level is the level in the workflow. So if you have a workflow with
+		three jobs--A, B, and C--and you have dependencies between A->B and
+		B->C, then A is level 1, B is level 2, and C is level 3. You don't
+		need to specify this because Pegasus calculates it automatically.
+		
+		I have no idea what 'compound' does, or what the 'dv_' stuff does.
+		
+		Arguments:
+			name: The transformation name (required)
+			id: A unique identifier for the job (autogenerated if None)
+			namespace: The namespace of the transformation
+			version: The transformation version
+			dv_name: ?
+			dv_namespace: ?
+			dv_version: ?
+			level: The level of the job in the workflow
+			compound: ?
+		"""
+		if name is None:
+			raise ValueError('name required')
+		self.name = name
+		self.namespace = namespace
+		self.version = version
+		self.id = id
+		self.dv_namespace = dv_namespace
+		self.dv_name = dv_name
+		self.dv_version = dv_version
+		self.level = level
+		self.compound = compound
+		
+		self.arguments = []
+		self.profiles = []
+		self.uses = []
+
+		self.stdout = None
+		self.stderr = None
+		self.stdin = None
+
+	
+	def addArguments(self, *arguments):
+		"""Add several arguments to the job"""
+		self.arguments.extend(arguments)
+
+	def addArgument(self, arg):
+		"""Add an argument to the job"""
+		self.addArguments(arg)
+
+	def addProfile(self,profile):
+		"""Add a profile to the job"""
+		self.profiles.append(profile)
+
+	def addUses(self, file, link=None, register=None, transfer=None, 
+				optional=None, temporaryHint=None):
+		"""Add a logical filename that the job uses.
+		
+		Optional arguments to this method specify job-level attributes of
+		the 'uses' tag in the DAX. If not specified, these values default
+		to those specified when creating the Filename object.
+		
+		I don't know what 'temporaryHint' does.
+		
+		Arguments:
+			file: A Filename object representing the logical file name
+			link: Is this file a job input, output or both (See LFN)
+			register: Should this file be registered in RLS? (True/False)
+			transfer: Should this file be transferred? (True/False or See LFN)
+			optional: Is this file optional, or should its absence be an error?
+			temporaryHint: ?
+		"""
+		use = Job.Use(file,link,register,transfer,optional)
+		self.uses.append(use)
+
+	def setStdout(self, filename):
+		"""Redirect stdout to a file"""
+		self.stdout = filename
+
+	def setStderr(self, filename):
+		"""Redirect stderr to a file"""
+		self.stderr = filename
+
+	def setStdin(self, filename):
+		"""Redirect stdin from a file"""
+		self.stdin = filename
+
+	def setID(self, id):
+		"""Set the ID of this job"""
+		self.id = id
+		
+	def getID(self):
+		"""Return the job ID"""
+		return self.id
+		
+	def setNamespace(self, namespace):
+		"""Set the transformation namespace for this job"""
+		self.namespace = namespace
+		
+	def getNamespace(self):
+		"""Get the transformation namespace for this job"""
+		return self.namespace
+		
+	def setName(self, name):
+		"""Set the transformation name of this job"""
+		self.name = name
+		
+	def getName(self):
+		"""Get the transformation name of this job"""
+		return self.name
+		
+	def setVersion(self, version):
+		"""Set the version of the transformation"""
+		self.version = version
+		
+	def getVersion(self):
+		"""Get the version of the transformation"""
+		return self.version
+		
+	def toXML(self,level=0,indent=u'\t'):
+		"""Return an XML representation of this job
+		
+		Arguments:
+			level: The level of indentation
+			indent: The indentation string
+		"""
+		xml = StringIO()
+		indentation = u''.join(indent for x in range(0,level))
+		
+		# Open tag
+		xml.write(indentation)
+		xml.write(u'<job id="%s"' % self.id)
+		if self.namespace is not None: xml.write(u' namespace="%s"' % self.namespace)
+		xml.write(u' name="%s"' % self.name)
+		if self.version is not None: xml.write(u' version="%s"' % self.version)
+		if self.dv_namespace is not None: xml.write(u' dv-namespace="%s"' % self.dv_namespace)
+		if self.dv_name is not None: xml.write(u' dv-name="%s"' % self.dv_name)
+		if self.dv_version is not None: xml.write(u' dv-version="%s"' % self.dv_version)
+		if self.level is not None: xml.write(u' level="%s"' % self.level)
+		if self.compound is not None: xml.write(u' compound="%s"' % self.compound)
+		xml.write(u'>\n')
+
+		# Arguments
+		if len(self.arguments) > 0:
+			xml.write(indentation)
+			xml.write(indent)
+			xml.write(u'<argument>')
+			xml.write(u' '.join(unicode(x) for x in self.arguments))
+			xml.write(u'</argument>\n')
+
+		# Profiles
+		if len(self.profiles) > 0:
+			for pro in self.profiles:
+				xml.write(indentation)
+				xml.write(indent)
+				xml.write(u'%s\n' % pro.toXML())
+		
+		# Stdin/xml/err
+		if self.stdin is not None:
+			xml.write(indentation)
+			xml.write(indent)
+			xml.write(self.stdin.toStdioXML('stdin'))
+			xml.write(u'\n')
+		if self.stdout is not None:
+			xml.write(indentation)
+			xml.write(indent)
+			xml.write(self.stdout.toStdioXML('stdout'))
+			xml.write(u'\n')
+		if self.stderr is not None:
+			xml.write(indentation)
+			xml.write(indent)
+			xml.write(self.stderr.toStdioXML('stderr'))
+			xml.write(u'\n')
+
+		# Uses
+		if len(self.uses) > 0:
+			for use in self.uses:
+				xml.write(indentation)
+				xml.write(indent)
+				xml.write(use.toXML())
+				xml.write(u'\n')
+				
+		# Close tag
+		xml.write(indentation)
+		xml.write(u'</job>')
+		
+		result = xml.getvalue()
+		xml.close()
+		return result
+
+
+class DAX:
+	"""DAX(name[,count][,index])
+	
+	Representation of a directed acyclic graph in XML (DAX).
+	
+	Examples:
+		dax = DAX('diamond')
+		part5 = DAX('partition_5',count=10,index=5)
+		
+	Adding jobs:
+		a = Job(...)
+		dax.addJob(a)
+		
+	Adding parent-child control-flow dependency:
+		dax.addDependency(a,b)
+		dax.addDependency(a,c)
+		dax.addDependency(b,d)
+		dax.addDependency(c,d)
+		
+	Adding Filenames (this is not required to produce a valid DAX):
+		input = Filename(...)
+		dax.addFilename(input)
+		
+	Writing a DAX out to a file:
+		f = open('diamond.dax','w')
+		dax.writeXML(f)
+		f.close()
+	"""
+	
+	class Dependency:
+		"""A control-flow dependency between a child and its parents"""
+		def __init__(self,child):
+			self.child = child
+			self.parents = []
+
+		def addParent(self, parent):
+			self.parents.append(parent)
+
+		def toXML(self, level=0, indent=u'\t'):
+			xml = StringIO()
+			indentation = ''.join([indent for x in range(0,level)])
+			
+			xml.write(indentation)
+			xml.write(u'<child ref="%s">\n' % self.child.getID())
+			for parent in self.parents:
+				xml.write(indentation)
+				xml.write(indent)
+				xml.write(u'<parent ref="%s"/>\n' % parent.getID())
+			xml.write(indentation)
+			xml.write(u'</child>')
+			
+			result = xml.getvalue()
+			xml.close()
+			return result
+
+	def __init__(self, name, count=1, index=0):
+		"""
+		Arguments:
+			name: The name of the workflow
+			count: Total number of DAXes that will be created
+			index: Zero-based index of this DAX
+		"""
+		self.name = name
+		self.count = count
+		self.index = index
+		
+		# This is used to generate unique ID numbers
+		self.sequence = 1
+		
+		self.jobs = []
+		self.filenames = []
+		self.lookup = {} # A lookup table for dependencies
+		self.dependencies = []
+
+	def getName(self):
+		return self.name
+
+	def setName(self,name):
+		self.name = name
+
+	def getCount(self):
+		return self.count
+
+	def setCount(self,count):
+		self.count = count
+
+	def getIndex(self):
+		return self.index
+
+	def setIndex(self,index):
+		self.index = index
+
+	def addJob(self,job):
+		"""Add a job to the list of jobs in the DAX"""
+		# Add an auto-generated ID if the job doesn't have one
+		if job.getID() is None:
+			job.setID("ID%07d" % self.sequence)
+			self.sequence += 1
+		self.jobs.append(job)
+		
+	def addFilename(self, filename):
+		"""Add a filename"""
+		self.filenames.append(filename)
+		
+	def addDependency(self, parent, child):
+		"""Add a control flow dependency"""
+		if not child in self.lookup:
+			dep = DAX.Dependency(child)
+			self.lookup[child] = dep
+			self.dependencies.append(dep)
+		self.lookup[child].addParent(parent)
+
+	def writeXML(self, out, indent='\t'):
+		"""Write the DAX as XML to a stream"""
+		
+		# Preamble
+		out.write(u'<?xml version="1.0" encoding="UTF-8"?>\n')
+		
+		# Metadata
+		out.write(u'<!-- generated: %s -->\n' % datetime.datetime.now())
+		out.write(u'<!-- generated by: %s -->\n' % pwd.getpwuid(os.getuid())[0])
+		out.write(u'<!-- generator: python -->\n')
+		
+		# Open tag
+		out.write(u'<adag xmlns="%s" ' % SCHEMA_NAMESPACE)
+		out.write(u'xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" ')
+		out.write(u'xsi:schemaLocation="%s %s" ' % (SCHEMA_NAMESPACE, SCHEMA_LOCATION))
+		out.write(u'version="%s" ' % SCHEMA_VERSION)
+		out.write(u'count="%d" index="%d" name="%s" ' % (self.count, self.index, self.name))
+		out.write(u'jobCount="%d" fileCount="%d" childCount="%d">\n' % (len(self.jobs), len(self.filenames), len(self.dependencies)))
+
+		# Files
+		out.write(u'\n%s<!-- part 1: list of all referenced files (may be empty) -->\n' % indent)
+		for filename in self.filenames:
+			out.write(indent)
+			out.write(filename.toFilenameXML())
+			out.write('\n')
+		
+		# Jobs
+		out.write(u'\n%s<!-- part 2: definition of all jobs (at least one) -->\n' % indent)
+		for job in self.jobs:
+			out.write(job.toXML(level=1,indent=indent))
+			out.write(u'\n')
+		
+		# Dependencies
+		out.write(u'\n%s<!-- part 3: list of control-flow dependencies (may be empty) -->\n' % indent)
+		for dep in self.dependencies:
+			out.write(dep.toXML(level=1,indent=indent))
+			out.write(u'\n')
+		
+		# Close tag
+		out.write(u'</adag>\n')
+
+
+class DAXHandler(xml.sax.handler.ContentHandler):
+	"""
+	This is a DAX parser
+	"""
+	def __init__(self):
+		self.dax = None
+		self.jobmap = {}
+		self.filemap = {}
+		self.lastJob = None
+		self.lastChild = None
+		self.lastArguments = None
+		self.lastProfile = None
+		
+	def startElement(self, name, attrs):
+		if name == "adag":
+			name = attrs.get("name")
+			count = int(attrs.get("count","1"))
+			index = int(attrs.get("index","0"))
+			self.dax = DAX(name,count,index)
+		elif name == "filename":
+			if self.lastJob is None:
+				file = attrs.get("file")
+				link = attrs.get("link")
+				optional = attrs.get("optional")
+				f = Filename(file, type=None, link=link, register=None, 
+					transfer=None, optional=optional)
+				self.dax.addFilename(f)
+				self.filemap[name] = f
+			else:
+				name = attrs.get("file")
+				if name in self.filemap:
+					f = self.filemap[name]
+				else:
+					f = Filename(name)
+					self.filemap[name] = f
+				if self.lastProfile is None:
+					self.lastArguments.append(f)
+				else:
+					self.lastProfile.value = f
+		elif name == "job":
+			id = attrs.get("id")
+			namespace = attrs.get("namespace")
+			name = attrs.get("name")
+			version = attrs.get("version")
+			dv_namespace = attrs.get("dv-namespace")
+			dv_name = attrs.get("dv-name")
+			dv_version = attrs.get("dv-version")
+			level = attrs.get("level")
+			compound = attrs.get("compound")
+			job = Job(id=id, namespace=namespace, name=name, version=version,
+				dv_namespace=dv_namespace, dv_name=dv_name, dv_version=dv_version,
+				level=level, compound=compound)
+			self.dax.addJob(job)
+			self.lastJob = job
+			self.jobmap[job.getID()] = job
+		elif name == "argument":
+			self.lastArguments = []
+		elif name == "profile":
+			namespace = attrs.get("namespace")
+			key = attrs.get("key")
+			self.lastProfile = Profile(namespace,key,"")
+			self.lastJob.addProfile(self.lastProfile)
+		elif name in ["stdin","stdout","stderr"]:
+			file = attrs.get("file")
+			link = attrs.get("link")
+			varname = attrs.get("varname")
+			if file in self.filemap:
+				f = self.filemap[file]
+			else:
+				f = Filename(file,link=link)
+				self.filemap[file] = f
+			if varname is not None:
+				if f.varname is None:
+					f.varname = varname
+			if name == "stdin":
+				self.lastJob.setStdin(f)
+			elif name == "stdout":
+				self.lastJob.setStdout(f)
+			else:
+				self.lastJob.setStderr(f)
+		elif name == "uses":
+			file = attrs.get("file")
+			link = attrs.get("link")
+			register = attrs.get("register")
+			transfer = attrs.get("transfer")
+			type = attrs.get("type")
+			temporaryHint = attrs.get("temporaryHint")
+			if file in self.filemap:
+				f = self.filemap[file]
+				if f.type is None:
+					f.type = type
+			else:
+				f = Filename(file, type=type, link=link,
+					register=register, transfer=transfer)
+				self.filemap[file] = f
+			self.lastJob.addUses(f,link=link,register=register,
+				transfer=transfer,temporaryHint=temporaryHint)
+		elif name == "child":
+			id = attrs.get("ref")
+			child = self.jobmap[id]
+			self.lastChild = child
+		elif name == "parent":
+			id = attrs.get("ref")
+			parent = self.jobmap[id]
+			self.dax.addDependency(parent, self.lastChild)
+			
+	def characters(self, chars):
+		if self.lastArguments is not None:
+			self.lastArguments += [unicode(a) for a in shlex.split(chars)]
+		elif self.lastProfile is not None:
+			self.lastProfile.value += chars
+			
+	def endElement(self, name):
+		if name == "child":
+			self.lastChild = None
+		elif name == "job":
+			self.lastJob = None
+		elif name == "argument":
+			self.lastJob.addArguments(*self.lastArguments)
+			self.lastArguments = None
+		elif name == "profile":
+			self.lastProfile = None
+
+				
+def parse(fname):
+	"""
+	Parse DAX from a Pegasus DAX file.
+	"""
+	handler = DAXHandler()
+	xml.sax.parse(fname, handler)
+	return handler.dax
+
+
+def parseString(string):
+	"""
+	Parse DAX from a string
+	"""
+	handler = DAXHandler()
+	xml.sax.parseString(string, handler)
+	return handler.dax
+
+
+if __name__ == '__main__':
+	"""An example of using the DAX API"""
+
+	# Create a DAX
+	diamond = DAX("diamond")
+
+	# Create some logical file names
+	a = Filename("f.a",link=LFN.INPUT,transfer=True)
+	b1 = Filename("f.b1",link=LFN.OUTPUT,transfer=True)
+	b2 = Filename("f.b2",link=LFN.OUTPUT,transfer=True)
+	c1 = Filename("f.c1",link=LFN.OUTPUT,transfer=True)
+	c2 = Filename("f.c2",link=LFN.OUTPUT,transfer=True)
+	d = Filename("f.d",link=LFN.OUTPUT,transfer=True,register=True)
+
+	# Add the filenames to the DAX (this is not strictly required)
+	diamond.addFilename(a)
+	diamond.addFilename(d)
+
+	# Add a preprocess job
+	preprocess = Job(namespace="diamond",name="preprocess",version="2.0")
+	preprocess.addArguments("-a preprocess","-T60","-i",a,"-o",b1,b2)
+	preprocess.addUses(a,link=LFN.INPUT)
+	preprocess.addUses(b1,link=LFN.OUTPUT)
+	preprocess.addUses(b2,link=LFN.OUTPUT)
+	diamond.addJob(preprocess)
+
+	# Add left Findrange job
+	frl = Job(namespace="diamond",name="findrange",version="2.0")
+	frl.addArguments("-a findrange","-T60","-i",b1,"-o",c1)
+	frl.addUses(b1,link=LFN.INPUT)
+	frl.addUses(c1,link=LFN.OUTPUT)
+	diamond.addJob(frl)
+
+	# Add right Findrange job
+	frr = Job(namespace="diamond",name="findrange",version="2.0")
+	frr.addArguments("-a findrange","-T60","-i",b2,"-o",c2)
+	frr.addUses(b2,link=LFN.INPUT)
+	frr.addUses(c2,link=LFN.OUTPUT)
+	diamond.addJob(frr)
+
+	# Add Analyze job
+	analyze = Job(namespace="diamond",name="analyze",version="2.0")
+	analyze.addArguments("-a analyze","-T60","-i",c1,c2,"-o",d)
+	analyze.addUses(c1,link=LFN.INPUT)
+	analyze.addUses(c2,link=LFN.INPUT)
+	analyze.addUses(d,link=LFN.OUTPUT)
+	diamond.addJob(analyze)
+
+	# Add control-flow dependencies
+	diamond.addDependency(parent=preprocess, child=frl)
+	diamond.addDependency(parent=preprocess, child=frr)
+	diamond.addDependency(parent=frl, child=analyze)
+	diamond.addDependency(parent=frr, child=analyze)
+	
+	out = StringIO()
+	diamond.writeXML(out)
+	foo1 = out.getvalue()
+	out.close()
+	
+	diamond = parseString(foo1)
+	
+	out = StringIO()
+	diamond.writeXML(out)
+	foo2 = out.getvalue()
+	out.close()
+	
+	print(foo1)
+	print(foo2)
+	
diff --git a/workflow-generator/Pegasus/DAX3.py b/workflow-generator/Pegasus/DAX3.py
new file mode 100644
index 0000000..e41b9be
--- /dev/null
+++ b/workflow-generator/Pegasus/DAX3.py
@@ -0,0 +1,2334 @@
+#  Copyright 2010 University Of Southern California
+#
+#  Licensed under the Apache License, Version 2.0 (the "License");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#  http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing,
+#  software distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+"""API for generating Pegasus DAXes
+
+The classes in this module can be used to generate DAXes that can be
+read by Pegasus.
+
+The official DAX schema is here: http://pegasus.isi.edu/schema/
+
+Here is an example showing how to create the diamond DAX using this API:
+
+# Create a DAX
+diamond = ADAG("diamond")
+
+# Add some metadata
+diamond.metadata("name", "diamond")
+diamond.metadata("createdby", "Gideon Juve")
+
+# Add input file to the DAX-level replica catalog
+a = File("f.a")
+a.addPFN(PFN("gsiftp://site.com/inputs/f.a","site"))
+a.metadata("size", "1024")
+diamond.addFile(a)
+
+# Add executables to the DAX-level replica catalog
+e_preprocess = Executable(namespace="diamond", name="preprocess", version="4.0", os="linux", arch="x86_64")
+e_preprocess.metadata("size", "2048")
+e_preprocess.addPFN(PFN("gsiftp://site.com/bin/preprocess","site"))
+diamond.addExecutable(e_preprocess)
+
+e_findrange = Executable(namespace="diamond", name="findrange", version="4.0", os="linux", arch="x86_64")
+e_findrange.addPFN(PFN("gsiftp://site.com/bin/findrange","site"))
+diamond.addExecutable(e_findrange)
+
+e_analyze = Executable(namespace="diamond", name="analyze", version="4.0", os="linux", arch="x86_64")
+e_analyze.addPFN(PFN("gsiftp://site.com/bin/analyze","site"))
+diamond.addExecutable(e_analyze)
+
+# Add a preprocess job
+preprocess = Job(e_preprocess)
+preprocess.metadata("time", "60")
+b1 = File("f.b1")
+b2 = File("f.b2")
+preprocess.addArguments("-a preprocess","-T60","-i",a,"-o",b1,b2)
+preprocess.uses(a, link=Link.INPUT)
+preprocess.uses(b1, link=Link.OUTPUT, transfer=True)
+preprocess.uses(b2, link=Link.OUTPUT, transfer=True)
+diamond.addJob(preprocess)
+
+# Add left Findrange job
+frl = Job(e_findrange)
+frl.metadata("time", "60")
+c1 = File("f.c1")
+frl.addArguments("-a findrange","-T60","-i",b1,"-o",c1)
+frl.uses(b1, link=Link.INPUT)
+frl.uses(c1, link=Link.OUTPUT, transfer=True)
+diamond.addJob(frl)
+
+# Add right Findrange job
+frr = Job(e_findrange)
+frr.metadata("time", "60")
+c2 = File("f.c2")
+frr.addArguments("-a findrange","-T60","-i",b2,"-o",c2)
+frr.uses(b2, link=Link.INPUT)
+frr.uses(c2, link=Link.OUTPUT, transfer=True)
+diamond.addJob(frr)
+
+# Add Analyze job
+analyze = Job(e_analyze)
+analyze.metadata("time", "60")
+d = File("f.d")
+analyze.addArguments("-a analyze","-T60","-i",c1,c2,"-o",d)
+analyze.uses(c1, link=Link.INPUT)
+analyze.uses(c2, link=Link.INPUT)
+analyze.uses(d, link=Link.OUTPUT, transfer=True, register=True)
+diamond.addJob(analyze)
+
+# Add dependencies
+diamond.depends(parent=preprocess, child=frl)
+diamond.depends(parent=preprocess, child=frr)
+diamond.depends(parent=frl, child=analyze)
+diamond.depends(parent=frr, child=analyze)
+
+# Write the DAX to stdout
+import sys
+diamond.writeXML(sys.stdout)
+
+# Write the DAX to a file
+f = open("diamond.dax","w")
+diamond.writeXML(f)
+f.close()
+"""
+
+__author__ = "Gideon Juve <gideon@isi.edu>, Rafael Ferreira da Silva <rafsilva@isi.edu>"
+
+__version__ = "3.6"
+
+__all__ = [
+    "DAX3Error",
+    "DuplicateError",
+    "NotFoundError",
+    "FormatError",
+    "ParseError",
+    "Element",
+    "Namespace",
+    "ContainerType",
+    "Arch",
+    "Link",
+    "Transfer",
+    "OS",
+    "When",
+    "Invoke",
+    "InvokeMixin",
+    "ProfileMixin",
+    "MetadataMixin",
+    "PFNMixin",
+    "CatalogType",
+    "File",
+    "Executable",
+    "Container",
+    "Metadata",
+    "PFN",
+    "Profile",
+    "Use",
+    "UseMixin",
+    "Transformation",
+    "AbstractJob",
+    "Job",
+    "DAX",
+    "DAG",
+    "Dependency",
+    "ADAG",
+    "parseString",
+    "parse"
+]
+
+import datetime, os, sys
+import codecs
+import shlex
+import codecs
+import warnings
+
+if sys.version_info >= (3, 0):
+    # compatibility with Python 3
+    from past.builtins import basestring
+
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import StringIO
+
+SCHEMA_NAMESPACE = "http://pegasus.isi.edu/schema/DAX"
+SCHEMA_LOCATION = "http://pegasus.isi.edu/schema/dax-3.6.xsd"
+SCHEMA_VERSION = "3.6"
+
+
+class DAX3Error(Exception): pass
+
+
+class DuplicateError(DAX3Error): pass
+
+
+class NotFoundError(DAX3Error): pass
+
+
+class FormatError(DAX3Error): pass
+
+
+class ParseError(DAX3Error): pass
+
+
+class Element:
+    """Representation of an XML element for formatting output"""
+
+    def __init__(self, name, attrs=[]):
+        self.name = name
+        self.attrs = []
+        for attr, value in attrs:
+            if value is not None:
+                if isinstance(value, bool):
+                    value = str(value).lower()
+                elif not isinstance(value, basestring):
+                    value = repr(value)
+                attr = attr.replace('__', ':')
+                self.attrs.append((attr, value))
+        self.children = []
+        self.flat = False
+
+    def _escape(self, text):
+        """Escape special characters in XML"""
+        o = []
+        for c in text:
+            if c == '"':
+                o.append("&quot;")
+            elif c == "'":
+                o.append("&apos;")
+            elif c == "<":
+                o.append("&lt;")
+            elif c == ">":
+                o.append("&gt;")
+            elif c == "&":
+                o.append("&amp;")
+            else:
+                o.append(c)
+        return ''.join(o)
+
+    def element(self, element):
+        self.children.append(element)
+        return element
+
+    def text(self, value):
+        if not isinstance(value, basestring):
+            value = str(value)
+        self.children.append(self._escape(value))
+        return self
+
+    def comment(self, message):
+        self.children.append("<!-- %s -->" % self._escape(message))
+
+    def flatten(self):
+        self.flat = True
+        return self
+
+    def __unicode__(self):
+        s = StringIO()
+        self.write(s)
+        x = s.getvalue()
+        s.close()
+        return unicode(x)
+
+    def __str__(self):
+        return unicode(self).encode('utf-8')
+
+    def write(self, stream=sys.stdout, level=0, flatten=False):
+        flat = self.flat or flatten
+
+        stream.write('<%s' % self.name)
+
+        for attr, value in self.attrs:
+            value = self._escape(value)
+            stream.write(' %s="%s"' % (attr, value))
+
+        if len(self.children) == 0:
+            stream.write('/>')
+        else:
+            stream.write('>')
+            if not flat:
+                stream.write('\n')
+            for child in self.children:
+                if not flat:
+                    stream.write('\t' * (level + 1))
+                if isinstance(child, basestring):
+                    stream.write(child)
+                else:
+                    child.write(stream, level + 1, flat)
+                if not flat:
+                    stream.write('\n')
+            if not flat:
+                stream.write('\t' * level)
+            stream.write('</%s>' % self.name)
+
+
+class Namespace:
+    """
+    Namespace values recognized by Pegasus. See Executable, 
+    Transformation, and Job.
+    """
+    PEGASUS = 'pegasus'
+    CONDOR = 'condor'
+    DAGMAN = 'dagman'
+    ENV = 'env'
+    HINTS = 'hints'
+    GLOBUS = 'globus'
+    SELECTOR = 'selector'
+    STAT = 'stat'
+
+
+class Arch:
+    """
+    Architecture types. See Executable.
+    """
+    X86 = 'x86'
+    X86_64 = 'x86_64'
+    PPC = 'ppc'
+    PPC_64 = 'ppc_64'
+    IA64 = 'ia64'
+    SPARCV7 = 'sparcv7'
+    SPARCV9 = 'sparcv9'
+    AMD64 = 'amd64'
+
+
+class Link:
+    """
+    Linkage attributes. See File, Executable and uses().
+    """
+    NONE = 'none'
+    INPUT = 'input'
+    OUTPUT = 'output'
+    INOUT = 'inout'
+    CHECKPOINT = 'checkpoint'
+
+
+class Transfer:
+    """
+    Transfer types for uses. See Executable, File.
+    """
+    FALSE = 'false'
+    OPTIONAL = 'optional'
+    TRUE = 'true'
+
+
+class OS:
+    """
+    OS types. See Executable.
+    """
+    LINUX = 'linux'
+    SUNOS = 'sunos'
+    AIX = 'aix'
+    MACOS = 'macos'
+    WINDOWS = 'windows'
+
+
+class When:
+    """
+    Job states for notifications. See Job/DAX/DAG.invoke().
+    """
+    NEVER = 'never'
+    START = 'start'
+    ON_ERROR = 'on_error'
+    ON_SUCCESS = 'on_success'
+    AT_END = 'at_end'
+    ALL = 'all'
+
+
+class ContainerType:
+    """
+    Container types. See Container.
+    """
+    DOCKER = 'docker'
+    SINGULARITY = 'singularity'
+
+
+class Invoke:
+    def __init__(self, when, what):
+        if not when:
+            raise FormatError("invalid when", when)
+        if not what:
+            raise FormatError("invalid what", what)
+        self.when = when
+        self.what = what
+
+    def __unicode__(self):
+        return u"<Invoke %s %s>" % (self.when, self.what)
+
+    def __str__(self):
+        return unicode(self).encode('utf-8')
+
+    def __hash__(self):
+        return hash((self.when, self.what))
+
+    def __eq__(self, other):
+        if isinstance(other, Invoke):
+            return self.when == other.when and self.what == other.what
+        return False
+
+    def toXML(self):
+        e = Element('invoke', [('when', self.when)])
+        e.text(self.what)
+        e.flatten()
+        return e
+
+
+class InvokeMixin:
+    def addInvoke(self, invoke):
+        """Add invoke to this object"""
+        if self.hasInvoke(invoke):
+            raise DuplicateError("Duplicate Invoke %s" % invoke)
+        self.invocations.add(invoke)
+
+    def hasInvoke(self, invoke):
+        """Test to see if this object has invoke"""
+        return invoke in self.invocations
+
+    def removeInvoke(self, invoke):
+        """Remove invoke from this object"""
+        if not self.hasInvoke(invoke):
+            raise NotFoundError("Invoke not found", invoke)
+        self.invocations.remove(invoke)
+
+    def clearInvokes(self):
+        """Remove all Invoke objects"""
+        self.invocations.clear()
+
+    def invoke(self, when, what):
+        """
+        Invoke executable 'what' when job reaches status 'when'. The value of 
+        'what' should be a command that can be executed on the submit host.
+
+        The list of valid values for 'when' is:
+
+        WHEN        MEANING
+        ==========  =======================================================
+        never       never invoke
+        start       invoke just before job gets submitted.
+        on_error    invoke after job finishes with failure (exitcode != 0).
+        on_success  invoke after job finishes with success (exitcode == 0).
+        at_end      invoke after job finishes, regardless of exit status.
+        all         like start and at_end combined.
+
+        Examples:
+            obj.invoke('at_end','/usr/bin/mail -s "job done" juve@usc.edu')
+            obj.invoke('on_error','/usr/bin/update_db -failure')
+        """
+        self.addInvoke(Invoke(when, what))
+
+
+class ProfileMixin:
+    def addProfile(self, profile):
+        """Add a profile to this object"""
+        if self.hasProfile(profile):
+            raise DuplicateError("Duplicate profile %s" % profile)
+        self.profiles.add(profile)
+
+    def hasProfile(self, profile):
+        """Does this object have profile?"""
+        return profile in self.profiles
+
+    def removeProfile(self, profile):
+        """Remove profile from this object"""
+        if not self.hasProfile(profile):
+            raise NotFoundError("Profile not found", profile)
+        self.profiles.remove(profile)
+
+    def clearProfiles(self):
+        """Remove all profiles from this object"""
+        self.profiles.clear()
+
+    def profile(self, namespace, key, value):
+        """Declarative profile addition"""
+        self.addProfile(Profile(namespace, key, value))
+
+
+class MetadataMixin:
+    def addMetadata(self, metadata):
+        """Add metadata to this object"""
+        if self.hasMetadata(metadata):
+            raise DuplicateError("Duplicate Metadata %s" % metadata)
+        self._metadata.add(metadata)
+
+    def removeMetadata(self, metadata):
+        """Remove meta from this object"""
+        if not self.hasMetadata(metadata):
+            raise NotFoundError("Metadata not found", metadata)
+        self._metadata.remove(metadata)
+
+    def hasMetadata(self, metadata):
+        """Does this object have metadata?"""
+        return metadata in self._metadata
+
+    def clearMetadata(self):
+        """Remove all metadata from this object"""
+        self._metadata.clear()
+
+    def metadata(self, key, value):
+        """Declarative metadata addition"""
+        self.addMetadata(Metadata(key, value))
+
+
+class PFNMixin:
+    def addPFN(self, pfn):
+        """Add a PFN to this object"""
+        if self.hasPFN(pfn):
+            raise DuplicateError("Duplicate PFN %s" % pfn)
+        self.pfns.add(pfn)
+
+    def removePFN(self, pfn):
+        """Remove PFN from this object"""
+        if not self.hasPFN(pfn):
+            raise NotFoundError("PFN not found", pfn)
+        self.pfns.remove(pfn)
+
+    def hasPFN(self, pfn):
+        """Does this object have pfn?"""
+        return pfn in self.pfns
+
+    def clearPFNs(self):
+        """Remove all PFNs from this object"""
+        self.pfns.clear()
+
+    def PFN(self, url, site=None):
+        """Declarative PFN addition"""
+        self.addPFN(PFN(url, site))
+
+
+class CatalogType(ProfileMixin, MetadataMixin, PFNMixin):
+    """Base class for File and Executable"""
+
+    def __init__(self, name):
+        """
+        All arguments specify the workflow-level behavior of this File. Job-level
+        behavior can be defined when adding the File to a Job's uses. If the
+        properties are not overridden at the job-level, then the workflow-level
+        values are used as defaults.
+
+        If this LFN is to be used as a job's stdin/stdout/stderr then the value
+        of link is ignored when generating the <std*> tags.
+
+        Arguments:
+            name: The name of the file (required)
+        """
+        if not name:
+            raise FormatError('name required')
+        self.name = name
+        self.profiles = set()
+        self._metadata = set()
+        self.pfns = set()
+
+    def innerXML(self, parent):
+        for p in self.profiles:
+            parent.element(p.toXML())
+        for m in self._metadata:
+            parent.element(m.toXML())
+        for p in self.pfns:
+            parent.element(p.toXML())
+
+
+class File(CatalogType):
+    """File(name)
+
+    A file entry for the DAX-level replica catalog, or a reference to a logical file
+    used by the workflow.
+
+    Examples:
+        input = File('input.txt')
+
+    Example use in job:
+        input = File('input.txt')
+        output = File('output.txt')
+        job = Job(name="compute")
+        job.uses(input, link=Link.INPUT, transfer=True)
+        job.uses(output, link=Link.OUTPUT, transfer=True, register=True)
+    """
+
+    def __init__(self, name):
+        """
+        All arguments specify the workflow-level behavior of this File. Job-level
+        behavior can be defined when adding the File to a Job's uses. If the
+        properties are not overridden at the job-level, then the workflow-level
+        values are used as defaults.
+
+        If this LFN is to be used as a job's stdin/stdout/stderr then the value
+        of link is ignored when generating the <std*> tags.
+
+        Arguments:
+            name: The name of the file (required)
+        """
+        CatalogType.__init__(self, name)
+
+    def __unicode__(self):
+        return u"<File %s>" % self.name
+
+    def __str__(self):
+        return unicode(self).encode('utf-8')
+
+    def __hash__(self):
+        return hash(self.name)
+
+    def __eq__(self, other):
+        return isinstance(other, File) and self.name == other.name
+
+    def toArgumentXML(self):
+        """Returns an XML representation of this File with no inner elements"""
+        return Element('file', [('name', self.name)])
+
+    def toStdioXML(self, tag):
+        """Returns an XML representation of this file as a stdin/out/err tag"""
+        if tag is 'stdin':
+            link = "input"  # stdin is always input
+        elif tag in ['stdout', 'stderr']:
+            link = "output"  # stdout/stderr are always output
+        else:
+            raise FormatError("invalid tag", tag, "should be one of stdin, stdout, stderr")
+
+        return Element(tag, [
+            ('name', self.name),
+            ('link', link)
+        ])
+
+    def toXML(self):
+        """Return the XML representation of this File with inner elements"""
+        e = self.toArgumentXML()
+        self.innerXML(e)
+        return e
+
+
+class Executable(CatalogType, InvokeMixin):
+    """Executable(name[,namespace][,version][,arch][,os][,osrelease][,osversion][,glibc][,installed])
+
+    An entry for an executable in the DAX-level transformation catalog.
+
+    Examples:
+        grep = Executable("grep")
+        grep = Executable(namespace="os",name="grep",version="2.3")
+        grep = Executable(namespace="os",name="grep",version="2.3",arch=Arch.X86)
+        grep = Executable(namespace="os",name="grep",version="2.3",arch=Arch.X86,os=OS.LINUX)
+    """
+
+    def __init__(self, name, namespace=None, version=None, arch=None, os=None,
+                 osrelease=None, osversion=None, glibc=None, installed=None,
+                 container=None):
+        """
+        Arguments:
+            name: Logical name of executable
+            namespace: Executable namespace
+            version: Executable version
+            arch: Architecture that this exe was compiled for
+            os: Name of os that this exe was compiled for
+            osrelease: Release of os that this exe was compiled for
+            osversion: Version of os that this exe was compiled for
+            glibc: Version of glibc this exe was compiled against
+            installed: Is the executable installed (true), or stageable (false)
+            container: Optional attribute to specify the container to use
+        """
+        CatalogType.__init__(self, name)
+        self.namespace = namespace
+        self.version = version
+        self.arch = arch
+        self.os = os
+        self.osrelease = osrelease
+        self.osversion = osversion
+        self.glibc = glibc
+        self.installed = installed
+        self.container = container
+        self.invocations = set()
+
+    def __unicode__(self):
+        return u"<Executable %s::%s:%s>" % (self.namespace, self.name, self.version)
+
+    def __str__(self):
+        return unicode(self).encode('utf-8')
+
+    def __hash__(self):
+        return hash((self.name,
+                     self.namespace,
+                     self.version,
+                     self.arch,
+                     self.os,
+                     self.osrelease,
+                     self.osversion,
+                     self.glibc,
+                     self.installed,
+                     self.container))
+
+    def __eq__(self, other):
+        if isinstance(other, Executable):
+            return self.name == other.name and \
+                   self.namespace == other.namespace and \
+                   self.version == other.version and \
+                   self.arch == other.arch and \
+                   self.os == other.os and \
+                   self.osrelease == other.osrelease and \
+                   self.osversion == other.osversion and \
+                   self.glibc == other.glibc and \
+                   self.installed == other.installed and \
+                   self.container == other.container
+        return False
+
+    def toXML(self):
+        """Returns an XML representation of this file as a filename tag"""
+        e = Element('executable', [
+            ('name', self.name),
+            ('namespace', self.namespace),
+            ('version', self.version),
+            ('arch', self.arch),
+            ('os', self.os),
+            ('osrelease', self.osrelease),
+            ('osversion', self.osversion),
+            ('glibc', self.glibc),
+            ('installed', self.installed)
+            # containers are not support by the DAX3 schema
+        ])
+        self.innerXML(e)
+
+        if self.container:
+            warnings.warn('The DAX API extensions do not support references for containers.')
+
+        # Invocations
+        for inv in self.invocations:
+            e.element(inv.toXML())
+
+        return e
+
+
+class Container(ProfileMixin):
+    """Container(name,type,image[,image_site])
+
+    An entry for a container in the DAX-level transformation catalog.
+
+    Examples:
+        mycontainer = Container("myapp", type="docker", image="docker:///rynge/montage:latest")
+    """
+
+    def __init__(self, name, type, image, imagesite=None, dockerfile=None, mount=None):
+        """
+        Arguments:
+            name: Container name
+            type: Container type (see ContainerType)
+            image: URL to image in a container hub OR URL to an existing container image
+            imagesite: optional site attribute to tell pegasus which site tar file exist
+            dockerfile: a url to an existing docker file to build container image from scratch
+            mount: list of volumes to be mounted
+        """
+        if not name:
+            raise FormatError("Invalid name", name)
+        if not type:
+            raise FormatError("Invalid container type", type)
+        if not image:
+            raise FormatError("Invalid image", image)
+        self.name = name
+        self.type = type
+        self.image = image
+        self.imagesite = imagesite
+        self.dockerfile = dockerfile
+        self.mount = mount if mount else []
+        self.profiles = set()
+
+    def __unicode__(self):
+        return u"<Container %s:%s>" % (self.name, self.type)
+
+    def __str__(self):
+        return unicode(self).encode('utf-8')
+
+    def __hash__(self):
+        return hash((self.name,
+                     self.type,
+                     self.image,
+                     self.imagesite,
+                     self.dockerfile))
+
+    def __eq__(self, other):
+        if isinstance(other, Container):
+            return self.name == other.name and \
+                   self.type == other.type and \
+                   self.image == other.image and \
+                   self.imagesite == other.imagesite and \
+                   self.dockerfile == other.dockerfile
+        return False
+
+
+class Metadata:
+    """Metadata(key,value)
+
+    A way to add metadata to File and Executable objects. This is
+    useful if you want to annotate the DAX with things like file
+    sizes, application-specific attributes, etc.
+
+    There is currently no restriction on the type.
+
+    Examples:
+        s = Metadata('size','12')
+        a = Metadata('algorithm','plav')
+    """
+
+    def __init__(self, key, value):
+        """
+        Arguments:
+            key: The key name of the item
+            value: The value of the item
+        """
+        if not key:
+            raise FormatError("Invalid key", key)
+        if not value:
+            raise FormatError("Invalid value", value)
+        self.key = key
+        self.value = value
+
+    def __unicode__(self):
+        return u"<Metadata %s = %s>" % (self.key, self.value)
+
+    def __str__(self):
+        return unicode(self).encode('utf-8')
+
+    def __hash__(self):
+        return hash(self.key)
+
+    def __eq__(self, other):
+        return isinstance(other, Metadata) and self.key == other.key
+
+    def toXML(self):
+        m = Element('metadata', [
+            ('key', self.key)
+        ])
+        m.text(self.value).flatten()
+        return m
+
+
+class PFN(ProfileMixin):
+    """PFN(url[,site])
+
+    A physical file name. Used to provide URLs for files and executables
+    in the DAX-level replica catalog.
+
+    PFNs can be added to File and Executable.
+
+    Examples:
+        PFN('http://site.com/path/to/file.txt','site')
+        PFN('http://site.com/path/to/file.txt',site='site')
+        PFN('http://site.com/path/to/file.txt')
+    """
+
+    def __init__(self, url, site=None):
+        """
+        Arguments:
+            url: The url of the file.
+            site: The name of the site. [default: local]
+        """
+        if not url:
+            raise FormatError("Invalid url", url)
+        if not site:
+            raise FormatError("Invalid site", site)
+        self.url = url
+        self.site = site
+        self.profiles = set()
+
+    def __unicode__(self):
+        return u"<PFN %s %s>" % (self.site, self.url)
+
+    def __str__(self):
+        return unicode(self).encode('utf-8')
+
+    def __hash__(self):
+        return hash((self.url, self.site))
+
+    def __eq__(self, other):
+        return isinstance(other, PFN) and \
+               self.url == other.url and \
+               self.site == other.site
+
+    def toXML(self):
+        pfn = Element('pfn', [
+            ('url', self.url),
+            ('site', self.site)
+        ])
+        for p in self.profiles:
+            pfn.element(p.toXML())
+        return pfn
+
+
+class Profile:
+    """Profile(namespace,key,value)
+
+    A Profile captures scheduler-, system-, and environment-specific 
+    parameters in a uniform fashion. Each profile declaration assigns a value
+    to a key within a namespace.
+
+    Profiles can be added to Job, DAX, DAG, File, Executable, and PFN.
+
+    Examples:
+        path = Profile(Namespace.ENV,'PATH','/bin')
+        vanilla = Profile(Namespace.CONDOR,'universe','vanilla')
+        path = Profile(namespace='env',key='PATH',value='/bin')
+        path = Profile('env','PATH','/bin')
+    """
+
+    def __init__(self, namespace, key, value):
+        """
+        Arguments:
+            namespace: The namespace of the profile (see Namespace) 
+            key: The key name. Can be anything that responds to str().
+            value: The value for the profile. Can be anything that responds to str().
+        """
+        self.namespace = namespace
+        self.key = key
+        self.value = value
+
+    def __unicode__(self):
+        return u"<Profile %s::%s = %s>" % (self.namespace, self.key, self.value)
+
+    def __str__(self):
+        return unicode(self).encode('utf-8')
+
+    def __hash__(self):
+        return hash((self.namespace, self.key))
+
+    def __eq__(self, other):
+        return isinstance(other, Profile) and \
+               self.namespace == other.namespace and \
+               self.key == other.key
+
+    def toXML(self):
+        """Return an XML element for this profile"""
+        p = Element("profile", [
+            ('namespace', self.namespace),
+            ('key', self.key)
+        ])
+        p.text(self.value).flatten()
+        return p
+
+
+class Use(MetadataMixin):
+    """Use(file[,link][,register][,transfer][,optional]
+           [,namespace][,version][,executable][,size])
+
+    Use of a logical file name. Used for referencing files in the DAX.
+
+    Attributes:
+        file: A string, File or Executable representing the logical file
+        link: Is this file a job input, output or both (See LFN) (optional)
+        register: Should this file be registered in RLS? (True/False) (optional)
+        transfer: Should this file be transferred? (True/False or See LFN) (optional)
+        optional: Is this file optional, or should its absence be an error? (optional)
+        namespace: Namespace of executable (optional)
+        version: version of executable (optional)
+        executable: Is file an executable? (True/False) (optional)
+        size: The size of the file (optional)
+
+    For Use objects that are added to Transformations, the attributes 'link', 'register',
+    'transfer', 'optional' and 'size' are ignored.
+
+    If a File object is passed in as 'file', then the default value for executable
+    is 'false'. Similarly, if an Executable object is passed in, then the default
+    value for executable is 'true'.
+    """
+
+    def __init__(self, name, link=None, register=None, transfer=None,
+                 optional=None, namespace=None, version=None, executable=None,
+                 size=None):
+        if not name:
+            raise FormatError('Invalid name', name)
+
+        self.name = name
+        self.link = link
+        self.optional = optional
+        self.register = register
+        self.transfer = transfer
+        self.namespace = namespace
+        self.version = version
+        self.executable = executable
+        self.size = size
+
+        self._metadata = set()
+
+    def __unicode__(self):
+        return u"<Use %s::%s:%s>" % (self.namespace, self.name, self.version)
+
+    def __str__(self):
+        return unicode(self).encode("utf-8")
+
+    def __hash__(self):
+        return hash((self.namespace, self.name, self.version))
+
+    def __eq__(self, other):
+        if isinstance(other, Use):
+            return self.namespace == other.namespace and \
+                   self.name == other.name and \
+                   self.version == other.version
+
+    def toTransformationXML(self):
+        e = Element('uses', [
+            ('namespace', self.namespace),
+            ('name', self.name),
+            ('version', self.version),
+            ('executable', self.executable)
+        ])
+
+        for m in self._metadata:
+            e.element(m.toXML())
+
+        return e
+
+    def toJobXML(self):
+        e = Element('uses', [
+            ('namespace', self.namespace),
+            ('name', self.name),
+            ('version', self.version),
+            ('link', self.link),
+            ('register', self.register),
+            ('transfer', self.transfer),
+            ('optional', self.optional),
+            ('executable', self.executable),
+            ('size', self.size)
+        ])
+
+        for m in self._metadata:
+            e.element(m.toXML())
+
+        return e
+
+
+class UseMixin:
+    def addUse(self, use):
+        """Add Use to this object"""
+        if self.hasUse(use):
+            raise DuplicateError("Duplicate Use %s" % use)
+        self.used.add(use)
+
+    def removeUse(self, use):
+        """Remove use from this object"""
+        if not self.hasUse(use):
+            raise NotFoundError("No such Use", use)
+        self.used.remove(use)
+
+    def hasUse(self, use):
+        """Test to see if this object has use"""
+        return use in self.used
+
+    def clearUses(self):
+        """Remove all uses from this object"""
+        self.used.clear()
+
+    def uses(self, arg, link=None, register=None, transfer=None,
+             optional=None, namespace=None, version=None, executable=None,
+             size=None):
+
+        if isinstance(arg, CatalogType):
+            _name = arg.name
+        else:
+            _name = arg
+
+        _namespace = None
+        _version = None
+        _executable = None
+
+        if isinstance(arg, Executable):
+            _namespace = arg.namespace
+            _version = arg.version
+            # We only need to set this for jobs
+            # the default is True for Transformations
+            if isinstance(self, AbstractJob):
+                _executable = True
+
+        if isinstance(arg, File):
+            # We only need to set this for transformations
+            # The default is False for Jobs
+            if isinstance(self, Transformation):
+                _executable = False
+
+        if namespace is not None:
+            _namespace = namespace
+        if version is not None:
+            _version = str(version)
+        if executable is not None:
+            _executable = executable
+
+        use = Use(_name, link, register, transfer, optional, _namespace,
+                  _version, _executable, size)
+
+        # Copy metadata from File or Executable
+        # XXX Maybe we only want this if link!=input
+        if isinstance(arg, CatalogType):
+            for m in arg._metadata:
+                use.addMetadata(m)
+
+        self.addUse(use)
+
+
+class Transformation(UseMixin, InvokeMixin, MetadataMixin):
+    """Transformation((name|executable)[,namespace][,version])
+
+    A logical transformation. This is basically defining one or more
+    entries in the transformation catalog. You can think of it like a macro
+    for adding <uses> to your jobs. You can define a transformation that
+    uses several files and/or executables, and refer to it when creating
+    a job. If you do, then all of the uses defined for that transformation
+    will be copied to the job during planning.
+
+    This code:
+        in = File("input.txt")
+        exe = Executable("exe")
+        t = Transformation(namespace="foo", name="bar", version="baz")
+        t.uses(in)
+        t.uses(exe)
+        j = Job(t)
+
+    is equivalent to:
+        in = File("input.txt")
+        exe = Executable("exe")
+        j = Job(namespace="foo", name="bar", version="baz")
+        j.uses(in)
+        j.uses(exe)
+
+    Examples:
+        Transformation(name='mDiff')
+        Transformation(namespace='montage',name='mDiff')
+        Transformation(namespace='montage',name='mDiff',version='3.0')
+
+    Using one executable:
+        mProjectPP = Executable(namespace="montage",name="mProjectPP",version="3.0")
+        x_mProjectPP = Transformation(mProjectPP)
+
+    Using several executables:
+        mDiff = Executable(namespace="montage",name="mProjectPP",version="3.0")
+        mFitplane = Executable(namespace="montage",name="mFitplane",version="3.0")
+        mDiffFit = Executable(namespace="montage",name="mDiffFit",version="3.0")
+        x_mDiffFit = Transformation(mDiffFit)
+        x_mDiffFit.uses(mDiff)
+        x_mDiffFit.uses(mFitplane)
+
+    Config files too:
+        conf = File("jbsim.conf")
+        jbsim = Executable(namespace="scec",name="jbsim")
+        x_jbsim = Transformation(jbsim)
+        x_jbsim.uses(conf)
+    """
+
+    def __init__(self, name, namespace=None, version=None):
+        """
+        The name argument can be either a string or an Executable object.
+        If it is an Executable object, then the Transformation inherits
+        its name, namespace and version from the Executable, and the 
+        Transformation is set to use the Executable with link=input,
+        transfer=true, and register=False.
+
+        Arguments:
+            name: The name of the transformation
+            namespace: The namespace of the xform (optional)
+            version: The version of the xform (optional)
+        """
+        self.name = None
+        self.namespace = None
+        self.version = None
+        self.used = set()
+        self.invocations = set()
+        self._metadata = set()
+        if isinstance(name, Executable):
+            self.name = name.name
+            self.namespace = name.namespace
+            self.version = name.version
+        else:
+            self.name = name
+        if namespace: self.namespace = namespace
+        if version: self.version = version
+
+    def __unicode__(self):
+        return u"<Transformation %s::%s:%s>" % (self.namespace, self.name, self.version)
+
+    def __str__(self):
+        return unicode(self).encode("utf-8")
+
+    def __hash__(self):
+        return hash((self.namespace, self.name, self.version))
+
+    def __eq__(self, other):
+        if isinstance(other, Transformation):
+            return self.namespace == other.namespace and \
+                   self.name == other.name and \
+                   self.version == other.version
+
+    def toXML(self):
+        """Return an XML representation of this transformation"""
+        e = Element('transformation', [
+            ('namespace', self.namespace),
+            ('name', self.name),
+            ('version', self.version)
+        ])
+
+        # Metadata
+        for m in self._metadata:
+            e.element(m.toXML())
+
+        # Uses
+        def getlink(a):
+            if a.link is not None:
+                return a.link
+            # Python 3 - make sure we return a string
+            return ""
+
+        used = list(self.used)
+        used.sort(key=getlink)
+        for u in used:
+            e.element(u.toTransformationXML())
+
+        # Invocations
+        for inv in self.invocations:
+            e.element(inv.toXML())
+
+        return e
+
+
+class AbstractJob(ProfileMixin, UseMixin, InvokeMixin, MetadataMixin):
+    """The base class for Job, DAX, and DAG"""
+
+    def __init__(self, id=None, node_label=None):
+        self.id = id
+        self.node_label = node_label
+
+        self.arguments = []
+        self.profiles = set()
+        self.used = set()
+        self.invocations = set()
+        self._metadata = set()
+
+        self.stdout = None
+        self.stderr = None
+        self.stdin = None
+
+    def addArguments(self, *arguments):
+        """Add one or more arguments to the job (this will add whitespace)"""
+        for arg in arguments:
+            if not isinstance(arg, (File, basestring)):
+                raise FormatError("Invalid argument", arg)
+        for arg in arguments:
+            if len(self.arguments) > 0:
+                self.arguments.append(' ')
+            self.arguments.append(arg)
+
+    def addRawArguments(self, *arguments):
+        """Add one or more arguments to the job (whitespace will NOT be added)"""
+        for arg in arguments:
+            if not isinstance(arg, (File, basestring)):
+                raise FormatError("Invalid argument", arg)
+        self.arguments.extend(arguments)
+
+    def clearArguments(self):
+        """Remove all arguments from this job"""
+        self.arguments = []
+
+    def getArguments(self):
+        """Get the arguments of this job"""
+        args = []
+        for a in self.arguments:
+            if isinstance(a, File):
+                args.append(unicode(a.toArgumentXML()))
+            else:
+                args.append(a)
+        return ''.join(args)
+
+    def setStdout(self, filename):
+        """Redirect stdout to a file"""
+        if isinstance(filename, File):
+            self.stdout = filename
+        else:
+            self.stdout = File(filename)
+
+    def clearStdout(self):
+        """Remove stdout file"""
+        self.stdout = None
+
+    def setStderr(self, filename):
+        """Redirect stderr to a file"""
+        if isinstance(filename, File):
+            self.stderr = filename
+        else:
+            self.stderr = File(filename)
+
+    def clearStderr(self):
+        """Remove stderr file"""
+        self.stderr = None
+
+    def setStdin(self, filename):
+        """Redirect stdin from a file"""
+        if isinstance(filename, File):
+            self.stdin = filename
+        else:
+            self.stdin = File(filename)
+
+    def clearStdin(self):
+        """Remove stdin file"""
+        self.stdin = None
+
+    def innerXML(self, element):
+        """Return an XML representation of this job"""
+        # Arguments
+        if len(self.arguments) > 0:
+            args = Element('argument').flatten()
+            for x in self.arguments:
+                if isinstance(x, File):
+                    args.element(x.toArgumentXML())
+                else:
+                    args.text(x)
+            element.element(args)
+
+        # Metadata
+        for m in self._metadata:
+            element.element(m.toXML())
+
+        # Profiles
+        for pro in self.profiles:
+            element.element(pro.toXML())
+
+        # Stdin/xml/err
+        if self.stdin is not None:
+            element.element(self.stdin.toStdioXML('stdin'))
+        if self.stdout is not None:
+            element.element(self.stdout.toStdioXML('stdout'))
+        if self.stderr is not None:
+            element.element(self.stderr.toStdioXML('stderr'))
+
+        # Uses
+        def getlink(a):
+            if a.link is not None:
+                return a.link
+            # Python 3 - make sure we return a string
+            return ""
+
+        used = list(self.used)
+        used.sort(key=getlink)
+        for use in used:
+            element.element(use.toJobXML())
+
+        # Invocations
+        for inv in self.invocations:
+            element.element(inv.toXML())
+
+
+class Job(AbstractJob):
+    """Job((name|Executable|Transformation)[,id][,namespace][,version][,node_label])
+
+    This class defines the specifics of a job to run in an abstract manner.
+    All filename references still refer to logical files. All references
+    transformations also refer to logical transformations, though
+    physical location hints can be passed through profiles.
+
+    Examples:
+        sleep = Job(id="ID0001",name="sleep")
+        jbsim = Job(id="ID0002",name="jbsim",namespace="cybershake",version="2.1")
+        merge = Job("jbsim")
+
+    You can create a Job based on a Transformation:
+        mDiff_xform = Transformation("mDiff", ...)
+        mDiff_job = Job(mDiff_xform)
+
+    Or an Executable:
+        mDiff_exe = Executable("mDiff", ...)
+        mDiff_job = Job(mDiff_exe)
+
+    Several arguments can be added at the same time:
+        input = File(...)
+        output = File(...)
+        job.addArguments("-i",input,"-o",output)
+
+    Profiles are added similarly:
+        job.addProfile(Profile(Namespace.ENV, key='PATH', value='/bin'))
+        job.profile(Namespace.ENV, "PATH", "/bin")
+
+    Adding file uses is simple, and you can override global File attributes:
+        job.uses(input, Link.INPUT)
+        job.uses(output, Link.OUTPUT, transfer=True, register=True)
+    """
+
+    def __init__(self, name, id=None, namespace=None, version=None, node_label=None):
+        """The ID for each job should be unique in the DAX. If it is None, then
+        it will be automatically generated when the job is added to the DAX.
+
+        The name, namespace, and version should match what you have in your
+        transformation catalog. For example, if namespace="foo" name="bar" 
+        and version="1.0", then the transformation catalog should have an
+        entry for "foo::bar:1.0".
+
+        The name argument can be either a string, or a Transformation object. If
+        it is a Transformation object, then the job will inherit the name, namespace,
+        and version from the Transformation.
+
+        Arguments:
+            name: The transformation name or Transformation object (required)
+            id: A unique identifier for the job (optional)
+            namespace: The namespace of the transformation (optional)
+            version: The transformation version (optional)
+            node_label: The label for this job to use in graphing (optional)
+        """
+        self.namespace = None
+        self.version = None
+        if isinstance(name, (Transformation, Executable)):
+            self.name = name.name
+            self.namespace = name.namespace
+            self.version = name.version
+        elif isinstance(name, basestring):
+            self.name = name
+        else:
+            raise FormatError("Name must be a string, Transformation or Executable")
+        if not self.name:
+            raise FormatError("Invalid name", self.name)
+        AbstractJob.__init__(self, id=id, node_label=node_label)
+        if namespace: self.namespace = namespace
+        if version: self.version = version
+
+    def __unicode__(self):
+        return u"<Job %s %s::%s:%s>" % (self.id, self.namespace, self.name, self.version)
+
+    def __str__(self):
+        return unicode(self).encode("utf-8")
+
+    def toXML(self):
+        e = Element('job', [
+            ('id', self.id),
+            ('namespace', self.namespace),
+            ('name', self.name),
+            ('version', self.version),
+            ('node-label', self.node_label)
+        ])
+        self.innerXML(e)
+        return e
+
+
+class DAX(AbstractJob):
+    """DAX(file[,id][,node_label])
+
+    This job represents a sub-DAX that will be planned and executed by
+    the workflow.
+
+    Examples:
+        daxjob1 = DAX("foo.dax")
+
+        daxfile = File("foo.dax")
+        daxjob2 = DAX(daxfile)
+    """
+
+    def __init__(self, file, id=None, node_label=None):
+        """
+
+        The name argument can be either a string, or a File object. If
+        it is a File object, then this job will inherit its name from the 
+        File and the File will be added in a <uses> with transfer=True,
+        register=False, and link=input.
+
+        Arguments:
+            file: The logical name of the DAX file or the DAX File object
+            id: The id of the DAX job [default: autogenerated]
+            node_label: The label for this job to use in graphing
+        """
+        if isinstance(file, File):
+            self.file = file
+        elif isinstance(file, str) or isinstance(file, unicode):
+            self.file = File(name=file)
+        else:
+            raise FormatError("invalid file", file)
+        AbstractJob.__init__(self, id=id, node_label=node_label)
+
+    def __unicode__(self):
+        return u"<DAX %s %s>" % (self.id, self.file.name)
+
+    def __str__(self):
+        return unicode(self).encode("utf-8")
+
+    def toXML(self):
+        """Return an XML representation of this job"""
+        e = Element('dax', [
+            ('id', self.id),
+            ('file', self.file.name),
+            ('node-label', self.node_label)
+        ])
+        self.innerXML(e)
+        return e
+
+
+class DAG(AbstractJob):
+    """DAG(file[,id][,node_label])
+
+    This job represents a sub-DAG that will be executed by this
+    workflow.
+
+    Examples:
+        dagjob1 = DAG(file="foo.dag")
+
+        dagfile = File("foo.dag")
+        dagjob2 = DAG(dagfile)
+    """
+
+    def __init__(self, file, id=None, node_label=None):
+        """
+        The name argument can be either a string, or a File object. If
+        it is a File object, then this job will inherit its name from the 
+        File and the File will be added in a <uses> with transfer=True,
+        register=False, and link=input.
+
+        Arguments:
+            file: The logical name of the DAG file, or the DAG File object
+            id: The ID of the DAG job [default: autogenerated]
+            node_label: The label for this job to use in graphing
+        """
+        if isinstance(file, File):
+            self.file = file
+        elif isinstance(file, str) or isinstance(file, unicode):
+            self.file = File(name=file)
+        else:
+            raise FormatError("Invalid file", file)
+        AbstractJob.__init__(self, id=id, node_label=node_label)
+
+    def __unicode__(self):
+        return u"<DAG %s %s>" % (self.id, self.file.name)
+
+    def __str__(self):
+        return unicode(self).encode("utf-8")
+
+    def toXML(self):
+        """Return an XML representation of this DAG"""
+        e = Element('dag', [
+            ('id', self.id),
+            ('file', self.file.name),
+            ('node-label', self.node_label)
+        ])
+        self.innerXML(e)
+        return e
+
+
+class Dependency:
+    """A dependency between two nodes in the ADAG"""
+
+    def __init__(self, parent, child, edge_label=None):
+        if isinstance(parent, AbstractJob):
+            if not parent.id:
+                raise FormatError("Parent job has no id", parent)
+            self.parent = parent.id
+        elif parent:
+            self.parent = parent
+        else:
+            raise FormatError("Invalid parent", parent)
+        if isinstance(child, AbstractJob):
+            if not child.id:
+                raise FormatError("Child job has no id", child)
+            self.child = child.id
+        elif child:
+            self.child = child
+        else:
+            raise FormatError("Invalid child", child)
+        if self.parent == self.child:
+            raise FormatError("No self edges allowed", (self.parent, self.child))
+        self.edge_label = edge_label
+
+    def __unicode__(self):
+        return "<Dependency %s -> %s>" % (self.parent, self.child)
+
+    def __str__(self):
+        return unicode(self).encode("utf-8")
+
+    def __hash__(self):
+        return hash((self.parent, self.child))
+
+    def __eq__(self, other):
+        """Equal dependencies have the same parent and child"""
+        if isinstance(other, Dependency):
+            return self.parent == other.parent and self.child == other.child
+        return False
+
+
+class ADAG(InvokeMixin, MetadataMixin):
+    """ADAG(name[,count][,index])
+
+    Representation of a directed acyclic graph in XML (DAX).
+
+    Examples:
+        dax = ADAG('diamond')
+    or, if you want to use the old style count/index partitioning stuff:
+        part5 = ADAG('partition_5',count=10,index=5)
+
+    Adding jobs:
+        a = Job(...)
+        dax.addJob(a)
+
+    Adding parent-child control-flow dependency:
+        dax.addDependency(Dependency(parent=a,child=b))
+        dax.addDependency(Dependency(parent=a,child=c))
+        dax.addDependency(Dependency(parent=b,child=d))
+        dax.addDependency(Dependency(parent=c,child=d)) 
+    or:
+        dax.depends(child=b, parent=a)
+
+    Adding Files (not required if you have a replica catalog):
+        input = File(...)
+        dax.addFile(input)
+
+    Adding Executables (not required if you have a transformation catalog):
+        exe = Executable(...)
+        dax.addExecutable(exe)
+
+    Adding Transformations (not required if you have a transformation catalog):
+        xform = Transformation(...)
+        dax.addTransformation(xform)
+
+    Writing a DAX out to a file:
+        f = open('diamond.dax','w')
+        dax.writeXML(f)
+        f.close()
+    """
+
+    def __init__(self, name, count=None, index=None, auto=False):
+        """
+        Arguments:
+            name: The name of the workflow
+            count: Total number of DAXes that will be created
+            index: Zero-based index of this DAX
+        """
+        if not name:
+            raise FormatError("Invalid ADAG name", name)
+        self.name = name
+        if count: count = int(count)
+        if index: index = int(index)
+        self.count = count
+        self.index = index
+        self._auto = auto if auto is True else False
+
+        # This is used to generate unique ID numbers
+        self.sequence = 1
+
+        self.jobs = {}
+        self.files = set()
+        self.executables = set()
+        self.dependencies = set()
+        self.transformations = set()
+        self.invocations = set()
+        self._metadata = set()
+
+        # PM-1311 always associate dax.api metadata
+        self.metadata("dax.api", "python")
+
+    def __unicode__(self):
+        return u"<ADAG %s>" % self.name
+
+    def __str__(self):
+        return unicode(self).encode("utf-8")
+
+    def nextJobID(self):
+        """Get an autogenerated ID for the next job"""
+        next = None
+        while not next or next in self.jobs:
+            next = "ID%07d" % self.sequence
+            self.sequence += 1
+        return next
+
+    def getJob(self, jobid):
+        """Get a Job/DAG/DAX"""
+        if not jobid in self.jobs:
+            raise NotFoundError("Job not found", jobid)
+        return self.jobs[jobid]
+
+    def addJob(self, job):
+        """Add a job to this ADAG"""
+        # Add an auto-generated ID if the job doesn't have one
+        if job.id is None:
+            job.id = self.nextJobID()
+        if self.hasJob(job):
+            raise DuplicateError("Duplicate job %s" % job)
+        self.jobs[job.id] = job
+
+    def hasJob(self, job):
+        """Test to see if job is in this ADAG
+        The job parameter can be an object or a job ID
+        """
+        if isinstance(job, AbstractJob):
+            return job.id in self.jobs
+        else:
+            return job in self.jobs
+
+    def removeJob(self, job):
+        """Remove job from this ADAG"""
+        if not self.hasJob(job):
+            raise NotFoundError("Job not found", job)
+        if isinstance(job, AbstractJob):
+            del self.jobs[job.id]
+        else:
+            del self.jobs[job]
+
+    def clearJobs(self):
+        """Remove all jobs"""
+        self.jobs = {}
+
+    def addDAX(self, dax):
+        """Add a sub-DAX (synonym for addJob)"""
+        if not isinstance(dax, DAX):
+            raise FormatError("Not a DAX", dax)
+        self.addJob(dax)
+
+    def addDAG(self, dag):
+        """Add a sub-DAG (synonym for addJob)"""
+        if not isinstance(dag, DAG):
+            raise FormatError("Not a DAG", dag)
+        self.addJob(dag)
+
+    def addFile(self, file):
+        """Add a file to the DAX"""
+        if not isinstance(file, File):
+            raise FormatError("Invalid File", file)
+        if self.hasFile(file):
+            raise DuplicateError("Duplicate file %s" % file)
+        self.files.add(file)
+
+    def hasFile(self, file):
+        """Check to see if file is in this ADAG"""
+        return file in self.files
+
+    def removeFile(self, file):
+        """Remove file from this ADAG"""
+        if not self.hasFile(file):
+            raise NotFoundError("File not found", file)
+        self.files.remove(file)
+
+    def clearFiles(self):
+        """Remove all files"""
+        self.files.clear()
+
+    def addExecutable(self, executable):
+        """Add an executable to this ADAG"""
+        if self.hasExecutable(executable):
+            raise DuplicateError("Duplicate executable %s" % executable)
+        self.executables.add(executable)
+
+    def hasExecutable(self, executable):
+        """Check if executable is in this ADAG"""
+        return executable in self.executables
+
+    def removeExecutable(self, executable):
+        """Remove executable from this ADAG"""
+        if not self.hasExecutable(executable):
+            raise NotFoundError("Executable not found %s" % executable)
+        self.executables.remove(executable)
+
+    def clearExecutables(self):
+        """Remove all executables"""
+        self.executables.clear()
+
+    def addTransformation(self, transformation):
+        """Add a transformation to this ADAG"""
+        if self.hasTransformation(transformation):
+            raise DuplicateError("Duplicate tranformation %s" % transformation)
+        self.transformations.add(transformation)
+
+    def hasTransformation(self, transformation):
+        """Check to see if transformation is in this ADAG"""
+        return transformation in self.transformations
+
+    def removeTransformation(self, transformation):
+        """Remove transformation from this ADAG"""
+        if not self.hasTransformation(transformation):
+            raise NotFoundError("Transformation not found %s" % transformation)
+        self.transformations.remove(transformation)
+
+    def clearTransformations(self):
+        """Remove all transformations"""
+        self.transformations.clear()
+
+    def depends(self, child, parent, edge_label=None):
+        """Add a dependency to the workflow
+        Arguments:
+            child: The child job/dax/dag or id
+            parent: The parent job/dax/dag or id
+            edge_label: A label for the edge (optional)
+        """
+        d = Dependency(parent, child, edge_label)
+        self.addDependency(d)
+
+    def addDependency(self, dep):
+        """Add a dependency to the workflow
+
+        The old way to call this method is no longer valid. Please change:
+            adag.addDependency(parent="ID01", child="ID02", edge_label="E01")
+        to be:
+            adag.addDependency(Dependency(parent="ID01", child="ID02", edge_label="E01"))
+        or:
+            adag.depends(parent="ID01", child="ID02", edge_label="E01")
+
+        """
+        if self.hasDependency(dep):
+            raise DuplicateError("Duplicate dependency %s" % dep)
+        # Check the jobs
+        if dep.parent not in self.jobs:
+            raise NotFoundError("Parent not found", dep.parent)
+        if dep.child not in self.jobs:
+            raise NotFoundError("Child not found", dep.child)
+        self.dependencies.add(dep)
+
+    def hasDependency(self, dep):
+        """Check to see if dependency exists"""
+        return dep in self.dependencies
+
+    def removeDependency(self, dep):
+        """Remove dependency from workflow"""
+        if not self.hasDependency(dep):
+            raise NotFoundError("Dependency not found", dep)
+        self.dependencies.remove(dep)
+
+    def clearDependencies(self):
+        """Remove all dependencies"""
+        self.dependencies.clear()
+
+    def toXML(self):
+        """Get the XML string for this ADAG
+        This is primarily intended for testing. If you have a large ADAG
+        you should use writeXML instead.
+        """
+        s = StringIO()
+        self.writeXML(s)
+        xml = s.getvalue()
+        s.close()
+        return xml
+
+    def writeXMLFile(self, filename):
+        """Write the ADAG to an XML file"""
+        file = codecs.open(filename, "w", "utf-8")
+        self.writeXML(file)
+        file.close()
+
+    def _autoDependencies(self):
+        """Automatically compute job dependencies based on input/output files used by a job"""
+        if self._auto is False:
+            return
+
+        mapping = {}
+
+        def addOutput(job, file_obj):
+            if file_obj:
+                file_obj = file_obj.name
+
+                if file_obj not in mapping:
+                    mapping[file_obj] = (set(), set())
+
+                mapping[file_obj][1].add(job)
+
+        # Automatically determine dependencies
+
+        # Traverse each job
+        for job_id, job in self.jobs.items():
+            file_used = job.used
+
+            # If job produces to stdout, identify it as an output file
+            addOutput(job, job.stdout)
+            # If job produces to stderr, identify it as an output file
+            addOutput(job, job.stderr)
+
+            # If job consumes from stdin, identify it as an input file
+            if job.stdin:
+                if job.stdin.name not in mapping:
+                    mapping[job.stdin.name] = (set(), set())
+
+                mapping[job.stdin.name][0].add(job)
+
+            for f in file_used:
+                if f.name not in mapping:
+                    mapping[f.name] = (set(), set())
+
+                if f.link == Link.INPUT:
+                    mapping[f.name][0].add(job)
+                else:
+                    mapping[f.name][1].add(job)
+
+        for file_name, io in mapping.items():
+            # Go through the mapping and for each file add dependencies between the
+            # job producing a file and the jobs consuming the file
+            inputs = io[0]
+
+            if len(io[1]) > 0:
+                output = io[1].pop()
+
+                for _input in inputs:
+                    try:
+                        self.depends(parent=output, child=_input)
+                    except DuplicateError:
+                        pass
+
+    def writeXML(self, out):
+        """Write the ADAG as XML to a stream"""
+        self._autoDependencies()
+
+        # Preamble
+        out.write('<?xml version="1.0" encoding="UTF-8"?>\n')
+
+        out.write('<!-- generated: %s -->\n' % datetime.datetime.now())
+        if os.name == 'posix':
+            import pwd
+            username = pwd.getpwuid(os.getuid())[0]
+        elif os.name == 'nt':
+            username = os.getenv("USERNAME", "N/A")
+        else:
+            username = "N/A"
+        out.write('<!-- generated by: %s -->\n' % username)
+        out.write('<!-- generator: python -->\n')
+
+        # Open tag
+        out.write('<adag xmlns="%s" ' % SCHEMA_NAMESPACE)
+        out.write('xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" ')
+        out.write('xsi:schemaLocation="%s %s" ' % (SCHEMA_NAMESPACE, SCHEMA_LOCATION))
+        out.write('version="%s" ' % SCHEMA_VERSION)
+        out.write('name="%s"' % self.name)
+        if self.count: out.write(' count="%d"' % self.count)
+        if self.index: out.write(' index="%d"' % self.index)
+        out.write('>\n')
+
+        # Metadata
+        for m in self._metadata:
+            out.write('\t')
+            m.toXML().write(stream=out, level=1)
+            out.write('\n')
+
+        # Invocations
+        for i in self.invocations:
+            out.write('\t')
+            i.toXML().write(stream=out, level=1)
+            out.write('\n')
+
+        # Files
+        for f in self.files:
+            out.write('\t')
+            f.toXML().write(stream=out, level=1)
+            out.write('\n')
+
+        # Executables
+        for e in self.executables:
+            out.write('\t')
+            e.toXML().write(stream=out, level=1)
+            out.write('\n')
+
+        # Transformations
+        for t in self.transformations:
+            out.write('\t')
+            t.toXML().write(stream=out, level=1)
+            out.write('\n')
+
+        # Jobs
+        keys = self.jobs.keys()
+        keys = sorted(keys)
+        for job_id in keys:
+            job = self.jobs[job_id]
+            out.write('\t')
+            job.toXML().write(stream=out, level=1)
+            out.write('\n')
+
+        # Dependencies
+        # Since we store dependencies as tuples, but we need to print them as nested elements
+        # we first build a map of all the children that maps child -> [(parent,label),...]
+        children = {}
+        for dep in self.dependencies:
+            if not dep.child in children:
+                children[dep.child] = []
+            children[dep.child].append((dep.parent, dep.edge_label))
+
+        # Now output all the xml in sorted order by child, then parent
+        keys = children.keys()
+        keys = sorted(keys)
+        for child in keys:
+            out.write('\t')
+            c = Element("child", [("ref", child)])
+            parents = children[child]
+            parents = sorted(parents)
+            for parent, edge_label in parents:
+                p = Element("parent", [
+                    ("ref", parent),
+                    ("edge-label", edge_label)
+                ])
+                c.element(p)
+            c.write(stream=out, level=1)
+            out.write('\n')
+
+        # Close tag
+        out.write('</adag>\n')
+
+
+def parseString(string):
+    s = StringIO(string)
+    return parse(s)
+
+
+def parse(infile):
+    try:
+        import xml.etree.cElementTree as etree
+    except:
+        try:
+            import xml.etree.ElementTree as etree
+        except:
+            try:
+                import elementtree.ElementTree as etree
+            except:
+                raise Exception("Please install elementtree")
+
+    NS = "{http://pegasus.isi.edu/schema/DAX}"
+
+    def QN(tag):
+        return NS + tag
+
+    def badattr(e, exc):
+        return ParseError("Attribute '%s' is required for element %s" % (exc.args[0], e.tag))
+
+    def parse_invoke(e):
+        try:
+            return Invoke(when=e.attrib["when"], what=e.text)
+        except KeyError as ke:
+            raise badattr(e, ke)
+
+    def parse_adag(e):
+        try:
+            name = e.attrib['name']
+            count = e.get("count", None)
+            index = e.get("index", None)
+            return ADAG(name=name, count=count, index=index)
+        except KeyError as ke:
+            raise badattr(e, ke)
+
+    def parse_profile(e):
+        try:
+            return Profile(
+                namespace=e.attrib["namespace"],
+                key=e.attrib["key"],
+                value=e.text)
+        except KeyError as ke:
+            raise badattr(e, ke)
+
+    def parse_metadata(e):
+        try:
+            return Metadata(
+                key=e.attrib['key'],
+                value=e.text)
+        except KeyError as ke:
+            raise badattr(e, ke)
+
+    def parse_pfn(e):
+        try:
+            p = PFN(
+                url=e.attrib['url'],
+                site=e.get("site", None)
+            )
+        except KeyError as ke:
+            raise badattr(e, ke)
+        for pr in e.findall(QN("profile")):
+            p.addProfile(parse_profile(pr))
+        return p
+
+    def parse_catalog(e, f):
+        for p in e.findall(QN("profile")):
+            f.addProfile(parse_profile(p))
+        for m in e.findall(QN("metadata")):
+            f.addMetadata(parse_metadata(m))
+        for p in e.findall(QN("pfn")):
+            f.addPFN(parse_pfn(p))
+        return f
+
+    def parse_file(e):
+        try:
+            f = File(e.attrib['name'])
+        except KeyError as ke:
+            raise badattr(e, ke)
+        return parse_catalog(e, f)
+
+    def parse_executable(e):
+        try:
+            exe = Executable(
+                name=e.attrib['name'],
+                namespace=e.get("namespace", None),
+                version=e.get("version", None),
+                arch=e.get("arch", None),
+                os=e.get("os", None),
+                osrelease=e.get("osrelease", None),
+                osversion=e.get("osversion", None),
+                glibc=e.get("glibc", None),
+                installed=e.get("installed", None)
+            )
+        except KeyError as ke:
+            raise badattr(e, ke)
+        parse_catalog(e, exe)
+        for i in e.findall(QN("invoke")):
+            exe.addInvoke(parse_invoke(i))
+        return exe
+
+    def parse_uses(e):
+        try:
+            u = Use(
+                e.attrib['name'],
+                namespace=e.get('namespace', None),
+                version=e.get('version', None),
+                link=e.get('link', None),
+                register=e.get('register', None),
+                transfer=e.get('transfer', None),
+                optional=e.get('optional', None),
+                executable=e.get('executable', None)
+            )
+        except KeyError as ke:
+            raise badattr(e, ke)
+        for m in e.findall(QN("metadata")):
+            u.addMetadata(parse_metadata(m))
+        return u
+
+    def parse_transformation(e):
+        try:
+            t = Transformation(
+                namespace=e.get("namespace", None),
+                name=e.attrib['name'],
+                version=e.get("version", None))
+        except KeyError as ke:
+            raise badattr(e, ke)
+        for u in e.findall(QN("uses")):
+            t.addUse(parse_uses(u))
+        for i in e.findall(QN("invoke")):
+            t.addInvoke(parse_invoke(i))
+        for m in e.findall(QN("metadata")):
+            t.addMetadata(parse_metadata(m))
+        return t
+
+    def iterelem(e):
+        if e.text:
+            yield e.text
+        for f in e:
+            if f.text:
+                yield f.text
+            yield f
+            if f.tail:
+                yield f.tail
+
+    def parse_absjob(e, j):
+        args = e.find(QN("argument"))
+        if args is not None:
+            for i in iterelem(args):
+                if isinstance(i, basestring):
+                    j.addRawArguments(i)
+                else:
+                    j.addRawArguments(File(i.attrib['name']))
+
+        try:
+            s = e.find(QN("stdin"))
+            if s is not None:
+                j.setStdin(s.attrib['name'])
+
+            s = e.find(QN("stdout"))
+            if s is not None:
+                j.setStdout(s.attrib['name'])
+
+            s = e.find(QN("stderr"))
+            if s is not None:
+                j.setStderr(s.attrib['name'])
+        except KeyError as ke:
+            raise badattr(s, ke)
+
+        for p in e.findall(QN("profile")):
+            j.addProfile(parse_profile(p))
+
+        for u in e.findall(QN("uses")):
+            j.addUse(parse_uses(u))
+
+        for i in e.findall(QN("invoke")):
+            j.addInvoke(parse_invoke(i))
+
+        for m in e.findall(QN("metadata")):
+            j.addMetadata(parse_metadata(m))
+
+        return j
+
+    def parse_job(e):
+        try:
+            j = Job(
+                name=e.attrib["name"],
+                id=e.attrib["id"],
+                namespace=e.get("namespace", None),
+                version=e.get("version", None),
+                node_label=e.get("node-label", None)
+            )
+        except KeyError as ke:
+            raise badattr(e, ke)
+        return parse_absjob(e, j)
+
+    def parse_dax(e):
+        try:
+            d = DAX(
+                file=e.attrib["file"],
+                id=e.attrib["id"],
+                node_label=e.get("node-label", None)
+            )
+        except KeyError as ke:
+            raise badattr(e, ke)
+        return parse_absjob(e, d)
+
+    def parse_dag(e):
+        try:
+            d = DAG(
+                file=e.attrib["file"],
+                id=e.attrib["id"],
+                node_label=e.get("node-label", None)
+            )
+        except KeyError as ke:
+            raise badattr(e, ke)
+        return parse_absjob(e, d)
+
+    def parse_dependencies(e):
+        try:
+            child = e.attrib["ref"]
+        except KeyError as ke:
+            raise badattr(e, ke)
+        for p in e.findall(QN("parent")):
+            try:
+                parent = p.attrib["ref"]
+                label = p.attrib.get("edge-label", None)
+                yield Dependency(parent, child, label)
+            except KeyError as ke:
+                raise badattr(p, ke)
+
+    # We use iterparse because we don't have to read in the
+    # entire document
+    iterator = etree.iterparse(infile, events=("start", "end"))
+    iterator = iter(iterator)
+
+    # Get the document element (should be <adag>)
+    event, root = next(iterator)
+    adag = parse_adag(root)
+
+    # This function reads all the children of "node"
+    def expand(node):
+        event, elem = next(iterator)
+        while elem != node:
+            event, elem = next(iterator)
+
+        # We clear the document element to prevent
+        # the memory usage from growing
+        root.clear()
+
+    for ev, elem in iterator:
+        if ev == "end":
+            continue
+
+        # Read in the entire element and children
+        expand(elem)
+
+        if elem.tag == QN("job"):
+            j = parse_job(elem)
+            adag.addJob(j)
+        elif elem.tag == QN("child"):
+            for d in parse_dependencies(elem):
+                adag.addDependency(d)
+        elif elem.tag == QN("file"):
+            f = parse_file(elem)
+            adag.addFile(f)
+        elif elem.tag == QN("executable"):
+            e = parse_executable(elem)
+            adag.addExecutable(e)
+        elif elem.tag == QN("transformation"):
+            t = parse_transformation(elem)
+            adag.addTransformation(t)
+        elif elem.tag == QN("dag"):
+            d = parse_dag(elem)
+            adag.addJob(d)
+        elif elem.tag == QN("dax"):
+            d = parse_dax(elem)
+            adag.addJob(d)
+        elif elem.tag == QN("invoke"):
+            adag.addInvoke(parse_invoke(elem))
+        elif elem.tag == QN("metadata"):
+            adag.addMetadata(parse_metadata(elem))
+        else:
+            raise ParseError("Unknown tag", elem.tag)
+
+    return adag
+
+
+def main():
+    """Simple smoke test"""
+    # Create a DAX
+    diamond = ADAG("diamond")
+
+    # Add some metadata
+    diamond.metadata("name", "diamond")
+    diamond.metadata("createdby", "Gideon Juve")
+
+    # add some invoke condition
+    diamond.invoke('on_error', '/usr/bin/update_db -failure')
+
+    # Add input file to the DAX-level replica catalog
+    a = File("f.a")
+    a.addPFN(PFN("gsiftp://site.com/inputs/f.a", "site"))
+    a.metadata("size", "1024")
+    diamond.addFile(a)
+
+    # Add executables to the DAX-level replica catalog
+    e_preprocess = Executable(namespace="diamond", name="preprocess", version="4.0", os="linux", arch="x86_64")
+    e_preprocess.metadata("size", "2048")
+    e_preprocess.addPFN(PFN("gsiftp://site.com/bin/preprocess", "site"))
+    diamond.addExecutable(e_preprocess)
+
+    e_findrange = Executable(namespace="diamond", name="findrange", version="4.0", os="linux", arch="x86_64")
+    e_findrange.addPFN(PFN("gsiftp://site.com/bin/findrange", "site"))
+    diamond.addExecutable(e_findrange)
+
+    e_analyze = Executable(namespace="diamond", name="analyze", version="4.0", os="linux", arch="x86_64")
+    e_analyze.addPFN(PFN("gsiftp://site.com/bin/analyze", "site"))
+    e_analyze.addProfile(Profile(namespace="env", key="APP_HOME", value="/app"))
+    diamond.addExecutable(e_analyze)
+
+    # Add a preprocess job
+    preprocess = Job(e_preprocess)
+    preprocess.metadata("time", "60")
+    b1 = File("f.b1")
+    b2 = File("f.b2")
+    preprocess.addArguments("-a preprocess", "-T60", "-i", a, "-o", b1, b2)
+    preprocess.uses(a, link=Link.INPUT)
+    preprocess.uses(b1, link=Link.OUTPUT, transfer=True)
+    preprocess.uses(b2, link=Link.OUTPUT, transfer=True)
+    diamond.addJob(preprocess)
+
+    # Add left Findrange job
+    frl = Job(e_findrange)
+    frl.metadata("time", "60")
+    c1 = File("f.c1")
+    frl.addArguments("-a findrange", "-T60", "-i", b1, "-o", c1)
+    frl.uses(b1, link=Link.INPUT)
+    frl.uses(c1, link=Link.OUTPUT, transfer=True)
+    diamond.addJob(frl)
+
+    # Add right Findrange job
+    frr = Job(e_findrange)
+    frr.metadata("time", "60")
+    c2 = File("f.c2")
+    frr.addArguments("-a findrange", "-T60", "-i", b2, "-o", c2)
+    frr.uses(b2, link=Link.INPUT)
+    frr.uses(c2, link=Link.OUTPUT, transfer=True)
+    diamond.addJob(frr)
+
+    # Add Analyze job
+    analyze = Job(e_analyze)
+    analyze.metadata("time", "60")
+    d = File("f.d")
+    analyze.addArguments("-a analyze", "-T60", "-i", c1, c2, "-o", d)
+    analyze.uses(c1, link=Link.INPUT)
+    analyze.uses(c2, link=Link.INPUT)
+    analyze.uses(d, link=Link.OUTPUT, transfer=True, register=True)
+    diamond.addJob(analyze)
+
+    # Add dependencies
+    diamond.depends(parent=preprocess, child=frl)
+    diamond.depends(parent=preprocess, child=frr)
+    diamond.depends(parent=frl, child=analyze)
+    diamond.depends(parent=frr, child=analyze)
+
+    # Get generated diamond dax
+    import sys
+    diamond.writeXML(sys.stdout)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/workflow-generator/Pegasus/__init__.py b/workflow-generator/Pegasus/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/workflow-generator/README.md b/workflow-generator/README.md
new file mode 100644
index 0000000..7bc68c0
--- /dev/null
+++ b/workflow-generator/README.md
@@ -0,0 +1 @@
+docker run -v $PWD:/workdir hyperflowwms/soykb-generator sh -c 'generate-workflow 2'
diff --git a/workflow-generator/chromosomes.txt b/workflow-generator/chromosomes.txt
new file mode 100644
index 0000000..76c659e
--- /dev/null
+++ b/workflow-generator/chromosomes.txt
@@ -0,0 +1,20 @@
+>Chr01
+>Chr02
+>Chr03
+>Chr04
+>Chr05
+>Chr06
+>Chr07
+>Chr08
+>Chr09
+>Chr10
+>Chr11
+>Chr12
+>Chr13
+>Chr14
+>Chr15
+>Chr16
+>Chr17
+>Chr18
+>Chr19
+>Chr20
diff --git a/workflow-generator/conf/.soybean-workflow.conf b/workflow-generator/conf/.soybean-workflow.conf
new file mode 100644
index 0000000..bbd4106
--- /dev/null
+++ b/workflow-generator/conf/.soybean-workflow.conf
@@ -0,0 +1,26 @@
+# local refers to the submit host. Specify paths to a directory
+# which can be used by the workflow as work space, and locations
+# for local software installs.
+[local]
+
+work_dir = data
+
+irods_bin = irods_bin
+
+# tacc refers to configuration for the TACC Stampede 
+# supercomputer. To use this machine, you need an allocation
+# (start with TG-) and you also need to know your username
+# and storage group name for the system. The easiest way to 
+# obtain those is to log into the system, and run:
+# cds; pwd
+# This should return a path like: /scratch/00384/rynge. The
+# storage group is the second level, and your username is 
+# last level.
+[tacc]
+
+allocation = TG-ABC1234
+
+username = rynge
+
+storage_group = 00384
+
diff --git a/workflow-generator/conf/distributed/pegasus.conf b/workflow-generator/conf/distributed/pegasus.conf
new file mode 100644
index 0000000..3ee6a94
--- /dev/null
+++ b/workflow-generator/conf/distributed/pegasus.conf
@@ -0,0 +1,22 @@
+pegasus.metrics.app = Soykb
+
+pegasus.catalog.site.file = sites.catalog
+
+pegasus.catalog.transformation.file = transformations.catalog
+
+pegasus.catalog.replica = File
+pegasus.catalog.replica.file = replica.catalog
+
+pegasus.dir.useTimestamp = true
+pegasus.dir.storage.mapper = Flat
+pegasus.dir.storage.deep = true
+pegasus.condor.logs.symlink = false
+
+pegasus.data.configuration = nonsharedfs
+
+pegasus.transfer.threads = 4
+pegasus.transfer.lite.threads = 4
+pegasus.stagein.clusters = 2
+pegasus.stageout.clusters = 2
+
+
diff --git a/workflow-generator/conf/distributed/replica.catalog b/workflow-generator/conf/distributed/replica.catalog
new file mode 100644
index 0000000..e69de29
diff --git a/workflow-generator/conf/distributed/site.conf b/workflow-generator/conf/distributed/site.conf
new file mode 100644
index 0000000..84d2139
--- /dev/null
+++ b/workflow-generator/conf/distributed/site.conf
@@ -0,0 +1,11 @@
+
+
+[exec_environment]
+
+staging_site = isi_workflow
+
+output_site = isi_workflow
+
+job_clustering = 
+
+
diff --git a/workflow-generator/conf/distributed/sites.catalog.template b/workflow-generator/conf/distributed/sites.catalog.template
new file mode 100644
index 0000000..fd80d75
--- /dev/null
+++ b/workflow-generator/conf/distributed/sites.catalog.template
@@ -0,0 +1,39 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<sitecatalog xmlns="http://pegasus.isi.edu/schema/sitecatalog" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pegasus.isi.edu/schema/sitecatalog http://pegasus.isi.edu/schema/sc-4.0.xsd" version="4.0">
+    <site  handle="local" arch="x86_64" os="LINUX">
+        <directory type="shared-scratch" path="$work_dir/scratch">
+            <file-server operation="all" url="file://$work_dir/scratch"/>
+        </directory>
+        <directory type="local-storage" path="$work_dir/outputs">
+            <file-server operation="all" url="file://$work_dir/outputs"/>
+        </directory>
+        <profile namespace="env" key="PATH">$pegasus_bin:$irods_bin:/usr/bin:/bin</profile>
+        <profile namespace="pegasus" key="SSH_PRIVATE_KEY">$home/.ssh/workflow</profile>
+    </site>
+    <site  handle="execution" arch="x86_64" os="LINUX">
+        <profile namespace="pegasus" key="style" >condor</profile>
+        <profile namespace="condor" key="universe" >vanilla</profile>
+        <profile namespace="condor" key="requirements" >isUndefined(GLIDEIN_Entry_Name)</profile>
+        <profile namespace="condor" key="+ProjectName" >"SoyKB"</profile>
+        <profile namespace="env" key="PATH">$irods_bin:/usr/bin:/bin</profile>
+        <profile namespace="env" key="TMPDIR_OVERRIDE">/tmp</profile>
+    </site>
+    <site  handle="irods_iplant" arch="x86_64" os="LINUX">
+        <directory type="local-storage" path="/iplant/home/shared/digbio/Pegasus_GATK_Results/Output_DIR/$username">
+            <file-server operation="all" url="irods:///iplant/home/shared/digbio/Pegasus_GATK_Results/Output_DIR/$username" />
+        </directory>
+        <profile namespace="pegasus" key="irodsEnvFile" >$home/irods.iplant.env</profile>
+    </site>
+    <site  handle="isi_workflow" arch="x86_64" os="LINUX">
+        <directory type="shared-scratch" path="/local-scratch/http/$username/soykb/scratch">
+            <file-server operation="get" url="http://workflow.isi.edu/scratch/$username/soykb/scratch"/>
+            <file-server operation="put" url="scp://$username@workflow.isi.edu/local-scratch/http/$username/soykb/scratch"/>
+        </directory>
+        <directory type="local-storage" path="/local-scratch/http/$username/soykb/outputs">
+            <file-server operation="get" url="http://workflow.isi.edu/scratch/$username/soykb/outputs"/>
+            <file-server operation="put" url="scp://$username@workflow.isi.edu/local-scratch/http/$username/soykb/outputs"/>
+        </directory>
+	    <profile namespace="pegasus" key="SSH_PRIVATE_KEY">$home/.ssh/workflow</profile>
+    </site>
+</sitecatalog>
+
diff --git a/workflow-generator/conf/distributed/transformations.catalog b/workflow-generator/conf/distributed/transformations.catalog
new file mode 100644
index 0000000..e69de29
diff --git a/workflow-generator/conf/main.conf b/workflow-generator/conf/main.conf
new file mode 100644
index 0000000..d32b9cf
--- /dev/null
+++ b/workflow-generator/conf/main.conf
@@ -0,0 +1,11 @@
+
+[main]
+
+# single-end or pair-end
+inputs-style = pair-end
+
+# example: QD < 2.0 || FS > 200.0 || MQ < 40 || Haplotypescore > 20.0
+snp_filter = QD < 2.0 || FS > 60.0 || MQ < 40.0
+indel_filter = QD < 2.0 || FS > 200.0 || MQ < 40
+
+
diff --git a/workflow-generator/conf/missouri/pegasus.conf b/workflow-generator/conf/missouri/pegasus.conf
new file mode 100644
index 0000000..3ee6a94
--- /dev/null
+++ b/workflow-generator/conf/missouri/pegasus.conf
@@ -0,0 +1,22 @@
+pegasus.metrics.app = Soykb
+
+pegasus.catalog.site.file = sites.catalog
+
+pegasus.catalog.transformation.file = transformations.catalog
+
+pegasus.catalog.replica = File
+pegasus.catalog.replica.file = replica.catalog
+
+pegasus.dir.useTimestamp = true
+pegasus.dir.storage.mapper = Flat
+pegasus.dir.storage.deep = true
+pegasus.condor.logs.symlink = false
+
+pegasus.data.configuration = nonsharedfs
+
+pegasus.transfer.threads = 4
+pegasus.transfer.lite.threads = 4
+pegasus.stagein.clusters = 2
+pegasus.stageout.clusters = 2
+
+
diff --git a/workflow-generator/conf/missouri/replica.catalog b/workflow-generator/conf/missouri/replica.catalog
new file mode 100644
index 0000000..e69de29
diff --git a/workflow-generator/conf/missouri/site.conf b/workflow-generator/conf/missouri/site.conf
new file mode 100644
index 0000000..f88a486
--- /dev/null
+++ b/workflow-generator/conf/missouri/site.conf
@@ -0,0 +1,11 @@
+
+
+[exec_environment]
+
+staging_site = staging
+
+output_site = local
+
+job_clustering = 
+
+
diff --git a/workflow-generator/conf/missouri/sites.catalog.template b/workflow-generator/conf/missouri/sites.catalog.template
new file mode 100644
index 0000000..359e79b
--- /dev/null
+++ b/workflow-generator/conf/missouri/sites.catalog.template
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<sitecatalog xmlns="http://pegasus.isi.edu/schema/sitecatalog" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pegasus.isi.edu/schema/sitecatalog http://pegasus.isi.edu/schema/sc-4.0.xsd" version="4.0">
+    <site  handle="local" arch="x86_64" os="LINUX">
+        <directory type="shared-scratch" path="$work_dir/scratch">
+            <file-server operation="all" url="file://$work_dir/scratch"/>
+        </directory>
+        <directory type="local-storage" path="$work_dir/outputs">
+            <file-server operation="all" url="file://$work_dir/outputs"/>
+        </directory>
+        <profile namespace="env" key="PATH">$pegasus_bin:$irods_bin:/usr/bin:/bin</profile>
+        <profile namespace="pegasus" key="SSH_PRIVATE_KEY">$home/.ssh/workflow</profile>
+    </site>
+    <site  handle="execution" arch="x86_64" os="LINUX">
+        <profile namespace="pegasus" key="style" >condor</profile>
+        <profile namespace="condor" key="universe" >vanilla</profile>
+        <profile namespace="env" key="PATH">/opt/java/jdk1.7.0_09/bin:/usr/bin:/bin</profile>
+    </site>
+    <site  handle="irods_iplant" arch="x86_64" os="LINUX">
+        <directory type="local-storage" path="/iplant/home/shared/digbio/Pegasus_GATK_Results/Output_DIR/$username">
+            <file-server operation="all" url="irods:///iplant/home/shared/digbio/Pegasus_GATK_Results/Output_DIR/$username" />
+        </directory>
+        <profile namespace="pegasus" key="irodsEnvFile" >$home/irods.iplant.json</profile>
+    </site>
+    <site  handle="staging" arch="x86_64" os="LINUX">
+        <directory type="shared-scratch" path="$work_dir/staging">
+            <file-server operation="all" url="scp://$username@plsci2.rnet.missouri.edu$work_dir/staging"/>
+        </directory>
+	    <profile namespace="pegasus" key="SSH_PRIVATE_KEY">$home/.ssh/workflow</profile>
+    </site>
+</sitecatalog>
+
diff --git a/workflow-generator/conf/missouri/transformations.catalog b/workflow-generator/conf/missouri/transformations.catalog
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/workflow-generator/conf/missouri/transformations.catalog
@@ -0,0 +1 @@
+
diff --git a/workflow-generator/conf/psc-bridges/README.md b/workflow-generator/conf/psc-bridges/README.md
new file mode 100644
index 0000000..84ea990
--- /dev/null
+++ b/workflow-generator/conf/psc-bridges/README.md
@@ -0,0 +1,43 @@
+
+
+## Editing sites.catalog.template
+
+Please note that `sites.catalog.template` needs to be updated based on the user who will run the glideins on PSC Bridges. The section which needs to be updated is:
+
+    <site  handle="execution" arch="x86_64" os="LINUX">
+        <directory type="shared-scratch" path="/pylon5/bi560mp/rynge/workflow-runs">
+            <file-server operation="all" url="file:///pylon5/bi560mp/rynge/workflow-runs"/>
+        </directory>
+
+Determine the shared directory assigned to you on Bridges by logging in and running `echo $SCRATCH`. Update the two paths in the section above such that they have your scratch directory, plus `/workflow-runs`. Do _not_ use environment variables here - only full expanded paths are allowed.
+
+
+## Glideins on PSC Bridges
+
+This setup is based on a PyGlidein setup (https://pegasus.isi.edu/documentation/pyglidein.php)
+
+To get setup up, first log in to your PSC Bridges account, and then copy the configuration to your home directory:
+
+    $ cd ~
+    $ cp ~rynge/rnaseq ~/
+
+Edit `~/rnaseq/config/rnaseq-bridges.config`. And the minimum, change `user` and the location of `tarball`, replacing `rynge` with your username. Also update the `#SBATCH --account=` line with a project you want the glidein to charge to.
+
+Set up the Python virtual environment, and try submitting your first glidein (assuming you already have a workflow submitted on workflow.isi.edu - pyglidein will check for demand before submitting new glideins):
+
+    $ module load python2
+    $ cd ~/rnaseq/
+    $ . venv/bin/activate
+    $ pyglidein_client --config=$HOME/rnaseq/config/rnaseq-bridges.config --secrets=$HOME/rnaseq/config/secrets
+
+The output should state that a glidein was submitted:
+
+    2018-08-29 17:29:55,788 DEBUG {u'count': 1, u'cpus': 1, u'memory': 0, u'gpus': 0, u'disk': 0.001, u'os': None}
+    2018-08-29 17:29:55,788 DEBUG {u'jsonrpc': u'2.0', u'result': [{u'count': 1, u'cpus': 1, u'memory': 0, u'gpus': 0, u'disk': 0.001, u'os': None}], u'id': 0}
+    Submitted batch job 3865303
+    2018-08-29 17:29:55,846 INFO launched 1 glideins on RM
+
+After a few minutes, you should be able to see the glidein by running `condor_status` on `workflow.isi.edu`.
+
+PSC Bridges no longer allows cron jobs, so you have to use the pyglidein_client to start glideins manually.
+
diff --git a/workflow-generator/conf/psc-bridges/pegasus.conf b/workflow-generator/conf/psc-bridges/pegasus.conf
new file mode 100644
index 0000000..4a97249
--- /dev/null
+++ b/workflow-generator/conf/psc-bridges/pegasus.conf
@@ -0,0 +1,19 @@
+pegasus.metrics.app = Soykb
+
+pegasus.catalog.site.file = sites.catalog
+
+pegasus.catalog.transformation.file = transformations.catalog
+
+pegasus.catalog.replica = File
+pegasus.catalog.replica.file = replica.catalog
+
+pegasus.dir.useTimestamp = true
+pegasus.dir.storage.mapper = Flat
+pegasus.dir.storage.deep = true
+pegasus.condor.logs.symlink = false
+
+pegasus.data.configuration = sharedfs
+
+pegasus.transfer.*.remote.sites = execution
+
+
diff --git a/workflow-generator/conf/psc-bridges/replica.catalog b/workflow-generator/conf/psc-bridges/replica.catalog
new file mode 100644
index 0000000..e69de29
diff --git a/workflow-generator/conf/psc-bridges/site.conf b/workflow-generator/conf/psc-bridges/site.conf
new file mode 100644
index 0000000..a5c3b2e
--- /dev/null
+++ b/workflow-generator/conf/psc-bridges/site.conf
@@ -0,0 +1,11 @@
+
+
+[exec_environment]
+
+staging_site =
+
+output_site = isi_workflow
+
+job_clustering = 
+
+
diff --git a/workflow-generator/conf/psc-bridges/sites.catalog.template b/workflow-generator/conf/psc-bridges/sites.catalog.template
new file mode 100644
index 0000000..1ab4ebf
--- /dev/null
+++ b/workflow-generator/conf/psc-bridges/sites.catalog.template
@@ -0,0 +1,48 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<sitecatalog xmlns="http://pegasus.isi.edu/schema/sitecatalog" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pegasus.isi.edu/schema/sitecatalog http://pegasus.isi.edu/schema/sc-4.0.xsd" version="4.0">
+
+    <site  handle="local" arch="x86_64" os="LINUX">
+        <directory type="shared-scratch" path="$work_dir/scratch">
+            <file-server operation="all" url="file://$work_dir/scratch"/>
+        </directory>
+        <directory type="local-storage" path="$work_dir/outputs">
+            <file-server operation="all" url="file://$work_dir/outputs"/>
+        </directory>
+        <profile namespace="env" key="PATH">$pegasus_bin:$irods_bin:/usr/bin:/bin</profile>
+        <profile namespace="pegasus" key="SSH_PRIVATE_KEY">$home/.ssh/workflow</profile>
+    </site>
+
+    <site  handle="execution" arch="x86_64" os="LINUX">
+        <directory type="shared-scratch" path="/pylon5/bi560mp/rynge/workflow-runs">
+            <file-server operation="all" url="file:///pylon5/bi560mp/rynge/workflow-runs"/>
+        </directory>
+        <profile namespace="pegasus" key="style" >condor</profile>
+        <profile namespace="condor" key="universe" >vanilla</profile>
+        <profile namespace="condor" key="requirements" >regexp("psc.edu", TARGET.FileSystemDomain)</profile>
+        <profile namespace="condor" key="rank" >TimeToLive</profile>
+        <profile namespace="condor" key="+Wants_PSC_Bridges" >True</profile>
+        <profile namespace="env" key="PEGASUS_HOME" >/home/rynge/software/pegasus/pegasus-4.8.3</profile>
+        <!-- profile namespace="env" key="TMPDIR_OVERRIDE">/tmp</profile -->
+    </site>
+
+    <site  handle="irods_iplant" arch="x86_64" os="LINUX">
+        <directory type="local-storage" path="/iplant/home/shared/digbio/Pegasus_GATK_Results/Output_DIR/$username">
+            <file-server operation="all" url="irods:///iplant/home/shared/digbio/Pegasus_GATK_Results/Output_DIR/$username" />
+        </directory>
+        <profile namespace="pegasus" key="irodsEnvFile" >$home/irods.iplant.json</profile>
+    </site>
+
+    <site  handle="isi_workflow" arch="x86_64" os="LINUX">
+        <directory type="shared-scratch" path="/local-scratch/http/$username/soykb/scratch">
+            <file-server operation="get" url="http://workflow.isi.edu/scratch/$username/soykb/scratch"/>
+            <file-server operation="put" url="scp://$username@workflow.isi.edu/local-scratch/http/$username/soykb/scratch"/>
+        </directory>
+        <directory type="local-storage" path="/local-scratch/http/$username/soykb/outputs">
+            <file-server operation="get" url="http://workflow.isi.edu/scratch/$username/soykb/outputs"/>
+            <file-server operation="put" url="scp://$username@workflow.isi.edu/local-scratch/http/$username/soykb/outputs"/>
+        </directory>
+	    <profile namespace="pegasus" key="SSH_PRIVATE_KEY">$home/.ssh/workflow</profile>
+    </site>
+
+</sitecatalog>
+
diff --git a/workflow-generator/conf/psc-bridges/transformations.catalog b/workflow-generator/conf/psc-bridges/transformations.catalog
new file mode 100644
index 0000000..e69de29
diff --git a/workflow-generator/conf/rc4/pegasus.conf b/workflow-generator/conf/rc4/pegasus.conf
new file mode 100644
index 0000000..3ee6a94
--- /dev/null
+++ b/workflow-generator/conf/rc4/pegasus.conf
@@ -0,0 +1,22 @@
+pegasus.metrics.app = Soykb
+
+pegasus.catalog.site.file = sites.catalog
+
+pegasus.catalog.transformation.file = transformations.catalog
+
+pegasus.catalog.replica = File
+pegasus.catalog.replica.file = replica.catalog
+
+pegasus.dir.useTimestamp = true
+pegasus.dir.storage.mapper = Flat
+pegasus.dir.storage.deep = true
+pegasus.condor.logs.symlink = false
+
+pegasus.data.configuration = nonsharedfs
+
+pegasus.transfer.threads = 4
+pegasus.transfer.lite.threads = 4
+pegasus.stagein.clusters = 2
+pegasus.stageout.clusters = 2
+
+
diff --git a/workflow-generator/conf/rc4/replica.catalog b/workflow-generator/conf/rc4/replica.catalog
new file mode 100644
index 0000000..e69de29
diff --git a/workflow-generator/conf/rc4/site.conf b/workflow-generator/conf/rc4/site.conf
new file mode 100644
index 0000000..3d5e3de
--- /dev/null
+++ b/workflow-generator/conf/rc4/site.conf
@@ -0,0 +1,11 @@
+
+
+[exec_environment]
+
+staging_site = execution
+
+output_site = local
+
+job_clustering = 
+
+
diff --git a/workflow-generator/conf/rc4/sites.catalog.template b/workflow-generator/conf/rc4/sites.catalog.template
new file mode 100644
index 0000000..3069600
--- /dev/null
+++ b/workflow-generator/conf/rc4/sites.catalog.template
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<sitecatalog xmlns="http://pegasus.isi.edu/schema/sitecatalog" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pegasus.isi.edu/schema/sitecatalog http://pegasus.isi.edu/schema/sc-4.0.xsd" version="4.0">
+    <site  handle="local" arch="x86_64" os="LINUX">
+        <directory type="shared-scratch" path="$work_dir/scratch">
+            <file-server operation="all" url="file://$work_dir/scratch"/>
+        </directory>
+        <directory type="local-storage" path="$work_dir/outputs">
+            <file-server operation="all" url="file://$work_dir/outputs"/>
+        </directory>
+        <profile namespace="env" key="PATH">$pegasus_bin:$irods_bin:/usr/bin:/bin</profile>
+        <profile namespace="pegasus" key="SSH_PRIVATE_KEY">$home/.ssh/workflow</profile>
+    </site>
+    <site  handle="execution" arch="x86_64" os="LINUX">
+        <directory type="shared-scratch" path="$work_dir/shared-scratch">
+            <file-server operation="all" url="file://$work_dir/shared-scratch"/>
+        </directory>
+        <profile namespace="pegasus" key="style" >glite</profile>
+        <profile namespace="pegasus" key="auxillary.local">true</profile>
+        <profile namespace="condor" key="grid_resource">batch slurm</profile>
+        <profile namespace="env" key="TMPDIR">/local/scratch/$username</profile>
+        <profile namespace="env" key="TMPDIR_OVERRIDE">/local/scratch/$username</profile>
+        <profile namespace="env" key="PATH">/home/rynge/software/jdk1.7.0_09/bin:/usr/bin:/bin</profile>
+        <profile namespace="env" key="PEGASUS_HOME" >/home/rynge/software/pegasus-4.6.2dev</profile>
+    </site>
+    <site  handle="irods_iplant" arch="x86_64" os="LINUX">
+        <directory type="local-storage" path="/iplant/home/shared/digbio/Pegasus_GATK_Results/Output_DIR/$username">
+            <file-server operation="all" url="irods:///iplant/home/shared/digbio/Pegasus_GATK_Results/Output_DIR/$username" />
+        </directory>
+        <profile namespace="pegasus" key="irodsEnvFile" >$home/irods.iplant.json</profile>
+    </site>
+</sitecatalog>
+
diff --git a/workflow-generator/conf/rc4/transformations.catalog b/workflow-generator/conf/rc4/transformations.catalog
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/workflow-generator/conf/rc4/transformations.catalog
@@ -0,0 +1 @@
+
diff --git a/workflow-generator/conf/tacc-stampede/pegasus.conf b/workflow-generator/conf/tacc-stampede/pegasus.conf
new file mode 100644
index 0000000..d9a1e96
--- /dev/null
+++ b/workflow-generator/conf/tacc-stampede/pegasus.conf
@@ -0,0 +1,21 @@
+pegasus.metrics.app = Soykb
+
+pegasus.catalog.site.file = sites.catalog
+
+pegasus.catalog.transformation.file = transformations.catalog
+
+pegasus.catalog.replica = File
+pegasus.catalog.replica.file = replica.catalog
+
+pegasus.dir.useTimestamp = true
+pegasus.dir.storage.mapper = Flat
+pegasus.dir.storage.deep = true
+pegasus.condor.logs.symlink = false
+
+pegasus.data.configuration = sharedfs
+
+pegasus.transfer.threads = 2
+pegasus.transfer.lite.threads = 8
+pegasus.stagein.clusters = 3
+pegasus.stageout.clusters = 4
+
diff --git a/workflow-generator/conf/tacc-stampede/replica.catalog b/workflow-generator/conf/tacc-stampede/replica.catalog
new file mode 100644
index 0000000..e69de29
diff --git a/workflow-generator/conf/tacc-stampede/site.conf b/workflow-generator/conf/tacc-stampede/site.conf
new file mode 100644
index 0000000..2fc1abc
--- /dev/null
+++ b/workflow-generator/conf/tacc-stampede/site.conf
@@ -0,0 +1,11 @@
+
+
+[exec_environment]
+
+staging_site = 
+
+output_site = irods_iplant
+
+job_clustering = label
+
+
diff --git a/workflow-generator/conf/tacc-stampede/sites.catalog.template b/workflow-generator/conf/tacc-stampede/sites.catalog.template
new file mode 100644
index 0000000..48fe4d2
--- /dev/null
+++ b/workflow-generator/conf/tacc-stampede/sites.catalog.template
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<sitecatalog xmlns="http://pegasus.isi.edu/schema/sitecatalog" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pegasus.isi.edu/schema/sitecatalog http://pegasus.isi.edu/schema/sc-4.0.xsd" version="4.0">
+    <site  handle="local" arch="x86_64" os="LINUX">
+        <directory type="shared-scratch" path="$work_dir/scratch">
+            <file-server operation="all" url="scp://$submit_host$work_dir/scratch"/>
+        </directory>
+        <directory type="local-storage" path="$work_dir/outputs">
+            <file-server operation="all" url="scp://$submit_host$work_dir/outputs"/>
+        </directory>
+        <profile namespace="env" key="PATH">$pegasus_bin:$irods_bin:/usr/bin:/bin</profile>
+        <profile namespace="pegasus" key="SSH_PRIVATE_KEY">$home/.ssh/workflow</profile>
+    </site>
+    <site  handle="execution" arch="x86_64" os="LINUX">
+        <grid  type="gt5" contact="login5.stampede.tacc.utexas.edu:2119/jobmanager-slurm" scheduler="Fork" jobtype="auxillary"/>
+        <grid  type="gt5" contact="login5.stampede.tacc.utexas.edu:2119/jobmanager-slurm" scheduler="Condor" jobtype="compute"/>
+        <directory type="shared-scratch" path="/scratch/$tacc_storage_group/$tacc_username/workflow-runs">
+            <file-server operation="all" url="gsiftp://gridftp.stampede.tacc.xsede.org:2811/scratch/$tacc_storage_group/$tacc_username/workflow-runs"/>
+        </directory>
+        <profile namespace="pegasus" key="job.aggregator">mpiexec</profile>
+        <profile namespace="env" key="PEGASUS_HOME">/home1/00384/rynge/software/pegasus/4.4.0cvs</profile>
+        <profile namespace="env" key="PATH">/home1/00384/rynge/software/pegasus/4.4.0cvs/bin:/home1/00384/rynge/software/irods/3.2/bin:/scratch/projects/xsede/globus-5.0.4-r1/bin:/usr/bin:/bin</profile>
+        <profile namespace="env" key="LD_LIBRARY_PATH">/scratch/projects/xsede/globus-5.0.4-r1/lib</profile>
+        <profile namespace="env" key="TMPDIR_OVERRIDE">/tmp</profile>
+        <profile namespace="globus" key="project">$tacc_allocation</profile>
+    </site>
+    <site  handle="irods_iplant" arch="x86_64" os="LINUX">
+        <directory type="local-storage" path="/iplant/home/shared/digbio/Pegasus_GATK_Results/Output_DIR/$username">
+            <file-server operation="all" url="irods:///iplant/home/shared/digbio/Pegasus_GATK_Results/Output_DIR/$username" />
+        </directory>
+        <profile namespace="pegasus" key="irodsEnvFile" >$home/irods.iplant.env</profile>
+    </site>
+    <site  handle="isi_workflow" arch="x86_64" os="LINUX">
+        <directory type="local-storage" path="/local-scratch/http/$username/soykb/outputs">
+            <file-server operation="get" url="http://workflow.isi.edu/scratch/$username/soykb/outputs"/>
+            <file-server operation="put" url="scp://$username@workflow.isi.edu/local-scratch/http/$username/soykb/outputs"/>
+        </directory>
+	    <profile namespace="pegasus" key="SSH_PRIVATE_KEY">$home/.ssh/workflow</profile>
+    </site>
+</sitecatalog>
+
diff --git a/workflow-generator/conf/tacc-stampede/transformations.catalog b/workflow-generator/conf/tacc-stampede/transformations.catalog
new file mode 100644
index 0000000..0d3768a
--- /dev/null
+++ b/workflow-generator/conf/tacc-stampede/transformations.catalog
@@ -0,0 +1,19 @@
+tr pegasus::transfer {
+    site execution {
+        pfn "/home1/00384/rynge/software/pegasus/4.4.0cvs/bin/pegasus-transfer"
+        arch "x86_64"
+        os "linux"
+        type "INSTALLED"
+        profile globus "maxwalltime" "1440"
+    }
+}
+tr pegasus::mpiexec {
+    site execution {
+        pfn "/home1/00384/rynge/software/pegasus-mpi-cluster/pegasus-mpi-cluster"
+        arch "x86_64"
+        os "linux"
+        type "INSTALLED"
+        profile globus "jobtype" "mpi"
+        profile globus "maxwalltime" "2880"
+    }
+}
diff --git a/workflow-generator/conf/tacc-wrangler/pegasus.conf b/workflow-generator/conf/tacc-wrangler/pegasus.conf
new file mode 100644
index 0000000..6820faf
--- /dev/null
+++ b/workflow-generator/conf/tacc-wrangler/pegasus.conf
@@ -0,0 +1,24 @@
+pegasus.metrics.app = Soykb
+
+pegasus.catalog.site.file = sites.catalog
+
+pegasus.catalog.transformation.file = transformations.catalog
+
+pegasus.catalog.replica = File
+pegasus.catalog.replica.file = replica.catalog
+
+pegasus.dir.useTimestamp = true
+pegasus.dir.storage.mapper = Flat
+pegasus.dir.storage.deep = true
+pegasus.condor.logs.symlink = false
+
+pegasus.data.configuration = sharedfs
+
+pegasus.transfer.*.remote.sites = execution
+
+pegasus.transfer.threads = 1
+pegasus.stagein.clusters = 20
+pegasus.stageout.clusters = 20
+pegasus.file.cleanup.clusters.size = 4
+
+
diff --git a/workflow-generator/conf/tacc-wrangler/replica.catalog b/workflow-generator/conf/tacc-wrangler/replica.catalog
new file mode 100644
index 0000000..e69de29
diff --git a/workflow-generator/conf/tacc-wrangler/site.conf b/workflow-generator/conf/tacc-wrangler/site.conf
new file mode 100644
index 0000000..a54fd51
--- /dev/null
+++ b/workflow-generator/conf/tacc-wrangler/site.conf
@@ -0,0 +1,11 @@
+
+
+[exec_environment]
+
+staging_site =
+
+output_site = irods_iplant
+
+job_clustering = 
+
+
diff --git a/workflow-generator/conf/tacc-wrangler/sites.catalog.template b/workflow-generator/conf/tacc-wrangler/sites.catalog.template
new file mode 100644
index 0000000..143e4f3
--- /dev/null
+++ b/workflow-generator/conf/tacc-wrangler/sites.catalog.template
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<sitecatalog xmlns="http://pegasus.isi.edu/schema/sitecatalog" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://pegasus.isi.edu/schema/sitecatalog http://pegasus.isi.edu/schema/sc-4.0.xsd" version="4.0">
+    <site  handle="local" arch="x86_64" os="LINUX">
+        <directory type="shared-scratch" path="$work_dir/scratch">
+            <file-server operation="all" url="file://$work_dir/scratch"/>
+        </directory>
+        <directory type="local-storage" path="$work_dir/outputs">
+            <file-server operation="all" url="file://$work_dir/outputs"/>
+        </directory>
+        <profile namespace="env" key="PATH">$pegasus_bin:$irods_bin:/usr/bin:/bin</profile>
+        <profile namespace="pegasus" key="SSH_PRIVATE_KEY">$home/.ssh/workflow</profile>
+    </site>
+    <site  handle="execution" arch="x86_64" os="LINUX">
+        <directory type="shared-scratch" path="/gpfs/flash/users/rynge/workflow-runs">
+            <file-server operation="all" url="file:///gpfs/flash/users/rynge/workflow-runs"/>
+        </directory>
+        <directory type="local-storage" path="/gpfs/flash/users/rynge/final-outputs">
+            <file-server operation="all" url="file:///gpfs/flash/users/rynge/final-outputs"/>
+        </directory>
+        <profile namespace="pegasus" key="style" >condor</profile>
+        <profile namespace="condor" key="universe" >vanilla</profile>
+        <profile namespace="condor" key="requirements" >TARGET.FileSystemDomain == "wrangler.tacc.utexas.edu"</profile>
+        <profile namespace="condor" key="rank" >TimeToLive</profile>
+        <profile namespace="condor" key="+wants_wrangler" >True</profile>
+        <profile namespace="env" key="PEGASUS_HOME" >/home/00384/rynge/software/pegasus/4.5.1</profile>
+        <profile namespace="env" key="TMPDIR_OVERRIDE">/tmp</profile>
+    </site>
+    <site  handle="irods_iplant" arch="x86_64" os="LINUX">
+        <directory type="local-storage" path="/iplant/home/shared/digbio/Pegasus_GATK_Results/Output_DIR/$username">
+            <file-server operation="all" url="irods:///iplant/home/shared/digbio/Pegasus_GATK_Results/Output_DIR/$username" />
+        </directory>
+        <profile namespace="pegasus" key="irodsEnvFile" >$home/irods.iplant.json</profile>
+    </site>
+    <site  handle="isi_workflow" arch="x86_64" os="LINUX">
+        <directory type="shared-scratch" path="/local-scratch/http/$username/soykb/scratch">
+            <file-server operation="get" url="http://workflow.isi.edu/scratch/$username/soykb/scratch"/>
+            <file-server operation="put" url="scp://$username@workflow.isi.edu/local-scratch/http/$username/soykb/scratch"/>
+        </directory>
+        <directory type="local-storage" path="/local-scratch/http/$username/soykb/outputs">
+            <file-server operation="get" url="http://workflow.isi.edu/scratch/$username/soykb/outputs"/>
+            <file-server operation="put" url="scp://$username@workflow.isi.edu/local-scratch/http/$username/soykb/outputs"/>
+        </directory>
+	    <profile namespace="pegasus" key="SSH_PRIVATE_KEY">$home/.ssh/workflow</profile>
+    </site>
+</sitecatalog>
+
diff --git a/workflow-generator/conf/tacc-wrangler/transformations.catalog b/workflow-generator/conf/tacc-wrangler/transformations.catalog
new file mode 100644
index 0000000..e69de29
diff --git a/workflow-generator/editWorkflow.py b/workflow-generator/editWorkflow.py
new file mode 100755
index 0000000..a4f77ab
--- /dev/null
+++ b/workflow-generator/editWorkflow.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python2
+
+import json
+import argparse
+parser = argparse.ArgumentParser()
+parser.add_argument('-p', '--path', help='path to workflow.json', default="workflow.json")
+parser.add_argument('-n', '--name', help='name of workflow', default="soykb")
+parser.add_argument('-v', '--version', help='version of workflow', default="1.0.0")
+args = parser.parse_args()
+
+with open(args.path, "r") as file:
+    contents = file.read()
+wf = json.loads(contents)
+wf["name"] = args.name
+wf["version"] = args.version
+
+with open(args.path, "w") as file:
+    json.dump(wf, file, indent=4, sort_keys=True)
\ No newline at end of file
diff --git a/workflow-generator/fillFastqFile.py b/workflow-generator/fillFastqFile.py
new file mode 100755
index 0000000..f208a2c
--- /dev/null
+++ b/workflow-generator/fillFastqFile.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python2
+import argparse
+prefix = "http://workflow.isi.edu/SoyKB/sample-inputs-3/"            
+fastq_data = [              
+    "USB-001_1.fastq", "USB-001_2.fastq", "USB-002_1.fastq", "USB-002_2.fastq", "USB-003_1.fastq", "USB-003_2.fastq", "USB-004_1.fastq", "USB-004_2.fastq", "USB-005_1.fastq", "USB-005_2.fastq", "USB-006_1.fastq", "USB-006_2.fastq", "USB-007_1.fastq", "USB-007_2.fastq", "USB-008_1.fastq", "USB-008_2.fastq", "USB-009_1.fastq", "USB-009_2.fastq", "USB-010_1.fastq", "USB-010_2.fastq", "USB-011_1.fastq", "USB-011_2.fastq", "USB-012_1.fastq", "USB-012_2.fastq", "USB-013_1.fastq", "USB-013_2.fastq", "USB-014_1.fastq", "USB-014_2.fastq", "USB-015_1.fastq", "USB-015_2.fastq", "USB-016_1.fastq", "USB-016_2.fastq", "USB-017_1.fastq", "USB-017_2.fastq", "USB-018_1.fastq", "USB-018_2.fastq", "USB-019_1.fastq", "USB-019_2.fastq", "USB-020_1.fastq", "USB-020_2.fastq", "USB-021_1.fastq", "USB-021_2.fastq", "USB-022_1.fastq", "USB-022_2.fastq", "USB-023_1.fastq", "USB-023_2.fastq", "USB-024_1.fastq", "USB-024_2.fastq", "USB-025_1.fastq", "USB-025_2.fastq"
+]
+
+parser = argparse.ArgumentParser()
+parser.add_argument('-p', '--path', help='path to inputs-fastq.txt file', default="inputs-fastq.txt")
+parser.add_argument('-s', '--size', help='size to fill inputs-fastq.txt with. Should be divisible by 2 for workflow to work. Max available value: {0}'.format(len(fastq_data)), default="2")
+args = parser.parse_args()                  
+
+
+lines = [prefix + fastq_data[i] for i in range(int(args.size))] 
+
+with open(args.path, "w") as file:
+    file.writelines('\n'.join(lines))
+    file.close()
\ No newline at end of file
diff --git a/workflow-generator/generate-workflow b/workflow-generator/generate-workflow
new file mode 100755
index 0000000..d8df3dc
--- /dev/null
+++ b/workflow-generator/generate-workflow
@@ -0,0 +1,17 @@
+#!/bin/sh
+if [ $# -eq 0 ]; then
+    echo -e "Usage: generate-workflow <size of workflow>\nSize should be divisible by 2 for this configuration of soykb."
+    exit 1
+fi
+if [ -d "/workdir" ]; then
+    echo "Installing config files in /workdir..."
+else
+    exit 1
+fi
+
+python2 fillFastqFile.py -p inputs-fastq.txt -s $1
+python2 workflow-generator --exec-env  tacc-stampede && hflow-convert-dax data/soykb.dax > data/workflow.json
+python2 editWorkflow.py -p data/workflow.json -n soykb -v 1.0.0
+cp inputs-fastq.txt data/
+
+cp -r data /workdir/
diff --git a/workflow-generator/inputs-fastq.txt b/workflow-generator/inputs-fastq.txt
new file mode 100644
index 0000000..b6e7328
--- /dev/null
+++ b/workflow-generator/inputs-fastq.txt
@@ -0,0 +1,2 @@
+http://workflow.isi.edu/SoyKB/sample-inputs-3/USB-001_1.fastq
+http://workflow.isi.edu/SoyKB/sample-inputs-3/USB-001_2.fastq
\ No newline at end of file
diff --git a/workflow-generator/inputs-ref.txt b/workflow-generator/inputs-ref.txt
new file mode 100644
index 0000000..10c6e9f
--- /dev/null
+++ b/workflow-generator/inputs-ref.txt
@@ -0,0 +1 @@
+http://workflow.isi.edu/SoyKB/ref/Gmax_275_v2.0.fa
diff --git a/workflow-generator/workflow-generator b/workflow-generator/workflow-generator
new file mode 100755
index 0000000..262344b
--- /dev/null
+++ b/workflow-generator/workflow-generator
@@ -0,0 +1,1037 @@
+#!/usr/bin/env python
+
+from Pegasus.AutoADAG import *
+import ConfigParser
+from Pegasus.DAX3 import *
+import getpass
+import logging
+import math
+import optparse
+import os
+import re
+import socket
+import string
+import subprocess
+import sys
+import time
+
+
+
+# to setup python lib dir for importing Pegasus PYTHON DAX API
+# pegasus_config = os.path.join("pegasus-config") + " --noeoln --python"
+# lib_dir = subprocess.Popen(pegasus_config,
+                        #    stdout=subprocess.PIPE,
+                        #    shell=True).communicate()[0]
+#Insert this directory in our search path
+# os.sys.path.insert(0, lib_dir)
+
+
+# --- global variables ----------------------------------------------------------------
+
+logger      = logging.getLogger("my_logger")
+conf        = None
+added_execs = []
+
+
+# --- classes -------------------------------------------------------------------------
+
+class ComputeJob(Job):
+    """ A Pegasus DAX Job with extra information such as cpu and memory
+    requirements, for both single and peagaus-mpi-cluster execution
+    """
+
+    def __init__(self, name, cores=1, mem_gb=2, partition="part1"):
+        Job.__init__(self, name=name)
+        
+        # label based clustering
+        self.addProfile(Profile(Namespace.PEGASUS, 
+                                key="label",
+                                value=partition))
+  
+        # standard resource requirements for all jobs
+        mem_mb = mem_gb * 1000
+        self.addProfile(Profile(Namespace.CONDOR,
+                                key="request_cpus",
+                                value=str(cores)))
+        self.addProfile(Profile(Namespace.PEGASUS,
+                                key="pmc_request_cpus",
+                                value=str(cores)))
+        self.addProfile(Profile(Namespace.CONDOR,
+                                key="request_memory",
+                                value=str(mem_mb)))
+        self.addProfile(Profile(Namespace.PEGASUS,
+                                key="pmc_request_memory",
+                                value=str(mem_mb)))
+        self.addProfile(Profile(Namespace.CONDOR,
+                                key="request_disk",
+                                value=str(20*1024*1024)))
+        self.addProfile(Profile(Namespace.GLOBUS,
+                                key="totalmemory",
+                                value=str(mem_mb)))
+
+        # special sauce for TACC - we want smaller jobs to go to the normal
+        # compute nodes and the large memory ones to go to the large memory
+        # nodes
+        if re.search('stampede', conf.get("local", "exec_env")):
+            hosts = conf.get("exec_environment", "hosts_" + partition)
+            cores = str(16 * int(hosts))
+            self.addProfile(Profile(Namespace.GLOBUS,
+                                    key="queue",
+                                    value="normal"))
+            self.addProfile(Profile(Namespace.GLOBUS,
+                                    key="hostcount",
+                                    value=hosts))
+            self.addProfile(Profile(Namespace.GLOBUS,
+                                    key="count",
+                                    value=cores))
+            self.addProfile(Profile(Namespace.ENV,
+                                    key="PMC_HOST_MEMORY",
+                                    value="29000"))
+
+        # let the GATK jobs know how much memory to use (requested - 2GB for Java)
+        #gatk_memory = mem_gb - 2
+        
+        # required for the Pegasus accounting
+        self.addProfile(Profile(Namespace.PEGASUS,
+                                key="cores",
+                                value=str(cores)))
+  
+
+
+# --- functions -----------------------------------------------------------------------
+
+
+def setup_logger(verbose):
+    """ Use a console logger for all output to the user """
+
+    # log to the console
+    console = logging.StreamHandler()
+
+    # default log level - make logger/console match
+    logger.setLevel(logging.INFO)
+    console.setLevel(logging.INFO)
+
+    if verbose:
+        logger.setLevel(logging.DEBUG)
+        console.setLevel(logging.DEBUG)
+
+    # formatter
+    formatter = logging.Formatter("%(asctime)s %(levelname)7s:  %(message)s")
+    console.setFormatter(formatter)
+    logger.addHandler(console)
+    logger.debug("Logger has been configured")
+
+
+def myexec(cmd_line):
+    """ Convenience function as we are shelling out a fair amount """
+    
+    sys.stdout.flush()
+    p = subprocess.Popen(cmd_line + " 2>&1", shell=True)
+    stdoutdata, stderrdata = p.communicate()
+    r = p.returncode
+    if r != 0:
+        raise RuntimeError("Command '%s' failed with error code %s" \
+                           % (cmd_line, r))
+
+
+def proxy_check():
+    """ Verify that the user has a proxy and it is valid for a long time """
+    p = subprocess.Popen("grid-proxy-info -timeleft", shell=True, 
+                         stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+    stdoutdata, stderrdata = p.communicate()
+    r = p.returncode
+    if r != 0:
+        logger.error(stderrdata)
+        raise RuntimeError("Unable to run the grid-proxy-info command." + \
+                           "Do you have a valid proxy?")
+    timeleft = int(stdoutdata)
+
+    # two weeks minimum lifetime
+    if timeleft < (60*60*24*10):
+        raise RuntimeError("There is not enough time left on your grid" +
+                           " proxy. Please renew, then run this command" +
+                           " again")
+
+        
+def generate_site_catalog():
+    """ Uses a templete file to produce the Pegasus site catalog """
+    
+    logger.info("Generating sites.catalog")
+    inf = open("conf/" + conf.get("local", "exec_env") + 
+               "/sites.catalog.template", 'r')
+    template = string.Template(inf.read())
+    inf.close()
+
+    outf = open(conf.get("local", "work_dir") + "/sites.catalog", "w")
+    outf.write(template.substitute(
+                        submit_host = socket.gethostname(),
+                        username = getpass.getuser(), 
+                        home = os.path.expanduser('~'),
+                        top_dir = conf.get("local", "top_dir"),
+                        work_dir = conf.get("local", "work_dir"),
+                        pegasus_bin = conf.get("local", "pegasus_bin"),
+                        irods_bin = conf.get("local", "irods_bin"),
+                        tacc_allocation = conf.get("tacc", "allocation"),
+                        tacc_username = conf.get("tacc", "username"),
+                        tacc_storage_group = conf.get("tacc", "storage_group"),
+              ))
+    outf.close()
+   
+
+def read_input_lists(ref_urls, chromosomes, fastq_urls):
+    """ The user provides a list of reference file URLs and pairs of fastq 
+    URLs to be processed.
+    """
+    
+    # first the reference
+    inf = open(conf.get("local", "top_dir") + "/inputs-ref.txt", "r")
+    for line in inf.readlines():
+        line = line.rstrip('\n')
+        if len(line) > 0:
+            ref_urls.append(line)
+    inf.close()
+
+    # chromosomes
+    inf = open(conf.get("local", "top_dir") + "/chromosomes.txt", "r")
+    for line in inf.readlines():
+        line = line.rstrip('\n')
+        line = re.sub("^> *", "", line)
+        line = re.sub(" .*", "", line)
+        if len(line) > 0:
+            logger.info("  Added chromosome: " + line)
+            chromosomes.append(line)
+    inf.close()
+    
+    inf = open(conf.get("local", "top_dir") + "/inputs-fastq.txt", "r")
+    for line in inf.readlines():
+        line = line.rstrip('\n')
+        if len(line) > 0:
+            fastq_urls.append(line)
+    inf.close()
+
+    # sanity checks
+    if len(ref_urls) != 1:
+        logger.error("Only one reference genome can be listed in inputs-ref.txt");
+        sys.exit(1) 
+    if len(fastq_urls) == 0:
+        logger.error("Did not find fastq files")
+        sys.exit(1)
+    if conf.get("main", "inputs-style") == "pair-end" and len(fastq_urls) % 2 != 0:
+        logger.error("Found an uneven number of fastq files in input list")
+        sys.exit(1)
+
+
+def extract_lfn(url):
+    """ determine a logical file name (basename) from a given URL """
+    return re.sub(".*/", "", url)
+
+
+def local_pfn(path):
+    """ generates a full pfn given a local path """
+    pfn = PFN("scp://" + getpass.getuser() + "@" + socket.gethostname() + "/" + path, "local")
+    if re.search('rc4', conf.get("local", "exec_env")):
+    	pfn = PFN("file://" + path, "execution")
+    return pfn
+  
+
+def extract_fasta_basefile(file_list):
+    """ find the base fasta file given a list of reference files """
+    for f in file_list:
+        if re.search("(.fa|.fasta)$", f.name):
+            return f
+
+
+def extract_sample_name(url):
+    """ sample name is the fist part of the base name (for example:
+    HN0001 from HN001_FCD1P1JACXX_L6_SZAIPI024836-36_1.fq)
+    """
+    name = re.sub(".*/", "", url)
+    name = re.sub("_.*", "", name)
+    name = re.sub("\..*", "", name)
+    return name
+
+
+def add_executable(dax, logical_name, wrapper_name):
+    """ adds executables to the DAX-level replica catalog """
+    global added_execs
+
+    if logical_name in added_execs:
+        return
+    
+    wrapper = Executable(name=logical_name, 
+                         arch="x86_64",
+                         installed=False)
+    wrapper.addPFN(local_pfn(conf.get("local", "top_dir") + "/wrappers/" + wrapper_name))
+    dax.addExecutable(wrapper)
+
+    added_execs.append(logical_name)
+
+
+def gunzip_job(dax, software_tar, f_in, f_out, parent_jobs=None):
+    """ adds a job to gunzip an input file
+    """
+    add_executable(dax, "gunzip", "gunzip-wrapper")
+    j = ComputeJob("gunzip", cores = 1, mem_gb = 2, partition = "part1")
+    j.uses(software_tar, link=Link.INPUT)
+    j.uses(f_in, link=Link.INPUT)
+    j.uses(f_out, link=Link.OUTPUT, transfer=False)
+    j.addArguments(f_in, f_out)
+    dax.addJob(j)
+    if parent_jobs is not None:
+        for parent in parent_jobs:
+            dax.depends(parent=parent, child=j)
+
+
+def prepare_ref_genome(dax, software_job, software_tar, ref_url, ref_files):
+    
+    # temp list of files we need to keep track of
+    files = {}
+   
+    lfn = extract_lfn(ref_url)
+    lfn_base = re.sub("\.[a-zA-Z0-9]+$", "", lfn)
+    lfn_ext = re.sub(".*\.", "", lfn)
+
+    j1 = None
+    if lfn_ext == "gz":
+        # add a job to gunzip the input
+        in_f = File(lfn)
+        in_f.addPFN(PFN(ref_url, "irods_iplant"))
+        dax.addFile(in_f)
+
+        fa_f = File(lfn_base + ".fa")
+        gunzip_job(dax, software_tar, in_f, fa_f, [software_job])
+
+        # update the lfn for subsequent jobs
+        lfn = lfn_base + ".fa"
+        lfn_ext = "fa"
+    elif lfn_ext == "fa":
+        # already a .fa file
+        fa_f = File(lfn)
+        fa_f.addPFN(PFN(ref_url, "irods_iplant"))
+        dax.addFile(fa_f)
+    else:
+        logger.error("Unable to handle reference genome with %s extension" %(lfn_ext))
+        sys.exit(1)
+    files[fa_f.name] = fa_f
+   
+    # bwa index
+    add_executable(dax, "bwa-index", "bwa-wrapper")
+    j2 = ComputeJob("bwa-index", cores = 1, mem_gb = 4, partition = "part1")
+    j2.uses(software_tar, link=Link.INPUT)
+    j2.uses(fa_f, link=Link.INPUT)
+    for ext in [".amb", ".ann", ".bwt", ".pac", ".sa"]:
+        f = File(lfn + ext)
+        files[f.name] = f
+        j2.uses(f, link=Link.OUTPUT, transfer=False)
+    j2.addArguments("index", fa_f)
+    dax.addJob(j2)
+    dax.depends(parent=software_job, child=j2)
+    
+    # samtools faidx
+    add_executable(dax, "faidx", "samtools-wrapper")
+    j3 = ComputeJob("faidx", cores = 1, mem_gb = 4, partition = "part1")
+    j3.uses(software_tar, link=Link.INPUT)
+    j3.uses(fa_f, link=Link.INPUT)
+    f = File(lfn_base + ".fa.fai")
+    files[f.name] = f
+    j3.uses(f, link=Link.OUTPUT, transfer=False)
+    j3.addArguments("faidx", fa_f)
+    dax.addJob(j3)
+    dax.depends(parent=software_job, child=j3)
+
+    # picard sequence dictionary
+    add_executable(dax, "seq_dict", "picard-wrapper")
+    j4 = ComputeJob("seq_dict", cores = 1, mem_gb = 4, partition = "part1")
+    j4.uses(software_tar, link=Link.INPUT)
+    j4.uses(fa_f, link=Link.INPUT)
+    f = File(lfn_base + ".dict")
+    files[f.name] = f
+    j4.uses(f, link=Link.OUTPUT, transfer=False)
+    j4.addArguments("CreateSequenceDictionary.jar", 
+                    "REFERENCE=" + fa_f.name,
+                    "OUTPUT=" + f.name)
+    dax.addJob(j4)
+    dax.depends(parent=software_job, child=j4)
+
+    # populate the reference file list which the rest of the workflow needs
+    for key, f in files.iteritems():
+        ref_files.append(f)
+
+
+def alignment_to_reference(sample_name, dax, software_tar, chromosomes, ref_files, tracked_files):
+    
+    # Note that the cores we give Pegasus and the -t does not match. 
+    # Oversubscriptions is ok, as bwa can not keep all the cores busy 100%
+    # of the time.
+    add_executable(dax, "alignment_to_reference", "bwa-wrapper")
+    j = ComputeJob("alignment_to_reference", cores = 6, mem_gb = 8,
+                   partition = "part1")
+    
+    # determine which is the fasta file
+    for f in ref_files:
+        j.uses(f, link=Link.INPUT)
+    j.uses(software_tar, link=Link.INPUT)
+    j.uses(tracked_files['sam'], link=Link.OUTPUT, transfer=False)
+    j.setStdout(tracked_files['sam'])
+        
+    if conf.get("main", "inputs-style") == "single-end":
+        # single-end inputs 
+        j.uses(tracked_files['fastq_input'], link=Link.INPUT)
+        j.addArguments("mem",
+                       "-t", "12",
+                       "-M", extract_fasta_basefile(ref_files), 
+                       tracked_files['fastq_input'])
+    else:
+        # pair-end inputs
+        j.uses(tracked_files['paired_read1_fastq'], link=Link.INPUT)
+        j.uses(tracked_files['paired_read2_fastq'], link=Link.INPUT)
+        j.addArguments("mem",
+                       "-t", "12",
+                       "-M", extract_fasta_basefile(ref_files), 
+                       tracked_files['paired_read1_fastq'],
+                       tracked_files['paired_read2_fastq'])
+    dax.addJob(j)
+
+    # next step    
+    sortsam_job = sort_sam(sample_name, dax, software_tar, chromosomes, ref_files, tracked_files)
+    
+    return j
+
+
+def sort_sam(sample_name, dax, software_tar, chromosomes, ref_files, tracked_files):
+
+    add_executable(dax, "sort_sam", "picard-wrapper")
+    j = ComputeJob("sort_sam", cores = 1, mem_gb = 16, partition = "part1")
+    j.uses(software_tar, link=Link.INPUT)
+    j.uses(tracked_files['sam'], link=Link.INPUT)
+    j.uses(tracked_files['sorted_reads'], link=Link.OUTPUT, transfer=False)
+    j.uses(tracked_files['sorted_index'], link=Link.OUTPUT, transfer=False)
+    
+    j.addArguments("SortSam.jar",
+                   "CREATE_INDEX=TRUE",
+                   "MAX_RECORDS_IN_RAM=5000000",
+                   "I=" + tracked_files['sam'].name,
+                   "O=" + tracked_files['sorted_reads'].name,
+                   "SO=coordinate",
+                   "VALIDATION_STRINGENCY=LENIENT")
+    dax.addJob(j)
+    
+    dedup_job = dedup(sample_name, dax, software_tar, chromosomes, ref_files, tracked_files)
+
+    
+
+def dedup(sample_name, dax, software_tar, chromosomes, ref_files, tracked_files):
+    add_executable(dax, "dedup", "picard-wrapper")
+    j = ComputeJob("dedup", cores = 1, mem_gb = 16,
+                   partition = "part1")
+    j.uses(software_tar, link=Link.INPUT)
+    j.uses(tracked_files['sorted_reads'], link=Link.INPUT)
+    j.uses(tracked_files['sorted_index'], link=Link.INPUT)
+    j.uses(tracked_files['deduped_reads'], link=Link.OUTPUT, transfer=False)
+    j.uses(tracked_files['deduped_index'], link=Link.OUTPUT, transfer=False)
+    #j.uses(tracked_files['deduped_metrics'], link=Link.OUTPUT, transfer=False)
+    
+    j.addArguments("MarkDuplicates.jar",
+                   "CREATE_INDEX=TRUE",
+                   "MAX_RECORDS_IN_RAM=5000000",
+                   "I=" + tracked_files['sorted_reads'].name,
+                   "O=" + tracked_files['deduped_reads'].name,
+                   "METRICS_FILE=" + tracked_files['deduped_metrics'].name,
+                   "VALIDATION_STRINGENCY=LENIENT")
+
+    dax.addJob(j)
+
+    add_replace(sample_name, dax, software_tar, chromosomes, ref_files, tracked_files)
+
+
+def add_replace(sample_name, dax, software_tar, chromosomes, ref_files, tracked_files):
+    add_executable(dax, "add_replace", "picard-wrapper")
+    j = ComputeJob("add_replace", cores = 1, mem_gb = 16,
+                   partition = "part1")
+    j.uses(software_tar, link=Link.INPUT)
+    j.uses(tracked_files['deduped_reads'], link=Link.INPUT)
+    j.uses(tracked_files['deduped_index'], link=Link.INPUT)
+    j.uses(tracked_files['addrepl_reads'], link=Link.OUTPUT, transfer=True)
+    j.uses(tracked_files['addrepl_index'], link=Link.OUTPUT, transfer=True)
+    
+    j.addArguments("AddOrReplaceReadGroups.jar",
+                   "MAX_RECORDS_IN_RAM=5000000",
+                   "I=" + tracked_files['deduped_reads'].name,
+                   "O=" + tracked_files['addrepl_reads'].name,
+                   "RGID=" + sample_name,
+                   "LB=" + sample_name,
+                   "PL=Illumina",
+                   "SM=" + sample_name,
+                   "CN=BGI",
+                   "RGPU=" + sample_name,
+                   "VALIDATION_STRINGENCY=LENIENT",
+                   "SORT_ORDER=coordinate",
+                   "CREATE_INDEX=TRUE")
+    dax.addJob(j)
+    
+    realign_target_creator(sample_name, dax, software_tar, chromosomes, ref_files, tracked_files)
+
+
+def realign_target_creator(sample_name, dax, software_tar, chromosomes, ref_files, tracked_files):
+    add_executable(dax, "realign_target_creator", "gatk-wrapper")
+    j = ComputeJob("realign_target_creator", cores = 15, mem_gb = 10,
+                   partition = "part1")
+    j.uses(software_tar, link=Link.INPUT)
+    for f in ref_files:
+        j.uses(f, link=Link.INPUT)
+    j.uses(tracked_files['addrepl_reads'], link=Link.INPUT)
+    j.uses(tracked_files['addrepl_index'], link=Link.INPUT)
+    j.uses(tracked_files['intervals'], link=Link.OUTPUT, transfer=False)
+    
+    j.addArguments("10", # memory
+                   "-T", "RealignerTargetCreator",
+                   "-nt", "15",
+                   "-R", extract_fasta_basefile(ref_files),
+                   "-I", tracked_files['addrepl_reads'],
+                   "-o", tracked_files['intervals']) 
+    dax.addJob(j)
+    
+    indel_realign(sample_name, dax, software_tar, chromosomes, ref_files, tracked_files)
+    
+
+def indel_realign(sample_name, dax, software_tar, chromosomes, ref_files, tracked_files):
+    # IndelRealigner can only be run single threaded
+    add_executable(dax, "indel_realign", "gatk-wrapper")   
+    j = ComputeJob("indel_realign", cores = 1, mem_gb = 10,
+                   partition = "part1")
+    j.uses(software_tar, link=Link.INPUT)
+    for f in ref_files:
+        j.uses(f, link=Link.INPUT)
+    j.uses(tracked_files['addrepl_reads'], link=Link.INPUT)
+    j.uses(tracked_files['addrepl_index'], link=Link.INPUT)
+    j.uses(tracked_files['intervals'], link=Link.INPUT)
+    j.uses(tracked_files['indel_realigned_reads'], link=Link.OUTPUT, transfer=True)
+    j.uses(tracked_files['indel_realigned_index'], link=Link.OUTPUT, transfer=True)
+    
+    j.addArguments("10", # memory
+                   "-T", "IndelRealigner",
+                   "-R", extract_fasta_basefile(ref_files),
+                   "-I", tracked_files['addrepl_reads'],
+                   "-targetIntervals", tracked_files['intervals'],
+                   "-o", tracked_files['indel_realigned_reads'])
+    dax.addJob(j)
+    
+    for chr in chromosomes:
+        haplotype_caller(sample_name, dax, software_tar,
+                         ref_files, tracked_files, chr)
+
+
+def select_and_filter_snp(dax, software_file, ref_files, tracked_files,
+                          in_file, out_file, out_idx):
+    
+    # we need an intermediate file
+    intername = re.sub(".*/", "", in_file.name) + "_snp_only.vcf"
+    tracked_files[intername] = File(intername) 
+    
+    add_executable(dax, "select_variants_snp", "gatk-wrapper")    
+    j = ComputeJob("select_variants_snp", cores = 14, mem_gb = 10,
+                   partition = "part3")
+    
+    # inputs
+    j.uses(software_file, link=Link.INPUT)
+    for f in ref_files:
+        j.uses(f, link=Link.INPUT)
+    j.uses(in_file, link=Link.INPUT)
+
+    # outputs
+    j.uses(tracked_files[intername], link=Link.OUTPUT, transfer=False)
+    
+    j.addArguments("10", # memory
+                   "-T", "SelectVariants",
+                   "-nt", "15",
+                   "-R", extract_fasta_basefile(ref_files),
+                   "-selectType", "SNP",
+                   "-V", in_file,
+                   "-o", tracked_files[intername])
+    
+    dax.addJob(j)
+    
+    add_executable(dax, "filtering_snp", "gatk-wrapper")    
+    j = ComputeJob("filtering_snp", cores = 1, mem_gb = 10,
+                   partition = "part3")
+    
+    # inputs
+    j.uses(software_file, link=Link.INPUT)
+    for f in ref_files:
+        j.uses(f, link=Link.INPUT)
+    j.uses(tracked_files[intername], link=Link.INPUT)
+
+    # outputs
+    j.uses(out_file, link=Link.OUTPUT, transfer=True)
+    j.uses(out_idx, link=Link.OUTPUT, transfer=True)
+    
+    j.addArguments("10", # memory
+                   "-T", "VariantFiltration",
+                   "-R", extract_fasta_basefile(ref_files),
+                   "-V", tracked_files[intername],
+                   "--filterExpression", "'" + conf.get("main", "snp_filter") + "'",
+                   "--filterName", "my_snp_filter",
+                   "-o", out_file)
+
+    dax.addJob(j)
+
+
+def select_and_filter_indel(dax, software_file, ref_files, tracked_files,
+                            in_file, out_file, out_idx):
+
+    # we need an intermediate file
+    intername = re.sub(".*/", "", in_file.name) + "_indel_only.vcf"
+    tracked_files[intername] = File(intername) 
+
+        
+    add_executable(dax, "select_variants_indel", "gatk-wrapper")    
+    j = ComputeJob("select_variants_indel", cores = 14, mem_gb = 10,
+                   partition = "part3")
+    
+    # inputs
+    j.uses(software_file, link=Link.INPUT)
+    for f in ref_files:
+        j.uses(f, link=Link.INPUT)
+    j.uses(in_file, link=Link.INPUT)
+
+    # outputs
+    j.uses(tracked_files[intername], link=Link.OUTPUT, transfer=False)
+    
+    j.addArguments("10", # memory
+                   "-T", "SelectVariants",
+                   "-nt", "15",
+                   "-R", extract_fasta_basefile(ref_files),
+                   "-selectType", "INDEL",
+                   "-V", in_file,
+                   "-o", tracked_files[intername])
+ 
+    dax.addJob(j)
+ 
+    add_executable(dax, "filtering_indel", "gatk-wrapper")    
+    j = ComputeJob("filtering_indel", cores = 1, mem_gb = 10,
+                   partition = "part3")
+    
+    # inputs
+    j.uses(software_file, link=Link.INPUT)
+    for f in ref_files:
+        j.uses(f, link=Link.INPUT)
+    j.uses(tracked_files[intername], link=Link.INPUT)
+
+    # outputs
+    j.uses(out_file, link=Link.OUTPUT, transfer=True)
+    j.uses(out_idx, link=Link.OUTPUT, transfer=True)
+    
+    j.addArguments("10", # memory
+                   "-T", "VariantFiltration",
+                   "-R", extract_fasta_basefile(ref_files),
+                   "-V", tracked_files[intername],
+                   "--filterExpression", "'" + conf.get("main", "indel_filter") + "'",
+                   "--filterName", "my_indel_filter",
+                   "-o", out_file)
+ 
+    dax.addJob(j)
+
+
+def haplotype_caller(sample_name, dax, software_file, ref_files, tracked_files,
+                     chromosome):
+
+    add_executable(dax, "haplotype_caller", "gatk-wrapper")
+    j = ComputeJob("haplotype_caller", cores = 1, mem_gb = 3, partition = "part2")
+    
+    # inputs
+    j.uses(software_file, link=Link.INPUT)
+    for f in ref_files:
+        j.uses(f, link=Link.INPUT)
+    j.uses(tracked_files['indel_realigned_reads'], link=Link.INPUT)
+    j.uses(tracked_files['indel_realigned_index'], link=Link.INPUT)
+
+    # outputs
+    fname = conf.get("local", "run_id") + "-" + sample_name + "_" + chromosome + ".vcf"
+    tracked_files[fname] = File(fname)
+    j.uses(tracked_files[fname], link=Link.OUTPUT, transfer=False)
+    tracked_files[fname + ".idx"] = File(fname + ".idx")
+    j.uses(tracked_files[fname + ".idx"], link=Link.OUTPUT, transfer=False)
+    
+    j.addArguments("4", # memory
+                   "-T", "HaplotypeCaller",
+                   "--emitRefConfidence", "GVCF",
+                   "--variant_index_type", "LINEAR",
+                   "--variant_index_parameter", "128000",
+                   "-L", chromosome,
+                   "-R", extract_fasta_basefile(ref_files),
+                   "-I", tracked_files['indel_realigned_reads'],
+                   "-o", tracked_files[fname])
+    dax.addJob(j)
+
+
+def merge_gvcf(dax, software_file, chromosomes, ref_files, tracked_files, sample_names):
+
+    # memory and cores based on what system we are targetting
+    cores = 1
+    mem_gb = 20
+    if re.search('wrangler', conf.get("local", "exec_env")):
+        mem_gb = 80
+
+    add_executable(dax, "merge_gcvf", "gatk-wrapper")
+    j = ComputeJob("merge_gcvf", cores = cores, mem_gb = mem_gb, partition = "part3")
+
+    # inputs
+    files = []
+    j.uses(software_file, link=Link.INPUT)
+    for f in ref_files:
+        j.uses(f, link=Link.INPUT)
+    for s in sample_names:
+        for chr in chromosomes:
+            fname = "%s-%s_%s.vcf" % (conf.get("local", "run_id"), s, chr)
+            j.uses(tracked_files[fname + ".idx"], link=Link.INPUT)
+            j.uses(tracked_files[fname], link=Link.INPUT)
+            files.append(fname)
+
+    # create filelist to minimize the length of the command line
+    fd = open(conf.get("local", "work_dir") + "/haplotype-files.list", "w")
+    for f in files:
+        fd.write("%s\n" %(f))
+    fd.close()
+    hf = File("haplotype-files.list")
+    hf.addPFN(local_pfn(conf.get("local", "work_dir") + "/haplotype-files.list"))
+    dax.addFile(hf)
+    j.uses("haplotype-files.list", link=Link.INPUT)
+
+    # outputs
+    fname = conf.get("local", "run_id") + "-mergeGVCF.vcf"
+    tracked_files[fname] = File(fname)
+    j.uses(tracked_files[fname], link=Link.OUTPUT, transfer=True)
+    tracked_files[fname + ".idx"] = File(fname + ".idx")
+    j.uses(tracked_files[fname + ".idx"], link=Link.OUTPUT, transfer=True)
+    
+    j.addArguments(str(mem_gb), # first argument is memory
+                   "-T", "CombineGVCFs",
+                   "-R", extract_fasta_basefile(ref_files),
+                   "-o", tracked_files[fname],
+                   "--variant", "haplotype-files.list")
+    
+    dax.addJob(j)
+
+
+def genotype_gvcfs(dax, software_file, ref_files, tracked_files, sample_names,
+                   chromosome):
+
+    add_executable(dax, "genotype_gvcfs", "gatk-wrapper")
+    j = ComputeJob("genotype_gvcfs", cores = 1, mem_gb = 10,
+                   partition = "part2")
+    
+    # inputs
+    variant_files = []
+    j.uses(software_file, link=Link.INPUT)
+    for f in ref_files:
+        j.uses(f, link=Link.INPUT)
+    for sname in sample_names:
+        fname = conf.get("local", "run_id") + "-" + sname + "_" + chromosome + ".vcf"
+        f = tracked_files[fname]
+        j.uses(f, link=Link.INPUT)
+        j.uses(tracked_files[fname + ".idx"], link=Link.INPUT)
+        variant_files.append(f)
+
+    # outputs
+    fname = conf.get("local", "run_id") + "-" + "GVCF_" + chromosome + ".vcf"
+    tracked_files[fname] = File(fname)
+    j.uses(tracked_files[fname], link=Link.OUTPUT, transfer=False)
+    tracked_files[fname + ".idx"] = File(fname + ".idx")
+    j.uses(tracked_files[fname + ".idx"], link=Link.OUTPUT, transfer=False)
+    
+    j.addArguments("10", # memory
+                   "-T", "GenotypeGVCFs",
+                   "-R", extract_fasta_basefile(ref_files),
+                   "-o", tracked_files[fname],
+                   "-L", chromosome)
+    for f in variant_files:
+        j.addArguments("--variant", f)
+
+    dax.addJob(j)
+    
+
+def combine_variants(dax, software_file, chromosomes, ref_files, tracked_files):
+
+    add_executable(dax, "combine_variants", "gatk-wrapper")
+    j = ComputeJob("combine_variants", cores = 1, mem_gb = 10,
+                   partition = "part3")
+    
+    # inputs
+    j.uses(software_file, link=Link.INPUT)
+    for f in ref_files:
+        j.uses(f, link=Link.INPUT)
+    for chr in chromosomes:
+        fname = conf.get("local", "run_id") + "-" + "GVCF_%s.vcf" % (chr)
+        j.uses(tracked_files[fname], link=Link.INPUT)
+        j.uses(tracked_files[fname + ".idx"], link=Link.INPUT)
+
+    # outputs
+    fname = conf.get("local", "run_id") + "-" + "All.vcf"
+    tracked_files[fname] = File(fname)
+    j.uses(tracked_files[fname], link=Link.OUTPUT, transfer=True)
+    tracked_files[fname + ".idx"] = File(fname + ".idx")
+    j.uses(tracked_files[fname + ".idx"], link=Link.OUTPUT, transfer=True)
+    
+    j.addArguments("10", # memory
+                   "-T", "CombineVariants",
+                   "--genotypemergeoption", "UNIQUIFY",
+                   "-R", extract_fasta_basefile(ref_files),
+                   "-o", tracked_files[fname])
+    for chr in chromosomes:
+        fname = conf.get("local", "run_id") + "-" + "GVCF_%s.vcf" % (chr)
+        j.addArguments("--variant", tracked_files[fname])
+    
+    dax.addJob(j)
+
+    # filter the results    
+    tracked_files[conf.get("local", "run_id") + "-" + 'All_filtered_snp.vcf'] = \
+            File(conf.get("local", "run_id") + "-" + "All_filtered_snp.vcf")
+    tracked_files[conf.get("local", "run_id") + "-" + 'All_filtered_snp.vcf.idx'] = \
+            File(conf.get("local", "run_id") + "-" + "All_filtered_snp.vcf.idx")
+    select_and_filter_snp(dax, software_file, ref_files, tracked_files,
+                          tracked_files[conf.get("local", "run_id") + "-" + 'All.vcf'],
+                          tracked_files[conf.get("local", "run_id") + "-" + 'All_filtered_snp.vcf'],
+                          tracked_files[conf.get("local", "run_id") + "-" + 'All_filtered_snp.vcf.idx'])
+    
+    tracked_files[conf.get("local", "run_id") + "-" + 'All_filtered_indel.vcf'] = \
+            File(conf.get("local", "run_id") + "-" + "All_filtered_indel.vcf")
+    tracked_files[conf.get("local", "run_id") + "-" + 'All_filtered_indel.vcf.idx'] = \
+            File(conf.get("local", "run_id") + "-" + "All_filtered_indel.vcf.idx")
+    select_and_filter_indel(dax, software_file, ref_files, tracked_files,
+                            tracked_files[conf.get("local", "run_id") + "-" + 'All.vcf'],
+                            tracked_files[conf.get("local", "run_id") + "-" + 'All_filtered_indel.vcf'],
+                            tracked_files[conf.get("local", "run_id") + "-" + 'All_filtered_indel.vcf.idx'])
+
+
+def generate_dax():
+    """ generates the Pegasus DAX (directed acyclic graph - abstract XML)
+    which is a description of a workflow """
+    
+    logger.info("Generating abstract workflow (DAX)")
+    
+    dax = AutoADAG("soykb")
+    
+    # The key to adding jobs to this workflow is the AutoADAG - it allows you
+    # to add jobs with listed input and output files, and then the AutoADAG
+    # will figure out the relationships between the jobs. There is no need
+    # to list parent/child relationships, but you can do that if you feel it
+    # makes the relationships more clear than just specifying the
+    # inputs/outputs.
+        
+    # email notificiations for when the state of the workflow changes
+    dax.invoke('all', conf.get("local", "top_dir") + "/email-notify")
+    
+    ref_urls = []
+    chromosomes = []
+    fastq_urls = []
+    read_input_lists(ref_urls, chromosomes, fastq_urls)
+
+    # determine how many TACC compute nodes we need
+    num_inputs_in_set = min(len(fastq_urls) / 2, 100)
+    conf.set("exec_environment", "hosts_part1", str( (num_inputs_in_set // 16 + 1) * 4 ))
+    conf.set("exec_environment", "hosts_part2", str( (num_inputs_in_set // 16 + 1) * 4 ))
+    conf.set("exec_environment", "hosts_part3", str( 1 ))
+
+    # we need to bring a copy of the software with us
+    software_tar = File("software.tar.gz")
+    software_tar.addPFN(local_pfn(conf.get("local", "work_dir") + "/software.tar.gz"))
+    dax.addFile(software_tar)    
+    add_executable(dax, "software-wrapper", "software-wrapper")
+    software_job = ComputeJob("software-wrapper", cores=1, mem_gb=1)
+    software_job.uses(software_tar, link=Link.INPUT)
+    dax.addJob(software_job)
+
+    # we need to track files across jobs
+    tracked_files = {}
+    sample_names = []
+    
+    # reference genome - add some jobs to prepare reference genome
+    ref_files = []
+    prepare_ref_genome(dax, software_job, software_tar, ref_urls[0], ref_files)
+
+    lane_count = len(fastq_urls)
+    if conf.get("main", "inputs-style") == "pair-end":
+        lane_count = len(fastq_urls) / 2
+
+    for lane in range(lane_count):
+
+        # input files for this lane
+        if conf.get("main", "inputs-style") == "single-end":
+            lfn = extract_lfn(fastq_urls[lane])
+            if re.search("\.gz$", lfn):
+                f_gz = File(extract_lfn(fastq_urls[lane]))
+                f_gz.addPFN(PFN(fastq_urls[lane], "irods_iplant"))
+                dax.addFile(f_gz)
+                fa_name = extract_lfn(fastq_urls[lane])
+                fa_name = re.sub("\..*", ".fa", fa_name)
+                f_fa = File(fa_name)
+                gunzip_job(dax, software_tar, f_gz, f_fa, [software_job])
+            else:
+                f_fa = File(extract_lfn(fastq_urls[lane]))
+                f_fa.addPFN(PFN(fastq_urls[lane], "irods_iplant"))
+                dax.addFile(f_fa)
+            tracked_files['fastq_input'] = f_fa
+            sample_name = extract_sample_name(tracked_files['fastq_input'].name)
+        else:
+            tracked_files['paired_read1_fastq'] = File(extract_lfn(fastq_urls[lane * 2]))
+            tracked_files['paired_read1_fastq'].addPFN(PFN(fastq_urls[lane * 2], "irods_iplant"))
+            dax.addFile(tracked_files['paired_read1_fastq'])
+    
+            tracked_files['paired_read2_fastq'] = File(extract_lfn(fastq_urls[lane * 2 + 1]))
+            tracked_files['paired_read2_fastq'].addPFN(PFN(fastq_urls[lane * 2 + 1], "irods_iplant"))
+            dax.addFile(tracked_files['paired_read2_fastq'])
+    
+            sample_name = extract_sample_name(tracked_files['paired_read1_fastq'].name)
+
+        # files we need to track
+        tracked_files['sam'] = File(conf.get("local", "run_id") + "-" + sample_name + "_aligned_reads.sam")
+        tracked_files['sorted_reads'] = File(conf.get("local", "run_id") + "-" + sample_name + "_sorted_reads.bam")
+        tracked_files['sorted_index'] = File(conf.get("local", "run_id") + "-" + sample_name + "_sorted_reads.bai")
+        tracked_files['deduped_reads'] = File(conf.get("local", "run_id") + "-" + sample_name + "_deduped_reads.bam")
+        tracked_files['deduped_index'] = File(conf.get("local", "run_id") + "-" + sample_name + "_deduped_reads.bai")
+        tracked_files['deduped_metrics'] = File(conf.get("local", "run_id") + "-" + sample_name + "_deduped.metrics")
+        tracked_files['addrepl_reads'] = File(conf.get("local", "run_id") + "-" + sample_name + "_addrepl.bam")
+        tracked_files['addrepl_index'] = File(conf.get("local", "run_id") + "-" + sample_name + "_addrepl.bai")
+        tracked_files['intervals'] = File(conf.get("local", "run_id") + "-" + sample_name + "_intervals.list")
+        tracked_files['indel_realigned_reads'] = File(conf.get("local", "run_id") + "-" + sample_name + "_indel_realigned.bam")
+        tracked_files['indel_realigned_index'] = File(conf.get("local", "run_id") + "-" + sample_name + "_indel_realigned.bai")
+    
+        # Step 1 - dependent jobs are now added in the parent jobs
+        align_job = alignment_to_reference(sample_name,
+                                           dax,
+                                           software_tar,
+                                           chromosomes,
+                                           ref_files,
+                                           tracked_files)
+        dax.depends(parent=software_job, child=align_job)
+
+        # keep a list of samples for the GenotypeGVCFs call
+        sample_names.append(sample_name)
+
+    # combine all haplotype_caller outputs into one merged file for output
+    merge_gvcf(dax, software_tar, chromosomes, ref_files, tracked_files, sample_names)
+    
+    # run genotype_gvcfs per chromosome
+    for chr in chromosomes:
+        genotype_gvcfs(dax, software_tar, ref_files, tracked_files,
+                       sample_names, chr)
+        
+    combine_variants(dax, software_tar, chromosomes, ref_files, tracked_files)
+ 
+    # write out the dax
+    dax_file = open(conf.get("local", "work_dir") + "/soykb.dax", "w")
+    dax.writeXML(dax_file)
+    dax_file.close()
+
+
+def main():
+    global conf
+    
+    setup_logger(True)
+
+    # Configure command line option parser
+    prog_usage = "usage: workflow-generator [options]"
+    parser = optparse.OptionParser(usage=prog_usage)
+
+    parser.add_option("-e", "--exec-env", action = "store", dest = "exec_env",
+                      help = "Handle for the target execution environment.")
+
+    # Parse command line options
+    (options, args) = parser.parse_args()
+    if options.exec_env == None:
+        logger.fatal("Please specify an execution environment with --exec-env")
+        sys.exit(1)
+
+    # read the config file and add those settings to the option object
+    conf = ConfigParser.SafeConfigParser({'username': getpass.getuser()})
+    r = conf.read(["conf/.soybean-workflow.conf", \
+                  "conf/main.conf", \
+                  "conf/%s/site.conf" % options.exec_env])
+    logger.debug(["conf/.soybean-workflow.conf", \
+                  "conf/main.conf", \
+                  "conf/%s/site.conf" % options.exec_env])
+    if len(r) != 3:
+        logger.fatal("Unable to read configuration files for that environment")
+        sys.exit(1)
+
+    if conf.get("main", "inputs-style") != "single-end" and \
+       conf.get("main", "inputs-style") != "pair-end":
+        logger.fatal("Valid choicses for the main/inputs-style configuration is" + \
+                     " single-end or pair-end")
+        sys.exit(1)
+
+    conf.set("local", "username", getpass.getuser())
+    conf.set("local", "exec_env", options.exec_env)
+    conf.set("local", "top_dir", os.path.dirname(os.path.realpath( __file__ )))
+
+    # run id
+    conf.set("local", "run_id", time.strftime("%Y%m%d-%H%M%S", time.gmtime()))
+    
+    # add the run id to the work dir
+    # conf.set("local", "work_dir", conf.get("local", "work_dir") + "/" + 
+                                #   conf.get("local", "run_id"))
+    
+    # local Pegasus environment
+    # pegasus_config = os.path.join("pegasus-config") + " --noeoln --bin"
+    # pegasus_bin_dir = subprocess.Popen(pegasus_config,
+    #                                    stdout=subprocess.PIPE,
+    #                                    shell=True).communicate()[0]
+    # conf.set("local", "pegasus_bin", pegasus_bin_dir)
+    conf.set("local", "pegasus_bin", "pegasus_bin_dir")
+
+    # check proxy before doing anything else
+    #proxy_check()
+    
+    # create a local work directory for the workflow
+    logger.info("Setting up work directory at %s" \
+                %(conf.get("local", "work_dir")))
+    if os.path.exists(conf.get("local", "work_dir")):
+        logger.fatal("Work directory already exists") 
+        os.exit(1)
+    os.makedirs(conf.get("local", "work_dir"))
+
+    # tar up the software
+    # logger.info("Tarring up software directory to send with jobs")
+    # myexec("tar czf " + conf.get("local", "work_dir") + \
+    #        "/software.tar.gz software")
+
+    generate_site_catalog()
+
+    # FIXME: what should we copy / keep in the top dir?
+    myexec("cp conf/" + conf.get("local", "exec_env") + 
+           "/transformations.catalog " + 
+           conf.get("local", "work_dir") + "/transformations.catalog")
+    myexec("cp conf/" + conf.get("local", "exec_env") + 
+           "/replica.catalog " + 
+           conf.get("local", "work_dir") + "/replica.catalog")
+
+    generate_dax()
+
+    # submit
+    logger.info("Planning workflow...")
+    os.chdir(conf.get("local", "work_dir"))
+    # cmd = "pegasus-plan " + \
+    #       " --conf " + conf.get("local", "top_dir") + \
+    #       "/conf/" + conf.get("local", "exec_env") + "/pegasus.conf" + \
+    #       " --dir ." + \
+    #       " --relative-dir wf-" + conf.get("local", "run_id") + \
+    #       " --sites execution"
+    
+    # if conf.get("exec_environment", "output_site") != "":
+    #     cmd += " --output-site " + conf.get("exec_environment", "output_site")
+              
+    # if conf.get("exec_environment", "staging_site") != "":
+    #     cmd += " --staging " + conf.get("exec_environment", "staging_site")
+          
+    # if conf.get("exec_environment", "job_clustering") != "":
+    #     cmd += " --cluster " + conf.get("exec_environment", "job_clustering")
+          
+    # cmd += " --dax soykb.dax" + \
+    #        " --submit"
+    # logger.info(cmd)
+    # myexec(cmd + " 2>&1 | tee pegasus-plan.out")
+
+
+if __name__ == "__main__":
+    main()
+