From 16fe5b8d1e3adb8435c43477ed1fee081a436a84 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 10 Nov 2014 14:46:27 -0500 Subject: [PATCH 001/221] Initial commit, proposed reference implementation. --- README.md | 9 +++ cwltool/__init__.py | 1 + cwltool/__main__.py | 3 + cwltool/main.py | 29 ++++++++ cwltool/ref_resolver.py | 154 +++++++++++++++++++++++++++++++++++++++ cwltool/schemas | 1 + cwltool/tool.py | 157 ++++++++++++++++++++++++++++++++++++++++ setup.py | 21 ++++++ tests/__init__.py | 0 9 files changed, 375 insertions(+) create mode 100644 README.md create mode 100644 cwltool/__init__.py create mode 100644 cwltool/__main__.py create mode 100644 cwltool/main.py create mode 100644 cwltool/ref_resolver.py create mode 120000 cwltool/schemas create mode 100644 cwltool/tool.py create mode 100644 setup.py create mode 100644 tests/__init__.py diff --git a/README.md b/README.md new file mode 100644 index 000000000..c104ff993 --- /dev/null +++ b/README.md @@ -0,0 +1,9 @@ +Install + +libv8-dev +libboost-python-dev +python setup.py + +Run + +python -mcwltool --tool [uri] --job-order [uri] diff --git a/cwltool/__init__.py b/cwltool/__init__.py new file mode 100644 index 000000000..70d587a69 --- /dev/null +++ b/cwltool/__init__.py @@ -0,0 +1 @@ +__author__ = 'peter.amstutz@curoverse.com' diff --git a/cwltool/__main__.py b/cwltool/__main__.py new file mode 100644 index 000000000..a952ff500 --- /dev/null +++ b/cwltool/__main__.py @@ -0,0 +1,3 @@ +import main + +main.main() diff --git a/cwltool/main.py b/cwltool/main.py new file mode 100644 index 000000000..3b37b7015 --- /dev/null +++ b/cwltool/main.py @@ -0,0 +1,29 @@ +import tool +import argparse +from ref_resolver import from_url +import jsonschema + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument("tool", type=str) + parser.add_argument("job_order", type=str) + parser.add_argument("-x", action="store_true", help="Execute") + + args = parser.parse_args() + + try: + t = tool.Tool(from_url(args.tool)) + except jsonschema.exceptions.ValidationError as e: + print "Tool definition failed validation" + print e + return + + try: + job = t.job(from_url(args.job_order)) + print '%s%s%s' % (' '.join(job.command_line), + ' < %s' % (job.stdin) if job.stdin else '', + ' > %s' % (job.stdout) if job.stdout else '') + except jsonschema.exceptions.ValidationError as e: + print "Job order failed validation" + print e + return diff --git a/cwltool/ref_resolver.py b/cwltool/ref_resolver.py new file mode 100644 index 000000000..106980a21 --- /dev/null +++ b/cwltool/ref_resolver.py @@ -0,0 +1,154 @@ +import os +import json +import yaml +import copy +import hashlib +import logging +import collections +import requests +import urlparse + +log = logging.getLogger(__name__) + + +class NormDict(dict): + def __init__(self, normalize=unicode): + super(NormDict, self).__init__() + self.normalize = normalize + + def __getitem__(self, key): + return super(NormDict, self).__getitem__(self.normalize(key)) + + def __setitem__(self, key, value): + return super(NormDict, self).__setitem__(self.normalize(key), value) + + def __delitem__(self, key): + return super(NormDict, self).__delitem__(self.normalize(key)) + + +class Loader(object): + def __init__(self): + normalize = lambda url: urlparse.urlsplit(url).geturl() + self.fetched = NormDict(normalize) + self.resolved = NormDict(normalize) + self.resolving = NormDict(normalize) + + def load(self, url, base_url=None): + base_url = base_url or 'file://%s/' % os.path.abspath('.') + return self.resolve_ref({'$ref': url}, base_url) + + def resolve_ref(self, obj, base_url): + ref, mixin, checksum = obj.pop('$ref', None), obj.pop('$mixin', None), obj.pop('$checksum', None) + ref = ref or mixin + url = urlparse.urljoin(base_url, ref) + if url in self.resolved: + return self.resolved[url] + if url in self.resolving: + raise RuntimeError('Circular reference for url %s' % url) + self.resolving[url] = True + doc_url, pointer = urlparse.urldefrag(url) + document = self.fetch(doc_url) + fragment = copy.deepcopy(resolve_pointer(document, pointer)) + try: + self.verify_checksum(checksum, fragment) + if isinstance(fragment, dict) and mixin: + fragment = dict(obj, **fragment) + result = self.resolve_all(fragment, doc_url) + finally: + del self.resolving[url] + return result + + def resolve_all(self, document, base_url): + if isinstance(document, list): + iterator = enumerate(document) + elif isinstance(document, dict): + if '$ref' in document or '$mixin' in document: + return self.resolve_ref(document, base_url) + iterator = document.iteritems() + else: + return document + for key, val in iterator: + document[key] = self.resolve_all(val, base_url) + return document + + def fetch(self, url): + if url in self.fetched: + return self.fetched[url] + split = urlparse.urlsplit(url) + scheme, path = split.scheme, split.path + + if scheme in ['http', 'https'] and requests: + resp = requests.get(url) + try: + resp.raise_for_status() + except Exception as e: + raise RuntimeError(url, cause=e) + result = resp.json() + elif scheme == 'file': + try: + with open(path) as fp: + result = yaml.load(fp) + except (OSError, IOError) as e: + raise RuntimeError('Failed for %s: %s' % (url, e)) + else: + raise ValueError('Unsupported scheme: %s' % scheme) + self.fetched[url] = result + return result + + def verify_checksum(self, checksum, document): + if not checksum: + return + hash_method, hexdigest = checksum.split('$') + if hexdigest != self.checksum(document, hash_method): + raise RuntimeError('Checksum does not match: %s' % checksum) + + def checksum(self, document, method='sha1'): + if method not in ('md5', 'sha1'): + raise NotImplementedError('Unsupported hash method: %s' % method) + normalized = json.dumps(document, sort_keys=True, separators=(',', ':')) + return getattr(hashlib, method)(normalized).hexdigest + + +POINTER_DEFAULT = object() + + +def resolve_pointer(document, pointer, default=POINTER_DEFAULT): + parts = urlparse.unquote(pointer.lstrip('/#')).split('/') \ + if pointer else [] + for part in parts: + if isinstance(document, collections.Sequence): + try: + part = int(part) + except ValueError: + pass + try: + document = document[part] + except: + if default != POINTER_DEFAULT: + return default + else: + raise ValueError('Unresolvable JSON pointer: %r' % pointer) + return document + + +loader = Loader() + + +def to_json(obj, fp=None): + default = lambda o: (o.__json__() if callable(getattr(o, '__json__', None)) + else str(o)) + kwargs = dict(default=default, indent=2, sort_keys=True) + return json.dump(obj, fp, **kwargs) if fp else json.dumps(obj, **kwargs) + + +def from_url(url, base_url=None): + return loader.load(url, base_url) + + +def test_tmap(): + path = os.path.join(os.path.dirname(__file__), '../examples/tmap.yml') + expected_path = os.path.join(os.path.dirname(__file__), '../examples/tmap_resolved.json') + doc = loader.load(path) + with open(expected_path) as fp: + expected = json.load(fp) + assert doc == expected diff --git a/cwltool/schemas b/cwltool/schemas new file mode 120000 index 000000000..30aed58dd --- /dev/null +++ b/cwltool/schemas @@ -0,0 +1 @@ +../../schemas \ No newline at end of file diff --git a/cwltool/tool.py b/cwltool/tool.py new file mode 100644 index 000000000..099551509 --- /dev/null +++ b/cwltool/tool.py @@ -0,0 +1,157 @@ +import os +import pprint +import json +import execjs +import pprint +import copy + +from jsonschema.validators import Draft4Validator +from ref_resolver import from_url, resolve_pointer + +module_dir = os.path.dirname(os.path.abspath(__file__)) + +with open(os.path.join(module_dir, 'schemas/tool.json')) as f: + tool_schema = json.load(f) +with open(os.path.join(module_dir, 'schemas/metaschema.json')) as f: + metaschema = json.load(f) +tool_schema["properties"]["inputs"]["$ref"] = "file:%s/schemas/metaschema.json" % module_dir +tool_schema["properties"]["outputs"]["$ref"] = "file:%s/schemas/metaschema.json" % module_dir +tool_schema = Draft4Validator(tool_schema) + +class Job(object): + def run(self): + pass + +def each(l): + if l is None: + return [] + if isinstance(l, (list, tuple)): + return l + else: + return [l] + +# http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html +def flatten(l, ltypes=(list, tuple)): + if l is None: + return [] + if not isinstance(l, ltypes): + return [l] + + ltype = type(l) + l = list(l) + i = 0 + while i < len(l): + while isinstance(l[i], ltypes): + if not l[i]: + l.pop(i) + i -= 1 + break + else: + l[i:i + 1] = l[i] + i += 1 + return ltype(l) + +def fix_file_type(t): + if 'type' in t and t['type'] == "file": + for a in metaschema["definitions"]["file"]: + t[a] = metaschema["definitions"]["file"][a] + for k in t: + if isinstance(t[k], dict): + fix_file_type(t[k]) + +def jseval(expression=None, job=None): + if expression.startswith('{'): + exp_tpl = '''function () { + $job = %s; + return function()%s();}() + ''' + else: + exp_tpl = '''function () { + $job = %s; + return %s;}() + ''' + exp = exp_tpl % (json.dumps(job['job']), expression) + return execjs.eval(exp) + +def to_cli(value): + if isinstance(value, dict) and 'path' in value: + return value["path"] + else: + return str(value) + +def adapt(adapter, job): + if "value" in adapter: + if "$expr" in adapter["value"]: + value = jseval(adapter["value"]["$expr"]["value"], job) + else: + value = adapter["value"] + elif "valueFrom" in adapter: + value = resolve_pointer(job, adapter["valueFrom"]) + + sep = adapter["separator"] if "separator" in adapter else '' + + value = [to_cli(v) for v in each(value)] + + if 'itemSeparator' in adapter: + if adapter["prefix"]: + l = [adapter["prefix"] + adapter['itemSeparator'].join(value)] + else: + l = [adapter['itemSeparator'].join(value)] + elif 'prefix' in adapter: + l = [] + for v in each(value): + if sep == " ": + l.append(adapter["prefix"]) + l.append(v) + else: + l.append(adapter["prefix"] + sep + v) + else: + l = [value] + + return l + +class Tool(object): + def __init__(self, toolpath_object): + self.tool = toolpath_object["tool"] + fix_file_type(self.tool) + tool_schema.validate(self.tool) + + def job(self, joborder): + inputs = joborder["job"]['inputs'] + Draft4Validator(self.tool['inputs']).validate(inputs) + + adapter = self.tool["adapter"] + adapters = [{"order": -1000000, "value": adapter['baseCmd']}] + + for a in adapter["args"]: + adapters.append(a) + + for k, v in self.tool['inputs']['properties'].items(): + if 'adapter' in v: + a = copy.copy(v['adapter']) + else: + a = {} + + if not 'value' in a: + a['valueFrom'] = "#/job/inputs/"+ k + if not "order" in a: + a["order"] = 1000000 + adapters.append(a) + + adapters.sort(key=lambda a: a["order"]) + pprint.pprint(adapters) + + j = Job() + j.command_line = flatten(map(lambda adapter: adapt(adapter, joborder), adapters)) + + if 'stdin' in adapter: + j.stdin = flatten(adapt({"value": adapter['stdin']}, joborder))[0] + else: + j.stdin = None + + if 'stdout' in adapter: + j.stdout = flatten(adapt({"value": adapter['stdout']}, joborder))[0] + else: + j.stdout = None + + return j diff --git a/setup.py b/setup.py new file mode 100644 index 000000000..58cdf054e --- /dev/null +++ b/setup.py @@ -0,0 +1,21 @@ +from setuptools import setup, find_packages + +setup(name='cwllib', + version='1.0', + description='Common workflow language reference implementation', + author='Common workflow language working group', + author_email='common-workflow-language@googlegroups.com', + url="https://github.com/curoverse/common-workflow-language", + download_url="https://github.com/curoverse/common-workflow-language", + license='Apache 2.0', + packages=find_packages(), + scripts=[ + ], + install_requires=[ + 'jsonschema', + 'pyexecjs' + ], + test_suite='tests', + tests_require=[], + zip_safe=False + ) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 000000000..e69de29bb From d069342c5d3d1fce75541c5bafbfd8adfba291b2 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 10 Nov 2014 14:58:42 -0500 Subject: [PATCH 002/221] Add a test --- README.md | 4 +--- tests/test_examples.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 3 deletions(-) create mode 100644 tests/test_examples.py diff --git a/README.md b/README.md index c104ff993..5466b63e0 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,6 @@ Install -libv8-dev -libboost-python-dev -python setup.py +python setup.py tests Run diff --git a/tests/test_examples.py b/tests/test_examples.py new file mode 100644 index 000000000..84537602a --- /dev/null +++ b/tests/test_examples.py @@ -0,0 +1,21 @@ +import unittest +from cwltool import tool +from cwltool.ref_resolver import from_url, resolve_pointer + +class TestExamples(unittest.TestCase): + def test_job_order(self): + t = tool.Tool(from_url("../examples/bwa-mem-tool.json")) + job = t.job(from_url("../examples/bwa-mem-job.json")) + self.assertEqual(job.command_line, ['bwa', + 'mem', + '-t4', + '-m', + '3', + '-I1,2,3,4', + './rabix/tests/test-files/chr20.fa', + './rabix/tests/test-files/example_human_Illumina.pe_1.fastq', + './rabix/tests/test-files/example_human_Illumina.pe_2.fastq']) + + +if __name__ == '__main__': + unittest.main() From a16d90274aa4f5173eb6a7d4be9dbb403305c0ef Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 10 Nov 2014 17:07:42 -0500 Subject: [PATCH 003/221] Adapters use additional schema information to determine how to build inputs. --- cwltool/tool.py | 108 ++++++++++++++++++++++++++++++++++++------------ 1 file changed, 81 insertions(+), 27 deletions(-) diff --git a/cwltool/tool.py b/cwltool/tool.py index 099551509..17e19cecb 100644 --- a/cwltool/tool.py +++ b/cwltool/tool.py @@ -2,7 +2,6 @@ import pprint import json import execjs -import pprint import copy from jsonschema.validators import Draft4Validator @@ -11,12 +10,21 @@ module_dir = os.path.dirname(os.path.abspath(__file__)) with open(os.path.join(module_dir, 'schemas/tool.json')) as f: - tool_schema = json.load(f) + tool_schema_doc = json.load(f) with open(os.path.join(module_dir, 'schemas/metaschema.json')) as f: metaschema = json.load(f) -tool_schema["properties"]["inputs"]["$ref"] = "file:%s/schemas/metaschema.json" % module_dir -tool_schema["properties"]["outputs"]["$ref"] = "file:%s/schemas/metaschema.json" % module_dir -tool_schema = Draft4Validator(tool_schema) + +def fix_metaschema(m): + if '$ref' in m and m['$ref'].startswith("metaschema.json"): + m['$ref'] = "file:%s/schemas/%s" % (module_dir, m['$ref']) + else: + for k in m: + if isinstance(m[k], dict): + fix_metaschema(m[k]) + +fix_metaschema(tool_schema_doc) + +tool_schema = Draft4Validator(tool_schema_doc) class Job(object): def run(self): @@ -55,6 +63,7 @@ def fix_file_type(t): if 'type' in t and t['type'] == "file": for a in metaschema["definitions"]["file"]: t[a] = metaschema["definitions"]["file"][a] + t["_type"] = "file" for k in t: if isinstance(t[k], dict): fix_file_type(t[k]) @@ -73,24 +82,71 @@ def jseval(expression=None, job=None): exp = exp_tpl % (json.dumps(job['job']), expression) return execjs.eval(exp) -def to_cli(value): - if isinstance(value, dict) and 'path' in value: - return value["path"] +def adapt_inputs(schema, inp): + adapters = [] + + if not 'adapter' in schema: + if isinstance(inp, dict): + for i in inp: + adapters.extend(adapt_inputs(schema["properties"][i], inp[i])) + return adapters + elif isinstance(inp, list): + for i in inp: + adapters.extend(adapt_inputs(schema["items"], i)) + return adapters + + if 'adapter' in schema: + a = copy.copy(schema['adapter']) else: - return str(value) + a = {} + + if not 'value' in a: + a['value'] = inp + if not "order" in a: + a["order"] = 1000000 + a["schema"] = schema + + adapters.append(a) + return adapters + +def to_str(schema, value): + if "$ref" in schema: + schema = from_url(schema["$ref"]) + + if 'oneOf' in schema: + for a in schema['oneOf']: + v = to_str(a, value) + if v is not None: + return v + return None + elif 'type' in schema: + if schema["type"] == "array" and isinstance(value, list): + return [to_str(schema["items"], v) for v in value] + elif schema["type"] == "object" and isinstance(value, dict): + if "path" in value: + return value["path"] + else: + raise Exception("Not expecting a dict") + elif schema["type"] in ("string", "number", "integer"): + return str(value) + elif schema["boolean"]: + # need special handling for flags + return str(value) + + return None def adapt(adapter, job): if "value" in adapter: - if "$expr" in adapter["value"]: + if isinstance(adapter["value"], dict) and "$expr" in adapter["value"]: value = jseval(adapter["value"]["$expr"]["value"], job) else: value = adapter["value"] elif "valueFrom" in adapter: value = resolve_pointer(job, adapter["valueFrom"]) - sep = adapter["separator"] if "separator" in adapter else '' + value = to_str(adapter["schema"], value) - value = [to_cli(v) for v in each(value)] + sep = adapter["separator"] if "separator" in adapter else '' if 'itemSeparator' in adapter: if adapter["prefix"]: @@ -110,47 +166,45 @@ def adapt(adapter, job): return l + class Tool(object): def __init__(self, toolpath_object): self.tool = toolpath_object["tool"] fix_file_type(self.tool) tool_schema.validate(self.tool) + def job(self, joborder): inputs = joborder["job"]['inputs'] Draft4Validator(self.tool['inputs']).validate(inputs) adapter = self.tool["adapter"] - adapters = [{"order": -1000000, "value": adapter['baseCmd']}] + adapters = [{"order": -1000000, + "schema": tool_schema_doc["properties"]["adapter"]["properties"]["baseCmd"], + "value": adapter['baseCmd']}] for a in adapter["args"]: + a = copy.copy(a) + a["schema"] = tool_schema_doc["definitions"]["strOrExpr"] adapters.append(a) - for k, v in self.tool['inputs']['properties'].items(): - if 'adapter' in v: - a = copy.copy(v['adapter']) - else: - a = {} - - if not 'value' in a: - a['valueFrom'] = "#/job/inputs/"+ k - if not "order" in a: - a["order"] = 1000000 - adapters.append(a) + adapters.extend(adapt_inputs(self.tool['inputs'], inputs)) adapters.sort(key=lambda a: a["order"]) - pprint.pprint(adapters) j = Job() j.command_line = flatten(map(lambda adapter: adapt(adapter, joborder), adapters)) if 'stdin' in adapter: - j.stdin = flatten(adapt({"value": adapter['stdin']}, joborder))[0] + j.stdin = flatten(adapt({"value": adapter['stdin'], + "schema": tool_schema_doc["properties"]["adapter"]["properties"]["stdin"] + }, joborder))[0] else: j.stdin = None if 'stdout' in adapter: - j.stdout = flatten(adapt({"value": adapter['stdout']}, joborder))[0] + j.stdout = flatten(adapt({"value": adapter['stdout'], + "schema": tool_schema_doc["properties"]["adapter"]["properties"]["stdout"]}, joborder))[0] else: j.stdout = None From 61b32c0a7686ffebe991f46585a3776715586dcd Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 10 Nov 2014 21:32:11 -0500 Subject: [PATCH 004/221] Initial conformance test framework. --- cwltool/main.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) mode change 100644 => 100755 cwltool/main.py diff --git a/cwltool/main.py b/cwltool/main.py old mode 100644 new mode 100755 index 3b37b7015..cdbf87c21 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -1,12 +1,16 @@ +#!/usr/bin/env python + import tool import argparse from ref_resolver import from_url import jsonschema +import json def main(): parser = argparse.ArgumentParser() parser.add_argument("tool", type=str) parser.add_argument("job_order", type=str) + parser.add_argument("--conformance-test", action="store_true") parser.add_argument("-x", action="store_true", help="Execute") args = parser.parse_args() @@ -20,10 +24,16 @@ def main(): try: job = t.job(from_url(args.job_order)) - print '%s%s%s' % (' '.join(job.command_line), - ' < %s' % (job.stdin) if job.stdin else '', - ' > %s' % (job.stdout) if job.stdout else '') + if args.conformance_test: + print json.dumps(job.command_line) + else: + print '%s%s%s' % (' '.join(job.command_line), + ' < %s' % (job.stdin) if job.stdin else '', + ' > %s' % (job.stdout) if job.stdout else '') except jsonschema.exceptions.ValidationError as e: print "Job order failed validation" print e return + +if __name__ == "__main__": + main() From a18878dd25f8e5b00f9c0988d81b6c1cc14a5379 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 10 Nov 2014 22:00:21 -0500 Subject: [PATCH 005/221] Remove default "tool": { } and "job": { } keys at the top document level. --- README.md | 2 +- cwltool/tool.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 5466b63e0..f77f84e4d 100644 --- a/README.md +++ b/README.md @@ -4,4 +4,4 @@ python setup.py tests Run -python -mcwltool --tool [uri] --job-order [uri] +python cwltool [tool] [job] diff --git a/cwltool/tool.py b/cwltool/tool.py index 17e19cecb..2f07a0f3d 100644 --- a/cwltool/tool.py +++ b/cwltool/tool.py @@ -79,7 +79,7 @@ def jseval(expression=None, job=None): $job = %s; return %s;}() ''' - exp = exp_tpl % (json.dumps(job['job']), expression) + exp = exp_tpl % (json.dumps(job), expression) return execjs.eval(exp) def adapt_inputs(schema, inp): @@ -169,13 +169,13 @@ def adapt(adapter, job): class Tool(object): def __init__(self, toolpath_object): - self.tool = toolpath_object["tool"] + self.tool = toolpath_object fix_file_type(self.tool) tool_schema.validate(self.tool) def job(self, joborder): - inputs = joborder["job"]['inputs'] + inputs = joborder['inputs'] Draft4Validator(self.tool['inputs']).validate(inputs) adapter = self.tool["adapter"] From bae08ccd3ec3be5ad1ca7070a630c80e4ff2e5d0 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 11 Nov 2014 11:34:26 -0500 Subject: [PATCH 006/221] Cwltool outputs json object with stdin and stdout. Test includes stdout. Better logging by test running. --- cwltool/main.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/cwltool/main.py b/cwltool/main.py index cdbf87c21..33bc88ed5 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -25,7 +25,12 @@ def main(): try: job = t.job(from_url(args.job_order)) if args.conformance_test: - print json.dumps(job.command_line) + a = {"args": job.command_line} + if job.stdin: + a["stdin"] = job.stdin + if job.stdout: + a["stdout"] = job.stdout + print json.dumps(a) else: print '%s%s%s' % (' '.join(job.command_line), ' < %s' % (job.stdin) if job.stdin else '', From 69648f5e251c334a18a72d0cc5a3836f4d1b8f64 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 11 Nov 2014 14:01:15 -0500 Subject: [PATCH 007/221] Setup.py works. Improve documentation. --- README.md | 23 +++++++++++++++++++---- setup.py | 14 +++++++++----- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index f77f84e4d..268e1c86c 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,22 @@ -Install +# Common workflow language tool description reference implementation -python setup.py tests +This is intended to be a lightweight reference implementation of the common +workflow language tool description. -Run +## Install -python cwltool [tool] [job] +``` +$ easy_install . +``` + +## Run on the command line + +``` +$ cwltool [tool] [job] +``` + +## Use as a library + +``` +import cwltool +``` \ No newline at end of file diff --git a/setup.py b/setup.py index 58cdf054e..de6ab1303 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,6 @@ -from setuptools import setup, find_packages +from setuptools import setup -setup(name='cwllib', +setup(name='cwltool', version='1.0', description='Common workflow language reference implementation', author='Common workflow language working group', @@ -8,7 +8,9 @@ url="https://github.com/curoverse/common-workflow-language", download_url="https://github.com/curoverse/common-workflow-language", license='Apache 2.0', - packages=find_packages(), + packages=["cwltool"], + package_data={'cwltool': ['schemas/*.json']}, + include_package_data=True, scripts=[ ], install_requires=[ @@ -17,5 +19,7 @@ ], test_suite='tests', tests_require=[], - zip_safe=False - ) + entry_points={ + 'console_scripts': [ "cwltool=cwltool.main:main" ] + } +) From 1e956492a02db4c7a6e00ab4ecab26d40930f6dd Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 11 Nov 2014 17:10:34 -0500 Subject: [PATCH 008/221] Cwltool supports nested adapters. Added $job (sort of like $ref but references the job document.) Added tmap to conformance test. --- cwltool/tool.py | 111 +++++++++++++++++++++++++++++++----------------- setup.py | 2 - 2 files changed, 71 insertions(+), 42 deletions(-) diff --git a/cwltool/tool.py b/cwltool/tool.py index 2f07a0f3d..7dd893a4e 100644 --- a/cwltool/tool.py +++ b/cwltool/tool.py @@ -3,6 +3,8 @@ import json import execjs import copy +import sys +import jsonschema.exceptions from jsonschema.validators import Draft4Validator from ref_resolver import from_url, resolve_pointer @@ -15,12 +17,15 @@ metaschema = json.load(f) def fix_metaschema(m): - if '$ref' in m and m['$ref'].startswith("metaschema.json"): - m['$ref'] = "file:%s/schemas/%s" % (module_dir, m['$ref']) - else: - for k in m: - if isinstance(m[k], dict): + if isinstance(m, dict): + if '$ref' in m and m['$ref'].startswith("metaschema.json"): + m['$ref'] = "file:%s/schemas/%s" % (module_dir, m['$ref']) + else: + for k in m: fix_metaschema(m[k]) + if isinstance(m, list): + for k in m: + fix_metaschema(k) fix_metaschema(tool_schema_doc) @@ -68,7 +73,7 @@ def fix_file_type(t): if isinstance(t[k], dict): fix_file_type(t[k]) -def jseval(expression=None, job=None): +def jseval(job=None, expression=None): if expression.startswith('{'): exp_tpl = '''function () { $job = %s; @@ -82,31 +87,49 @@ def jseval(expression=None, job=None): exp = exp_tpl % (json.dumps(job), expression) return execjs.eval(exp) -def adapt_inputs(schema, inp): +def adapt_inputs(schema, inp, key): adapters = [] - if not 'adapter' in schema: - if isinstance(inp, dict): - for i in inp: - adapters.extend(adapt_inputs(schema["properties"][i], inp[i])) - return adapters - elif isinstance(inp, list): + if 'oneOf' in schema: + for one in schema["oneOf"]: + try: + Draft4Validator(one).validate(inp) + schema = one + break + except jsonschema.exceptions.ValidationError: + pass + + if isinstance(inp, dict): + if "properties" in schema: for i in inp: - adapters.extend(adapt_inputs(schema["items"], i)) - return adapters + a = adapt_inputs(schema["properties"][i], inp[i], i) + adapters.extend(a) + elif isinstance(inp, list): + for n, i in enumerate(inp): + a = adapt_inputs(schema["items"], i, format(n, '06')) + for x in a: + x["order"].insert(0, n) + adapters.extend(a) if 'adapter' in schema: a = copy.copy(schema['adapter']) - else: - a = {} - if not 'value' in a: - a['value'] = inp - if not "order" in a: - a["order"] = 1000000 - a["schema"] = schema + if "order" in a: + a["order"] = [a["order"], key] + else: + a["order"] = [1000000, key] + + a["schema"] = schema + + for x in adapters: + x["order"] = a["order"] + x["order"] + + if not 'value' in a and len(adapters) == 0: + a['value'] = inp + + if len(adapters) == 0 or "value" in a: + adapters.insert(0, a) - adapters.append(a) return adapters def to_str(schema, value): @@ -126,10 +149,10 @@ def to_str(schema, value): if "path" in value: return value["path"] else: - raise Exception("Not expecting a dict") + raise Exception("Not expecting a dict %s" % (value)) elif schema["type"] in ("string", "number", "integer"): return str(value) - elif schema["boolean"]: + elif schema["type"] == "boolean": # need special handling for flags return str(value) @@ -137,12 +160,12 @@ def to_str(schema, value): def adapt(adapter, job): if "value" in adapter: - if isinstance(adapter["value"], dict) and "$expr" in adapter["value"]: - value = jseval(adapter["value"]["$expr"]["value"], job) - else: - value = adapter["value"] - elif "valueFrom" in adapter: - value = resolve_pointer(job, adapter["valueFrom"]) + value = adapter["value"] + if isinstance(value, dict): + if "$expr" in value: + value = jseval(job, value["$expr"]["value"]) + elif "$job" in value: + value = resolve_pointer(job, value["$job"]) value = to_str(adapter["schema"], value) @@ -179,16 +202,24 @@ def job(self, joborder): Draft4Validator(self.tool['inputs']).validate(inputs) adapter = self.tool["adapter"] - adapters = [{"order": -1000000, + adapters = [{"order": [-1000000], "schema": tool_schema_doc["properties"]["adapter"]["properties"]["baseCmd"], - "value": adapter['baseCmd']}] - - for a in adapter["args"]: - a = copy.copy(a) - a["schema"] = tool_schema_doc["definitions"]["strOrExpr"] - adapters.append(a) - - adapters.extend(adapt_inputs(self.tool['inputs'], inputs)) + "value": adapter['baseCmd'], + #"_key": "0" + }] + + if "args" in adapter: + for i, a in enumerate(adapter["args"]): + a = copy.copy(a) + if "order" in a: + a["order"] = [a["order"]] + else: + a["order"] = [0] + a["schema"] = tool_schema_doc["definitions"]["strOrExpr"] + #a["_key"] = "!" + format(i, '06') + adapters.append(a) + + adapters.extend(adapt_inputs(self.tool['inputs'], inputs, "")) adapters.sort(key=lambda a: a["order"]) diff --git a/setup.py b/setup.py index de6ab1303..4f43a2ef1 100644 --- a/setup.py +++ b/setup.py @@ -11,8 +11,6 @@ packages=["cwltool"], package_data={'cwltool': ['schemas/*.json']}, include_package_data=True, - scripts=[ - ], install_requires=[ 'jsonschema', 'pyexecjs' From 624a9259b1195e8174da4b06846f23372dbef438 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 12 Nov 2014 16:23:10 -0500 Subject: [PATCH 009/221] Adding more examples. Started work on running jobs in Docker. Working on capturing and remapping file paths. Fiddling with ref resolving of cross references in schema files. --- cwltool/main.py | 13 +++-- cwltool/tool.py | 136 +++++++++++++++++++++++++++++++++++++----------- 2 files changed, 116 insertions(+), 33 deletions(-) diff --git a/cwltool/main.py b/cwltool/main.py index 33bc88ed5..3b2f60a4d 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -5,6 +5,8 @@ from ref_resolver import from_url import jsonschema import json +import os +import sys def main(): parser = argparse.ArgumentParser() @@ -20,7 +22,7 @@ def main(): except jsonschema.exceptions.ValidationError as e: print "Tool definition failed validation" print e - return + return 1 try: job = t.job(from_url(args.job_order)) @@ -38,7 +40,12 @@ def main(): except jsonschema.exceptions.ValidationError as e: print "Job order failed validation" print e - return + return 1 + + if args.x: + job.run() + + return 0 if __name__ == "__main__": - main() + sys.exit(main()) diff --git a/cwltool/tool.py b/cwltool/tool.py index 7dd893a4e..a187eae84 100644 --- a/cwltool/tool.py +++ b/cwltool/tool.py @@ -5,13 +5,15 @@ import copy import sys import jsonschema.exceptions +from job import Job from jsonschema.validators import Draft4Validator from ref_resolver import from_url, resolve_pointer module_dir = os.path.dirname(os.path.abspath(__file__)) -with open(os.path.join(module_dir, 'schemas/tool.json')) as f: +toolpath = os.path.join(module_dir, 'schemas/tool.json') +with open(toolpath) as f: tool_schema_doc = json.load(f) with open(os.path.join(module_dir, 'schemas/metaschema.json')) as f: metaschema = json.load(f) @@ -31,10 +33,6 @@ def fix_metaschema(m): tool_schema = Draft4Validator(tool_schema_doc) -class Job(object): - def run(self): - pass - def each(l): if l is None: return [] @@ -87,7 +85,15 @@ def jseval(job=None, expression=None): exp = exp_tpl % (json.dumps(job), expression) return execjs.eval(exp) -def adapt_inputs(schema, inp, key): +def resolve_eval(job, v): + if isinstance(v, dict): + if "$expr" in v: + return jseval(job, v["$expr"]["value"]) + elif "$job" in v: + return resolve_pointer(job, v["$job"]) + return v + +def adapt_inputs(schema, job, inp, key): adapters = [] if 'oneOf' in schema: @@ -102,11 +108,11 @@ def adapt_inputs(schema, inp, key): if isinstance(inp, dict): if "properties" in schema: for i in inp: - a = adapt_inputs(schema["properties"][i], inp[i], i) + a = adapt_inputs(schema["properties"][i], job, inp[i], i) adapters.extend(a) elif isinstance(inp, list): for n, i in enumerate(inp): - a = adapt_inputs(schema["items"], i, format(n, '06')) + a = adapt_inputs(schema["items"], job, i, format(n, '06')) for x in a: x["order"].insert(0, n) adapters.extend(a) @@ -132,22 +138,22 @@ def adapt_inputs(schema, inp, key): return adapters -def to_str(schema, value): +def to_str(schema, value, base_url, path_mapper): if "$ref" in schema: - schema = from_url(schema["$ref"]) + schema = from_url(schema["$ref"], base_url) if 'oneOf' in schema: for a in schema['oneOf']: - v = to_str(a, value) + v = to_str(a, value, base_url, path_mapper) if v is not None: return v return None elif 'type' in schema: if schema["type"] == "array" and isinstance(value, list): - return [to_str(schema["items"], v) for v in value] + return [to_str(schema["items"], v, base_url, path_mapper) for v in value] elif schema["type"] == "object" and isinstance(value, dict): if "path" in value: - return value["path"] + return path_mapper(value["path"]) else: raise Exception("Not expecting a dict %s" % (value)) elif schema["type"] in ("string", "number", "integer"): @@ -158,16 +164,42 @@ def to_str(schema, value): return None -def adapt(adapter, job): +def find_files(adapter, job): + if "value" in adapter: + value = resolve_eval(job, adapter["value"]) + else: + return None + + schema = adapter["schema"] + + if "$ref" in schema: + schema = from_url(schema["$ref"], adapter.get("$ref_base_url")) + + if 'oneOf' in schema: + for a in schema['oneOf']: + v = find_files(a, value) + if v is not None: + return v + return None + elif 'type' in schema: + if schema["type"] == "array" and isinstance(value, list): + return [find_files(schema["items"], v) for v in value] + elif schema["type"] == "object" and isinstance(value, dict): + if "path" in value: + return value["path"] + else: + raise Exception("Not expecting a dict %s" % (value)) + + return None + + +def adapt(adapter, job, path_mapper): if "value" in adapter: - value = adapter["value"] - if isinstance(value, dict): - if "$expr" in value: - value = jseval(job, value["$expr"]["value"]) - elif "$job" in value: - value = resolve_pointer(job, value["$job"]) + value = resolve_eval(job, adapter["value"]) + else: + raise Exception("No value in adapter") - value = to_str(adapter["schema"], value) + value = to_str(adapter["schema"], value, adapter.get("$ref_base_url"), path_mapper) sep = adapter["separator"] if "separator" in adapter else '' @@ -189,6 +221,22 @@ def adapt(adapter, job): return l +class PathMapper(object): + def __init__(self, basedir): + self.basedir = basedir + self._pathmap = {} + + def mapper(self, src): + if not os.path.isabs(src): + src = os.path.join(self.basedir, src) + self._pathmap[src] = src + return self._pathmap[src] + + def pathmap(self): + return self._pathmap + +class DockerPathMapper(PathMapper): + pass class Tool(object): def __init__(self, toolpath_object): @@ -196,8 +244,7 @@ def __init__(self, toolpath_object): fix_file_type(self.tool) tool_schema.validate(self.tool) - - def job(self, joborder): + def job(self, joborder, basedir=""): inputs = joborder['inputs'] Draft4Validator(self.tool['inputs']).validate(inputs) @@ -205,7 +252,7 @@ def job(self, joborder): adapters = [{"order": [-1000000], "schema": tool_schema_doc["properties"]["adapter"]["properties"]["baseCmd"], "value": adapter['baseCmd'], - #"_key": "0" + "$ref_base_url": "file:"+toolpath }] if "args" in adapter: @@ -216,27 +263,56 @@ def job(self, joborder): else: a["order"] = [0] a["schema"] = tool_schema_doc["definitions"]["strOrExpr"] - #a["_key"] = "!" + format(i, '06') adapters.append(a) - adapters.extend(adapt_inputs(self.tool['inputs'], inputs, "")) + adapters.extend(adapt_inputs(self.tool['inputs'], inputs, inputs, "")) adapters.sort(key=lambda a: a["order"]) + referenced_files = filter(lambda a: a is not None, flatten(map(lambda a: find_files(a, joborder), adapters))) + print >>sys.stderr, referenced_files + j = Job() - j.command_line = flatten(map(lambda adapter: adapt(adapter, joborder), adapters)) + j.tool = self + + j.container = None if 'stdin' in adapter: j.stdin = flatten(adapt({"value": adapter['stdin'], - "schema": tool_schema_doc["properties"]["adapter"]["properties"]["stdin"] - }, joborder))[0] + "schema": tool_schema_doc["properties"]["adapter"]["properties"]["stdin"], + "$ref_base_url": "file:"+toolpath + }, joborder, None))[0] + referenced_files.append(j.stdin) else: j.stdin = None if 'stdout' in adapter: j.stdout = flatten(adapt({"value": adapter['stdout'], - "schema": tool_schema_doc["properties"]["adapter"]["properties"]["stdout"]}, joborder))[0] + "schema": tool_schema_doc["properties"]["adapter"]["properties"]["stdout"], + "$ref_base_url": "file:"+toolpath + }, joborder, None))[0] + + if os.path.isabs(j.stdout): + raise Exception("stdout must be a relative path") else: j.stdout = None + d = None + a = self.tool.get("requirements") + if a: + b = a.get("environment") + if b: + c = b.get("container") + if c: + if c.get("type") == "docker": + d = DockerPathMapper(basedir) + j.container = c + + if d is None: + d = PathMapper(basedir) + + j.command_line = flatten(map(lambda a: adapt(a, joborder, d.mapper), adapters)) + + j.pathmap = d.pathmap() + return j From e4ef50386c596e85f45d834d34e5c0254673c356 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 12 Nov 2014 17:01:05 -0500 Subject: [PATCH 010/221] Remap file paths to absolute paths by default. Accomodate this in conformance test. --- cwltool/job.py | 20 ++++++++++++++++++++ cwltool/main.py | 2 +- cwltool/tool.py | 24 ++++++++++++++---------- 3 files changed, 35 insertions(+), 11 deletions(-) create mode 100644 cwltool/job.py diff --git a/cwltool/job.py b/cwltool/job.py new file mode 100644 index 000000000..e7692ad05 --- /dev/null +++ b/cwltool/job.py @@ -0,0 +1,20 @@ +import subprocess + +class Job(object): + def remap_files(): + pass + + def run(self): + runtime = [] + + print self.pathmap + + if self.container: + runtime = ["docker", "run", self.container["imageId"]] + + stdin = None + stdout = None + + sp = subprocess.Popen(runtime + self.command_line, shell=False, stdin=stdin, stdout=stdout) + + sp.wait() diff --git a/cwltool/main.py b/cwltool/main.py index 3b2f60a4d..dada36748 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -25,7 +25,7 @@ def main(): return 1 try: - job = t.job(from_url(args.job_order)) + job = t.job(from_url(args.job_order), os.path.abspath(os.path.dirname(args.job_order))) if args.conformance_test: a = {"args": job.command_line} if job.stdin: diff --git a/cwltool/tool.py b/cwltool/tool.py index a187eae84..c2b8fa7d9 100644 --- a/cwltool/tool.py +++ b/cwltool/tool.py @@ -183,7 +183,8 @@ def find_files(adapter, job): return None elif 'type' in schema: if schema["type"] == "array" and isinstance(value, list): - return [find_files(schema["items"], v) for v in value] + return [find_files({"value": v, + "schema": schema["items"]}, job) for v in value] elif schema["type"] == "object" and isinstance(value, dict): if "path" in value: return value["path"] @@ -222,14 +223,15 @@ def adapt(adapter, job, path_mapper): return l class PathMapper(object): - def __init__(self, basedir): - self.basedir = basedir + def __init__(self, referenced_files, basedir): self._pathmap = {} + for src in referenced_files: + dest = src + if not os.path.isabs(dest): + dest = os.path.join(basedir, src) + self._pathmap[src] = dest def mapper(self, src): - if not os.path.isabs(src): - src = os.path.join(self.basedir, src) - self._pathmap[src] = src return self._pathmap[src] def pathmap(self): @@ -244,7 +246,7 @@ def __init__(self, toolpath_object): fix_file_type(self.tool) tool_schema.validate(self.tool) - def job(self, joborder, basedir=""): + def job(self, joborder, basedir): inputs = joborder['inputs'] Draft4Validator(self.tool['inputs']).validate(inputs) @@ -270,7 +272,6 @@ def job(self, joborder, basedir=""): adapters.sort(key=lambda a: a["order"]) referenced_files = filter(lambda a: a is not None, flatten(map(lambda a: find_files(a, joborder), adapters))) - print >>sys.stderr, referenced_files j = Job() j.tool = self @@ -305,11 +306,14 @@ def job(self, joborder, basedir=""): c = b.get("container") if c: if c.get("type") == "docker": - d = DockerPathMapper(basedir) + d = DockerPathMapper(referenced_files, basedir) j.container = c if d is None: - d = PathMapper(basedir) + d = PathMapper(referenced_files, basedir) + + if j.stdin: + j.stdin = d.mapper(j.stdin) j.command_line = flatten(map(lambda a: adapt(a, joborder, d.mapper), adapters)) From bb0d88c84a9f8b000c173117306300116d5d92f2 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 13 Nov 2014 09:47:05 -0500 Subject: [PATCH 011/221] Added filename mapping for running inside docker. --- cwltool/job.py | 12 ++++++---- cwltool/tool.py | 59 +++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 58 insertions(+), 13 deletions(-) diff --git a/cwltool/job.py b/cwltool/job.py index e7692ad05..a38a214a1 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -1,4 +1,5 @@ import subprocess +import os class Job(object): def remap_files(): @@ -7,14 +8,17 @@ def remap_files(): def run(self): runtime = [] - print self.pathmap - - if self.container: - runtime = ["docker", "run", self.container["imageId"]] + if self.container and self.container.get("type") == "docker": + runtime = ["docker", "run"] + for d in self.pathmapper.dirs: + runtime.append("--volume=%s:%s:ro" % (d, self.pathmapper.dirs[d])) + runtime.append(self.container["imageId"]) stdin = None stdout = None + print runtime + self.command_line + sp = subprocess.Popen(runtime + self.command_line, shell=False, stdin=stdin, stdout=stdout) sp.wait() diff --git a/cwltool/tool.py b/cwltool/tool.py index c2b8fa7d9..6e6928e74 100644 --- a/cwltool/tool.py +++ b/cwltool/tool.py @@ -5,6 +5,7 @@ import copy import sys import jsonschema.exceptions +import random from job import Job from jsonschema.validators import Draft4Validator @@ -223,22 +224,62 @@ def adapt(adapter, job, path_mapper): return l class PathMapper(object): + # Maps files to their absolute path def __init__(self, referenced_files, basedir): self._pathmap = {} for src in referenced_files: - dest = src - if not os.path.isabs(dest): - dest = os.path.join(basedir, src) - self._pathmap[src] = dest + if os.path.isabs(src): + abs = src + else: + abs = os.path.join(basedir, src) + + self._pathmap[src] = abs def mapper(self, src): return self._pathmap[src] - def pathmap(self): - return self._pathmap -class DockerPathMapper(PathMapper): - pass +class DockerPathMapper(object): + def __init__(self, referenced_files, basedir): + self._pathmap = {} + self.dirs = {} + for src in referenced_files: + abs = src if os.path.isabs(src) else os.path.join(basedir, src) + dir, fn = os.path.split(abs) + + subdir = False + for d in self.dirs: + if dir.startswith(d): + subdir = True + break + + if not subdir: + for d in list(self.dirs): + if d.startswith(dir): + # 'dir' is a parent of 'd' + del self.dirs[d] + self.dirs[dir] = True + + prefix = "job" + str(random.randint(1, 1000000000)) + "_" + + names = set() + for d in self.dirs: + name = os.path.join("/tmp", prefix + os.path.basename(d)) + i = 1 + while name in names: + i += 1 + name = os.path.join("/tmp", prefix + os.path.basename(d) + str(i)) + names.add(name) + self.dirs[d] = name + + for src in referenced_files: + abs = src if os.path.isabs(src) else os.path.join(basedir, src) + for d in self.dirs: + if abs.startswith(d): + self._pathmap[src] = os.path.join(self.dirs[d], abs[len(d)+1:]) + + def mapper(self, src): + return self._pathmap[src] class Tool(object): def __init__(self, toolpath_object): @@ -317,6 +358,6 @@ def job(self, joborder, basedir): j.command_line = flatten(map(lambda a: adapt(a, joborder, d.mapper), adapters)) - j.pathmap = d.pathmap() + j.pathmapper = d return j From 333924bb9bb2e784ce58a177d5832508d3fa093e Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 13 Nov 2014 10:33:35 -0500 Subject: [PATCH 012/221] * Docker running works! Supports bind mounting input and output directories and stdin and stdout redirection. * Conformance testing now requires --basedir (to standardize the absolute directory path that is generated) and --no-container to suppress container bind mount name remapping. --- cwltool/job.py | 25 +++++++++++++++++++++++-- cwltool/main.py | 6 +++++- cwltool/tool.py | 12 ++++-------- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/cwltool/job.py b/cwltool/job.py index a38a214a1..f0456cc56 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -1,24 +1,45 @@ import subprocess import os +import tempfile class Job(object): def remap_files(): pass def run(self): + outdir = tempfile.mkdtemp() + runtime = [] if self.container and self.container.get("type") == "docker": - runtime = ["docker", "run"] + runtime = ["docker", "run", "-i"] for d in self.pathmapper.dirs: runtime.append("--volume=%s:%s:ro" % (d, self.pathmapper.dirs[d])) + runtime.append("--volume=%s:%s:ro" % (outdir, "/tmp/job_output")) + runtime.append("--workdir=%s" % ("/tmp/job_output")) + runtime.append("--user=%s" % (os.geteuid())) runtime.append(self.container["imageId"]) + else: + os.chdir(outdir) stdin = None stdout = None + if self.stdin: + stdin = open(self.stdin, "rb") + + if self.stdout: + stdout = open(os.path.join(outdir, self.stdout), "wb") + print runtime + self.command_line sp = subprocess.Popen(runtime + self.command_line, shell=False, stdin=stdin, stdout=stdout) - sp.wait() + + if stdin: + stdin.close() + + if stdout: + stdout.close() + + print "Output directory is %s" % outdir diff --git a/cwltool/main.py b/cwltool/main.py index dada36748..d1c9992a5 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -13,6 +13,8 @@ def main(): parser.add_argument("tool", type=str) parser.add_argument("job_order", type=str) parser.add_argument("--conformance-test", action="store_true") + parser.add_argument("--basedir", type=str) + parser.add_argument("--no-container", action="store_true") parser.add_argument("-x", action="store_true", help="Execute") args = parser.parse_args() @@ -24,8 +26,10 @@ def main(): print e return 1 + basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(args.job_order)) + try: - job = t.job(from_url(args.job_order), os.path.abspath(os.path.dirname(args.job_order))) + job = t.job(from_url(args.job_order), basedir, use_container=(not args.no_container)) if args.conformance_test: a = {"args": job.command_line} if job.stdin: diff --git a/cwltool/tool.py b/cwltool/tool.py index 6e6928e74..0dbe7a00a 100644 --- a/cwltool/tool.py +++ b/cwltool/tool.py @@ -228,11 +228,7 @@ class PathMapper(object): def __init__(self, referenced_files, basedir): self._pathmap = {} for src in referenced_files: - if os.path.isabs(src): - abs = src - else: - abs = os.path.join(basedir, src) - + abs = src if os.path.isabs(src) else os.path.join(basedir, src) self._pathmap[src] = abs def mapper(self, src): @@ -287,7 +283,7 @@ def __init__(self, toolpath_object): fix_file_type(self.tool) tool_schema.validate(self.tool) - def job(self, joborder, basedir): + def job(self, joborder, basedir, use_container=True): inputs = joborder['inputs'] Draft4Validator(self.tool['inputs']).validate(inputs) @@ -345,7 +341,7 @@ def job(self, joborder, basedir): b = a.get("environment") if b: c = b.get("container") - if c: + if use_container and c: if c.get("type") == "docker": d = DockerPathMapper(referenced_files, basedir) j.container = c @@ -354,7 +350,7 @@ def job(self, joborder, basedir): d = PathMapper(referenced_files, basedir) if j.stdin: - j.stdin = d.mapper(j.stdin) + j.stdin = j.stdin if os.path.isabs(j.stdin) else os.path.join(basedir, j.stdin) j.command_line = flatten(map(lambda a: adapt(a, joborder, d.mapper), adapters)) From 28d6dfa286061e8aa01c296d1461ab586578ca43 Mon Sep 17 00:00:00 2001 From: Ward Vandewege Date: Tue, 18 Nov 2014 10:51:49 -0500 Subject: [PATCH 013/221] Update setup.py so that package version numbering is more flexible. Rename README.md to README.rst so that it also renders on pypi No issue # --- README.md | 22 ---------------------- README.rst | 35 +++++++++++++++++++++++++++++++++++ setup.py | 30 +++++++++++++++++++++++++++--- 3 files changed, 62 insertions(+), 25 deletions(-) delete mode 100644 README.md create mode 100644 README.rst diff --git a/README.md b/README.md deleted file mode 100644 index 268e1c86c..000000000 --- a/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# Common workflow language tool description reference implementation - -This is intended to be a lightweight reference implementation of the common -workflow language tool description. - -## Install - -``` -$ easy_install . -``` - -## Run on the command line - -``` -$ cwltool [tool] [job] -``` - -## Use as a library - -``` -import cwltool -``` \ No newline at end of file diff --git a/README.rst b/README.rst new file mode 100644 index 000000000..2e62afd6c --- /dev/null +++ b/README.rst @@ -0,0 +1,35 @@ +================================================================== +Common workflow language tool description reference implementation +================================================================== + +This is intended to be a lightweight reference implementation of the common +workflow language tool description. + +Install +------- + +From source:: + + git clone https://github.com/curoverse/common-workflow-language.git + cd common-workflow-language/reference + easy_install . + +With pip:: + + pip install cwltool + + +Run on the command line +----------------------- + + ``cwltool [tool] [job]`` + +Use as a library +---------------- + +Add:: + + import cwltool + +to your script. + diff --git a/setup.py b/setup.py index 4f43a2ef1..6251a2961 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,31 @@ -from setuptools import setup +#!/usr/bin/env python + +import os +import subprocess +import time + +from setuptools import setup, find_packages + +SETUP_DIR = os.path.dirname(__file__) +README = os.path.join(SETUP_DIR, 'README.rst') + +cmd_opts = {'egg_info': {}} +try: + git_tags = subprocess.check_output( + ['git', 'log', '--first-parent', '--max-count=1', + '--format=format:%ct %h', SETUP_DIR]).split() + assert len(git_tags) == 2 +except (AssertionError, OSError, subprocess.CalledProcessError): + pass +else: + git_tags[0] = time.strftime('%Y%m%d%H%M%S', time.gmtime(int(git_tags[0]))) + cmd_opts['egg_info']['tag_build'] = '.{}.{}'.format(*git_tags) + setup(name='cwltool', - version='1.0', + version='0.1', description='Common workflow language reference implementation', + long_description=open(README).read(), author='Common workflow language working group', author_email='common-workflow-language@googlegroups.com', url="https://github.com/curoverse/common-workflow-language", @@ -19,5 +42,6 @@ tests_require=[], entry_points={ 'console_scripts': [ "cwltool=cwltool.main:main" ] - } + }, + options=cmd_opts, ) From 4ccf283c25f2fc408d3c3f6db8bf46a9b1cd17f8 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 18 Nov 2014 11:35:09 -0500 Subject: [PATCH 014/221] Remove yaml from ref_resolver, only support JSON in cwltool. --- cwltool/ref_resolver.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cwltool/ref_resolver.py b/cwltool/ref_resolver.py index 106980a21..aae572d94 100644 --- a/cwltool/ref_resolver.py +++ b/cwltool/ref_resolver.py @@ -1,6 +1,5 @@ import os import json -import yaml import copy import hashlib import logging @@ -87,7 +86,7 @@ def fetch(self, url): elif scheme == 'file': try: with open(path) as fp: - result = yaml.load(fp) + result = json.load(fp) except (OSError, IOError) as e: raise RuntimeError('Failed for %s: %s' % (url, e)) else: From e48f139783027d49974dc91ccbd1515e0aa41ddc Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 18 Nov 2014 11:36:26 -0500 Subject: [PATCH 015/221] Change paths to refer to rabix/common-workflow-language instead of curoverse/common-workflow-language. --- README.rst | 3 +-- setup.py | 4 ++-- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index 2e62afd6c..2f118a64c 100644 --- a/README.rst +++ b/README.rst @@ -10,7 +10,7 @@ Install From source:: - git clone https://github.com/curoverse/common-workflow-language.git + git clone https://github.com/rabix/common-workflow-language.git cd common-workflow-language/reference easy_install . @@ -32,4 +32,3 @@ Add:: import cwltool to your script. - diff --git a/setup.py b/setup.py index 6251a2961..d21df1753 100644 --- a/setup.py +++ b/setup.py @@ -28,8 +28,8 @@ long_description=open(README).read(), author='Common workflow language working group', author_email='common-workflow-language@googlegroups.com', - url="https://github.com/curoverse/common-workflow-language", - download_url="https://github.com/curoverse/common-workflow-language", + url="https://github.com/rabix/common-workflow-language", + download_url="https://github.com/rabix/common-workflow-language", license='Apache 2.0', packages=["cwltool"], package_data={'cwltool': ['schemas/*.json']}, From 736fc2078cc3500feab1d1b9909ce39ce2f8cc24 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 18 Nov 2014 13:53:49 -0500 Subject: [PATCH 016/221] Add version constraints to setup.py --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index d21df1753..d3f6b2505 100644 --- a/setup.py +++ b/setup.py @@ -35,8 +35,8 @@ package_data={'cwltool': ['schemas/*.json']}, include_package_data=True, install_requires=[ - 'jsonschema', - 'pyexecjs' + 'jsonschema >= 2.4.0', + 'pyexecjs >= 1.0.5' ], test_suite='tests', tests_require=[], From e47522bb665e840ce6fcfc851c1ba0ffe94415dc Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 18 Nov 2014 22:08:40 -0500 Subject: [PATCH 017/221] Populate the 'fetched' cache of ref_resolver with schemas so they do not need to be downloaded. Update schemas with full URIs (no more abbreviated metaschema.json). Cwltool now runs the job by default, use --conformance-test or --dry-run to just print out what it is going to do. --- cwltool/job.py | 32 ++++++++++++++++++-------------- cwltool/main.py | 5 ++--- cwltool/ref_resolver.py | 2 +- cwltool/tool.py | 21 +++++++++------------ 4 files changed, 30 insertions(+), 30 deletions(-) diff --git a/cwltool/job.py b/cwltool/job.py index f0456cc56..19bd1a743 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -6,8 +6,11 @@ class Job(object): def remap_files(): pass - def run(self): - outdir = tempfile.mkdtemp() + def run(self, dry_run=False): + if not dry_run: + outdir = tempfile.mkdtemp() + else: + outdir = "/tmp" runtime = [] @@ -25,21 +28,22 @@ def run(self): stdin = None stdout = None - if self.stdin: - stdin = open(self.stdin, "rb") + print runtime + self.command_line - if self.stdout: - stdout = open(os.path.join(outdir, self.stdout), "wb") + if not dry_run: + if self.stdin: + stdin = open(self.stdin, "rb") - print runtime + self.command_line + if self.stdout: + stdout = open(os.path.join(outdir, self.stdout), "wb") - sp = subprocess.Popen(runtime + self.command_line, shell=False, stdin=stdin, stdout=stdout) - sp.wait() + sp = subprocess.Popen(runtime + self.command_line, shell=False, stdin=stdin, stdout=stdout) + sp.wait() - if stdin: - stdin.close() + if stdin: + stdin.close() - if stdout: - stdout.close() + if stdout: + stdout.close() - print "Output directory is %s" % outdir + print "Output directory is %s" % outdir diff --git a/cwltool/main.py b/cwltool/main.py index d1c9992a5..d4f3dca23 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -15,7 +15,7 @@ def main(): parser.add_argument("--conformance-test", action="store_true") parser.add_argument("--basedir", type=str) parser.add_argument("--no-container", action="store_true") - parser.add_argument("-x", action="store_true", help="Execute") + parser.add_argument("--dry-run", action="store_true", help="Do not execute") args = parser.parse_args() @@ -46,8 +46,7 @@ def main(): print e return 1 - if args.x: - job.run() + job.run(dry_run=args.dry_run) return 0 diff --git a/cwltool/ref_resolver.py b/cwltool/ref_resolver.py index aae572d94..090f051ad 100644 --- a/cwltool/ref_resolver.py +++ b/cwltool/ref_resolver.py @@ -81,7 +81,7 @@ def fetch(self, url): try: resp.raise_for_status() except Exception as e: - raise RuntimeError(url, cause=e) + raise RuntimeError(url, e) result = resp.json() elif scheme == 'file': try: diff --git a/cwltool/tool.py b/cwltool/tool.py index 0dbe7a00a..6afd644c8 100644 --- a/cwltool/tool.py +++ b/cwltool/tool.py @@ -9,28 +9,25 @@ from job import Job from jsonschema.validators import Draft4Validator +import ref_resolver from ref_resolver import from_url, resolve_pointer module_dir = os.path.dirname(os.path.abspath(__file__)) +jsonschemapath = os.path.join(module_dir, 'schemas/json-schema-draft-04.json') +with open(jsonschemapath) as f: + jsonschemapath_doc = json.load(f) + +ref_resolver.loader.fetched["http://json-schema.org/draft-04/schema"] = jsonschemapath_doc + toolpath = os.path.join(module_dir, 'schemas/tool.json') with open(toolpath) as f: tool_schema_doc = json.load(f) with open(os.path.join(module_dir, 'schemas/metaschema.json')) as f: metaschema = json.load(f) -def fix_metaschema(m): - if isinstance(m, dict): - if '$ref' in m and m['$ref'].startswith("metaschema.json"): - m['$ref'] = "file:%s/schemas/%s" % (module_dir, m['$ref']) - else: - for k in m: - fix_metaschema(m[k]) - if isinstance(m, list): - for k in m: - fix_metaschema(k) - -fix_metaschema(tool_schema_doc) +ref_resolver.loader.fetched["https://raw.githubusercontent.com/rabix/common-workflow-language/master/schemas/tool.json"] = tool_schema_doc +ref_resolver.loader.fetched["https://raw.githubusercontent.com/rabix/common-workflow-language/master/schemas/metaschema.json"] = metaschema tool_schema = Draft4Validator(tool_schema_doc) From 42445bcbb180169ef4e0ed06122f125d8bb006a6 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 18 Nov 2014 22:17:26 -0500 Subject: [PATCH 018/221] Example documents were being read in with an overly permissive json parser and actually had syntax errors. Fixed. Don't try to run the job when --conformance-tests is specified. Tests pass again. --- cwltool/main.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cwltool/main.py b/cwltool/main.py index d4f3dca23..7b4265b24 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -41,13 +41,12 @@ def main(): print '%s%s%s' % (' '.join(job.command_line), ' < %s' % (job.stdin) if job.stdin else '', ' > %s' % (job.stdout) if job.stdout else '') + job.run(dry_run=args.dry_run) except jsonschema.exceptions.ValidationError as e: print "Job order failed validation" print e return 1 - job.run(dry_run=args.dry_run) - return 0 if __name__ == "__main__": From 77aa8ff12120a16f056e0c4803c12c2fdc4acb6a Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 25 Nov 2014 13:41:03 -0500 Subject: [PATCH 019/221] Javascript expressions now run in a Node.js sandbox. Drop dependency on pyexecjs, but depends implicitly on nodejs being installed. --- cwltool/sandboxjs.py | 30 ++++++++++++++++++++++++++++++ cwltool/tool.py | 16 ++++++++-------- setup.py | 3 +-- 3 files changed, 39 insertions(+), 10 deletions(-) create mode 100644 cwltool/sandboxjs.py diff --git a/cwltool/sandboxjs.py b/cwltool/sandboxjs.py new file mode 100644 index 000000000..6a1243829 --- /dev/null +++ b/cwltool/sandboxjs.py @@ -0,0 +1,30 @@ +import subprocess +import json +import threading + +class JavascriptException(Exception): + pass + +def execjs(js): + nodejs = subprocess.Popen(["nodejs"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + fn = "\"use strict\";\n(function()%s)()" % (js if isinstance(js, basestring) and len(js) > 1 and js[0] == '{' else ("{return (%s);}" % js)) + script = "console.log(JSON.stringify(require(\"vm\").runInNewContext(%s, {})))" % json.dumps(fn) + + def term(): + try: + nodejs.terminate() + except OSError: + pass + + # Time out after 5 seconds + tm = threading.Timer(5, term) + tm.start() + + stdoutdata, stderrdata = nodejs.communicate(script) + tm.cancel() + + if stderrdata.strip() or nodejs.returncode != 0: + raise JavascriptException(script + "\n" + stderrdata) + else: + return json.loads(stdoutdata) diff --git a/cwltool/tool.py b/cwltool/tool.py index 6afd644c8..b95fe9df7 100644 --- a/cwltool/tool.py +++ b/cwltool/tool.py @@ -1,7 +1,7 @@ import os import pprint import json -import execjs +import sandboxjs import copy import sys import jsonschema.exceptions @@ -71,17 +71,17 @@ def fix_file_type(t): def jseval(job=None, expression=None): if expression.startswith('{'): - exp_tpl = '''function () { - $job = %s; - return function()%s();}() + exp_tpl = '''{ + var $job = %s; + return function()%s();} ''' else: - exp_tpl = '''function () { - $job = %s; - return %s;}() + exp_tpl = '''{ + var $job = %s; + return %s;} ''' exp = exp_tpl % (json.dumps(job), expression) - return execjs.eval(exp) + return sandboxjs.execjs(exp) def resolve_eval(job, v): if isinstance(v, dict): diff --git a/setup.py b/setup.py index d3f6b2505..c48e52fca 100644 --- a/setup.py +++ b/setup.py @@ -35,8 +35,7 @@ package_data={'cwltool': ['schemas/*.json']}, include_package_data=True, install_requires=[ - 'jsonschema >= 2.4.0', - 'pyexecjs >= 1.0.5' + 'jsonschema >= 2.4.0' ], test_suite='tests', tests_require=[], From b9c74462cb9d1940b1990365bc4e75eaab572840 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 28 Nov 2014 15:53:39 -0500 Subject: [PATCH 020/221] Initial support for "outputs" section of tool file in reference cwltool. --- cwltool/job.py | 26 +++++++++++++++++++++++--- cwltool/main.py | 2 +- cwltool/tool.py | 1 + setup.py | 4 ++-- 4 files changed, 27 insertions(+), 6 deletions(-) diff --git a/cwltool/job.py b/cwltool/job.py index 19bd1a743..addc8f7dc 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -1,11 +1,10 @@ import subprocess import os import tempfile +import tool +import glob class Job(object): - def remap_files(): - pass - def run(self, dry_run=False): if not dry_run: outdir = tempfile.mkdtemp() @@ -47,3 +46,24 @@ def run(self, dry_run=False): stdout.close() print "Output directory is %s" % outdir + return self.collect_outputs(self.tool.tool["outputs"], outdir) + else: + return None + + def collect_outputs(self, schema, outdir): + r = None + if isinstance(schema, dict): + if "adapter" in schema: + adapter = schema["adapter"] + if "glob" in adapter: + r = [{"path": g} for g in glob.glob(os.path.join(outdir, adapter["glob"]))] + if "value" in adapter: + r = tool.resolve_eval(self.joborder, adapter["value"]) + if not r and "properties" in schema: + r = {} + for k, v in schema["properties"].items(): + out = self.collect_outputs(v, outdir) + if out: + r[k] = out + + return r diff --git a/cwltool/main.py b/cwltool/main.py index 7b4265b24..0b307c572 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -41,7 +41,7 @@ def main(): print '%s%s%s' % (' '.join(job.command_line), ' < %s' % (job.stdin) if job.stdin else '', ' > %s' % (job.stdout) if job.stdout else '') - job.run(dry_run=args.dry_run) + print job.run(dry_run=args.dry_run) except jsonschema.exceptions.ValidationError as e: print "Job order failed validation" print e diff --git a/cwltool/tool.py b/cwltool/tool.py index b95fe9df7..9e98d7d85 100644 --- a/cwltool/tool.py +++ b/cwltool/tool.py @@ -308,6 +308,7 @@ def job(self, joborder, basedir, use_container=True): referenced_files = filter(lambda a: a is not None, flatten(map(lambda a: find_files(a, joborder), adapters))) j = Job() + j.joborder = joborder j.tool = self j.container = None diff --git a/setup.py b/setup.py index c48e52fca..423c80cfc 100644 --- a/setup.py +++ b/setup.py @@ -28,8 +28,8 @@ long_description=open(README).read(), author='Common workflow language working group', author_email='common-workflow-language@googlegroups.com', - url="https://github.com/rabix/common-workflow-language", - download_url="https://github.com/rabix/common-workflow-language", + url="https://github.com/common-workflow-language/common-workflow-language", + download_url="https://github.com/common-workflow-language/common-workflow-language", license='Apache 2.0', packages=["cwltool"], package_data={'cwltool': ['schemas/*.json']}, From e2ce606cac4c5af34844e16b6f2e67d7a9a8919b Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 28 Nov 2014 16:04:37 -0500 Subject: [PATCH 021/221] "glob" adapter only returns first hit unless type is "array" --- cwltool/job.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cwltool/job.py b/cwltool/job.py index addc8f7dc..03b4aa076 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -57,6 +57,11 @@ def collect_outputs(self, schema, outdir): adapter = schema["adapter"] if "glob" in adapter: r = [{"path": g} for g in glob.glob(os.path.join(outdir, adapter["glob"]))] + if not ("type" in schema and schema["type"] == "array"): + if r: + r = r[0] + else: + r = None if "value" in adapter: r = tool.resolve_eval(self.joborder, adapter["value"]) if not r and "properties" in schema: From 3752a8876ede27d99610ea781c31b540783ade04 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 5 Dec 2014 11:20:16 -0500 Subject: [PATCH 022/221] Require that tool description documents say what tool schema version they are using. --- cwltool/tool.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cwltool/tool.py b/cwltool/tool.py index 9e98d7d85..3d5d4c781 100644 --- a/cwltool/tool.py +++ b/cwltool/tool.py @@ -26,8 +26,12 @@ with open(os.path.join(module_dir, 'schemas/metaschema.json')) as f: metaschema = json.load(f) -ref_resolver.loader.fetched["https://raw.githubusercontent.com/rabix/common-workflow-language/master/schemas/tool.json"] = tool_schema_doc -ref_resolver.loader.fetched["https://raw.githubusercontent.com/rabix/common-workflow-language/master/schemas/metaschema.json"] = metaschema +SCHEMA_URL_PREFIX = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/draft-1/schemas/" +TOOL_SCHEMA_URL = SCHEMA_URL_PREFIX + "tool.json" +METASCHEMA_SCHEMA_URL = SCHEMA_URL_PREFIX + "metaschema.json" + +ref_resolver.loader.fetched[TOOL_SCHEMA_URL] = tool_schema_doc +ref_resolver.loader.fetched[METASCHEMA_SCHEMA_URL] = metaschema tool_schema = Draft4Validator(tool_schema_doc) @@ -278,6 +282,8 @@ class Tool(object): def __init__(self, toolpath_object): self.tool = toolpath_object fix_file_type(self.tool) + if "schema" not in self.tool or self.tool["schema"] != TOOL_SCHEMA_URL: + raise Exception("Missing or invalid 'schema' field in tool description document, must be %s" % TOOL_SCHEMA_URL) tool_schema.validate(self.tool) def job(self, joborder, basedir, use_container=True): From da45892482407778c4861a63e90c0c920afa46b1 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 5 Dec 2014 14:46:42 -0500 Subject: [PATCH 023/221] Spec defines $ref, $mixin and $expr. Decided $expr didn't need a "lang" field and made it simpler by eliminating the inner object. Replaced references to "rabix" github organization with "common-workflow-language". Updated schema references to specifically refer to draft-1 branch. --- README.rst | 2 +- cwltool/tool.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 2f118a64c..6a60be272 100644 --- a/README.rst +++ b/README.rst @@ -10,7 +10,7 @@ Install From source:: - git clone https://github.com/rabix/common-workflow-language.git + git clone https://github.com/common-workflow-language/common-workflow-language.git cd common-workflow-language/reference easy_install . diff --git a/cwltool/tool.py b/cwltool/tool.py index 3d5d4c781..034e24577 100644 --- a/cwltool/tool.py +++ b/cwltool/tool.py @@ -90,7 +90,7 @@ def jseval(job=None, expression=None): def resolve_eval(job, v): if isinstance(v, dict): if "$expr" in v: - return jseval(job, v["$expr"]["value"]) + return jseval(job, v["$expr"]) elif "$job" in v: return resolve_pointer(job, v["$job"]) return v From 5fcb67f922ac3cead49675dca4c669bf64866b96 Mon Sep 17 00:00:00 2001 From: Nebojsa Tijanic Date: Mon, 15 Dec 2014 17:04:07 +0100 Subject: [PATCH 024/221] See previous commit --- cwltool/job.py | 11 +++++++++++ tests/test_examples.py | 7 +++++++ 2 files changed, 18 insertions(+) diff --git a/cwltool/job.py b/cwltool/job.py index 03b4aa076..1b816a78c 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -3,6 +3,8 @@ import tempfile import tool import glob +import json + class Job(object): def run(self, dry_run=False): @@ -11,6 +13,9 @@ def run(self, dry_run=False): else: outdir = "/tmp" + with open(os.path.join(outdir, "job.cwl.json"), "w") as fp: + json.dump(self.joborder, fp) + runtime = [] if self.container and self.container.get("type") == "docker": @@ -51,6 +56,12 @@ def run(self, dry_run=False): return None def collect_outputs(self, schema, outdir): + result_path = os.path.join(outdir, "result.cwl.json") + if os.path.isfile(result_path): + print "Result file found." + with open(result_path) as fp: + return json.load(fp) + r = None if isinstance(schema, dict): if "adapter" in schema: diff --git a/tests/test_examples.py b/tests/test_examples.py index 84537602a..891b9f5fb 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -16,6 +16,13 @@ def test_job_order(self): './rabix/tests/test-files/example_human_Illumina.pe_1.fastq', './rabix/tests/test-files/example_human_Illumina.pe_2.fastq']) + def test_no_adapters(self): + t = tool.Tool(from_url("../examples/add_ints-tool.json")) + job = t.job(from_url("../examples/add_ints-job.json"), basedir='.') + result = job.run() + print result + self.assertEqual(result['c'], 3) + if __name__ == '__main__': unittest.main() From 58910919cff382e63f2282bc1f651c057f2e3ada Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 22 Jan 2015 22:19:15 -0500 Subject: [PATCH 025/221] Fix uri in bwa example. Cwltool now runs "docker pull" before running the job. --- cwltool/job.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cwltool/job.py b/cwltool/job.py index 1b816a78c..026b2c140 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -19,6 +19,8 @@ def run(self, dry_run=False): runtime = [] if self.container and self.container.get("type") == "docker": + if "uri" in self.container: + subprocess.call("docker", "pull", self.container["uri"]) runtime = ["docker", "run", "-i"] for d in self.pathmapper.dirs: runtime.append("--volume=%s:%s:ro" % (d, self.pathmapper.dirs[d])) From 02b17c7b53eaa2bf5a44e6cbee6a657fd36467e9 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 23 Jan 2015 16:31:20 -0500 Subject: [PATCH 026/221] Add $import and $apply directives, expressionlib and generatefiles section. Now loads as YAML instead of json. Fixed boolean handling. Updated spec. --- cwltool/job.py | 21 ++- cwltool/main.py | 7 +- cwltool/ref_resolver.py | 14 +- cwltool/sandboxjs.py | 6 +- cwltool/tool.py | 334 +++++++++++++++++++++++----------------- setup.py | 4 +- 6 files changed, 220 insertions(+), 166 deletions(-) diff --git a/cwltool/job.py b/cwltool/job.py index 026b2c140..2295acbcf 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -4,10 +4,10 @@ import tool import glob import json - +import yaml class Job(object): - def run(self, dry_run=False): + def run(self, dry_run=False, pull_image=True): if not dry_run: outdir = tempfile.mkdtemp() else: @@ -19,8 +19,8 @@ def run(self, dry_run=False): runtime = [] if self.container and self.container.get("type") == "docker": - if "uri" in self.container: - subprocess.call("docker", "pull", self.container["uri"]) + if "uri" in self.container and pull_image: + subprocess.call(["docker", "pull", self.container["uri"]]) runtime = ["docker", "run", "-i"] for d in self.pathmapper.dirs: runtime.append("--volume=%s:%s:ro" % (d, self.pathmapper.dirs[d])) @@ -43,6 +43,10 @@ def run(self, dry_run=False): if self.stdout: stdout = open(os.path.join(outdir, self.stdout), "wb") + for t in self.generatefiles: + with open(os.path.join(outdir, t), "w") as f: + f.write(self.generatefiles[t]) + sp = subprocess.Popen(runtime + self.command_line, shell=False, stdin=stdin, stdout=stdout) sp.wait() @@ -53,16 +57,17 @@ def run(self, dry_run=False): stdout.close() print "Output directory is %s" % outdir - return self.collect_outputs(self.tool.tool["outputs"], outdir) - else: - return None + if 'outputs' in self.tool.tool: + return self.collect_outputs(self.tool.tool["outputs"], outdir) + + return None def collect_outputs(self, schema, outdir): result_path = os.path.join(outdir, "result.cwl.json") if os.path.isfile(result_path): print "Result file found." with open(result_path) as fp: - return json.load(fp) + return yaml.load(fp) r = None if isinstance(schema, dict): diff --git a/cwltool/main.py b/cwltool/main.py index 0b307c572..e50470516 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -14,7 +14,8 @@ def main(): parser.add_argument("job_order", type=str) parser.add_argument("--conformance-test", action="store_true") parser.add_argument("--basedir", type=str) - parser.add_argument("--no-container", action="store_true") + parser.add_argument("--no-container", action="store_true", help="Do not execute in a Docker container, even if one is specified in the tool file") + parser.add_argument("--no-pull", default=False, action="store_true", help="Do not try to pull the Docker image") parser.add_argument("--dry-run", action="store_true", help="Do not execute") args = parser.parse_args() @@ -36,12 +37,14 @@ def main(): a["stdin"] = job.stdin if job.stdout: a["stdout"] = job.stdout + if job.generatefiles: + a["generatefiles"] = job.generatefiles print json.dumps(a) else: print '%s%s%s' % (' '.join(job.command_line), ' < %s' % (job.stdin) if job.stdin else '', ' > %s' % (job.stdout) if job.stdout else '') - print job.run(dry_run=args.dry_run) + print "Output json is " + json.dumps(job.run(dry_run=args.dry_run, pull_image=(not args.no_pull))) except jsonschema.exceptions.ValidationError as e: print "Job order failed validation" print e diff --git a/cwltool/ref_resolver.py b/cwltool/ref_resolver.py index 090f051ad..c71f7cdb2 100644 --- a/cwltool/ref_resolver.py +++ b/cwltool/ref_resolver.py @@ -6,6 +6,7 @@ import collections import requests import urlparse +import yaml log = logging.getLogger(__name__) @@ -82,11 +83,11 @@ def fetch(self, url): resp.raise_for_status() except Exception as e: raise RuntimeError(url, e) - result = resp.json() + result = yaml.load(resp.text) elif scheme == 'file': try: with open(path) as fp: - result = json.load(fp) + result = yaml.load(fp) except (OSError, IOError) as e: raise RuntimeError('Failed for %s: %s' % (url, e)) else: @@ -142,12 +143,3 @@ def to_json(obj, fp=None): def from_url(url, base_url=None): return loader.load(url, base_url) - - -def test_tmap(): - path = os.path.join(os.path.dirname(__file__), '../examples/tmap.yml') - expected_path = os.path.join(os.path.dirname(__file__), '../examples/tmap_resolved.json') - doc = loader.load(path) - with open(expected_path) as fp: - expected = json.load(fp) - assert doc == expected diff --git a/cwltool/sandboxjs.py b/cwltool/sandboxjs.py index 6a1243829..752ff02db 100644 --- a/cwltool/sandboxjs.py +++ b/cwltool/sandboxjs.py @@ -5,12 +5,14 @@ class JavascriptException(Exception): pass -def execjs(js): +def execjs(js, jslib): nodejs = subprocess.Popen(["nodejs"], stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) - fn = "\"use strict\";\n(function()%s)()" % (js if isinstance(js, basestring) and len(js) > 1 and js[0] == '{' else ("{return (%s);}" % js)) + fn = "\"use strict\";%s\n(function()%s)()" % (jslib, js if isinstance(js, basestring) and len(js) > 1 and js[0] == '{' else ("{return (%s);}" % js)) script = "console.log(JSON.stringify(require(\"vm\").runInNewContext(%s, {})))" % json.dumps(fn) + #print script + def term(): try: nodejs.terminate() diff --git a/cwltool/tool.py b/cwltool/tool.py index 034e24577..c3bf8c336 100644 --- a/cwltool/tool.py +++ b/cwltool/tool.py @@ -6,6 +6,8 @@ import sys import jsonschema.exceptions import random +import requests +import urlparse from job import Job from jsonschema.validators import Draft4Validator @@ -73,156 +75,194 @@ def fix_file_type(t): if isinstance(t[k], dict): fix_file_type(t[k]) -def jseval(job=None, expression=None): - if expression.startswith('{'): - exp_tpl = '''{ - var $job = %s; - return function()%s();} - ''' - else: - exp_tpl = '''{ - var $job = %s; - return %s;} - ''' - exp = exp_tpl % (json.dumps(job), expression) - return sandboxjs.execjs(exp) - -def resolve_eval(job, v): - if isinstance(v, dict): - if "$expr" in v: - return jseval(job, v["$expr"]) - elif "$job" in v: - return resolve_pointer(job, v["$job"]) - return v - -def adapt_inputs(schema, job, inp, key): - adapters = [] - - if 'oneOf' in schema: - for one in schema["oneOf"]: - try: - Draft4Validator(one).validate(inp) - schema = one - break - except jsonschema.exceptions.ValidationError: - pass +class Builder(object): - if isinstance(inp, dict): - if "properties" in schema: - for i in inp: - a = adapt_inputs(schema["properties"][i], job, inp[i], i) - adapters.extend(a) - elif isinstance(inp, list): - for n, i in enumerate(inp): - a = adapt_inputs(schema["items"], job, i, format(n, '06')) - for x in a: - x["order"].insert(0, n) - adapters.extend(a) - - if 'adapter' in schema: - a = copy.copy(schema['adapter']) - - if "order" in a: - a["order"] = [a["order"], key] + def jseval(self, job=None, expression=None): + if expression.startswith('{'): + exp_tpl = '''{ + return function()%s();} + ''' else: - a["order"] = [1000000, key] + exp_tpl = '''{ + return %s;} + ''' + exp = exp_tpl % (expression) + return sandboxjs.execjs(exp, "var $job = %s;%s" % (json.dumps(job), self.jslib)) + + def resolve_eval(self, job, v): + if isinstance(v, dict): + if "$expr" in v: + # Support $import of the $expr + return self.jseval(job, self.resolve_eval(job, v["$expr"])) + if "$apply" in v: + # Support $import of the $expr + ex = "" + for i, p in enumerate(v["$apply"]): + if i == 0: + ex += p + "(" + else: + ex += json.dumps(self.resolve_eval(job, p)) + if i < len(v["$apply"])-1: + ex += "," + ex += ")" + return self.jseval(job, ex) + elif "$job" in v: + return resolve_pointer(job, v["$job"]) + elif "$import" in v: + # TODO: check checksum + url = urlparse.urljoin(self.base_url, v["$import"]) + split = urlparse.urlsplit(url) + scheme, path = split.scheme, split.path + if scheme in ['http', 'https']: + resp = requests.get(url) + try: + resp.raise_for_status() + except Exception as e: + raise RuntimeError(url, e) + return resp.text + elif scheme == 'file': + try: + with open(path) as fp: + return fp.read() + except (OSError, IOError) as e: + raise RuntimeError('Failed for %s: %s' % (url, e)) + else: + raise ValueError('Unsupported scheme: %s' % scheme) + return v + + def adapt_inputs(self, schema, job, inp, key): + adapters = [] + + if 'oneOf' in schema: + for one in schema["oneOf"]: + try: + Draft4Validator(one).validate(inp) + schema = one + break + except jsonschema.exceptions.ValidationError: + pass + + if isinstance(inp, dict): + if "properties" in schema: + for i in inp: + a = self.adapt_inputs(schema["properties"][i], job, inp[i], i) + adapters.extend(a) + elif isinstance(inp, list): + for n, i in enumerate(inp): + a = self.adapt_inputs(schema["items"], job, i, format(n, '06')) + for x in a: + x["order"].insert(0, n) + adapters.extend(a) - a["schema"] = schema + if 'adapter' in schema: + a = copy.copy(schema['adapter']) - for x in adapters: - x["order"] = a["order"] + x["order"] + if "order" in a: + a["order"] = [a["order"], key] + else: + a["order"] = [1000000, key] - if not 'value' in a and len(adapters) == 0: - a['value'] = inp + a["schema"] = schema - if len(adapters) == 0 or "value" in a: - adapters.insert(0, a) + for x in adapters: + x["order"] = a["order"] + x["order"] - return adapters + if not 'value' in a and len(adapters) == 0: + a['value'] = inp -def to_str(schema, value, base_url, path_mapper): - if "$ref" in schema: - schema = from_url(schema["$ref"], base_url) + if len(adapters) == 0 or "value" in a: + adapters.insert(0, a) - if 'oneOf' in schema: - for a in schema['oneOf']: - v = to_str(a, value, base_url, path_mapper) - if v is not None: - return v - return None - elif 'type' in schema: - if schema["type"] == "array" and isinstance(value, list): - return [to_str(schema["items"], v, base_url, path_mapper) for v in value] - elif schema["type"] == "object" and isinstance(value, dict): - if "path" in value: - return path_mapper(value["path"]) - else: - raise Exception("Not expecting a dict %s" % (value)) - elif schema["type"] in ("string", "number", "integer"): - return str(value) - elif schema["type"] == "boolean": - # need special handling for flags - return str(value) - - return None - -def find_files(adapter, job): - if "value" in adapter: - value = resolve_eval(job, adapter["value"]) - else: - return None + return adapters - schema = adapter["schema"] + def to_str(self, schema, value, path_mapper): + if "$ref" in schema: + schema = from_url(schema["$ref"], self.ref_base_url) - if "$ref" in schema: - schema = from_url(schema["$ref"], adapter.get("$ref_base_url")) + if 'oneOf' in schema: + for a in schema['oneOf']: + v = self.to_str(a, value, path_mapper) + if v is not None: + return v + return None + elif 'type' in schema: + if schema["type"] == "array" and isinstance(value, list): + return [self.to_str(schema["items"], v, path_mapper) for v in value] + elif schema["type"] == "object" and isinstance(value, dict): + if "path" in value: + return path_mapper(value["path"]) + else: + raise Exception("Not expecting a dict %s" % (value)) + elif schema["type"] in ("string", "number", "integer"): + return str(value) + elif schema["type"] == "boolean": + # handled specially by adapt() + return value - if 'oneOf' in schema: - for a in schema['oneOf']: - v = find_files(a, value) - if v is not None: - return v return None - elif 'type' in schema: - if schema["type"] == "array" and isinstance(value, list): - return [find_files({"value": v, - "schema": schema["items"]}, job) for v in value] - elif schema["type"] == "object" and isinstance(value, dict): - if "path" in value: - return value["path"] - else: - raise Exception("Not expecting a dict %s" % (value)) - return None + def find_files(self, adapter, job): + if "value" in adapter: + value = self.resolve_eval(job, adapter["value"]) + else: + return None + + schema = adapter["schema"] + + if "$ref" in schema: + schema = from_url(schema["$ref"], self.ref_base_url) + + if 'oneOf' in schema: + for a in schema['oneOf']: + v = self.find_files(a, value) + if v is not None: + return v + return None + elif 'type' in schema: + if schema["type"] == "array" and isinstance(value, list): + return [self.find_files({"value": v, + "schema": schema["items"]}, job) for v in value] + elif schema["type"] == "object" and isinstance(value, dict): + if "path" in value: + return value["path"] + else: + raise Exception("Not expecting a dict %s" % (value)) + + return None -def adapt(adapter, job, path_mapper): - if "value" in adapter: - value = resolve_eval(job, adapter["value"]) - else: - raise Exception("No value in adapter") + def adapt(self, adapter, job, path_mapper): + if "value" in adapter: + value = self.resolve_eval(job, adapter["value"]) + else: + raise Exception("No value in adapter") - value = to_str(adapter["schema"], value, adapter.get("$ref_base_url"), path_mapper) + value = self.to_str(adapter["schema"], value, path_mapper) - sep = adapter["separator"] if "separator" in adapter else '' + sep = adapter["separator"] if "separator" in adapter else " " - if 'itemSeparator' in adapter: - if adapter["prefix"]: - l = [adapter["prefix"] + adapter['itemSeparator'].join(value)] - else: - l = [adapter['itemSeparator'].join(value)] - elif 'prefix' in adapter: - l = [] - for v in each(value): - if sep == " ": + if 'itemSeparator' in adapter: + if adapter["prefix"]: + l = [adapter["prefix"] + adapter['itemSeparator'].join(value)] + else: + l = [adapter['itemSeparator'].join(value)] + elif 'prefix' in adapter: + l = [] + if value is True: l.append(adapter["prefix"]) - l.append(v) + elif value is False: + pass else: - l.append(adapter["prefix"] + sep + v) - else: - l = [value] + for v in each(value): + if sep == " ": + l.append(adapter["prefix"]) + l.append(v) + else: + l.append(adapter["prefix"] + sep + v) + else: + l = [value] - return l + return l class PathMapper(object): # Maps files to their absolute path @@ -293,10 +333,19 @@ def job(self, joborder, basedir, use_container=True): adapter = self.tool["adapter"] adapters = [{"order": [-1000000], "schema": tool_schema_doc["properties"]["adapter"]["properties"]["baseCmd"], - "value": adapter['baseCmd'], - "$ref_base_url": "file:"+toolpath + "value": adapter['baseCmd'] }] + builder = Builder() + builder.base_url = "file:"+os.path.abspath(basedir)+"/" + builder.ref_base_url = "file:"+toolpath + + requirements = self.tool.get("requirements") + builder.jslib = '' + if requirements and 'expressionlib' in requirements: + for ex in requirements['expressionlib']: + builder.jslib += builder.resolve_eval(joborder, ex) + "\n" + if "args" in adapter: for i, a in enumerate(adapter["args"]): a = copy.copy(a) @@ -307,11 +356,11 @@ def job(self, joborder, basedir, use_container=True): a["schema"] = tool_schema_doc["definitions"]["strOrExpr"] adapters.append(a) - adapters.extend(adapt_inputs(self.tool['inputs'], inputs, inputs, "")) + adapters.extend(builder.adapt_inputs(self.tool['inputs'], inputs, inputs, "")) adapters.sort(key=lambda a: a["order"]) - referenced_files = filter(lambda a: a is not None, flatten(map(lambda a: find_files(a, joborder), adapters))) + referenced_files = filter(lambda a: a is not None, flatten(map(lambda a: builder.find_files(a, joborder), adapters))) j = Job() j.joborder = joborder @@ -320,18 +369,16 @@ def job(self, joborder, basedir, use_container=True): j.container = None if 'stdin' in adapter: - j.stdin = flatten(adapt({"value": adapter['stdin'], - "schema": tool_schema_doc["properties"]["adapter"]["properties"]["stdin"], - "$ref_base_url": "file:"+toolpath + j.stdin = flatten(builder.adapt({"value": adapter['stdin'], + "schema": tool_schema_doc["properties"]["adapter"]["properties"]["stdin"] }, joborder, None))[0] referenced_files.append(j.stdin) else: j.stdin = None if 'stdout' in adapter: - j.stdout = flatten(adapt({"value": adapter['stdout'], - "schema": tool_schema_doc["properties"]["adapter"]["properties"]["stdout"], - "$ref_base_url": "file:"+toolpath + j.stdout = flatten(builder.adapt({"value": adapter['stdout'], + "schema": tool_schema_doc["properties"]["adapter"]["properties"]["stdout"] }, joborder, None))[0] if os.path.isabs(j.stdout): @@ -339,10 +386,13 @@ def job(self, joborder, basedir, use_container=True): else: j.stdout = None + j.generatefiles = {} + for t in adapter.get("generatefiles", []): + j.generatefiles[builder.resolve_eval(inputs, t["name"])] = builder.resolve_eval(inputs, t["value"]) + d = None - a = self.tool.get("requirements") - if a: - b = a.get("environment") + if requirements: + b = requirements.get("environment") if b: c = b.get("container") if use_container and c: @@ -356,7 +406,7 @@ def job(self, joborder, basedir, use_container=True): if j.stdin: j.stdin = j.stdin if os.path.isabs(j.stdin) else os.path.join(basedir, j.stdin) - j.command_line = flatten(map(lambda a: adapt(a, joborder, d.mapper), adapters)) + j.command_line = flatten(map(lambda a: builder.adapt(a, joborder, d.mapper), adapters)) j.pathmapper = d diff --git a/setup.py b/setup.py index 423c80cfc..32fc6275f 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,9 @@ package_data={'cwltool': ['schemas/*.json']}, include_package_data=True, install_requires=[ - 'jsonschema >= 2.4.0' + 'jsonschema >= 2.4.0', + 'requests', + 'PyYAML' ], test_suite='tests', tests_require=[], From b3ca4cea2f97b8ed6b2155b44dd4cc056df42b4d Mon Sep 17 00:00:00 2001 From: Nebojsa Tijanic Date: Fri, 30 Jan 2015 14:34:54 +0100 Subject: [PATCH 027/221] fixed jsons, most conformance tests passing --- cwltool/tool_new.py | 156 +++++++++++++++++++++++++++ cwltool/workflow.py | 252 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 408 insertions(+) create mode 100755 cwltool/tool_new.py create mode 100644 cwltool/workflow.py diff --git a/cwltool/tool_new.py b/cwltool/tool_new.py new file mode 100755 index 000000000..a13b16f9e --- /dev/null +++ b/cwltool/tool_new.py @@ -0,0 +1,156 @@ +#!/usr/bin/env python + +import os +import sys +import json +from collections import namedtuple +from tool import resolve_pointer, flatten +import sandboxjs + +Args = namedtuple('Args', ['position', 'args']) +merge_args = lambda args: flatten([a.args for a in sorted(args, key=lambda x: x.position)]) + + +def jseval(job, expression): + if expression.startswith('{'): + exp_tpl = '''{ + return function()%s();} + ''' + else: + exp_tpl = '''{ + return %s;} + ''' + exp = exp_tpl % expression + return sandboxjs.execjs(exp, "var $job = %s;" % json.dumps(job)) + + +def resolve_transform(job, val): + if not isinstance(val, dict) or val.get('@type') != 'Transform': + return val + lang = val.get('language') + expr = val.get('value') + if lang == 'javascript': + return jseval(job, expr) + elif lang == 'jsonpointer': + return resolve_pointer(job, expr) + else: + raise Exception('Unknown language for Transform: %s' % lang) + + +def get_args(job, adapter, value=None, schema=None, key=None): + position = adapter.get('position', 0) + prefix = adapter.get('prefix') + sep = adapter.get('separator', ' ') + item_sep = adapter.get('itemSeparator') + arg_val = adapter.get('argValue') + pos = [position, key] + + if isinstance(arg_val, dict) and arg_val.get('@type') == 'Transform': + value = resolve_transform(job, arg_val) + elif isinstance(value, dict) and value.get('@type') == 'File': + value = value.get('path') + + if value is None: + return Args(pos, []) + + if isinstance(value, bool): + return Args(pos, [prefix]) if value else Args(pos, []) + + if isinstance(value, dict): + if not schema: + return Args(pos, []) + args = [] + for k, v in value.iteritems(): + item_schema = filter(lambda x: x['name'] == k, schema['fields'])[0] + item_adapter = item_schema.get('adapter') + if item_adapter is not None: + args.append(get_args(job, item_adapter, v, item_schema, k)) + return Args(pos, merge_args(args)) + + if isinstance(value, list): + # TODO: complex item types + items = map(lambda x: unicode(x) if not isinstance(x, dict) else x['path'], value) + if item_sep: + return Args(pos, get_args(job, adapter, item_sep.join(items)).args) + if not prefix: + return Args(pos, items) + if sep == ' ': + return Args(pos, flatten([prefix, item] for item in items)) + return Args(pos, [sep.join([prefix, item]) for item in items]) + + value = unicode(value) + if not prefix: + return Args(pos, [value]) + if sep == ' ': + return Args(pos, [prefix, value]) + return Args(pos, [sep.join([prefix, value])]) + + +def get_proc_args_and_redirects(tool, job): + adaptable_inputs = [i for i in tool.get('inputs', []) if 'adapter' in i.get('schema', {})] + input_args = [] + for i in adaptable_inputs: + inp_id = i['@id'][1:] + inp_val = job['inputs'].get(inp_id) + inp_adapter = i['schema']['adapter'] + input_args.append(get_args(job, inp_adapter, inp_val, i['schema'], inp_id)) + adapter_args = [get_args(job, a) for a in tool.get('adapters', [])] + if isinstance(tool.get('baseCmd'), basestring): + tool['baseCmd'] = [tool['baseCmd']] + base_cmd = [resolve_transform(job, v) for v in tool['baseCmd']] + argv = base_cmd + merge_args(input_args + adapter_args) + stdin = resolve_transform(job, tool.get('stdin')) + stdout = resolve_transform(job, tool.get('stdout')) + return argv, stdin, stdout + + +def test(tool, job): + ex = os.path.join(os.path.dirname(__file__), '../../examples/') + with open(os.path.join(ex, tool)) as fp: + tool = json.load(fp) + with open(os.path.join(ex, job)) as fp: + job = json.load(fp) + argv, stdin, stdout = get_proc_args_and_redirects(tool, job) + print ' '.join(argv), '<', stdin, '>', stdout + + +def conformance_test(): + tool, job = filter(lambda x: x[0] != '-', sys.argv[1:]) + assert os.path.isfile(tool) + assert os.path.isfile(job) + base_dir = filter(lambda x: x.startswith('--basedir='), sys.argv[1:]) + if base_dir: + base_dir = base_dir[0][len('--basedir='):] + + with open(tool) as t, open(job) as j: + tool = json.load(t) + job = json.load(j) + + if base_dir: + job['inputs'] = map_paths(job.get('inputs', {}), base_dir) + + argv, stdin, stdout = get_proc_args_and_redirects(tool, job) + print json.dumps({ + 'args': argv, + 'stdin': stdin, + 'stdout': stdout, + }) + + +def map_paths(obj, base_dir): + if isinstance(obj, list): + return [map_paths(i, base_dir) for i in obj] + if not isinstance(obj, dict): + return obj + if obj.get('@type') == 'File': + obj['path'] = os.path.join(base_dir, obj['path']) + return obj + return {k: map_paths(v, base_dir) for k, v in obj.iteritems()} + + +if __name__ == '__main__': + if '--conformance-test' not in sys.argv: + test('bwa-mem-tool.json', 'bwa-mem-job.json') + test('cat1-tool.json', 'cat-n-job.json') + else: + conformance_test() diff --git a/cwltool/workflow.py b/cwltool/workflow.py new file mode 100644 index 000000000..66c30d90c --- /dev/null +++ b/cwltool/workflow.py @@ -0,0 +1,252 @@ +import os +import logging +import functools +import json +from datetime import datetime +from copy import deepcopy +from collections import defaultdict + +from rdflib import Graph, URIRef, Literal, RDF, XSD +from rdflib.namespace import Namespace, NamespaceManager + +from tool_new import jseval + + +log = logging.getLogger(__file__) + +CWL = Namespace('http://github.com/common-workflow-language/schema/wf#') +PROV = Namespace('http://www.w3.org/ns/prov#') +DCT = Namespace('http://purl.org/dc/terms/') + + +def value_for(graph, iri): + return graph.value(iri).toPython() + + +class Inputs(object): + def __init__(self, graph, tuples): + self.g = graph + self.d = {} + self.wrapped = [] + for k, v in tuples: + self[k] = v + + def __getitem__(self, item): + return self.d[item] + + def __setitem__(self, key, value): + if key not in self.d: + self.d[key] = value_for(self.g, value) + elif key in self.wrapped: + self.d[key].append(value_for(self.g, value)) + else: + self.d[key] = [self.d[key], value_for(self.g, value)] + self.wrapped.append(key) + + def to_dict(self): + return {k[k.rfind('/') + 1:]: v for k, v in self.d.iteritems()} + + +def lazy(func): + attr = '__lazy_' + func.__name__ + + @functools.wraps(func) + def wrapped(self): + if not hasattr(self, attr): + setattr(self, attr, func(self)) + return getattr(self, attr) + return property(wrapped) + + +class Process(object): + def __init__(self, graph, iri): + self.g = graph + self.iri = URIRef(iri) + + activity = lazy(lambda self: self.g.value(None, CWL.activityFor, self.iri)) + inputs = lazy(lambda self: list(self.g.objects(self.iri, CWL.inputs))) + outputs = lazy(lambda self: list(self.g.objects(self.iri, CWL.outputs))) + started = lazy(lambda self: self.g.value(self.activity, PROV.startedAtTime) if self.activity else None) + ended = lazy(lambda self: self.g.value(self.activity, PROV.endedAtTime) if self.activity else None) + has_prereqs = lazy(lambda self: all([None, CWL.producedByPort, src] in self.g for src in self.sources)) + + @lazy + def has_prereqs(self): + return all([None, CWL.producedByPort, src] in self.g for src in self.sources) + + @lazy + def sources(self): + return [x[0] for x in self.g.query(''' + select ?src + where { + <%s> cwl:inputs ?port . + ?link cwl:destination ?port ; + cwl:source ?src . + } + ''' % self.iri)] + + @lazy + def input_values(self): + return self.g.query(''' + select ?port ?val + where { + <%s> cwl:inputs ?port . + ?link cwl:destination ?port ; + cwl:source ?src . + ?val cwl:producedByPort ?src . + } + ''' % self.iri) + + +class WorkflowRunner(object): + def __init__(self): + nm = NamespaceManager(Graph()) + nm.bind('cwl', CWL) + nm.bind('prov', PROV) + nm.bind('dcterms', DCT) + self.g = Graph(namespace_manager=nm) + self.wf_iri = None + self.act_iri = None + + def load(self, *args, **kwargs): + return self.g.parse(*args, **kwargs) + + def start(self, proc_iri=None): + main_act = False + if not proc_iri: + proc_iri = self.wf_iri + main_act = True + proc_iri = URIRef(proc_iri) + iri = self.iri_for_activity(proc_iri) + log.debug('Starting %s', iri) + self.g.add([iri, RDF.type, CWL.Activity]) + self.g.add([iri, CWL.activityFor, proc_iri]) + self.g.add([iri, PROV.startedAtTime, Literal(datetime.now(), datatype=XSD.datetime)]) + if main_act: + self.act_iri = iri + else: + self.g.add([self.act_iri, DCT.hasPart, iri]) + for k, v in Process(self.g, proc_iri).input_values: + val = self.g.value(v) + log.debug('Value on %s is %s', k, val.toPython()) + return iri + + def end(self, act_iri): + act_iri = URIRef(act_iri) + self.g.add([act_iri, PROV.endedAtTime, Literal(datetime.now(), datatype=XSD.datetime)]) + + def iri_for_activity(self, process_iri): + sep = '/' if '#' in process_iri else '#' + return URIRef(process_iri + sep + '__activity__') # TODO: Better IRIs + + def iri_for_value(self, port_iri): + return URIRef(port_iri + '/__value__') # TODO: Better IRIs + + def queued(self): + ps = [Process(self.g, iri) for iri in self.g.subjects(RDF.type, CWL.Process)] + return [p for p in ps if p.has_prereqs and not p.started] + + def set_value(self, port_iri, value, creator_iri=None): + if not port_iri.startswith(self.wf_iri): + port_iri = self.wf_iri + '#' + port_iri + port_iri = URIRef(port_iri) + iri = self.iri_for_value(port_iri) + self.g.add([iri, RDF.type, CWL.Value]) + self.g.add([iri, RDF.value, Literal(value)]) # TODO: complex types as cnt; add CWL.includesFile + self.g.add([iri, CWL.producedByPort, URIRef(port_iri)]) + if creator_iri: + self.g.add([iri, PROV.wasGeneratedBy, URIRef(creator_iri)]) + return iri + + def _depth_mismatch_port(self, proc, inputs): + depth_of = lambda x: 1 if isinstance(x, list) else 0 # TODO: fixme + incoming = {k: depth_of(v) for k, v in inputs.d.iteritems()} + expected = {k: self.g.value(k, CWL.depth).toPython() for k in proc.inputs} + result = None + for k, v in incoming.iteritems(): + if expected[k] != v: + if result: + log.error('\nIncoming: %s\nExpected: %s', incoming, expected) + raise NotImplementedError('More than one port has mismatching depth.') + if incoming[k] < expected[k]: + raise Exception('depth(incoming) < depth(expected); Wrapping must be done explicitly.') + result = k + return result + + def run_workflow(self): + self.start() + while self.queued(): + act = self.start(self.queued()[0].iri) + proc = Process(self.g, self.g.value(act, CWL.activityFor)) + tool = self.g.value(proc.iri, CWL.tool) + inputs = Inputs(self.g, proc.input_values) # TODO: propagate desc<->impl + dmp = self._depth_mismatch_port(proc, inputs) + if not dmp: + job = {'inputs': inputs.to_dict()} + outputs = self.run_script(tool, job) + else: + jobs, outputs = [], defaultdict(list) + for i in inputs[dmp]: + inp_copy = deepcopy(inputs) + inp_copy.d[dmp] = i + jobs.append({'inputs': inp_copy.to_dict()}) + for job in jobs: + outs = self.run_script(tool, job) + for k, v in outs.iteritems(): + outputs[k].append(v) + for k, v in outputs.iteritems(): + self.set_value(proc.iri + '/' + k, v, act) + self.end(act) + self.end(self.act_iri) + outputs = dict(self.g.query(''' + select ?port ?val + where { + <%s> cwl:outputs ?port . + ?link cwl:destination ?port ; + cwl:source ?src . + ?val cwl:producedByPort ?src . + } + ''' % self.wf_iri)) + return {k: self.g.value(v).toPython() for k, v in outputs.iteritems()} + + def run_script(self, tool, job): + expr = self.g.value(tool, CWL.expr) + log.debug('Running expr %s\nJob: %s', expr, job) + result = jseval(job, expr) + logging.debug('Result: %s', result) + return result + + @classmethod + def from_workflow(cls, path): + wfr = cls() + wfr.load(path, format='json-ld') + wfr.wf_iri = URIRef('file://' + path) # TODO: Find a better way to do this + wfr.g.add([wfr.wf_iri, RDF.type, CWL.Process]) + for sp in wfr.g.objects(wfr.wf_iri, CWL.steps): + wfr.g.add([sp, RDF.type, CWL.Process]) + tool = wfr.g.value(sp, CWL.tool) + log.debug('Loading reference %s', tool) + wfr.g.parse(tool, format='json-ld') + return wfr + + +def aplusbtimesc(wf_name, a, b, c): + print '\n\n--- %s ---\n\n' % wf_name + path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../examples/' + wf_name)) + rnr = WorkflowRunner.from_workflow(path) + rnr.set_value('a', a) + rnr.set_value('b', b) + rnr.set_value('c', c) + outs = rnr.run_workflow() + assert outs + print '\nDone. Workflow outputs:' + for k, v in outs.iteritems(): + print k, v + assert v == (a+b)*c + return rnr + +if __name__ == '__main__': + logging.basicConfig(level=logging.DEBUG) + aplusbtimesc('wf_simple.json', 2, 3, 4) + aplusbtimesc('wf_lists.json', 2, 3, 4) + aplusbtimesc('wf_map.json', 2, 3, 4) \ No newline at end of file From bad48f0be090f788b50f6f9316b34278c97971b1 Mon Sep 17 00:00:00 2001 From: Nebojsa Tijanic Date: Wed, 4 Feb 2015 14:07:09 +0100 Subject: [PATCH 028/221] passing conformance tests with tool_new --- cwltool/tool_new.py | 64 +++++++++++++++++++++++++++++++------- cwltool/workflow.py | 76 +++++++++++++++++++++++++++------------------ 2 files changed, 98 insertions(+), 42 deletions(-) diff --git a/cwltool/tool_new.py b/cwltool/tool_new.py index a13b16f9e..e82ab7bcc 100755 --- a/cwltool/tool_new.py +++ b/cwltool/tool_new.py @@ -3,9 +3,12 @@ import os import sys import json +import logging from collections import namedtuple from tool import resolve_pointer, flatten import sandboxjs +import avro.io +import avro.schema Args = namedtuple('Args', ['position', 'args']) merge_args = lambda args: flatten([a.args for a in sorted(args, key=lambda x: x.position)]) @@ -37,7 +40,12 @@ def resolve_transform(job, val): raise Exception('Unknown language for Transform: %s' % lang) -def get_args(job, adapter, value=None, schema=None, key=None): +def get_args(job, adapter, value=None, schema=None, key=None, tool=None): + if schema and 'adapter' in schema: + adapter = schema['adapter'] + if adapter is None: + return Args(None, []) + position = adapter.get('position', 0) prefix = adapter.get('prefix') sep = adapter.get('separator', ' ') @@ -61,17 +69,23 @@ def get_args(job, adapter, value=None, schema=None, key=None): return Args(pos, []) args = [] for k, v in value.iteritems(): - item_schema = filter(lambda x: x['name'] == k, schema['fields'])[0] - item_adapter = item_schema.get('adapter') - if item_adapter is not None: - args.append(get_args(job, item_adapter, v, item_schema, k)) + field = filter(lambda x: x['name'] == k, schema['fields']) + if not field: + logging.error('Field not found in schema: "%s". Schema: %s', k, schema) + continue + field = field[0] + field_adapter = field.get('adapter') + field_schema = schema_by_name(field.get('type'), tool) + args.append(get_args(job, field_adapter, v, field_schema, k, tool=tool)) return Args(pos, merge_args(args)) if isinstance(value, list): - # TODO: complex item types - items = map(lambda x: unicode(x) if not isinstance(x, dict) else x['path'], value) + items = flatten([get_args(job, {}, i, schema_for_item(i, schema, tool), tool=tool).args for i in value]) if item_sep: - return Args(pos, get_args(job, adapter, item_sep.join(items)).args) + val = item_sep.join(items) + if not prefix: + return Args(pos, [val]) + return Args(pos, [prefix, val] if sep == ' ' else [sep.join([prefix, val])]) if not prefix: return Args(pos, items) if sep == ' ': @@ -86,6 +100,31 @@ def get_args(job, adapter, value=None, schema=None, key=None): return Args(pos, [sep.join([prefix, value])]) +def schema_by_name(type_name, tool): + if isinstance(type_name, dict): + return type_name + tds = filter(lambda x: x['name'] == type_name, tool.get('schemaDefs', [])) + return tds[0] if tds else None + + +def schema_for_item(value, array_schema, tool): + if not array_schema: + return None + opts = array_schema.get('items', []) + if not opts: + return None + if not isinstance(opts, list): + opts = [opts] + opts = [schema_by_name(opt, tool) for opt in opts] + if len(opts) == 1: + return opts[0] + for opt in opts: + sch = avro.schema.parse(json.dumps(opt)) + if avro.io.validate(sch, value): + return opt + return None + + def get_proc_args_and_redirects(tool, job): adaptable_inputs = [i for i in tool.get('inputs', []) if 'adapter' in i.get('schema', {})] input_args = [] @@ -93,8 +132,8 @@ def get_proc_args_and_redirects(tool, job): inp_id = i['@id'][1:] inp_val = job['inputs'].get(inp_id) inp_adapter = i['schema']['adapter'] - input_args.append(get_args(job, inp_adapter, inp_val, i['schema'], inp_id)) - adapter_args = [get_args(job, a) for a in tool.get('adapters', [])] + input_args.append(get_args(job, inp_adapter, inp_val, i['schema'], inp_id, tool=tool)) + adapter_args = [get_args(job, a, tool=tool) for a in tool.get('adapters', [])] if isinstance(tool.get('baseCmd'), basestring): tool['baseCmd'] = [tool['baseCmd']] base_cmd = [resolve_transform(job, v) for v in tool['baseCmd']] @@ -150,7 +189,8 @@ def map_paths(obj, base_dir): if __name__ == '__main__': if '--conformance-test' not in sys.argv: - test('bwa-mem-tool.json', 'bwa-mem-job.json') - test('cat1-tool.json', 'cat-n-job.json') + # test('bwa-mem-tool.json', 'bwa-mem-job.json') + # test('cat1-tool.json', 'cat-n-job.json') + test('tmap-tool.json', 'tmap-job.json') else: conformance_test() diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 66c30d90c..6300af23b 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -14,15 +14,27 @@ log = logging.getLogger(__file__) -CWL = Namespace('http://github.com/common-workflow-language/schema/wf#') +CWL = Namespace('http://github.com/common-workflow-language/') PROV = Namespace('http://www.w3.org/ns/prov#') DCT = Namespace('http://purl.org/dc/terms/') +CNT = Namespace('http://www.w3.org/2011/content#') -def value_for(graph, iri): +def get_value(graph, iri): + chars = graph.value(iri, CNT.chars) + if chars: + return json.load(chars.toPython()) return graph.value(iri).toPython() +def set_value(graph, iri, val): + # TODO: add CWL.includesFile + if isinstance(val, (dict, list)): + graph.add(iri, CNT.chars, json.dumps(val)) + else: + graph.add(iri, RDF.value, Literal(val)) + + class Inputs(object): def __init__(self, graph, tuples): self.g = graph @@ -36,11 +48,11 @@ def __getitem__(self, item): def __setitem__(self, key, value): if key not in self.d: - self.d[key] = value_for(self.g, value) + self.d[key] = get_value(self.g, value) elif key in self.wrapped: - self.d[key].append(value_for(self.g, value)) + self.d[key].append(get_value(self.g, value)) else: - self.d[key] = [self.d[key], value_for(self.g, value)] + self.d[key] = [self.d[key], get_value(self.g, value)] self.wrapped.append(key) def to_dict(self): @@ -99,17 +111,26 @@ def input_values(self): class WorkflowRunner(object): - def __init__(self): + def __init__(self, path): nm = NamespaceManager(Graph()) nm.bind('cwl', CWL) nm.bind('prov', PROV) - nm.bind('dcterms', DCT) + nm.bind('dct', DCT) + nm.bind('cnt', CNT) self.g = Graph(namespace_manager=nm) self.wf_iri = None self.act_iri = None - - def load(self, *args, **kwargs): - return self.g.parse(*args, **kwargs) + self._load(path) + + def _load(self, path): + self.g.parse(path) + self.wf_iri = URIRef('file://' + path) # TODO: Find a better way to do this + self.g.add([self.wf_iri, RDF.type, CWL.Process]) + for sp in self.g.objects(self.wf_iri, CWL.steps): + self.g.add([sp, RDF.type, CWL.Process]) + tool = self.g.value(sp, CWL.tool) + log.debug('Loading reference %s', tool) + self.g.parse(tool, format='json-ld') def start(self, proc_iri=None): main_act = False @@ -151,8 +172,8 @@ def set_value(self, port_iri, value, creator_iri=None): port_iri = self.wf_iri + '#' + port_iri port_iri = URIRef(port_iri) iri = self.iri_for_value(port_iri) + set_value(self.g, iri, value) self.g.add([iri, RDF.type, CWL.Value]) - self.g.add([iri, RDF.value, Literal(value)]) # TODO: complex types as cnt; add CWL.includesFile self.g.add([iri, CWL.producedByPort, URIRef(port_iri)]) if creator_iri: self.g.add([iri, PROV.wasGeneratedBy, URIRef(creator_iri)]) @@ -167,12 +188,20 @@ def _depth_mismatch_port(self, proc, inputs): if expected[k] != v: if result: log.error('\nIncoming: %s\nExpected: %s', incoming, expected) - raise NotImplementedError('More than one port has mismatching depth.') + raise Exception('More than one port has mismatching depth.') if incoming[k] < expected[k]: raise Exception('depth(incoming) < depth(expected); Wrapping must be done explicitly.') + if incoming[k] - expected[k] > 1: + raise NotImplementedError('Only handling one nesting level at the moment.') result = k return result + def run_component(self, tool, job): + cmp_type = self.g.value(tool, RDF.type) + if cmp_type == CWL.SimpleTransformTool: + return self.run_script(tool, job) + raise Exception('Unrecognized component type: %s' % cmp_type) + def run_workflow(self): self.start() while self.queued(): @@ -183,7 +212,7 @@ def run_workflow(self): dmp = self._depth_mismatch_port(proc, inputs) if not dmp: job = {'inputs': inputs.to_dict()} - outputs = self.run_script(tool, job) + outputs = self.run_component(tool, job) else: jobs, outputs = [], defaultdict(list) for i in inputs[dmp]: @@ -191,7 +220,7 @@ def run_workflow(self): inp_copy.d[dmp] = i jobs.append({'inputs': inp_copy.to_dict()}) for job in jobs: - outs = self.run_script(tool, job) + outs = self.run_component(tool, job) for k, v in outs.iteritems(): outputs[k].append(v) for k, v in outputs.iteritems(): @@ -207,33 +236,20 @@ def run_workflow(self): ?val cwl:producedByPort ?src . } ''' % self.wf_iri)) - return {k: self.g.value(v).toPython() for k, v in outputs.iteritems()} + return {k: get_value(self.g, v) for k, v in outputs.iteritems()} def run_script(self, tool, job): - expr = self.g.value(tool, CWL.expr) + expr = self.g.value(self.g.value(tool, CWL.script)).toPython() log.debug('Running expr %s\nJob: %s', expr, job) result = jseval(job, expr) logging.debug('Result: %s', result) return result - @classmethod - def from_workflow(cls, path): - wfr = cls() - wfr.load(path, format='json-ld') - wfr.wf_iri = URIRef('file://' + path) # TODO: Find a better way to do this - wfr.g.add([wfr.wf_iri, RDF.type, CWL.Process]) - for sp in wfr.g.objects(wfr.wf_iri, CWL.steps): - wfr.g.add([sp, RDF.type, CWL.Process]) - tool = wfr.g.value(sp, CWL.tool) - log.debug('Loading reference %s', tool) - wfr.g.parse(tool, format='json-ld') - return wfr - def aplusbtimesc(wf_name, a, b, c): print '\n\n--- %s ---\n\n' % wf_name path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../examples/' + wf_name)) - rnr = WorkflowRunner.from_workflow(path) + rnr = WorkflowRunner(path) rnr.set_value('a', a) rnr.set_value('b', b) rnr.set_value('c', c) From b397ea7b5ca885f799e206f1849a7c0208f04f18 Mon Sep 17 00:00:00 2001 From: Nebojsa Tijanic Date: Thu, 5 Feb 2015 12:47:29 +0100 Subject: [PATCH 029/221] updated examples, added @context --- cwltool/tool_new.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/cwltool/tool_new.py b/cwltool/tool_new.py index e82ab7bcc..e6c215214 100755 --- a/cwltool/tool_new.py +++ b/cwltool/tool_new.py @@ -62,6 +62,8 @@ def get_args(job, adapter, value=None, schema=None, key=None, tool=None): return Args(pos, []) if isinstance(value, bool): + if not prefix: + raise Exception('Boolean value without prefix in adapter') return Args(pos, [prefix]) if value else Args(pos, []) if isinstance(value, dict): @@ -133,13 +135,14 @@ def get_proc_args_and_redirects(tool, job): inp_val = job['inputs'].get(inp_id) inp_adapter = i['schema']['adapter'] input_args.append(get_args(job, inp_adapter, inp_val, i['schema'], inp_id, tool=tool)) - adapter_args = [get_args(job, a, tool=tool) for a in tool.get('adapters', [])] - if isinstance(tool.get('baseCmd'), basestring): - tool['baseCmd'] = [tool['baseCmd']] - base_cmd = [resolve_transform(job, v) for v in tool['baseCmd']] + cli_adapter = tool['cliAdapter'] + adapter_args = [get_args(job, a, tool=tool) for a in cli_adapter.get('argAdapters', [])] + if isinstance(cli_adapter.get('baseCmd'), basestring): + cli_adapter['baseCmd'] = [cli_adapter['baseCmd']] + base_cmd = [resolve_transform(job, v) for v in cli_adapter['baseCmd']] argv = base_cmd + merge_args(input_args + adapter_args) - stdin = resolve_transform(job, tool.get('stdin')) - stdout = resolve_transform(job, tool.get('stdout')) + stdin = resolve_transform(job, cli_adapter.get('stdin')) + stdout = resolve_transform(job, cli_adapter.get('stdout')) return argv, stdin, stdout From 99bb84698684c78ce034ec037a1d3a05edfd1cf7 Mon Sep 17 00:00:00 2001 From: Nebojsa Tijanic Date: Thu, 5 Feb 2015 13:05:02 +0100 Subject: [PATCH 030/221] context in expressions --- cwltool/tool_new.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/cwltool/tool_new.py b/cwltool/tool_new.py index e6c215214..a83259b2c 100755 --- a/cwltool/tool_new.py +++ b/cwltool/tool_new.py @@ -14,7 +14,7 @@ merge_args = lambda args: flatten([a.args for a in sorted(args, key=lambda x: x.position)]) -def jseval(job, expression): +def jseval(job, expression, context=None): if expression.startswith('{'): exp_tpl = '''{ return function()%s();} @@ -24,16 +24,16 @@ def jseval(job, expression): return %s;} ''' exp = exp_tpl % expression - return sandboxjs.execjs(exp, "var $job = %s;" % json.dumps(job)) + return sandboxjs.execjs(exp, "var $job = %s, $self = %s;" % (json.dumps(job), json.dumps(context))) -def resolve_transform(job, val): +def resolve_transform(job, val, context=None): if not isinstance(val, dict) or val.get('@type') != 'Transform': return val lang = val.get('language') expr = val.get('value') if lang == 'javascript': - return jseval(job, expr) + return jseval(job, expr, context) elif lang == 'jsonpointer': return resolve_pointer(job, expr) else: @@ -54,7 +54,7 @@ def get_args(job, adapter, value=None, schema=None, key=None, tool=None): pos = [position, key] if isinstance(arg_val, dict) and arg_val.get('@type') == 'Transform': - value = resolve_transform(job, arg_val) + value = resolve_transform(job, arg_val, value) elif isinstance(value, dict) and value.get('@type') == 'File': value = value.get('path') From 616982f4d4339a893fe93189d271ab7092aee046 Mon Sep 17 00:00:00 2001 From: Nebojsa Tijanic Date: Fri, 6 Feb 2015 17:17:07 +0100 Subject: [PATCH 031/221] added @context to examples --- cwltool/tool_new.py | 23 +++++++++++++++- cwltool/workflow.py | 66 ++++++++++++++++++++++++++++----------------- 2 files changed, 63 insertions(+), 26 deletions(-) diff --git a/cwltool/tool_new.py b/cwltool/tool_new.py index a83259b2c..0ca9751b8 100755 --- a/cwltool/tool_new.py +++ b/cwltool/tool_new.py @@ -4,6 +4,7 @@ import sys import json import logging +import tempfile from collections import namedtuple from tool import resolve_pointer, flatten import sandboxjs @@ -190,10 +191,30 @@ def map_paths(obj, base_dir): return {k: map_paths(v, base_dir) for k, v in obj.iteritems()} +def run(tool_path, job_path): + with open(tool_path) as fpt, open(job_path) as fpj: + tool = json.load(fpt) + job = json.load(fpj) + job = map_paths(job, os.path.join(os.path.dirname(__file__), '../../examples/')) + argv, stdin, stdout = get_proc_args_and_redirects(tool, job) + line = ' '.join(argv) + if stdin: + line += ' < ' + stdin + if stdout: + line += ' > ' + stdout + print line + job_dir = tempfile.mkdtemp() + os.chdir(job_dir) + if os.system(line): + raise Exception('Process failed.') + print os.listdir('.') + + if __name__ == '__main__': if '--conformance-test' not in sys.argv: + run(*sys.argv[1:]) # test('bwa-mem-tool.json', 'bwa-mem-job.json') # test('cat1-tool.json', 'cat-n-job.json') - test('tmap-tool.json', 'tmap-job.json') + # test('tmap-tool.json', 'tmap-job.json') else: conformance_test() diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 6300af23b..0a95c5602 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -15,6 +15,7 @@ log = logging.getLogger(__file__) CWL = Namespace('http://github.com/common-workflow-language/') +WFD = Namespace('http://purl.org/wf4ever/wfdesc#') PROV = Namespace('http://www.w3.org/ns/prov#') DCT = Namespace('http://purl.org/dc/terms/') CNT = Namespace('http://www.w3.org/2011/content#') @@ -28,11 +29,10 @@ def get_value(graph, iri): def set_value(graph, iri, val): - # TODO: add CWL.includesFile if isinstance(val, (dict, list)): - graph.add(iri, CNT.chars, json.dumps(val)) + graph.set([iri, CNT.chars, Literal(json.dumps(val))]) else: - graph.add(iri, RDF.value, Literal(val)) + graph.set([iri, RDF.value, Literal(val)]) class Inputs(object): @@ -76,8 +76,8 @@ def __init__(self, graph, iri): self.iri = URIRef(iri) activity = lazy(lambda self: self.g.value(None, CWL.activityFor, self.iri)) - inputs = lazy(lambda self: list(self.g.objects(self.iri, CWL.inputs))) - outputs = lazy(lambda self: list(self.g.objects(self.iri, CWL.outputs))) + inputs = lazy(lambda self: list(self.g.objects(self.iri, WFD.hasInput))) + outputs = lazy(lambda self: list(self.g.objects(self.iri, WFD.hasOutput))) started = lazy(lambda self: self.g.value(self.activity, PROV.startedAtTime) if self.activity else None) ended = lazy(lambda self: self.g.value(self.activity, PROV.endedAtTime) if self.activity else None) has_prereqs = lazy(lambda self: all([None, CWL.producedByPort, src] in self.g for src in self.sources)) @@ -91,9 +91,9 @@ def sources(self): return [x[0] for x in self.g.query(''' select ?src where { - <%s> cwl:inputs ?port . - ?link cwl:destination ?port ; - cwl:source ?src . + <%s> wfd:hasInput ?port . + ?link wfd:hasSink ?port ; + wfd:hasSource ?src . } ''' % self.iri)] @@ -102,9 +102,9 @@ def input_values(self): return self.g.query(''' select ?port ?val where { - <%s> cwl:inputs ?port . - ?link cwl:destination ?port ; - cwl:source ?src . + <%s> wfd:hasInput ?port . + ?link wfd:hasSink ?port ; + wfd:hasSource ?src . ?val cwl:producedByPort ?src . } ''' % self.iri) @@ -114,6 +114,7 @@ class WorkflowRunner(object): def __init__(self, path): nm = NamespaceManager(Graph()) nm.bind('cwl', CWL) + nm.bind('wfd', WFD) nm.bind('prov', PROV) nm.bind('dct', DCT) nm.bind('cnt', CNT) @@ -123,12 +124,12 @@ def __init__(self, path): self._load(path) def _load(self, path): - self.g.parse(path) + self.g.parse(path, format='json-ld') self.wf_iri = URIRef('file://' + path) # TODO: Find a better way to do this - self.g.add([self.wf_iri, RDF.type, CWL.Process]) - for sp in self.g.objects(self.wf_iri, CWL.steps): - self.g.add([sp, RDF.type, CWL.Process]) - tool = self.g.value(sp, CWL.tool) + self.g.add([self.wf_iri, RDF.type, WFD.Process]) + for sp in self.g.objects(self.wf_iri, WFD.hasSubProcess): + self.g.add([sp, RDF.type, WFD.Process]) + tool = self.g.value(sp, CWL.hasImplementation) log.debug('Loading reference %s', tool) self.g.parse(tool, format='json-ld') @@ -182,7 +183,7 @@ def set_value(self, port_iri, value, creator_iri=None): def _depth_mismatch_port(self, proc, inputs): depth_of = lambda x: 1 if isinstance(x, list) else 0 # TODO: fixme incoming = {k: depth_of(v) for k, v in inputs.d.iteritems()} - expected = {k: self.g.value(k, CWL.depth).toPython() for k in proc.inputs} + expected = {k: self.g.value(k, CWL.hasDepth).toPython() for k in proc.inputs} result = None for k, v in incoming.iteritems(): if expected[k] != v: @@ -207,7 +208,7 @@ def run_workflow(self): while self.queued(): act = self.start(self.queued()[0].iri) proc = Process(self.g, self.g.value(act, CWL.activityFor)) - tool = self.g.value(proc.iri, CWL.tool) + tool = self.g.value(proc.iri, CWL.hasImplementation) inputs = Inputs(self.g, proc.input_values) # TODO: propagate desc<->impl dmp = self._depth_mismatch_port(proc, inputs) if not dmp: @@ -230,16 +231,16 @@ def run_workflow(self): outputs = dict(self.g.query(''' select ?port ?val where { - <%s> cwl:outputs ?port . - ?link cwl:destination ?port ; - cwl:source ?src . + <%s> wfd:hasOutput ?port . + ?link wfd:hasSink ?port ; + wfd:hasSource ?src . ?val cwl:producedByPort ?src . } ''' % self.wf_iri)) return {k: get_value(self.g, v) for k, v in outputs.iteritems()} def run_script(self, tool, job): - expr = self.g.value(self.g.value(tool, CWL.script)).toPython() + expr = self.g.value(self.g.value(tool, CWL.hasScript)).toPython() log.debug('Running expr %s\nJob: %s', expr, job) result = jseval(job, expr) logging.debug('Result: %s', result) @@ -261,8 +262,23 @@ def aplusbtimesc(wf_name, a, b, c): assert v == (a+b)*c return rnr + +def count_lines(): + examples = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../examples')) + wf_path = os.path.join(examples, 'wf-count-lines.json') + job_path = os.path.join(examples, 'wf-count-lines-job.json') + with open(job_path) as fp: + inputs = json.load(fp)['inputs'] + rnr = WorkflowRunner(wf_path) + for k, v in inputs.iteritems(): + rnr.set_value(k, v) + print rnr.run_workflow() + return rnr + + if __name__ == '__main__': logging.basicConfig(level=logging.DEBUG) - aplusbtimesc('wf_simple.json', 2, 3, 4) - aplusbtimesc('wf_lists.json', 2, 3, 4) - aplusbtimesc('wf_map.json', 2, 3, 4) \ No newline at end of file + # aplusbtimesc('wf_simple.json', 2, 3, 4) + # aplusbtimesc('wf_lists.json', 2, 3, 4) + # aplusbtimesc('wf_map.json', 2, 3, 4) + count_lines() From dd5feb74523952ecfef94b26b3f117c25dfea188 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 9 Feb 2015 15:53:40 -0500 Subject: [PATCH 032/221] Completed avro schema. Added Avro validate function that reports better errors than the default. --- cwltool/validate.py | 109 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 cwltool/validate.py diff --git a/cwltool/validate.py b/cwltool/validate.py new file mode 100644 index 000000000..04e80a3cf --- /dev/null +++ b/cwltool/validate.py @@ -0,0 +1,109 @@ +import avro.schema +import json +import pprint + +module_dir = os.path.dirname(os.path.abspath(__file__)) +names = avro.schema.Names() +cwl_avsc = os.path.join(module_dir, 'schemas/draft-2/cwl.avsc') +with open(cwl_avsc) as f: + j = json.load(f) + for t in j: + avro.schema.make_avsc_object(t, names) + +class ValidationException(Exception): + pass + +def validate(expected_schema, datum): + try: + return validate_ex(expected_schema, datum) + except ValidationException: + return False + +INT_MIN_VALUE = -(1 << 31) +INT_MAX_VALUE = (1 << 31) - 1 +LONG_MIN_VALUE = -(1 << 63) +LONG_MAX_VALUE = (1 << 63) - 1 + +def validate_ex(expected_schema, datum): + """Determine if a python datum is an instance of a schema.""" + schema_type = expected_schema.type + if schema_type == 'null': + if datum is None: + return True + else: + raise ValidationException("'%s' is not None" % datum) + elif schema_type == 'boolean': + if isinstance(datum, bool): + return True + else: + raise ValidationException("'%s' is not bool" % datum) + elif schema_type == 'string': + if isinstance(datum, basestring): + return True + else: + raise ValidationException("'%s' is not string" % datum) + elif schema_type == 'bytes': + if isinstance(datum, str): + return True + else: + raise ValidationException("'%s' is not bytes" % datum) + elif schema_type == 'int': + if ((isinstance(datum, int) or isinstance(datum, long)) + and INT_MIN_VALUE <= datum <= INT_MAX_VALUE): + return True + else: + raise ValidationException("'%s' is not int" % datum) + elif schema_type == 'long': + if ((isinstance(datum, int) or isinstance(datum, long)) + and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE): + return True + else: + raise ValidationException("'%s' is not long" % datum) + elif schema_type in ['float', 'double']: + if (isinstance(datum, int) or isinstance(datum, long) + or isinstance(datum, float)): + return True + else: + raise ValidationException("'%s' is not float or double" % datum) + elif schema_type == 'fixed': + if isinstance(datum, str) and len(datum) == expected_schema.size: + return True + else: + raise ValidationException("'%s' is not fixed" % datum) + elif schema_type == 'enum': + if datum in expected_schema.symbols: + return True + else: + raise ValidationException("'%s'\n is not a valid enum symbol\n %s" % (pprint.pformat(datum), pprint.pformat(expected_schema.symbols))) + elif schema_type == 'array': + if (isinstance(datum, list) and + False not in [validate(expected_schema.items, d) for d in datum]): + return True + else: + raise ValidationException("'%s'\n is not a valid list item\n %s" % (pprint.pformat(datum), expected_schema.items)) + elif schema_type == 'map': + if (isinstance(datum, dict) and + False not in [isinstance(k, basestring) for k in datum.keys()] and + False not in + [validate(expected_schema.values, v) for v in datum.values()]): + return True + else: + raise ValidationException("'%s' is not a valid map value %s" % (pprint.pformat(datum), pprint.pformat(expected_schema.values))) + elif schema_type in ['union', 'error_union']: + if True in [validate(s, datum) for s in expected_schema.schemas]: + return True + else: + raise ValidationException("'%s' is not a valid union %s" % (pprint.pformat(datum), pprint.pformat(expected_schema.schemas))) + elif schema_type in ['record', 'error', 'request']: + if (isinstance(datum, dict) and + False not in + [validate(f.type, datum.get(f.name)) for f in expected_schema.fields]): + return True + else: + if not isinstance(datum, dict): + raise ValidationException("'%s'\n is not a dict" % pprint.pformat(datum)) + [validate_ex(f.type, datum.get(f.name)) for f in expected_schema.fields] + raise ValidationException("Unrecognized schema_type %s" % schema_type) + +def validate_tool(j): + validate_ex(names.get_name("CommandLineTool", ""), j) From 2a6125e82e6e74719fd9e6780cc3ec85e2eab6e1 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 9 Feb 2015 15:59:32 -0500 Subject: [PATCH 033/221] Renamed "tool.py" to "draft1tool.py" --- cwltool/{tool.py => draft1tool.py} | 0 cwltool/{validate.py => draft2tool.py} | 0 cwltool/job.py | 4 ++-- cwltool/main.py | 4 ++-- 4 files changed, 4 insertions(+), 4 deletions(-) rename cwltool/{tool.py => draft1tool.py} (100%) rename cwltool/{validate.py => draft2tool.py} (100%) diff --git a/cwltool/tool.py b/cwltool/draft1tool.py similarity index 100% rename from cwltool/tool.py rename to cwltool/draft1tool.py diff --git a/cwltool/validate.py b/cwltool/draft2tool.py similarity index 100% rename from cwltool/validate.py rename to cwltool/draft2tool.py diff --git a/cwltool/job.py b/cwltool/job.py index 2295acbcf..6670712d3 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -1,7 +1,7 @@ import subprocess import os import tempfile -import tool +import draft1tool import glob import json import yaml @@ -81,7 +81,7 @@ def collect_outputs(self, schema, outdir): else: r = None if "value" in adapter: - r = tool.resolve_eval(self.joborder, adapter["value"]) + r = draft1tool.resolve_eval(self.joborder, adapter["value"]) if not r and "properties" in schema: r = {} for k, v in schema["properties"].items(): diff --git a/cwltool/main.py b/cwltool/main.py index e50470516..e361200bf 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -1,6 +1,6 @@ #!/usr/bin/env python -import tool +import draft1tool import argparse from ref_resolver import from_url import jsonschema @@ -21,7 +21,7 @@ def main(): args = parser.parse_args() try: - t = tool.Tool(from_url(args.tool)) + t = draft1tool.Tool(from_url(args.tool)) except jsonschema.exceptions.ValidationError as e: print "Tool definition failed validation" print e From 08e47b2a5d754da4be49000c10d8c6713b24a038 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 9 Feb 2015 23:29:51 -0500 Subject: [PATCH 034/221] Working on generating cli from binding --- cwltool/draft1tool.py | 83 ++------------------ cwltool/draft2tool.py | 172 +++++++++++++++++++++++++++++++++++++++--- cwltool/flatten.py | 20 +++++ cwltool/main.py | 11 ++- cwltool/pathmapper.py | 55 ++++++++++++++ 5 files changed, 253 insertions(+), 88 deletions(-) create mode 100644 cwltool/flatten.py create mode 100644 cwltool/pathmapper.py diff --git a/cwltool/draft1tool.py b/cwltool/draft1tool.py index c3bf8c336..740219e63 100644 --- a/cwltool/draft1tool.py +++ b/cwltool/draft1tool.py @@ -8,7 +8,9 @@ import random import requests import urlparse +from pathmapper import PathMapper, DockerPathMapper from job import Job +from flatten import flatten from jsonschema.validators import Draft4Validator import ref_resolver @@ -16,16 +18,16 @@ module_dir = os.path.dirname(os.path.abspath(__file__)) -jsonschemapath = os.path.join(module_dir, 'schemas/json-schema-draft-04.json') +jsonschemapath = os.path.join(module_dir, 'schemas/draft-1/json-schema-draft-04.json') with open(jsonschemapath) as f: jsonschemapath_doc = json.load(f) ref_resolver.loader.fetched["http://json-schema.org/draft-04/schema"] = jsonschemapath_doc -toolpath = os.path.join(module_dir, 'schemas/tool.json') +toolpath = os.path.join(module_dir, 'schemas/draft-1/tool.json') with open(toolpath) as f: tool_schema_doc = json.load(f) -with open(os.path.join(module_dir, 'schemas/metaschema.json')) as f: +with open(os.path.join(module_dir, 'schemas/draft-1/metaschema.json')) as f: metaschema = json.load(f) SCHEMA_URL_PREFIX = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/draft-1/schemas/" @@ -45,27 +47,6 @@ def each(l): else: return [l] -# http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html -def flatten(l, ltypes=(list, tuple)): - if l is None: - return [] - if not isinstance(l, ltypes): - return [l] - - ltype = type(l) - l = list(l) - i = 0 - while i < len(l): - while isinstance(l[i], ltypes): - if not l[i]: - l.pop(i) - i -= 1 - break - else: - l[i:i + 1] = l[i] - i += 1 - return ltype(l) - def fix_file_type(t): if 'type' in t and t['type'] == "file": for a in metaschema["definitions"]["file"]: @@ -264,60 +245,6 @@ def adapt(self, adapter, job, path_mapper): return l -class PathMapper(object): - # Maps files to their absolute path - def __init__(self, referenced_files, basedir): - self._pathmap = {} - for src in referenced_files: - abs = src if os.path.isabs(src) else os.path.join(basedir, src) - self._pathmap[src] = abs - - def mapper(self, src): - return self._pathmap[src] - - -class DockerPathMapper(object): - def __init__(self, referenced_files, basedir): - self._pathmap = {} - self.dirs = {} - for src in referenced_files: - abs = src if os.path.isabs(src) else os.path.join(basedir, src) - dir, fn = os.path.split(abs) - - subdir = False - for d in self.dirs: - if dir.startswith(d): - subdir = True - break - - if not subdir: - for d in list(self.dirs): - if d.startswith(dir): - # 'dir' is a parent of 'd' - del self.dirs[d] - self.dirs[dir] = True - - prefix = "job" + str(random.randint(1, 1000000000)) + "_" - - names = set() - for d in self.dirs: - name = os.path.join("/tmp", prefix + os.path.basename(d)) - i = 1 - while name in names: - i += 1 - name = os.path.join("/tmp", prefix + os.path.basename(d) + str(i)) - names.add(name) - self.dirs[d] = name - - for src in referenced_files: - abs = src if os.path.isabs(src) else os.path.join(basedir, src) - for d in self.dirs: - if abs.startswith(d): - self._pathmap[src] = os.path.join(self.dirs[d], abs[len(d)+1:]) - - def mapper(self, src): - return self._pathmap[src] - class Tool(object): def __init__(self, toolpath_object): self.tool = toolpath_object diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 04e80a3cf..0605642be 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -1,14 +1,13 @@ import avro.schema import json import pprint +import copy +from flatten import flatten +import os + +TOOL_CONTEXT_URL = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/draft-2-pa/schemas/draft-2/context.json" module_dir = os.path.dirname(os.path.abspath(__file__)) -names = avro.schema.Names() -cwl_avsc = os.path.join(module_dir, 'schemas/draft-2/cwl.avsc') -with open(cwl_avsc) as f: - j = json.load(f) - for t in j: - avro.schema.make_avsc_object(t, names) class ValidationException(Exception): pass @@ -102,8 +101,163 @@ def validate_ex(expected_schema, datum): else: if not isinstance(datum, dict): raise ValidationException("'%s'\n is not a dict" % pprint.pformat(datum)) - [validate_ex(f.type, datum.get(f.name)) for f in expected_schema.fields] + try: + [validate_ex(f.type, datum.get(f.name)) for f in expected_schema.fields] + except ValidationException as v: + raise ValidationException("%s\nValidating record %s" % (v, pprint.pformat(datum))) raise ValidationException("Unrecognized schema_type %s" % schema_type) -def validate_tool(j): - validate_ex(names.get_name("CommandLineTool", ""), j) +class Builder(object): + def jseval(self, expression): + if expression.startswith('{'): + exp_tpl = '{return function()%s();}' + else: + exp_tpl = '{return %s;}' + exp = exp_tpl % (expression) + return sandboxjs.execjs(exp, "var $job = %s;%s" % (json.dumps(self.job), self.jslib)) + + def do_eval(self, s): + if isinstance(ex, dict): + if ex.get("@type") == "JavascriptExpression": + return jseval(ex["value"]) + elif ex.get("@id"): + with open(os.path.join(basedir, ex["@id"]), "r") as f: + return f.read() + else: + return ex + + def input_binding(self, schema, datum, key): + bindings = [] + # Handle union types + if isinstance(schema["type"], list): + for t in schema["type"]: + if validate(t, datum): + return input_binding(t, datum) + raise ValidationException("'%s' is not a valid union %s" % (pprint.pformat(datum), pprint.pformat(schema["type"]))) + + if schema["type"] == "record": + for f in schema["fields"]: + bindings.extend(self.input_binding(f, datum[f["name"]], f["name"])) + + if schema["type"] == "map": + for v in datum: + bindings.extend(self.input_binding(schema["values"], datum[v], v)) + + if schema["type"] == "array": + for n, item in enumerate(datum): + b = self.input_binding(schema["items"], item, format(n, '06')) + bindings.extend(b) + + if schema["type"] == "File": + self.files.append(datum) + + if schema.get("binding"): + b = copy.copy(schema["binding"]) + + if b.get("position"): + b["position"] = [b["position"], key] + else: + b["position"] = [0, key] + + # Position to front of the sort key + for bi in bindings: + bi["position"] = b["position"] + bi["position"] + + if "valueFrom" not in b: + b["valueFrom"] = datum + + bindings.append(b) + + return bindings + + def bind(self, binding): + value = self.do_eval(binding["valueFrom"]) + + ls = [] + + if isinstance(value, list): + if binding.get("itemSeparator"): + l = [binding["itemSeparator"].join(value)] + else: + pass + elif isinstance(value, dict): + pass + elif isinstance(value, bool): + if value and binding.get("prefix"): + sv = binding["prefix"] + + +class Tool(object): + def __init__(self, toolpath_object): + self.names = avro.schema.Names() + cwl_avsc = os.path.join(module_dir, 'schemas/draft-2/cwl.avsc') + with open(cwl_avsc) as f: + j = json.load(f) + for t in j: + avro.schema.make_avsc_object(t, self.names) + + self.tool = toolpath_object + if self.tool.get("@context") != TOOL_CONTEXT_URL: + raise Exception("Missing or invalid '@context' field in tool description document, must be %s" % TOOL_CONTEXT_URL) + + # Validate tool documument + validate_ex(self.names.get_name("CommandLineTool", ""), self.tool) + + # Import schema defs + if self.tool.get("schemaDefs"): + for i in self.tool["schemaDefs"]: + avro.schema.make_avsc_object(i, self.names) + + # Build record schema from inputs + self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []} + for i in self.tool["inputs"]: + c = copy.copy(i) + c["name"] = c["port"][1:] + del c["port"] + self.inputs_record_schema["fields"].append(c) + avro.schema.make_avsc_object(self.inputs_record_schema, self.names) + + self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []} + for i in self.tool["outputs"]: + c = copy.copy(i) + c["name"] = c["port"][1:] + del c["port"] + self.outputs_record_schema["fields"].append(c) + avro.schema.make_avsc_object(self.outputs_record_schema, self.names) + + def job(self, joborder, basedir, use_container=True): + # Validate job order + validate_ex(self.names.get_name("input_record_schema", ""), joborder) + + builder = Builder() + builder.job = joborder + builder.jslib = '' + builder.files = [] + builder.bindings = [{ + "position": [-1000000], + "valueFrom": self.tool["baseCommand"] + }] + + if self.tool.get("expressionDefs"): + for ex in self.tool['expressionDefs']: + builder.jslib += builder.do_eval(ex) + "\n" + + if self.tool.get("arguments"): + for i, a in enumerate(self.tool["arguments"]): + a = copy.copy(a) + if a.get("position"): + a["position"] = [a["position"], i] + else: + a["position"] = [0, i] + builder.bindings.append(a) + + builder.bindings.extend(builder.input_binding(self.inputs_record_schema, joborder, "")) + + builder.bindings.sort(key=lambda a: a["position"]) + + pprint.pprint(builder.bindings) + + # j = Job() + # j.joborder = joborder + # j.tool = self + # j.container = None diff --git a/cwltool/flatten.py b/cwltool/flatten.py new file mode 100644 index 000000000..54e918a78 --- /dev/null +++ b/cwltool/flatten.py @@ -0,0 +1,20 @@ +# http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html +def flatten(l, ltypes=(list, tuple)): + if l is None: + return [] + if not isinstance(l, ltypes): + return [l] + + ltype = type(l) + l = list(l) + i = 0 + while i < len(l): + while isinstance(l[i], ltypes): + if not l[i]: + l.pop(i) + i -= 1 + break + else: + l[i:i + 1] = l[i] + i += 1 + return ltype(l) diff --git a/cwltool/main.py b/cwltool/main.py index e361200bf..4a53c37b1 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -1,6 +1,7 @@ #!/usr/bin/env python import draft1tool +import draft2tool import argparse from ref_resolver import from_url import jsonschema @@ -21,11 +22,19 @@ def main(): args = parser.parse_args() try: - t = draft1tool.Tool(from_url(args.tool)) + u = from_url(args.tool) + if "schema" in u: + t = draft1tool.Tool(u) + else: + t = draft2tool.Tool(u) except jsonschema.exceptions.ValidationError as e: print "Tool definition failed validation" print e return 1 + except draft2tool.ValidationException as e: + print "Tool definition failed validation" + print e + return 1 basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(args.job_order)) diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py new file mode 100644 index 000000000..08815e5a9 --- /dev/null +++ b/cwltool/pathmapper.py @@ -0,0 +1,55 @@ +import os + +class PathMapper(object): + # Maps files to their absolute path + def __init__(self, referenced_files, basedir): + self._pathmap = {} + for src in referenced_files: + abs = src if os.path.isabs(src) else os.path.join(basedir, src) + self._pathmap[src] = abs + + def mapper(self, src): + return self._pathmap[src] + + +class DockerPathMapper(object): + def __init__(self, referenced_files, basedir): + self._pathmap = {} + self.dirs = {} + for src in referenced_files: + abs = src if os.path.isabs(src) else os.path.join(basedir, src) + dir, fn = os.path.split(abs) + + subdir = False + for d in self.dirs: + if dir.startswith(d): + subdir = True + break + + if not subdir: + for d in list(self.dirs): + if d.startswith(dir): + # 'dir' is a parent of 'd' + del self.dirs[d] + self.dirs[dir] = True + + prefix = "job" + str(random.randint(1, 1000000000)) + "_" + + names = set() + for d in self.dirs: + name = os.path.join("/tmp", prefix + os.path.basename(d)) + i = 1 + while name in names: + i += 1 + name = os.path.join("/tmp", prefix + os.path.basename(d) + str(i)) + names.add(name) + self.dirs[d] = name + + for src in referenced_files: + abs = src if os.path.isabs(src) else os.path.join(basedir, src) + for d in self.dirs: + if abs.startswith(d): + self._pathmap[src] = os.path.join(self.dirs[d], abs[len(d)+1:]) + + def mapper(self, src): + return self._pathmap[src] From 67e6d56c1b938899f2b402dd711d14d889a0e93a Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 10 Feb 2015 10:32:03 -0500 Subject: [PATCH 035/221] Still working on CLI generation --- cwltool/draft2tool.py | 106 ++++++++++++++++++++++++++++-------------- 1 file changed, 70 insertions(+), 36 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 0605642be..d92028e52 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -4,6 +4,8 @@ import copy from flatten import flatten import os +from pathmapper import PathMapper, DockerPathMapper +import sandboxjs TOOL_CONTEXT_URL = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/draft-2-pa/schemas/draft-2/context.json" @@ -108,50 +110,56 @@ def validate_ex(expected_schema, datum): raise ValidationException("Unrecognized schema_type %s" % schema_type) class Builder(object): - def jseval(self, expression): - if expression.startswith('{'): - exp_tpl = '{return function()%s();}' + def jseval(self, expression, context): + if isinstance(expression, list): + exp = "{return %s(%s);}" % (expression[0], ",".join([self.do_eval(e) for e in expression[1:]])) + elif expression.startswith('{'): + exp = '{return function()%s();}' % (expression) else: - exp_tpl = '{return %s;}' - exp = exp_tpl % (expression) - return sandboxjs.execjs(exp, "var $job = %s;%s" % (json.dumps(self.job), self.jslib)) + exp = '{return %s;}' % (expression) + return sandboxjs.execjs(exp, "var $job = %s; var $self = %s; %s" % (json.dumps(self.job), json.dumps(context), self.jslib)) - def do_eval(self, s): + def do_eval(self, ex, context=None): if isinstance(ex, dict): - if ex.get("@type") == "JavascriptExpression": - return jseval(ex["value"]) - elif ex.get("@id"): - with open(os.path.join(basedir, ex["@id"]), "r") as f: + if ex.get("expressionType") == "javascript": + return self.jseval(ex["value"], context) + elif ex.get("ref"): + with open(os.path.join(basedir, ex["ref"]), "r") as f: return f.read() else: return ex - def input_binding(self, schema, datum, key): + def bind_input(self, schema, datum, key): bindings = [] + # Handle union types if isinstance(schema["type"], list): for t in schema["type"]: if validate(t, datum): - return input_binding(t, datum) + return bind_input(t, datum) raise ValidationException("'%s' is not a valid union %s" % (pprint.pformat(datum), pprint.pformat(schema["type"]))) + if isinstance(schema["type"], dict): + bindings.extend(self.bind_input(schema["type"], datum, key)) + if schema["type"] == "record": for f in schema["fields"]: - bindings.extend(self.input_binding(f, datum[f["name"]], f["name"])) + bindings.extend(self.bind_input(f, datum[f["name"]], f["name"])) if schema["type"] == "map": for v in datum: - bindings.extend(self.input_binding(schema["values"], datum[v], v)) + bindings.extend(self.bind_input(schema["values"], datum[v], v)) if schema["type"] == "array": for n, item in enumerate(datum): - b = self.input_binding(schema["items"], item, format(n, '06')) + #print n, item, schema["items"] + b = self.bind_input({"type": schema["items"], "binding": schema.get("binding")}, item, format(n, '06')) bindings.extend(b) if schema["type"] == "File": - self.files.append(datum) + self.files.append(datum["path"]) - if schema.get("binding"): + if "binding" in schema and isinstance(schema["binding"], dict): b = copy.copy(schema["binding"]) if b.get("position"): @@ -163,29 +171,50 @@ def input_binding(self, schema, datum, key): for bi in bindings: bi["position"] = b["position"] + bi["position"] - if "valueFrom" not in b: + if "valueFrom" in b: + b["valueFrom"] = self.do_eval(b["valueFrom"], datum) + else: b["valueFrom"] = datum + if schema["type"] == "File": + b["is_file"] = True + bindings.append(b) return bindings - def bind(self, binding): - value = self.do_eval(binding["valueFrom"]) - - ls = [] + def generate_arg(self, binding): + value = binding["valueFrom"] + prefix = binding.get("prefix") + sep = binding.get("separator") + l = [] if isinstance(value, list): if binding.get("itemSeparator"): - l = [binding["itemSeparator"].join(value)] - else: - pass + l = [binding["itemSeparator"].join([str(v) for v in value])] + elif prefix: + return [prefix] + elif binding.get("is_file"): + l = [self.pathmapper.mapper(value["path"])] elif isinstance(value, dict): - pass + if prefix: + return [prefix] elif isinstance(value, bool): - if value and binding.get("prefix"): - sv = binding["prefix"] + if value and prefix: + return [prefix] + else: + return [] + else: + l = [value] + + args = [] + for j in l: + if sep is None or sep == " ": + args.extend([prefix, str(j)]) + else: + args.extend([prefix + sep + str(j)]) + return [a for a in args if a is not None] class Tool(object): def __init__(self, toolpath_object): @@ -249,15 +278,20 @@ def job(self, joborder, basedir, use_container=True): a["position"] = [a["position"], i] else: a["position"] = [0, i] + a["valueFrom"] = builder.do_eval(a["valueFrom"]) builder.bindings.append(a) - builder.bindings.extend(builder.input_binding(self.inputs_record_schema, joborder, "")) - + builder.bindings.extend(builder.bind_input(self.inputs_record_schema, joborder, "")) builder.bindings.sort(key=lambda a: a["position"]) - pprint.pprint(builder.bindings) + builder.pathmapper = PathMapper(builder.files, basedir) + + #pprint.pprint(builder.bindings) + #pprint.pprint(builder.files) + - # j = Job() - # j.joborder = joborder - # j.tool = self - # j.container = None + j = Job() + j.joborder = joborder + j.tool = self + j.container = None + j.command_line = flatten(map(builder.generate_arg, builder.bindings)) From 79c918ffd125ee6c92ddf9c02eadebe13a079ad1 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 10 Feb 2015 11:10:04 -0500 Subject: [PATCH 036/221] Stuff runs now, need to port examples forward. --- cwltool/draft1tool.py | 2 -- cwltool/draft2tool.py | 46 +++++++++++++++++++++++++++++++++++-------- cwltool/job.py | 7 +++++-- cwltool/pathmapper.py | 1 + 4 files changed, 44 insertions(+), 12 deletions(-) diff --git a/cwltool/draft1tool.py b/cwltool/draft1tool.py index 740219e63..d7cea8714 100644 --- a/cwltool/draft1tool.py +++ b/cwltool/draft1tool.py @@ -291,8 +291,6 @@ def job(self, joborder, basedir, use_container=True): j = Job() j.joborder = joborder - j.tool = self - j.container = None if 'stdin' in adapter: diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index d92028e52..30ff54128 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -6,6 +6,7 @@ import os from pathmapper import PathMapper, DockerPathMapper import sandboxjs +from job import Job TOOL_CONTEXT_URL = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/draft-2-pa/schemas/draft-2/context.json" @@ -262,10 +263,12 @@ def job(self, joborder, basedir, use_container=True): builder.job = joborder builder.jslib = '' builder.files = [] - builder.bindings = [{ - "position": [-1000000], - "valueFrom": self.tool["baseCommand"] - }] + builder.bindings = [] + for n, b in enumerate(self.tool["baseCommand"]): + builder.bindings.append({ + "position": [-1000000, n], + "valueFrom": b + }) if self.tool.get("expressionDefs"): for ex in self.tool['expressionDefs']: @@ -284,14 +287,41 @@ def job(self, joborder, basedir, use_container=True): builder.bindings.extend(builder.bind_input(self.inputs_record_schema, joborder, "")) builder.bindings.sort(key=lambda a: a["position"]) - builder.pathmapper = PathMapper(builder.files, basedir) - #pprint.pprint(builder.bindings) #pprint.pprint(builder.files) - j = Job() j.joborder = joborder - j.tool = self j.container = None + builder.pathmapper = None + + if self.tool.get("stdin"): + j.stdin = builder.do_eval(self.tool["stdin"]) + referenced_files.append(j.stdin) + else: + j.stdin = None + + if self.tool.get("stdout"): + j.stdout = builder.do_eval(self.tool["stdout"]) + if os.path.isabs(j.stdout): + raise Exception("stdout must be a relative path") + else: + j.stdout = None + + j.generatefiles = {} + for t in self.tool.get("fileDefs", []): + j.generatefiles[t["filename"]] = builder.do_eval(t["value"]) + + for r in self.tool.get("hints", []): + if r["requirementType"] == "DockerImage": + j.container = {} + j.container["pull"] = r.get("dockerPull") + j.container["import"] = r.get("dockerImport") + j.container["imageId"] = r.get("dockerImageId") + builder.pathmapper = DockerPathMapper(builder.files, basedir) + + if builder.pathmapper is None: + builder.pathmapper = PathMapper(builder.files, basedir) j.command_line = flatten(map(builder.generate_arg, builder.bindings)) + + return j diff --git a/cwltool/job.py b/cwltool/job.py index 6670712d3..895a9634c 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -19,8 +19,11 @@ def run(self, dry_run=False, pull_image=True): runtime = [] if self.container and self.container.get("type") == "docker": - if "uri" in self.container and pull_image: - subprocess.call(["docker", "pull", self.container["uri"]]) + if pull_image: + if "pull" in self.container: + subprocess.call(["docker", "pull", self.container["pull"]]) + elif "import" in self.container: + subprocess.call(["docker", "import", self.container["import"]]) runtime = ["docker", "run", "-i"] for d in self.pathmapper.dirs: runtime.append("--volume=%s:%s:ro" % (d, self.pathmapper.dirs[d])) diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index 08815e5a9..fd64f7683 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -1,4 +1,5 @@ import os +import random class PathMapper(object): # Maps files to their absolute path From b5c2eb693e2d5c27a1d5d5a152606e9c768003f8 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 10 Feb 2015 17:03:28 -0500 Subject: [PATCH 037/221] Conformance tests updated and all pass! --- cwltool/draft2tool.py | 243 ++++++++++++++++++++++++------------------ 1 file changed, 138 insertions(+), 105 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 30ff54128..c9cb57978 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -27,93 +27,99 @@ def validate(expected_schema, datum): LONG_MAX_VALUE = (1 << 63) - 1 def validate_ex(expected_schema, datum): - """Determine if a python datum is an instance of a schema.""" - schema_type = expected_schema.type - if schema_type == 'null': - if datum is None: - return True - else: - raise ValidationException("'%s' is not None" % datum) - elif schema_type == 'boolean': - if isinstance(datum, bool): - return True - else: - raise ValidationException("'%s' is not bool" % datum) - elif schema_type == 'string': - if isinstance(datum, basestring): - return True - else: - raise ValidationException("'%s' is not string" % datum) - elif schema_type == 'bytes': - if isinstance(datum, str): - return True - else: - raise ValidationException("'%s' is not bytes" % datum) - elif schema_type == 'int': - if ((isinstance(datum, int) or isinstance(datum, long)) + """Determine if a python datum is an instance of a schema.""" + schema_type = expected_schema.type + if schema_type == 'null': + if datum is None: + return True + else: + raise ValidationException("`%s` is not null" % datum) + elif schema_type == 'boolean': + if isinstance(datum, bool): + return True + else: + raise ValidationException("`%s` is not boolean" % datum) + elif schema_type == 'string': + if isinstance(datum, basestring): + return True + else: + raise ValidationException("`%s` is not string" % datum) + elif schema_type == 'bytes': + if isinstance(datum, str): + return True + else: + raise ValidationException("`%s` is not bytes" % datum) + elif schema_type == 'int': + if ((isinstance(datum, int) or isinstance(datum, long)) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE): - return True - else: - raise ValidationException("'%s' is not int" % datum) - elif schema_type == 'long': - if ((isinstance(datum, int) or isinstance(datum, long)) + return True + else: + raise ValidationException("`%s` is not int" % datum) + elif schema_type == 'long': + if ((isinstance(datum, int) or isinstance(datum, long)) and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE): - return True - else: - raise ValidationException("'%s' is not long" % datum) - elif schema_type in ['float', 'double']: - if (isinstance(datum, int) or isinstance(datum, long) + return True + else: + raise ValidationException("`%s` is not long" % datum) + elif schema_type in ['float', 'double']: + if (isinstance(datum, int) or isinstance(datum, long) or isinstance(datum, float)): - return True - else: - raise ValidationException("'%s' is not float or double" % datum) - elif schema_type == 'fixed': - if isinstance(datum, str) and len(datum) == expected_schema.size: - return True - else: - raise ValidationException("'%s' is not fixed" % datum) - elif schema_type == 'enum': - if datum in expected_schema.symbols: - return True - else: - raise ValidationException("'%s'\n is not a valid enum symbol\n %s" % (pprint.pformat(datum), pprint.pformat(expected_schema.symbols))) - elif schema_type == 'array': - if (isinstance(datum, list) and - False not in [validate(expected_schema.items, d) for d in datum]): - return True - else: - raise ValidationException("'%s'\n is not a valid list item\n %s" % (pprint.pformat(datum), expected_schema.items)) - elif schema_type == 'map': - if (isinstance(datum, dict) and - False not in [isinstance(k, basestring) for k in datum.keys()] and - False not in - [validate(expected_schema.values, v) for v in datum.values()]): - return True - else: - raise ValidationException("'%s' is not a valid map value %s" % (pprint.pformat(datum), pprint.pformat(expected_schema.values))) - elif schema_type in ['union', 'error_union']: - if True in [validate(s, datum) for s in expected_schema.schemas]: - return True - else: - raise ValidationException("'%s' is not a valid union %s" % (pprint.pformat(datum), pprint.pformat(expected_schema.schemas))) - elif schema_type in ['record', 'error', 'request']: - if (isinstance(datum, dict) and - False not in - [validate(f.type, datum.get(f.name)) for f in expected_schema.fields]): - return True - else: - if not isinstance(datum, dict): - raise ValidationException("'%s'\n is not a dict" % pprint.pformat(datum)) - try: - [validate_ex(f.type, datum.get(f.name)) for f in expected_schema.fields] - except ValidationException as v: - raise ValidationException("%s\nValidating record %s" % (v, pprint.pformat(datum))) - raise ValidationException("Unrecognized schema_type %s" % schema_type) + return True + else: + raise ValidationException("`%s` is not float or double" % datum) + elif schema_type == 'fixed': + if isinstance(datum, str) and len(datum) == expected_schema.size: + return True + else: + raise ValidationException("`%s` is not fixed" % datum) + elif schema_type == 'enum': + if datum in expected_schema.symbols: + return True + else: + raise ValidationException("`%s`\n is not a valid enum symbol, expected\n %s" % (pprint.pformat(datum), pprint.pformat(expected_schema.symbols))) + elif schema_type == 'array': + if isinstance(datum, list): + for i, d in enumerate(datum): + try: + validate_ex(expected_schema.items, d) + except ValidationException as v: + raise ValidationException("%s\n while validating item at position %i `%s`" % (v, i, d)) + return True + else: + raise ValidationException("`%s`\n is not a list, expected list of\n %s" % (pprint.pformat(datum), expected_schema.items)) + elif schema_type == 'map': + if (isinstance(datum, dict) and + False not in [isinstance(k, basestring) for k in datum.keys()] and + False not in [validate(expected_schema.values, v) for v in datum.values()]): + return True + else: + raise ValidationException("`%s` is not a valid map value, expected\n %s" % (pprint.pformat(datum), pprint.pformat(expected_schema.values))) + elif schema_type in ['union', 'error_union']: + if True in [validate(s, datum) for s in expected_schema.schemas]: + return True + else: + errors = [] + for s in expected_schema.schemas: + try: + validate_ex(s, datum) + except ValidationException as e: + errors.append(str(e)) + raise ValidationException("`%s`\n is not valid, expected one of:\n\n%s\n\n the individual errors are:\n%s" % (pprint.pformat(datum), ",\n\n ".join([str(s) for s in expected_schema.schemas]), ";\n\n".join(errors))) + elif schema_type in ['record', 'error', 'request']: + if not isinstance(datum, dict): + raise ValidationException("`%s`\n is not a dict" % pprint.pformat(datum)) + try: + for f in expected_schema.fields: + validate_ex(f.type, datum.get(f.name)) + return True + except ValidationException as v: + raise ValidationException("%s\n while validating field `%s`" % (v, f.name)) + raise ValidationException("Unrecognized schema_type %s" % schema_type) class Builder(object): def jseval(self, expression, context): if isinstance(expression, list): - exp = "{return %s(%s);}" % (expression[0], ",".join([self.do_eval(e) for e in expression[1:]])) + exp = "{return %s(%s);}" % (expression[0], ",".join([json.dumps(self.do_eval(e)) for e in expression[1:]])) elif expression.startswith('{'): exp = '{return function()%s();}' % (expression) else: @@ -125,7 +131,7 @@ def do_eval(self, ex, context=None): if ex.get("expressionType") == "javascript": return self.jseval(ex["value"], context) elif ex.get("ref"): - with open(os.path.join(basedir, ex["ref"]), "r") as f: + with open(os.path.join(self.basedir, ex["ref"]), "r") as f: return f.read() else: return ex @@ -135,31 +141,45 @@ def bind_input(self, schema, datum, key): # Handle union types if isinstance(schema["type"], list): + success = False for t in schema["type"]: - if validate(t, datum): - return bind_input(t, datum) - raise ValidationException("'%s' is not a valid union %s" % (pprint.pformat(datum), pprint.pformat(schema["type"]))) - - if isinstance(schema["type"], dict): + if t in self.schemaDefs: + t = self.schemaDefs[t] + avsc = avro.schema.make_avsc_object(t, None) + if validate(avsc, datum): + if isinstance(t, basestring): + t = {"type": t} + bindings.extend(self.bind_input(t, datum, key)) + success = True + break + if not success: + raise ValidationException("'%s' is not a valid union %s" % (datum, schema["type"])) + elif isinstance(schema["type"], dict): bindings.extend(self.bind_input(schema["type"], datum, key)) + else: + if schema["type"] in self.schemaDefs: + schema = self.schemaDefs[schema["type"]] - if schema["type"] == "record": - for f in schema["fields"]: - bindings.extend(self.bind_input(f, datum[f["name"]], f["name"])) + if schema["type"] == "record": + for f in schema["fields"]: + if f["name"] in datum: + bindings.extend(self.bind_input(f, datum[f["name"]], f["name"])) - if schema["type"] == "map": - for v in datum: - bindings.extend(self.bind_input(schema["values"], datum[v], v)) + if schema["type"] == "map": + for v in datum: + bindings.extend(self.bind_input(schema["values"], datum[v], v)) - if schema["type"] == "array": - for n, item in enumerate(datum): - #print n, item, schema["items"] - b = self.bind_input({"type": schema["items"], "binding": schema.get("binding")}, item, format(n, '06')) - bindings.extend(b) + if schema["type"] == "array": + for n, item in enumerate(datum): + b = self.bind_input({"type": schema["items"], "binding": schema.get("binding")}, item, "") + for bi in b: + bi["position"].insert(0, n) + bindings.extend(b) - if schema["type"] == "File": - self.files.append(datum["path"]) + if schema["type"] == "File": + self.files.append(datum["path"]) + b = None if "binding" in schema and isinstance(schema["binding"], dict): b = copy.copy(schema["binding"]) @@ -179,7 +199,6 @@ def bind_input(self, schema, datum, key): if schema["type"] == "File": b["is_file"] = True - bindings.append(b) return bindings @@ -234,9 +253,11 @@ def __init__(self, toolpath_object): validate_ex(self.names.get_name("CommandLineTool", ""), self.tool) # Import schema defs + self.schemaDefs = {} if self.tool.get("schemaDefs"): for i in self.tool["schemaDefs"]: avro.schema.make_avsc_object(i, self.names) + self.schemaDefs[i["name"]] = i # Build record schema from inputs self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []} @@ -262,12 +283,21 @@ def job(self, joborder, basedir, use_container=True): builder = Builder() builder.job = joborder builder.jslib = '' + builder.basedir = basedir builder.files = [] builder.bindings = [] - for n, b in enumerate(self.tool["baseCommand"]): + builder.schemaDefs = self.schemaDefs + + if isinstance(self.tool["baseCommand"], list): + for n, b in enumerate(self.tool["baseCommand"]): + builder.bindings.append({ + "position": [-1000000, n], + "valueFrom": b + }) + else: builder.bindings.append({ - "position": [-1000000, n], - "valueFrom": b + "position": [-1000000], + "valueFrom": self.tool["baseCommand"] }) if self.tool.get("expressionDefs"): @@ -297,7 +327,7 @@ def job(self, joborder, basedir, use_container=True): if self.tool.get("stdin"): j.stdin = builder.do_eval(self.tool["stdin"]) - referenced_files.append(j.stdin) + builder.files.append(j.stdin) else: j.stdin = None @@ -313,7 +343,7 @@ def job(self, joborder, basedir, use_container=True): j.generatefiles[t["filename"]] = builder.do_eval(t["value"]) for r in self.tool.get("hints", []): - if r["requirementType"] == "DockerImage": + if r["requirementType"] == "DockerImage" and use_container: j.container = {} j.container["pull"] = r.get("dockerPull") j.container["import"] = r.get("dockerImport") @@ -323,5 +353,8 @@ def job(self, joborder, basedir, use_container=True): if builder.pathmapper is None: builder.pathmapper = PathMapper(builder.files, basedir) j.command_line = flatten(map(builder.generate_arg, builder.bindings)) + if j.stdin: + j.stdin = j.stdin if os.path.isabs(j.stdin) else os.path.join(basedir, j.stdin) + return j From 99f4e04c52dbf63557a015d2ad76f31686a3d6f9 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 16 Feb 2015 09:49:48 -0500 Subject: [PATCH 038/221] Tweak implementation of ordering for arrays. --- cwltool/draft2tool.py | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index c9cb57978..aa863feb8 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -136,7 +136,7 @@ def do_eval(self, ex, context=None): else: return ex - def bind_input(self, schema, datum, key): + def bind_input(self, schema, datum): bindings = [] # Handle union types @@ -149,13 +149,13 @@ def bind_input(self, schema, datum, key): if validate(avsc, datum): if isinstance(t, basestring): t = {"type": t} - bindings.extend(self.bind_input(t, datum, key)) + bindings.extend(self.bind_input(t, datum)) success = True break if not success: raise ValidationException("'%s' is not a valid union %s" % (datum, schema["type"])) elif isinstance(schema["type"], dict): - bindings.extend(self.bind_input(schema["type"], datum, key)) + bindings.extend(self.bind_input(schema["type"], datum)) else: if schema["type"] in self.schemaDefs: schema = self.schemaDefs[schema["type"]] @@ -163,15 +163,21 @@ def bind_input(self, schema, datum, key): if schema["type"] == "record": for f in schema["fields"]: if f["name"] in datum: - bindings.extend(self.bind_input(f, datum[f["name"]], f["name"])) + b = self.bind_input(f, datum[f["name"]]) + for bi in b: + bi["position"].append(f["name"]) + bindings.extend(b) if schema["type"] == "map": for v in datum: - bindings.extend(self.bind_input(schema["values"], datum[v], v)) + b = self.bind_input(schema["values"], datum[v])) + for bi in b: + bi["position"].insert(0, v) + bindings.extend(b) if schema["type"] == "array": for n, item in enumerate(datum): - b = self.bind_input({"type": schema["items"], "binding": schema.get("binding")}, item, "") + b = self.bind_input({"type": schema["items"], "binding": schema.get("binding")}, item) for bi in b: bi["position"].insert(0, n) bindings.extend(b) @@ -184,9 +190,9 @@ def bind_input(self, schema, datum, key): b = copy.copy(schema["binding"]) if b.get("position"): - b["position"] = [b["position"], key] + b["position"] = [b["position"]] else: - b["position"] = [0, key] + b["position"] = [0] # Position to front of the sort key for bi in bindings: @@ -314,7 +320,7 @@ def job(self, joborder, basedir, use_container=True): a["valueFrom"] = builder.do_eval(a["valueFrom"]) builder.bindings.append(a) - builder.bindings.extend(builder.bind_input(self.inputs_record_schema, joborder, "")) + builder.bindings.extend(builder.bind_input(self.inputs_record_schema, joborder)) builder.bindings.sort(key=lambda a: a["position"]) #pprint.pprint(builder.bindings) From df7f590e7f8b8797287481e158109eba274d7146 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Sun, 22 Feb 2015 22:57:53 -0500 Subject: [PATCH 039/221] Reorganize examples/tests into draft-1 and draft-2 directories. Update tests. Running draft 2 jobs works, but collect_output has not yet been ported. --- cwltool/draft1tool.py | 31 ++++++++++++++++++++++++++ cwltool/draft2tool.py | 43 ++++++++++++++++++++++++++++++++---- cwltool/job.py | 49 +++++++++++------------------------------- cwltool/pathmapper.py | 4 ++-- setup.py | 3 ++- tests/test_examples.py | 24 +++++---------------- 6 files changed, 92 insertions(+), 62 deletions(-) diff --git a/cwltool/draft1tool.py b/cwltool/draft1tool.py index d7cea8714..05cacce14 100644 --- a/cwltool/draft1tool.py +++ b/cwltool/draft1tool.py @@ -8,6 +8,7 @@ import random import requests import urlparse +import functools from pathmapper import PathMapper, DockerPathMapper from job import Job from flatten import flatten @@ -334,5 +335,35 @@ def job(self, joborder, basedir, use_container=True): j.command_line = flatten(map(lambda a: builder.adapt(a, joborder, d.mapper), adapters)) j.pathmapper = d + j.collect_outputs = functools.partial(self.collect_outputs, self.tool["outputs"], joborder) return j + + def collect_outputs(self, schema, joborder, outdir): + result_path = os.path.join(outdir, "result.cwl.json") + if os.path.isfile(result_path): + print "Result file found." + with open(result_path) as fp: + return yaml.load(fp) + + r = None + if isinstance(schema, dict): + if "adapter" in schema: + adapter = schema["adapter"] + if "glob" in adapter: + r = [{"path": g} for g in glob.glob(os.path.join(outdir, adapter["glob"]))] + if not ("type" in schema and schema["type"] == "array"): + if r: + r = r[0] + else: + r = None + if "value" in adapter: + r = draft1tool.resolve_eval(joborder, adapter["value"]) + if not r and "properties" in schema: + r = {} + for k, v in schema["properties"].items(): + out = self.collect_outputs(v, joborder, outdir) + if out: + r[k] = out + + return r diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index aa863feb8..894269c99 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -3,6 +3,7 @@ import pprint import copy from flatten import flatten +import functools import os from pathmapper import PathMapper, DockerPathMapper import sandboxjs @@ -170,7 +171,7 @@ def bind_input(self, schema, datum): if schema["type"] == "map": for v in datum: - b = self.bind_input(schema["values"], datum[v])) + b = self.bind_input(schema["values"], datum[v]) for bi in b: bi["position"].insert(0, v) bindings.extend(b) @@ -351,16 +352,50 @@ def job(self, joborder, basedir, use_container=True): for r in self.tool.get("hints", []): if r["requirementType"] == "DockerImage" and use_container: j.container = {} - j.container["pull"] = r.get("dockerPull") - j.container["import"] = r.get("dockerImport") - j.container["imageId"] = r.get("dockerImageId") + j.container["type"] = "docker" + if "dockerPull" in r: + j.container["pull"] = r["dockerPull"] + if "dockerImport" in r: + j.container["import"] = r["dockerImport"] + if "dockerImageId" in r: + j.container["imageId"] = r["dockerImageId"] + else: + j.container["imageId"] = r["dockerPull"] builder.pathmapper = DockerPathMapper(builder.files, basedir) if builder.pathmapper is None: builder.pathmapper = PathMapper(builder.files, basedir) j.command_line = flatten(map(builder.generate_arg, builder.bindings)) + if j.stdin: j.stdin = j.stdin if os.path.isabs(j.stdin) else os.path.join(basedir, j.stdin) + j.pathmapper = builder.pathmapper + j.collect_outputs = functools.partial(self.collect_outputs, self.tool["outputs"], joborder) return j + + + def collect_outputs(self, schema, joborder, outdir): + r = None + if isinstance(schema, dict): + if "binding" in schema: + binding = schema["binding"] + if "glob" in binding: + r = [{"path": g} for g in glob.glob(os.path.join(outdir, binding["glob"]))] + # if not ("type" in schema and schema["type"] == "array"): + # if r: + # r = r[0] + # else: + # r = None + #if "value" in binding: + # r = draft1tool.resolve_eval(joborder, binding["value"]) + # if not r and "properties" in schema: + # r = {} + # for k, v in schema["properties"].items(): + # out = self.collect_outputs(v, joborder, outdir) + # if out: + # r[k] = out + + + return r diff --git a/cwltool/job.py b/cwltool/job.py index 895a9634c..8c9f79183 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -1,7 +1,6 @@ import subprocess import os import tempfile -import draft1tool import glob import json import yaml @@ -24,15 +23,14 @@ def run(self, dry_run=False, pull_image=True): subprocess.call(["docker", "pull", self.container["pull"]]) elif "import" in self.container: subprocess.call(["docker", "import", self.container["import"]]) + runtime = ["docker", "run", "-i"] for d in self.pathmapper.dirs: - runtime.append("--volume=%s:%s:ro" % (d, self.pathmapper.dirs[d])) + runtime.append("--volume=%s:%s:ro" % (os.path.abspath(d), self.pathmapper.dirs[d])) runtime.append("--volume=%s:%s:ro" % (outdir, "/tmp/job_output")) runtime.append("--workdir=%s" % ("/tmp/job_output")) runtime.append("--user=%s" % (os.geteuid())) runtime.append(self.container["imageId"]) - else: - os.chdir(outdir) stdin = None stdout = None @@ -43,8 +41,10 @@ def run(self, dry_run=False, pull_image=True): if self.stdin: stdin = open(self.stdin, "rb") + os.chdir(outdir) + if self.stdout: - stdout = open(os.path.join(outdir, self.stdout), "wb") + stdout = open(self.stdout, "wb") for t in self.generatefiles: with open(os.path.join(outdir, t), "w") as f: @@ -60,36 +60,13 @@ def run(self, dry_run=False, pull_image=True): stdout.close() print "Output directory is %s" % outdir - if 'outputs' in self.tool.tool: - return self.collect_outputs(self.tool.tool["outputs"], outdir) - return None + result_path = os.path.join(outdir, "result.cwl.json") + if os.path.isfile(result_path): + print "Result file found." + with open(result_path) as fp: + return yaml.load(fp) + else: + return self.collect_outputs(outdir) - def collect_outputs(self, schema, outdir): - result_path = os.path.join(outdir, "result.cwl.json") - if os.path.isfile(result_path): - print "Result file found." - with open(result_path) as fp: - return yaml.load(fp) - - r = None - if isinstance(schema, dict): - if "adapter" in schema: - adapter = schema["adapter"] - if "glob" in adapter: - r = [{"path": g} for g in glob.glob(os.path.join(outdir, adapter["glob"]))] - if not ("type" in schema and schema["type"] == "array"): - if r: - r = r[0] - else: - r = None - if "value" in adapter: - r = draft1tool.resolve_eval(self.joborder, adapter["value"]) - if not r and "properties" in schema: - r = {} - for k, v in schema["properties"].items(): - out = self.collect_outputs(v, outdir) - if out: - r[k] = out - - return r + return None diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index fd64f7683..fcb783924 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -6,8 +6,8 @@ class PathMapper(object): def __init__(self, referenced_files, basedir): self._pathmap = {} for src in referenced_files: - abs = src if os.path.isabs(src) else os.path.join(basedir, src) - self._pathmap[src] = abs + ab = src if os.path.isabs(src) else os.path.join(basedir, src) + self._pathmap[src] = ab def mapper(self, src): return self._pathmap[src] diff --git a/setup.py b/setup.py index 32fc6275f..cf80929bd 100644 --- a/setup.py +++ b/setup.py @@ -37,7 +37,8 @@ install_requires=[ 'jsonschema >= 2.4.0', 'requests', - 'PyYAML' + 'PyYAML', + 'avro' ], test_suite='tests', tests_require=[], diff --git a/tests/test_examples.py b/tests/test_examples.py index 891b9f5fb..5cc2b7cd1 100644 --- a/tests/test_examples.py +++ b/tests/test_examples.py @@ -1,27 +1,13 @@ import unittest -from cwltool import tool -from cwltool.ref_resolver import from_url, resolve_pointer +import cwltool.draft2tool as tool +from cwltool.ref_resolver import from_url class TestExamples(unittest.TestCase): - def test_job_order(self): - t = tool.Tool(from_url("../examples/bwa-mem-tool.json")) - job = t.job(from_url("../examples/bwa-mem-job.json")) - self.assertEqual(job.command_line, ['bwa', - 'mem', - '-t4', - '-m', - '3', - '-I1,2,3,4', - './rabix/tests/test-files/chr20.fa', - './rabix/tests/test-files/example_human_Illumina.pe_1.fastq', - './rabix/tests/test-files/example_human_Illumina.pe_2.fastq']) - - def test_no_adapters(self): - t = tool.Tool(from_url("../examples/add_ints-tool.json")) - job = t.job(from_url("../examples/add_ints-job.json"), basedir='.') + def test_cat1(self): + t = tool.Tool(from_url("../examples/draft-2/cat4-tool.json")) + job = t.job(from_url("../examples/draft-2/cat-job.json"), basedir="../examples/draft-2") result = job.run() print result - self.assertEqual(result['c'], 3) if __name__ == '__main__': From 770635f6af9acb99c4f6e6028d55c64cb48c540f Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 4 Mar 2015 14:57:19 -0500 Subject: [PATCH 040/221] Add draft-1 examples and tests back in. --- cwltool/draft1tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cwltool/draft1tool.py b/cwltool/draft1tool.py index 05cacce14..630b4a245 100644 --- a/cwltool/draft1tool.py +++ b/cwltool/draft1tool.py @@ -335,7 +335,7 @@ def job(self, joborder, basedir, use_container=True): j.command_line = flatten(map(lambda a: builder.adapt(a, joborder, d.mapper), adapters)) j.pathmapper = d - j.collect_outputs = functools.partial(self.collect_outputs, self.tool["outputs"], joborder) + j.collect_outputs = functools.partial(self.collect_outputs, self.tool.get("outputs", {}), joborder) return j From 1eb797938ec3ae8f836b8ccfca6d59e62c18a8cd Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 4 Mar 2015 17:08:45 -0500 Subject: [PATCH 041/221] Working on adding testing running actual containers not just command line generation. --- cwltool/__main__.py | 2 +- cwltool/draft2tool.py | 53 ++++---- cwltool/job.py | 31 ++--- cwltool/main.py | 22 ++-- cwltool/tool_new.py | 220 -------------------------------- cwltool/workflow.py | 284 ------------------------------------------ 6 files changed, 54 insertions(+), 558 deletions(-) delete mode 100755 cwltool/tool_new.py delete mode 100644 cwltool/workflow.py diff --git a/cwltool/__main__.py b/cwltool/__main__.py index a952ff500..857f2ac79 100644 --- a/cwltool/__main__.py +++ b/cwltool/__main__.py @@ -1,3 +1,3 @@ import main -main.main() +sys.exit(main.main()) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 894269c99..b828fcf80 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -8,6 +8,9 @@ from pathmapper import PathMapper, DockerPathMapper import sandboxjs from job import Job +import yaml +import glob +import logging TOOL_CONTEXT_URL = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/draft-2-pa/schemas/draft-2/context.json" @@ -324,8 +327,8 @@ def job(self, joborder, basedir, use_container=True): builder.bindings.extend(builder.bind_input(self.inputs_record_schema, joborder)) builder.bindings.sort(key=lambda a: a["position"]) - #pprint.pprint(builder.bindings) - #pprint.pprint(builder.files) + logging.debug(pprint.pformat(builder.bindings)) + logging.debug(pprint.pformat(builder.files)) j = Job() j.joborder = joborder @@ -371,31 +374,35 @@ def job(self, joborder, basedir, use_container=True): j.stdin = j.stdin if os.path.isabs(j.stdin) else os.path.join(basedir, j.stdin) j.pathmapper = builder.pathmapper - j.collect_outputs = functools.partial(self.collect_outputs, self.tool["outputs"], joborder) + j.collect_outputs = functools.partial(self.collect_output_ports, self.tool["outputs"], builder) return j + def collect_output_ports(self, ports, builder, outdir): + custom_output = os.path.join(outdir, "output.cwl.json") + if os.path.exists(custom_output): + outputdoc = yaml.load(custom_output) + validate_ex(self.names.get_name("output_record_schema", ""), outputdoc) + return outputdoc + return {port["port"][1:]: self.collect_output(port, builder, outdir) for port in ports} - def collect_outputs(self, schema, joborder, outdir): + def collect_output(self, schema, builder, outdir): r = None - if isinstance(schema, dict): - if "binding" in schema: - binding = schema["binding"] - if "glob" in binding: - r = [{"path": g} for g in glob.glob(os.path.join(outdir, binding["glob"]))] - # if not ("type" in schema and schema["type"] == "array"): - # if r: - # r = r[0] - # else: - # r = None - #if "value" in binding: - # r = draft1tool.resolve_eval(joborder, binding["value"]) - # if not r and "properties" in schema: - # r = {} - # for k, v in schema["properties"].items(): - # out = self.collect_outputs(v, joborder, outdir) - # if out: - # r[k] = out - + if "binding" in schema: + binding = schema["binding"] + if ("glob" in binding and + (schema["type"] == "File" or + (schema["type"] == "array" and + schema["items"] == "File"))): + r = [{"path": g} for g in glob.glob(os.path.join(outdir, binding["glob"]))] + if schema["type"] == "File": + r = r[0] if r else None + elif "valueFrom" in binding: + r = builder.do_eval(binding["valueFrom"]) + + if not r and schema["type"] == "record": + r = {} + for f in schema["fields"]: + r[f["name"]] = self.collect_output(f, builder, outdir) return r diff --git a/cwltool/job.py b/cwltool/job.py index 8c9f79183..af5e9fea0 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -4,13 +4,15 @@ import glob import json import yaml +import logging class Job(object): - def run(self, dry_run=False, pull_image=True): - if not dry_run: - outdir = tempfile.mkdtemp() - else: - outdir = "/tmp" + def run(self, dry_run=False, pull_image=True, outdir=None): + if not outdir: + if not dry_run: + outdir = tempfile.mkdtemp() + else: + outdir = "/tmp" with open(os.path.join(outdir, "job.cwl.json"), "w") as fp: json.dump(self.joborder, fp) @@ -20,14 +22,14 @@ def run(self, dry_run=False, pull_image=True): if self.container and self.container.get("type") == "docker": if pull_image: if "pull" in self.container: - subprocess.call(["docker", "pull", self.container["pull"]]) + subprocess.check_call(["docker", "pull", self.container["pull"]]) elif "import" in self.container: - subprocess.call(["docker", "import", self.container["import"]]) + subprocess.check_call(["docker", "import", self.container["import"]]) runtime = ["docker", "run", "-i"] for d in self.pathmapper.dirs: runtime.append("--volume=%s:%s:ro" % (os.path.abspath(d), self.pathmapper.dirs[d])) - runtime.append("--volume=%s:%s:ro" % (outdir, "/tmp/job_output")) + runtime.append("--volume=%s:%s:ro" % (os.path.abspath(outdir), "/tmp/job_output")) runtime.append("--workdir=%s" % ("/tmp/job_output")) runtime.append("--user=%s" % (os.geteuid())) runtime.append(self.container["imageId"]) @@ -35,7 +37,7 @@ def run(self, dry_run=False, pull_image=True): stdin = None stdout = None - print runtime + self.command_line + logging.info(str(runtime + self.command_line)) if not dry_run: if self.stdin: @@ -59,14 +61,7 @@ def run(self, dry_run=False, pull_image=True): if stdout: stdout.close() - print "Output directory is %s" % outdir - - result_path = os.path.join(outdir, "result.cwl.json") - if os.path.isfile(result_path): - print "Result file found." - with open(result_path) as fp: - return yaml.load(fp) - else: - return self.collect_outputs(outdir) + logging.info("Output directory is %s", outdir) + return self.collect_outputs(outdir) return None diff --git a/cwltool/main.py b/cwltool/main.py index 4a53c37b1..927c295fe 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -8,6 +8,7 @@ import json import os import sys +import logging def main(): parser = argparse.ArgumentParser() @@ -15,6 +16,7 @@ def main(): parser.add_argument("job_order", type=str) parser.add_argument("--conformance-test", action="store_true") parser.add_argument("--basedir", type=str) + parser.add_argument("--outdir", type=str) parser.add_argument("--no-container", action="store_true", help="Do not execute in a Docker container, even if one is specified in the tool file") parser.add_argument("--no-pull", default=False, action="store_true", help="Do not try to pull the Docker image") parser.add_argument("--dry-run", action="store_true", help="Do not execute") @@ -27,13 +29,8 @@ def main(): t = draft1tool.Tool(u) else: t = draft2tool.Tool(u) - except jsonschema.exceptions.ValidationError as e: - print "Tool definition failed validation" - print e - return 1 - except draft2tool.ValidationException as e: - print "Tool definition failed validation" - print e + except (jsonschema.exceptions.ValidationError, draft2tool.ValidationException): + logging.exception("Tool definition failed validation") return 1 basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(args.job_order)) @@ -50,13 +47,14 @@ def main(): a["generatefiles"] = job.generatefiles print json.dumps(a) else: - print '%s%s%s' % (' '.join(job.command_line), + logging.info('%s%s%s', ' '.join(job.command_line), ' < %s' % (job.stdin) if job.stdin else '', ' > %s' % (job.stdout) if job.stdout else '') - print "Output json is " + json.dumps(job.run(dry_run=args.dry_run, pull_image=(not args.no_pull))) - except jsonschema.exceptions.ValidationError as e: - print "Job order failed validation" - print e + + runjob = job.run(dry_run=args.dry_run, pull_image=(not args.no_pull), outdir=args.outdir) + print json.dumps(runjob) + except jsonschema.exceptions.ValidationError: + logging.exception("Job order failed validation") return 1 return 0 diff --git a/cwltool/tool_new.py b/cwltool/tool_new.py deleted file mode 100755 index 0ca9751b8..000000000 --- a/cwltool/tool_new.py +++ /dev/null @@ -1,220 +0,0 @@ -#!/usr/bin/env python - -import os -import sys -import json -import logging -import tempfile -from collections import namedtuple -from tool import resolve_pointer, flatten -import sandboxjs -import avro.io -import avro.schema - -Args = namedtuple('Args', ['position', 'args']) -merge_args = lambda args: flatten([a.args for a in sorted(args, key=lambda x: x.position)]) - - -def jseval(job, expression, context=None): - if expression.startswith('{'): - exp_tpl = '''{ - return function()%s();} - ''' - else: - exp_tpl = '''{ - return %s;} - ''' - exp = exp_tpl % expression - return sandboxjs.execjs(exp, "var $job = %s, $self = %s;" % (json.dumps(job), json.dumps(context))) - - -def resolve_transform(job, val, context=None): - if not isinstance(val, dict) or val.get('@type') != 'Transform': - return val - lang = val.get('language') - expr = val.get('value') - if lang == 'javascript': - return jseval(job, expr, context) - elif lang == 'jsonpointer': - return resolve_pointer(job, expr) - else: - raise Exception('Unknown language for Transform: %s' % lang) - - -def get_args(job, adapter, value=None, schema=None, key=None, tool=None): - if schema and 'adapter' in schema: - adapter = schema['adapter'] - if adapter is None: - return Args(None, []) - - position = adapter.get('position', 0) - prefix = adapter.get('prefix') - sep = adapter.get('separator', ' ') - item_sep = adapter.get('itemSeparator') - arg_val = adapter.get('argValue') - pos = [position, key] - - if isinstance(arg_val, dict) and arg_val.get('@type') == 'Transform': - value = resolve_transform(job, arg_val, value) - elif isinstance(value, dict) and value.get('@type') == 'File': - value = value.get('path') - - if value is None: - return Args(pos, []) - - if isinstance(value, bool): - if not prefix: - raise Exception('Boolean value without prefix in adapter') - return Args(pos, [prefix]) if value else Args(pos, []) - - if isinstance(value, dict): - if not schema: - return Args(pos, []) - args = [] - for k, v in value.iteritems(): - field = filter(lambda x: x['name'] == k, schema['fields']) - if not field: - logging.error('Field not found in schema: "%s". Schema: %s', k, schema) - continue - field = field[0] - field_adapter = field.get('adapter') - field_schema = schema_by_name(field.get('type'), tool) - args.append(get_args(job, field_adapter, v, field_schema, k, tool=tool)) - return Args(pos, merge_args(args)) - - if isinstance(value, list): - items = flatten([get_args(job, {}, i, schema_for_item(i, schema, tool), tool=tool).args for i in value]) - if item_sep: - val = item_sep.join(items) - if not prefix: - return Args(pos, [val]) - return Args(pos, [prefix, val] if sep == ' ' else [sep.join([prefix, val])]) - if not prefix: - return Args(pos, items) - if sep == ' ': - return Args(pos, flatten([prefix, item] for item in items)) - return Args(pos, [sep.join([prefix, item]) for item in items]) - - value = unicode(value) - if not prefix: - return Args(pos, [value]) - if sep == ' ': - return Args(pos, [prefix, value]) - return Args(pos, [sep.join([prefix, value])]) - - -def schema_by_name(type_name, tool): - if isinstance(type_name, dict): - return type_name - tds = filter(lambda x: x['name'] == type_name, tool.get('schemaDefs', [])) - return tds[0] if tds else None - - -def schema_for_item(value, array_schema, tool): - if not array_schema: - return None - opts = array_schema.get('items', []) - if not opts: - return None - if not isinstance(opts, list): - opts = [opts] - opts = [schema_by_name(opt, tool) for opt in opts] - if len(opts) == 1: - return opts[0] - for opt in opts: - sch = avro.schema.parse(json.dumps(opt)) - if avro.io.validate(sch, value): - return opt - return None - - -def get_proc_args_and_redirects(tool, job): - adaptable_inputs = [i for i in tool.get('inputs', []) if 'adapter' in i.get('schema', {})] - input_args = [] - for i in adaptable_inputs: - inp_id = i['@id'][1:] - inp_val = job['inputs'].get(inp_id) - inp_adapter = i['schema']['adapter'] - input_args.append(get_args(job, inp_adapter, inp_val, i['schema'], inp_id, tool=tool)) - cli_adapter = tool['cliAdapter'] - adapter_args = [get_args(job, a, tool=tool) for a in cli_adapter.get('argAdapters', [])] - if isinstance(cli_adapter.get('baseCmd'), basestring): - cli_adapter['baseCmd'] = [cli_adapter['baseCmd']] - base_cmd = [resolve_transform(job, v) for v in cli_adapter['baseCmd']] - argv = base_cmd + merge_args(input_args + adapter_args) - stdin = resolve_transform(job, cli_adapter.get('stdin')) - stdout = resolve_transform(job, cli_adapter.get('stdout')) - return argv, stdin, stdout - - -def test(tool, job): - ex = os.path.join(os.path.dirname(__file__), '../../examples/') - with open(os.path.join(ex, tool)) as fp: - tool = json.load(fp) - with open(os.path.join(ex, job)) as fp: - job = json.load(fp) - argv, stdin, stdout = get_proc_args_and_redirects(tool, job) - print ' '.join(argv), '<', stdin, '>', stdout - - -def conformance_test(): - tool, job = filter(lambda x: x[0] != '-', sys.argv[1:]) - assert os.path.isfile(tool) - assert os.path.isfile(job) - base_dir = filter(lambda x: x.startswith('--basedir='), sys.argv[1:]) - if base_dir: - base_dir = base_dir[0][len('--basedir='):] - - with open(tool) as t, open(job) as j: - tool = json.load(t) - job = json.load(j) - - if base_dir: - job['inputs'] = map_paths(job.get('inputs', {}), base_dir) - - argv, stdin, stdout = get_proc_args_and_redirects(tool, job) - print json.dumps({ - 'args': argv, - 'stdin': stdin, - 'stdout': stdout, - }) - - -def map_paths(obj, base_dir): - if isinstance(obj, list): - return [map_paths(i, base_dir) for i in obj] - if not isinstance(obj, dict): - return obj - if obj.get('@type') == 'File': - obj['path'] = os.path.join(base_dir, obj['path']) - return obj - return {k: map_paths(v, base_dir) for k, v in obj.iteritems()} - - -def run(tool_path, job_path): - with open(tool_path) as fpt, open(job_path) as fpj: - tool = json.load(fpt) - job = json.load(fpj) - job = map_paths(job, os.path.join(os.path.dirname(__file__), '../../examples/')) - argv, stdin, stdout = get_proc_args_and_redirects(tool, job) - line = ' '.join(argv) - if stdin: - line += ' < ' + stdin - if stdout: - line += ' > ' + stdout - print line - job_dir = tempfile.mkdtemp() - os.chdir(job_dir) - if os.system(line): - raise Exception('Process failed.') - print os.listdir('.') - - -if __name__ == '__main__': - if '--conformance-test' not in sys.argv: - run(*sys.argv[1:]) - # test('bwa-mem-tool.json', 'bwa-mem-job.json') - # test('cat1-tool.json', 'cat-n-job.json') - # test('tmap-tool.json', 'tmap-job.json') - else: - conformance_test() diff --git a/cwltool/workflow.py b/cwltool/workflow.py deleted file mode 100644 index 0a95c5602..000000000 --- a/cwltool/workflow.py +++ /dev/null @@ -1,284 +0,0 @@ -import os -import logging -import functools -import json -from datetime import datetime -from copy import deepcopy -from collections import defaultdict - -from rdflib import Graph, URIRef, Literal, RDF, XSD -from rdflib.namespace import Namespace, NamespaceManager - -from tool_new import jseval - - -log = logging.getLogger(__file__) - -CWL = Namespace('http://github.com/common-workflow-language/') -WFD = Namespace('http://purl.org/wf4ever/wfdesc#') -PROV = Namespace('http://www.w3.org/ns/prov#') -DCT = Namespace('http://purl.org/dc/terms/') -CNT = Namespace('http://www.w3.org/2011/content#') - - -def get_value(graph, iri): - chars = graph.value(iri, CNT.chars) - if chars: - return json.load(chars.toPython()) - return graph.value(iri).toPython() - - -def set_value(graph, iri, val): - if isinstance(val, (dict, list)): - graph.set([iri, CNT.chars, Literal(json.dumps(val))]) - else: - graph.set([iri, RDF.value, Literal(val)]) - - -class Inputs(object): - def __init__(self, graph, tuples): - self.g = graph - self.d = {} - self.wrapped = [] - for k, v in tuples: - self[k] = v - - def __getitem__(self, item): - return self.d[item] - - def __setitem__(self, key, value): - if key not in self.d: - self.d[key] = get_value(self.g, value) - elif key in self.wrapped: - self.d[key].append(get_value(self.g, value)) - else: - self.d[key] = [self.d[key], get_value(self.g, value)] - self.wrapped.append(key) - - def to_dict(self): - return {k[k.rfind('/') + 1:]: v for k, v in self.d.iteritems()} - - -def lazy(func): - attr = '__lazy_' + func.__name__ - - @functools.wraps(func) - def wrapped(self): - if not hasattr(self, attr): - setattr(self, attr, func(self)) - return getattr(self, attr) - return property(wrapped) - - -class Process(object): - def __init__(self, graph, iri): - self.g = graph - self.iri = URIRef(iri) - - activity = lazy(lambda self: self.g.value(None, CWL.activityFor, self.iri)) - inputs = lazy(lambda self: list(self.g.objects(self.iri, WFD.hasInput))) - outputs = lazy(lambda self: list(self.g.objects(self.iri, WFD.hasOutput))) - started = lazy(lambda self: self.g.value(self.activity, PROV.startedAtTime) if self.activity else None) - ended = lazy(lambda self: self.g.value(self.activity, PROV.endedAtTime) if self.activity else None) - has_prereqs = lazy(lambda self: all([None, CWL.producedByPort, src] in self.g for src in self.sources)) - - @lazy - def has_prereqs(self): - return all([None, CWL.producedByPort, src] in self.g for src in self.sources) - - @lazy - def sources(self): - return [x[0] for x in self.g.query(''' - select ?src - where { - <%s> wfd:hasInput ?port . - ?link wfd:hasSink ?port ; - wfd:hasSource ?src . - } - ''' % self.iri)] - - @lazy - def input_values(self): - return self.g.query(''' - select ?port ?val - where { - <%s> wfd:hasInput ?port . - ?link wfd:hasSink ?port ; - wfd:hasSource ?src . - ?val cwl:producedByPort ?src . - } - ''' % self.iri) - - -class WorkflowRunner(object): - def __init__(self, path): - nm = NamespaceManager(Graph()) - nm.bind('cwl', CWL) - nm.bind('wfd', WFD) - nm.bind('prov', PROV) - nm.bind('dct', DCT) - nm.bind('cnt', CNT) - self.g = Graph(namespace_manager=nm) - self.wf_iri = None - self.act_iri = None - self._load(path) - - def _load(self, path): - self.g.parse(path, format='json-ld') - self.wf_iri = URIRef('file://' + path) # TODO: Find a better way to do this - self.g.add([self.wf_iri, RDF.type, WFD.Process]) - for sp in self.g.objects(self.wf_iri, WFD.hasSubProcess): - self.g.add([sp, RDF.type, WFD.Process]) - tool = self.g.value(sp, CWL.hasImplementation) - log.debug('Loading reference %s', tool) - self.g.parse(tool, format='json-ld') - - def start(self, proc_iri=None): - main_act = False - if not proc_iri: - proc_iri = self.wf_iri - main_act = True - proc_iri = URIRef(proc_iri) - iri = self.iri_for_activity(proc_iri) - log.debug('Starting %s', iri) - self.g.add([iri, RDF.type, CWL.Activity]) - self.g.add([iri, CWL.activityFor, proc_iri]) - self.g.add([iri, PROV.startedAtTime, Literal(datetime.now(), datatype=XSD.datetime)]) - if main_act: - self.act_iri = iri - else: - self.g.add([self.act_iri, DCT.hasPart, iri]) - for k, v in Process(self.g, proc_iri).input_values: - val = self.g.value(v) - log.debug('Value on %s is %s', k, val.toPython()) - return iri - - def end(self, act_iri): - act_iri = URIRef(act_iri) - self.g.add([act_iri, PROV.endedAtTime, Literal(datetime.now(), datatype=XSD.datetime)]) - - def iri_for_activity(self, process_iri): - sep = '/' if '#' in process_iri else '#' - return URIRef(process_iri + sep + '__activity__') # TODO: Better IRIs - - def iri_for_value(self, port_iri): - return URIRef(port_iri + '/__value__') # TODO: Better IRIs - - def queued(self): - ps = [Process(self.g, iri) for iri in self.g.subjects(RDF.type, CWL.Process)] - return [p for p in ps if p.has_prereqs and not p.started] - - def set_value(self, port_iri, value, creator_iri=None): - if not port_iri.startswith(self.wf_iri): - port_iri = self.wf_iri + '#' + port_iri - port_iri = URIRef(port_iri) - iri = self.iri_for_value(port_iri) - set_value(self.g, iri, value) - self.g.add([iri, RDF.type, CWL.Value]) - self.g.add([iri, CWL.producedByPort, URIRef(port_iri)]) - if creator_iri: - self.g.add([iri, PROV.wasGeneratedBy, URIRef(creator_iri)]) - return iri - - def _depth_mismatch_port(self, proc, inputs): - depth_of = lambda x: 1 if isinstance(x, list) else 0 # TODO: fixme - incoming = {k: depth_of(v) for k, v in inputs.d.iteritems()} - expected = {k: self.g.value(k, CWL.hasDepth).toPython() for k in proc.inputs} - result = None - for k, v in incoming.iteritems(): - if expected[k] != v: - if result: - log.error('\nIncoming: %s\nExpected: %s', incoming, expected) - raise Exception('More than one port has mismatching depth.') - if incoming[k] < expected[k]: - raise Exception('depth(incoming) < depth(expected); Wrapping must be done explicitly.') - if incoming[k] - expected[k] > 1: - raise NotImplementedError('Only handling one nesting level at the moment.') - result = k - return result - - def run_component(self, tool, job): - cmp_type = self.g.value(tool, RDF.type) - if cmp_type == CWL.SimpleTransformTool: - return self.run_script(tool, job) - raise Exception('Unrecognized component type: %s' % cmp_type) - - def run_workflow(self): - self.start() - while self.queued(): - act = self.start(self.queued()[0].iri) - proc = Process(self.g, self.g.value(act, CWL.activityFor)) - tool = self.g.value(proc.iri, CWL.hasImplementation) - inputs = Inputs(self.g, proc.input_values) # TODO: propagate desc<->impl - dmp = self._depth_mismatch_port(proc, inputs) - if not dmp: - job = {'inputs': inputs.to_dict()} - outputs = self.run_component(tool, job) - else: - jobs, outputs = [], defaultdict(list) - for i in inputs[dmp]: - inp_copy = deepcopy(inputs) - inp_copy.d[dmp] = i - jobs.append({'inputs': inp_copy.to_dict()}) - for job in jobs: - outs = self.run_component(tool, job) - for k, v in outs.iteritems(): - outputs[k].append(v) - for k, v in outputs.iteritems(): - self.set_value(proc.iri + '/' + k, v, act) - self.end(act) - self.end(self.act_iri) - outputs = dict(self.g.query(''' - select ?port ?val - where { - <%s> wfd:hasOutput ?port . - ?link wfd:hasSink ?port ; - wfd:hasSource ?src . - ?val cwl:producedByPort ?src . - } - ''' % self.wf_iri)) - return {k: get_value(self.g, v) for k, v in outputs.iteritems()} - - def run_script(self, tool, job): - expr = self.g.value(self.g.value(tool, CWL.hasScript)).toPython() - log.debug('Running expr %s\nJob: %s', expr, job) - result = jseval(job, expr) - logging.debug('Result: %s', result) - return result - - -def aplusbtimesc(wf_name, a, b, c): - print '\n\n--- %s ---\n\n' % wf_name - path = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../examples/' + wf_name)) - rnr = WorkflowRunner(path) - rnr.set_value('a', a) - rnr.set_value('b', b) - rnr.set_value('c', c) - outs = rnr.run_workflow() - assert outs - print '\nDone. Workflow outputs:' - for k, v in outs.iteritems(): - print k, v - assert v == (a+b)*c - return rnr - - -def count_lines(): - examples = os.path.abspath(os.path.join(os.path.dirname(__file__), '../../examples')) - wf_path = os.path.join(examples, 'wf-count-lines.json') - job_path = os.path.join(examples, 'wf-count-lines-job.json') - with open(job_path) as fp: - inputs = json.load(fp)['inputs'] - rnr = WorkflowRunner(wf_path) - for k, v in inputs.iteritems(): - rnr.set_value(k, v) - print rnr.run_workflow() - return rnr - - -if __name__ == '__main__': - logging.basicConfig(level=logging.DEBUG) - # aplusbtimesc('wf_simple.json', 2, 3, 4) - # aplusbtimesc('wf_lists.json', 2, 3, 4) - # aplusbtimesc('wf_map.json', 2, 3, 4) - count_lines() From 18d6b2e53abb2e4ca46969d214c9e9e52755e230 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 5 Mar 2015 11:05:18 -0500 Subject: [PATCH 042/221] Can now have tests actually run tools. --- cwltool/draft2tool.py | 14 +++++++++++--- cwltool/job.py | 22 ++++++++++++++++------ cwltool/main.py | 12 ++++++++---- 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index b828fcf80..15071d5c9 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -11,6 +11,9 @@ import yaml import glob import logging +import hashlib + +_logger = logging.getLogger("cwltool") TOOL_CONTEXT_URL = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/draft-2-pa/schemas/draft-2/context.json" @@ -327,8 +330,8 @@ def job(self, joborder, basedir, use_container=True): builder.bindings.extend(builder.bind_input(self.inputs_record_schema, joborder)) builder.bindings.sort(key=lambda a: a["position"]) - logging.debug(pprint.pformat(builder.bindings)) - logging.debug(pprint.pformat(builder.files)) + _logger.debug(pprint.pformat(builder.bindings)) + _logger.debug(pprint.pformat(builder.files)) j = Job() j.joborder = joborder @@ -394,7 +397,12 @@ def collect_output(self, schema, builder, outdir): (schema["type"] == "File" or (schema["type"] == "array" and schema["items"] == "File"))): - r = [{"path": g} for g in glob.glob(os.path.join(outdir, binding["glob"]))] + r = [{"path": g} for g in glob.glob(binding["glob"])] + for files in r: + checksum = hashlib.sha1() + with open(files["path"], "rb") as f: + checksum.update(f.read()) + files["checksum"] = "sha1$%s" % checksum.hexdigest() if schema["type"] == "File": r = r[0] if r else None elif "valueFrom" in binding: diff --git a/cwltool/job.py b/cwltool/job.py index af5e9fea0..e8a9b0a16 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -5,6 +5,9 @@ import json import yaml import logging +import sys + +_logger = logging.getLogger("cwltool") class Job(object): def run(self, dry_run=False, pull_image=True, outdir=None): @@ -22,9 +25,9 @@ def run(self, dry_run=False, pull_image=True, outdir=None): if self.container and self.container.get("type") == "docker": if pull_image: if "pull" in self.container: - subprocess.check_call(["docker", "pull", self.container["pull"]]) + subprocess.check_call(["docker", "pull", self.container["pull"]], stdout=sys.stderr) elif "import" in self.container: - subprocess.check_call(["docker", "import", self.container["import"]]) + subprocess.check_call(["docker", "import", self.container["import"]], stdout=sys.stderr) runtime = ["docker", "run", "-i"] for d in self.pathmapper.dirs: @@ -37,31 +40,38 @@ def run(self, dry_run=False, pull_image=True, outdir=None): stdin = None stdout = None - logging.info(str(runtime + self.command_line)) + _logger.info(str(runtime + self.command_line)) if not dry_run: if self.stdin: stdin = open(self.stdin, "rb") + else: + stdin = subprocess.PIPE os.chdir(outdir) if self.stdout: stdout = open(self.stdout, "wb") + else: + stdout = sys.stderr for t in self.generatefiles: with open(os.path.join(outdir, t), "w") as f: f.write(self.generatefiles[t]) sp = subprocess.Popen(runtime + self.command_line, shell=False, stdin=stdin, stdout=stdout) + + if stdin == subprocess.PIPE: + sp.stdin.close() + sp.wait() - if stdin: + if stdin != subprocess.PIPE: stdin.close() if stdout: stdout.close() - logging.info("Output directory is %s", outdir) - return self.collect_outputs(outdir) + return (outdir, self.collect_outputs(outdir)) return None diff --git a/cwltool/main.py b/cwltool/main.py index 927c295fe..96d0c3467 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -10,6 +10,9 @@ import sys import logging +_logger = logging.getLogger("cwltool") +_logger.addHandler(logging.StreamHandler()) + def main(): parser = argparse.ArgumentParser() parser.add_argument("tool", type=str) @@ -30,7 +33,7 @@ def main(): else: t = draft2tool.Tool(u) except (jsonschema.exceptions.ValidationError, draft2tool.ValidationException): - logging.exception("Tool definition failed validation") + _logger.exception("Tool definition failed validation") return 1 basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(args.job_order)) @@ -47,14 +50,15 @@ def main(): a["generatefiles"] = job.generatefiles print json.dumps(a) else: - logging.info('%s%s%s', ' '.join(job.command_line), + _logger.info('%s%s%s', ' '.join(job.command_line), ' < %s' % (job.stdin) if job.stdin else '', ' > %s' % (job.stdout) if job.stdout else '') - runjob = job.run(dry_run=args.dry_run, pull_image=(not args.no_pull), outdir=args.outdir) + (outdir, runjob) = job.run(dry_run=args.dry_run, pull_image=(not args.no_pull), outdir=args.outdir) + _logger.info("Output directory is %s", outdir) print json.dumps(runjob) except jsonschema.exceptions.ValidationError: - logging.exception("Job order failed validation") + _logger.exception("Job order failed validation") return 1 return 0 From b7d51e343fecc63485bff9d9781e514900e0b64c Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 5 Mar 2015 17:04:55 -0500 Subject: [PATCH 043/221] Added: ExpressionTool, loadContents flag, more conformance tests. --- cwltool/draft2tool.py | 96 ++++++++++++++++++++++++++++++++----------- cwltool/main.py | 10 ++--- 2 files changed, 77 insertions(+), 29 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 15071d5c9..c85eb20f5 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -184,17 +184,20 @@ def bind_input(self, schema, datum): if schema["type"] == "array": for n, item in enumerate(datum): - b = self.bind_input({"type": schema["items"], "binding": schema.get("binding")}, item) + b = self.bind_input({"type": schema["items"], "commandLineBinding": schema.get("commandLineBinding")}, item) for bi in b: bi["position"].insert(0, n) bindings.extend(b) if schema["type"] == "File": - self.files.append(datum["path"]) + if schema.get("loadContents"): + with open(os.path.join(self.basedir, datum["path"]), "rb") as f: + datum["contents"] = f.read() + self.files.append(datum) b = None - if "binding" in schema and isinstance(schema["binding"], dict): - b = copy.copy(schema["binding"]) + if "commandLineBinding" in schema and isinstance(schema["commandLineBinding"], dict): + b = copy.copy(schema["commandLineBinding"]) if b.get("position"): b["position"] = [b["position"]] @@ -249,8 +252,14 @@ def generate_arg(self, binding): return [a for a in args if a is not None] +def makeTool(toolpath_object): + if toolpath_object["@type"] == "CommandLineTool": + return CommandLineTool(toolpath_object) + elif toolpath_object["@type"] == "ExpressionTool": + return ExpressionTool(toolpath_object) + class Tool(object): - def __init__(self, toolpath_object): + def __init__(self, toolpath_object, validateAs): self.names = avro.schema.Names() cwl_avsc = os.path.join(module_dir, 'schemas/draft-2/cwl.avsc') with open(cwl_avsc) as f: @@ -263,7 +272,7 @@ def __init__(self, toolpath_object): raise Exception("Missing or invalid '@context' field in tool description document, must be %s" % TOOL_CONTEXT_URL) # Validate tool documument - validate_ex(self.names.get_name("CommandLineTool", ""), self.tool) + validate_ex(self.names.get_name(validateAs, ""), self.tool) # Import schema defs self.schemaDefs = {} @@ -289,18 +298,52 @@ def __init__(self, toolpath_object): self.outputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.outputs_record_schema, self.names) - def job(self, joborder, basedir, use_container=True): + def _init_job(self, joborder, basedir): # Validate job order validate_ex(self.names.get_name("input_record_schema", ""), joborder) builder = Builder() - builder.job = joborder + builder.job = copy.deepcopy(joborder) builder.jslib = '' builder.basedir = basedir builder.files = [] builder.bindings = [] builder.schemaDefs = self.schemaDefs + if self.tool.get("expressionDefs"): + for ex in self.tool['expressionDefs']: + builder.jslib += builder.do_eval(ex) + "\n" + + builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job)) + + return builder + + +class ExpressionTool(Tool): + def __init__(self, toolpath_object): + super(ExpressionTool, self).__init__(toolpath_object, "ExpressionTool") + + class ExpressionJob(object): + def run(self, outdir=None, **kwargs): + return (outdir, self.builder.do_eval(self.script)) + + def job(self, joborder, basedir, **kwargs): + builder = self._init_job(joborder, basedir) + + j = ExpressionTool.ExpressionJob() + j.builder = builder + j.script = self.tool["script"] + + return j + + +class CommandLineTool(Tool): + def __init__(self, toolpath_object): + super(CommandLineTool, self).__init__(toolpath_object, "CommandLineTool") + + def job(self, joborder, basedir, use_container=True): + builder = self._init_job(joborder, basedir) + if isinstance(self.tool["baseCommand"], list): for n, b in enumerate(self.tool["baseCommand"]): builder.bindings.append({ @@ -313,10 +356,6 @@ def job(self, joborder, basedir, use_container=True): "valueFrom": self.tool["baseCommand"] }) - if self.tool.get("expressionDefs"): - for ex in self.tool['expressionDefs']: - builder.jslib += builder.do_eval(ex) + "\n" - if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): a = copy.copy(a) @@ -327,14 +366,15 @@ def job(self, joborder, basedir, use_container=True): a["valueFrom"] = builder.do_eval(a["valueFrom"]) builder.bindings.append(a) - builder.bindings.extend(builder.bind_input(self.inputs_record_schema, joborder)) builder.bindings.sort(key=lambda a: a["position"]) _logger.debug(pprint.pformat(builder.bindings)) _logger.debug(pprint.pformat(builder.files)) + builder.files = [f["path"] for f in builder.files] + j = Job() - j.joborder = joborder + j.joborder = builder.job j.container = None builder.pathmapper = None @@ -391,22 +431,30 @@ def collect_output_ports(self, ports, builder, outdir): def collect_output(self, schema, builder, outdir): r = None - if "binding" in schema: - binding = schema["binding"] - if ("glob" in binding and - (schema["type"] == "File" or - (schema["type"] == "array" and - schema["items"] == "File"))): + if "outputBinding" in schema: + binding = schema["outputBinding"] + if "glob" in binding: r = [{"path": g} for g in glob.glob(binding["glob"])] for files in r: checksum = hashlib.sha1() with open(files["path"], "rb") as f: - checksum.update(f.read()) + contents = f.read() + checksum.update(contents) + if binding.get("loadContents"): + files["contents"] = contents files["checksum"] = "sha1$%s" % checksum.hexdigest() - if schema["type"] == "File": + + if schema["type"] == "array" and schema["items"] == "File": + pass + elif schema["type"] == "File": r = r[0] if r else None - elif "valueFrom" in binding: - r = builder.do_eval(binding["valueFrom"]) + elif binding.get("loadContents"): + r = [v["contents"] for v in r] + else: + r = None + + if "valueFrom" in binding: + r = builder.do_eval(binding["valueFrom"], r) if not r and schema["type"] == "record": r = {} diff --git a/cwltool/main.py b/cwltool/main.py index 96d0c3467..e36c9ec08 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -31,7 +31,7 @@ def main(): if "schema" in u: t = draft1tool.Tool(u) else: - t = draft2tool.Tool(u) + t = draft2tool.makeTool(u) except (jsonschema.exceptions.ValidationError, draft2tool.ValidationException): _logger.exception("Tool definition failed validation") return 1 @@ -50,10 +50,10 @@ def main(): a["generatefiles"] = job.generatefiles print json.dumps(a) else: - _logger.info('%s%s%s', ' '.join(job.command_line), - ' < %s' % (job.stdin) if job.stdin else '', - ' > %s' % (job.stdout) if job.stdout else '') - + if isinstance(job, draft1tool.Tool) or isinstance(job, draft2tool.CommandLineTool): + _logger.info('%s%s%s', ' '.join(job.command_line), + ' < %s' % (job.stdin) if job.stdin else '', + ' > %s' % (job.stdout) if job.stdout else '') (outdir, runjob) = job.run(dry_run=args.dry_run, pull_image=(not args.no_pull), outdir=args.outdir) _logger.info("Output directory is %s", outdir) print json.dumps(runjob) From 3efa0763e8b9859f2cfe729c8de508351a57c162 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 6 Mar 2015 09:56:47 -0500 Subject: [PATCH 044/221] Fix json-ld contexts to point to master branch. --- cwltool/draft2tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index c85eb20f5..8c3fecdfb 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -15,7 +15,7 @@ _logger = logging.getLogger("cwltool") -TOOL_CONTEXT_URL = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/draft-2-pa/schemas/draft-2/context.json" +TOOL_CONTEXT_URL = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/master/schemas/draft-2/cwl-context.json" module_dir = os.path.dirname(os.path.abspath(__file__)) From 4b883fedb9132ff2f1c41950e4d2f9eed0eccf97 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 6 Mar 2015 13:45:31 -0500 Subject: [PATCH 045/221] Change "port" to "id", "requirementType"/"expressionType" to "class". Add file size to when capturing outputs. Limit "loadContents" to first megabyte. stdin can reference an parameter defined in input and stdout can reference a parameter defined in output. --- cwltool/draft2tool.py | 68 +++++++++++++++++++++++++++++-------------- 1 file changed, 46 insertions(+), 22 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 8c3fecdfb..91473df72 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -12,10 +12,12 @@ import glob import logging import hashlib +import random _logger = logging.getLogger("cwltool") TOOL_CONTEXT_URL = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/master/schemas/draft-2/cwl-context.json" +CONTENT_LIMIT = 1024 * 1024 module_dir = os.path.dirname(os.path.abspath(__file__)) @@ -135,11 +137,17 @@ def jseval(self, expression, context): def do_eval(self, ex, context=None): if isinstance(ex, dict): - if ex.get("expressionType") == "javascript": - return self.jseval(ex["value"], context) - elif ex.get("ref"): - with open(os.path.join(self.basedir, ex["ref"]), "r") as f: - return f.read() + if ex.get("class") == "JavascriptExpression": + if "value" in ex: + return self.jseval(ex["value"], context) + elif "invoke" in ex: + return self.jseval(ex["invoke"], context) + elif ex.get("id"): + if ex["id"].startswith("#"): + return self.job[ex["id"][1:]] + else: + with open(os.path.join(self.basedir, ex["id"]), "r") as f: + return f.read() else: return ex @@ -192,7 +200,7 @@ def bind_input(self, schema, datum): if schema["type"] == "File": if schema.get("loadContents"): with open(os.path.join(self.basedir, datum["path"]), "rb") as f: - datum["contents"] = f.read() + datum["contents"] = f.read(CONTENT_LIMIT) self.files.append(datum) b = None @@ -253,9 +261,9 @@ def generate_arg(self, binding): return [a for a in args if a is not None] def makeTool(toolpath_object): - if toolpath_object["@type"] == "CommandLineTool": + if toolpath_object["class"] == "CommandLineTool": return CommandLineTool(toolpath_object) - elif toolpath_object["@type"] == "ExpressionTool": + elif toolpath_object["class"] == "ExpressionTool": return ExpressionTool(toolpath_object) class Tool(object): @@ -285,16 +293,16 @@ def __init__(self, toolpath_object, validateAs): self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []} for i in self.tool["inputs"]: c = copy.copy(i) - c["name"] = c["port"][1:] - del c["port"] + c["name"] = c["id"][1:] + del c["id"] self.inputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []} for i in self.tool["outputs"]: c = copy.copy(i) - c["name"] = c["port"][1:] - del c["port"] + c["name"] = c["id"][1:] + del c["id"] self.outputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.outputs_record_schema, self.names) @@ -376,27 +384,38 @@ def job(self, joborder, basedir, use_container=True): j = Job() j.joborder = builder.job j.container = None + j.stdin = None + j.stdout = None builder.pathmapper = None if self.tool.get("stdin"): j.stdin = builder.do_eval(self.tool["stdin"]) + if isinstance(j.stdin, dict): + j.stdin = j.stdin["path"] builder.files.append(j.stdin) - else: - j.stdin = None if self.tool.get("stdout"): - j.stdout = builder.do_eval(self.tool["stdout"]) + if isinstance(self.tool["stdout"], dict) and "id" in self.tool["stdout"]: + for out in self.tool.get("outputs", []): + if out["id"] == self.tool["stdout"]["id"]: + filename = self.tool["stdout"]["id"][1:] + j.stdout = filename + out["outputBinding"] = out.get("outputBinding", {}) + out["outputBinding"]["glob"] = filename + if not j.stdout: + raise Exception("stdout refers to invalid output") + else: + j.stdout = builder.do_eval(self.tool["stdout"]) if os.path.isabs(j.stdout): raise Exception("stdout must be a relative path") - else: - j.stdout = None j.generatefiles = {} for t in self.tool.get("fileDefs", []): j.generatefiles[t["filename"]] = builder.do_eval(t["value"]) - for r in self.tool.get("hints", []): - if r["requirementType"] == "DockerImage" and use_container: + reqsAndHints = self.tool.get("requirements", []) + self.tool.get("hints", []) + for r in reqsAndHints: + if r["class"] == "DockerRequirement" and use_container: j.container = {} j.container["type"] = "docker" if "dockerPull" in r: @@ -427,7 +446,7 @@ def collect_output_ports(self, ports, builder, outdir): outputdoc = yaml.load(custom_output) validate_ex(self.names.get_name("output_record_schema", ""), outputdoc) return outputdoc - return {port["port"][1:]: self.collect_output(port, builder, outdir) for port in ports} + return {port["id"][1:]: self.collect_output(port, builder, outdir) for port in ports} def collect_output(self, schema, builder, outdir): r = None @@ -438,11 +457,16 @@ def collect_output(self, schema, builder, outdir): for files in r: checksum = hashlib.sha1() with open(files["path"], "rb") as f: - contents = f.read() - checksum.update(contents) + contents = f.read(CONTENT_LIMIT) if binding.get("loadContents"): files["contents"] = contents + filesize = 0 + while contents != "": + checksum.update(contents) + filesize += len(contents) + contents = f.read(1024*1024) files["checksum"] = "sha1$%s" % checksum.hexdigest() + files["size"] = filesize if schema["type"] == "array" and schema["items"] == "File": pass From 1d0f1ca33f28225daf45ae1533c1b80257959bbb Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 6 Mar 2015 15:50:57 -0500 Subject: [PATCH 046/221] Work-in-progress workflow runner! --- cwltool/__main__.py | 1 + cwltool/draft1tool.py | 4 +- cwltool/draft2tool.py | 177 ++++-------------------------------------- cwltool/job.py | 63 ++++++++------- cwltool/main.py | 21 ++--- cwltool/process.py | 48 ++++++++++++ cwltool/validate.py | 105 +++++++++++++++++++++++++ cwltool/workflow.py | 80 +++++++++++++++++++ 8 files changed, 298 insertions(+), 201 deletions(-) create mode 100644 cwltool/process.py create mode 100644 cwltool/validate.py create mode 100644 cwltool/workflow.py diff --git a/cwltool/__main__.py b/cwltool/__main__.py index 857f2ac79..ae4ff8a78 100644 --- a/cwltool/__main__.py +++ b/cwltool/__main__.py @@ -1,3 +1,4 @@ import main +import sys sys.exit(main.main()) diff --git a/cwltool/draft1tool.py b/cwltool/draft1tool.py index 630b4a245..fb9ae7357 100644 --- a/cwltool/draft1tool.py +++ b/cwltool/draft1tool.py @@ -10,7 +10,7 @@ import urlparse import functools from pathmapper import PathMapper, DockerPathMapper -from job import Job +from job import CommandLineJob from flatten import flatten from jsonschema.validators import Draft4Validator @@ -290,7 +290,7 @@ def job(self, joborder, basedir, use_container=True): referenced_files = filter(lambda a: a is not None, flatten(map(lambda a: builder.find_files(a, joborder), adapters))) - j = Job() + j = CommandLineProcess() j.joborder = joborder j.container = None diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 91473df72..8b30eed2f 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -7,124 +7,21 @@ import os from pathmapper import PathMapper, DockerPathMapper import sandboxjs -from job import Job +from job import CommandLineJob import yaml import glob import logging import hashlib import random +from process import Process +import validate _logger = logging.getLogger("cwltool") -TOOL_CONTEXT_URL = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/master/schemas/draft-2/cwl-context.json" CONTENT_LIMIT = 1024 * 1024 module_dir = os.path.dirname(os.path.abspath(__file__)) -class ValidationException(Exception): - pass - -def validate(expected_schema, datum): - try: - return validate_ex(expected_schema, datum) - except ValidationException: - return False - -INT_MIN_VALUE = -(1 << 31) -INT_MAX_VALUE = (1 << 31) - 1 -LONG_MIN_VALUE = -(1 << 63) -LONG_MAX_VALUE = (1 << 63) - 1 - -def validate_ex(expected_schema, datum): - """Determine if a python datum is an instance of a schema.""" - schema_type = expected_schema.type - if schema_type == 'null': - if datum is None: - return True - else: - raise ValidationException("`%s` is not null" % datum) - elif schema_type == 'boolean': - if isinstance(datum, bool): - return True - else: - raise ValidationException("`%s` is not boolean" % datum) - elif schema_type == 'string': - if isinstance(datum, basestring): - return True - else: - raise ValidationException("`%s` is not string" % datum) - elif schema_type == 'bytes': - if isinstance(datum, str): - return True - else: - raise ValidationException("`%s` is not bytes" % datum) - elif schema_type == 'int': - if ((isinstance(datum, int) or isinstance(datum, long)) - and INT_MIN_VALUE <= datum <= INT_MAX_VALUE): - return True - else: - raise ValidationException("`%s` is not int" % datum) - elif schema_type == 'long': - if ((isinstance(datum, int) or isinstance(datum, long)) - and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE): - return True - else: - raise ValidationException("`%s` is not long" % datum) - elif schema_type in ['float', 'double']: - if (isinstance(datum, int) or isinstance(datum, long) - or isinstance(datum, float)): - return True - else: - raise ValidationException("`%s` is not float or double" % datum) - elif schema_type == 'fixed': - if isinstance(datum, str) and len(datum) == expected_schema.size: - return True - else: - raise ValidationException("`%s` is not fixed" % datum) - elif schema_type == 'enum': - if datum in expected_schema.symbols: - return True - else: - raise ValidationException("`%s`\n is not a valid enum symbol, expected\n %s" % (pprint.pformat(datum), pprint.pformat(expected_schema.symbols))) - elif schema_type == 'array': - if isinstance(datum, list): - for i, d in enumerate(datum): - try: - validate_ex(expected_schema.items, d) - except ValidationException as v: - raise ValidationException("%s\n while validating item at position %i `%s`" % (v, i, d)) - return True - else: - raise ValidationException("`%s`\n is not a list, expected list of\n %s" % (pprint.pformat(datum), expected_schema.items)) - elif schema_type == 'map': - if (isinstance(datum, dict) and - False not in [isinstance(k, basestring) for k in datum.keys()] and - False not in [validate(expected_schema.values, v) for v in datum.values()]): - return True - else: - raise ValidationException("`%s` is not a valid map value, expected\n %s" % (pprint.pformat(datum), pprint.pformat(expected_schema.values))) - elif schema_type in ['union', 'error_union']: - if True in [validate(s, datum) for s in expected_schema.schemas]: - return True - else: - errors = [] - for s in expected_schema.schemas: - try: - validate_ex(s, datum) - except ValidationException as e: - errors.append(str(e)) - raise ValidationException("`%s`\n is not valid, expected one of:\n\n%s\n\n the individual errors are:\n%s" % (pprint.pformat(datum), ",\n\n ".join([str(s) for s in expected_schema.schemas]), ";\n\n".join(errors))) - elif schema_type in ['record', 'error', 'request']: - if not isinstance(datum, dict): - raise ValidationException("`%s`\n is not a dict" % pprint.pformat(datum)) - try: - for f in expected_schema.fields: - validate_ex(f.type, datum.get(f.name)) - return True - except ValidationException as v: - raise ValidationException("%s\n while validating field `%s`" % (v, f.name)) - raise ValidationException("Unrecognized schema_type %s" % schema_type) - class Builder(object): def jseval(self, expression, context): if isinstance(expression, list): @@ -161,7 +58,7 @@ def bind_input(self, schema, datum): if t in self.schemaDefs: t = self.schemaDefs[t] avsc = avro.schema.make_avsc_object(t, None) - if validate(avsc, datum): + if validate.validate(avsc, datum): if isinstance(t, basestring): t = {"type": t} bindings.extend(self.bind_input(t, datum)) @@ -260,55 +157,11 @@ def generate_arg(self, binding): return [a for a in args if a is not None] -def makeTool(toolpath_object): - if toolpath_object["class"] == "CommandLineTool": - return CommandLineTool(toolpath_object) - elif toolpath_object["class"] == "ExpressionTool": - return ExpressionTool(toolpath_object) - -class Tool(object): - def __init__(self, toolpath_object, validateAs): - self.names = avro.schema.Names() - cwl_avsc = os.path.join(module_dir, 'schemas/draft-2/cwl.avsc') - with open(cwl_avsc) as f: - j = json.load(f) - for t in j: - avro.schema.make_avsc_object(t, self.names) - - self.tool = toolpath_object - if self.tool.get("@context") != TOOL_CONTEXT_URL: - raise Exception("Missing or invalid '@context' field in tool description document, must be %s" % TOOL_CONTEXT_URL) - - # Validate tool documument - validate_ex(self.names.get_name(validateAs, ""), self.tool) - - # Import schema defs - self.schemaDefs = {} - if self.tool.get("schemaDefs"): - for i in self.tool["schemaDefs"]: - avro.schema.make_avsc_object(i, self.names) - self.schemaDefs[i["name"]] = i - - # Build record schema from inputs - self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []} - for i in self.tool["inputs"]: - c = copy.copy(i) - c["name"] = c["id"][1:] - del c["id"] - self.inputs_record_schema["fields"].append(c) - avro.schema.make_avsc_object(self.inputs_record_schema, self.names) - - self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []} - for i in self.tool["outputs"]: - c = copy.copy(i) - c["name"] = c["id"][1:] - del c["id"] - self.outputs_record_schema["fields"].append(c) - avro.schema.make_avsc_object(self.outputs_record_schema, self.names) +class Tool(Process): def _init_job(self, joborder, basedir): # Validate job order - validate_ex(self.names.get_name("input_record_schema", ""), joborder) + validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) builder = Builder() builder.job = copy.deepcopy(joborder) @@ -344,6 +197,11 @@ def job(self, joborder, basedir, **kwargs): return j +def aslist(l): + if isinstance(l, list): + return l + else: + return [l] class CommandLineTool(Tool): def __init__(self, toolpath_object): @@ -352,17 +210,12 @@ def __init__(self, toolpath_object): def job(self, joborder, basedir, use_container=True): builder = self._init_job(joborder, basedir) - if isinstance(self.tool["baseCommand"], list): - for n, b in enumerate(self.tool["baseCommand"]): + if self.tool["baseCommand"]: + for n, b in enumerate(aslist(self.tool["baseCommand"])): builder.bindings.append({ "position": [-1000000, n], "valueFrom": b }) - else: - builder.bindings.append({ - "position": [-1000000], - "valueFrom": self.tool["baseCommand"] - }) if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): @@ -381,7 +234,7 @@ def job(self, joborder, basedir, use_container=True): builder.files = [f["path"] for f in builder.files] - j = Job() + j = CommandLineJob() j.joborder = builder.job j.container = None j.stdin = None @@ -444,7 +297,7 @@ def collect_output_ports(self, ports, builder, outdir): custom_output = os.path.join(outdir, "output.cwl.json") if os.path.exists(custom_output): outputdoc = yaml.load(custom_output) - validate_ex(self.names.get_name("output_record_schema", ""), outputdoc) + validate.validate_ex(self.names.get_name("output_record_schema", ""), outputdoc) return outputdoc return {port["id"][1:]: self.collect_output(port, builder, outdir) for port in ports} diff --git a/cwltool/job.py b/cwltool/job.py index e8a9b0a16..6a5469265 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -9,7 +9,7 @@ _logger = logging.getLogger("cwltool") -class Job(object): +class CommandLineJob(object): def run(self, dry_run=False, pull_image=True, outdir=None): if not outdir: if not dry_run: @@ -25,9 +25,15 @@ def run(self, dry_run=False, pull_image=True, outdir=None): if self.container and self.container.get("type") == "docker": if pull_image: if "pull" in self.container: - subprocess.check_call(["docker", "pull", self.container["pull"]], stdout=sys.stderr) + cmd = ["docker", "pull", self.container["pull"]] + _logger.info(str(cmd)) + if not dry_run: + subprocess.check_call(["docker", "pull", self.container["pull"]], stdout=sys.stderr) elif "import" in self.container: - subprocess.check_call(["docker", "import", self.container["import"]], stdout=sys.stderr) + cmd = ["docker", "import", self.container["import"]] + _logger.info(str(cmd)) + if not dry_run: + subprocess.check_call(["docker", "import", self.container["import"]], stdout=sys.stderr) runtime = ["docker", "run", "-i"] for d in self.pathmapper.dirs: @@ -40,38 +46,41 @@ def run(self, dry_run=False, pull_image=True, outdir=None): stdin = None stdout = None - _logger.info(str(runtime + self.command_line)) + _logger.info("%s%s%s", + " ".join(runtime + self.command_line), + ' < %s' % (self.stdin) if self.stdin else '', + ' > %s' % (self.stdout) if self.stdout else '') - if not dry_run: - if self.stdin: - stdin = open(self.stdin, "rb") - else: - stdin = subprocess.PIPE + if dry_run: + return (outdir, {}) - os.chdir(outdir) + if self.stdin: + stdin = open(self.stdin, "rb") + else: + stdin = subprocess.PIPE - if self.stdout: - stdout = open(self.stdout, "wb") - else: - stdout = sys.stderr + os.chdir(outdir) - for t in self.generatefiles: - with open(os.path.join(outdir, t), "w") as f: - f.write(self.generatefiles[t]) + if self.stdout: + stdout = open(self.stdout, "wb") + else: + stdout = sys.stderr - sp = subprocess.Popen(runtime + self.command_line, shell=False, stdin=stdin, stdout=stdout) + for t in self.generatefiles: + with open(os.path.join(outdir, t), "w") as f: + f.write(self.generatefiles[t]) - if stdin == subprocess.PIPE: - sp.stdin.close() + sp = subprocess.Popen(runtime + self.command_line, shell=False, stdin=stdin, stdout=stdout) - sp.wait() + if stdin == subprocess.PIPE: + sp.stdin.close() - if stdin != subprocess.PIPE: - stdin.close() + sp.wait() - if stdout: - stdout.close() + if stdin != subprocess.PIPE: + stdin.close() - return (outdir, self.collect_outputs(outdir)) + if stdout: + stdout.close() - return None + return (outdir, self.collect_outputs(outdir)) diff --git a/cwltool/main.py b/cwltool/main.py index e36c9ec08..ea6796f39 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -9,6 +9,8 @@ import os import sys import logging +import workflow +import validate _logger = logging.getLogger("cwltool") _logger.addHandler(logging.StreamHandler()) @@ -23,16 +25,19 @@ def main(): parser.add_argument("--no-container", action="store_true", help="Do not execute in a Docker container, even if one is specified in the tool file") parser.add_argument("--no-pull", default=False, action="store_true", help="Do not try to pull the Docker image") parser.add_argument("--dry-run", action="store_true", help="Do not execute") + parser.add_argument("--verbose", action="store_true", help="Print more logging") + parser.add_argument("--debug", action="store_true", help="Print even more logging") args = parser.parse_args() + if args.verbose: + logging.getLogger("cwltool").setLevel(logging.INFO) + if args.debug: + logging.getLogger("cwltool").setLevel(logging.DEBUG) + try: - u = from_url(args.tool) - if "schema" in u: - t = draft1tool.Tool(u) - else: - t = draft2tool.makeTool(u) - except (jsonschema.exceptions.ValidationError, draft2tool.ValidationException): + t = workflow.makeTool(from_url(args.tool)) + except (jsonschema.exceptions.ValidationError, validate.ValidationException): _logger.exception("Tool definition failed validation") return 1 @@ -50,10 +55,6 @@ def main(): a["generatefiles"] = job.generatefiles print json.dumps(a) else: - if isinstance(job, draft1tool.Tool) or isinstance(job, draft2tool.CommandLineTool): - _logger.info('%s%s%s', ' '.join(job.command_line), - ' < %s' % (job.stdin) if job.stdin else '', - ' > %s' % (job.stdout) if job.stdout else '') (outdir, runjob) = job.run(dry_run=args.dry_run, pull_image=(not args.no_pull), outdir=args.outdir) _logger.info("Output directory is %s", outdir) print json.dumps(runjob) diff --git a/cwltool/process.py b/cwltool/process.py new file mode 100644 index 000000000..cf05b62ec --- /dev/null +++ b/cwltool/process.py @@ -0,0 +1,48 @@ +import avro.schema +import os +import json +import validate +import copy + +TOOL_CONTEXT_URL = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/master/schemas/draft-2/cwl-context.json" +module_dir = os.path.dirname(os.path.abspath(__file__)) + +class Process(object): + def __init__(self, toolpath_object, validateAs): + self.names = avro.schema.Names() + cwl_avsc = os.path.join(module_dir, 'schemas/draft-2/cwl.avsc') + with open(cwl_avsc) as f: + j = json.load(f) + for t in j: + avro.schema.make_avsc_object(t, self.names) + + self.tool = toolpath_object + #if self.tool.get("@context") != TOOL_CONTEXT_URL: + # raise Exception("Missing or invalid '@context' field in tool description document, must be %s" % TOOL_CONTEXT_URL) + + # Validate tool documument + validate.validate_ex(self.names.get_name(validateAs, ""), self.tool) + + # Import schema defs + self.schemaDefs = {} + if self.tool.get("schemaDefs"): + for i in self.tool["schemaDefs"]: + avro.schema.make_avsc_object(i, self.names) + self.schemaDefs[i["name"]] = i + + # Build record schema from inputs + self.inputs_record_schema = {"name": "input_record_schema", "type": "record", "fields": []} + for i in self.tool["inputs"]: + c = copy.copy(i) + c["name"] = c["id"][1:] + del c["id"] + self.inputs_record_schema["fields"].append(c) + avro.schema.make_avsc_object(self.inputs_record_schema, self.names) + + self.outputs_record_schema = {"name": "outputs_record_schema", "type": "record", "fields": []} + for i in self.tool["outputs"]: + c = copy.copy(i) + c["name"] = c["id"][1:] + del c["id"] + self.outputs_record_schema["fields"].append(c) + avro.schema.make_avsc_object(self.outputs_record_schema, self.names) diff --git a/cwltool/validate.py b/cwltool/validate.py new file mode 100644 index 000000000..dbb589733 --- /dev/null +++ b/cwltool/validate.py @@ -0,0 +1,105 @@ +import pprint + +class ValidationException(Exception): + pass + +def validate(expected_schema, datum): + try: + return validate_ex(expected_schema, datum) + except ValidationException: + return False + +INT_MIN_VALUE = -(1 << 31) +INT_MAX_VALUE = (1 << 31) - 1 +LONG_MIN_VALUE = -(1 << 63) +LONG_MAX_VALUE = (1 << 63) - 1 + +def validate_ex(expected_schema, datum): + """Determine if a python datum is an instance of a schema.""" + schema_type = expected_schema.type + if schema_type == 'null': + if datum is None: + return True + else: + raise ValidationException("`%s` is not null" % datum) + elif schema_type == 'boolean': + if isinstance(datum, bool): + return True + else: + raise ValidationException("`%s` is not boolean" % datum) + elif schema_type == 'string': + if isinstance(datum, basestring): + return True + else: + raise ValidationException("`%s` is not string" % datum) + elif schema_type == 'bytes': + if isinstance(datum, str): + return True + else: + raise ValidationException("`%s` is not bytes" % datum) + elif schema_type == 'int': + if ((isinstance(datum, int) or isinstance(datum, long)) + and INT_MIN_VALUE <= datum <= INT_MAX_VALUE): + return True + else: + raise ValidationException("`%s` is not int" % datum) + elif schema_type == 'long': + if ((isinstance(datum, int) or isinstance(datum, long)) + and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE): + return True + else: + raise ValidationException("`%s` is not long" % datum) + elif schema_type in ['float', 'double']: + if (isinstance(datum, int) or isinstance(datum, long) + or isinstance(datum, float)): + return True + else: + raise ValidationException("`%s` is not float or double" % datum) + elif schema_type == 'fixed': + if isinstance(datum, str) and len(datum) == expected_schema.size: + return True + else: + raise ValidationException("`%s` is not fixed" % datum) + elif schema_type == 'enum': + if datum in expected_schema.symbols: + return True + else: + raise ValidationException("`%s`\n is not a valid enum symbol, expected\n %s" % (pprint.pformat(datum), pprint.pformat(expected_schema.symbols))) + elif schema_type == 'array': + if isinstance(datum, list): + for i, d in enumerate(datum): + try: + validate_ex(expected_schema.items, d) + except ValidationException as v: + raise ValidationException("%s\n while validating item at position %i `%s`" % (v, i, d)) + return True + else: + raise ValidationException("`%s`\n is not a list, expected list of\n %s" % (pprint.pformat(datum), expected_schema.items)) + elif schema_type == 'map': + if (isinstance(datum, dict) and + False not in [isinstance(k, basestring) for k in datum.keys()] and + False not in [validate(expected_schema.values, v) for v in datum.values()]): + return True + else: + raise ValidationException("`%s` is not a valid map value, expected\n %s" % (pprint.pformat(datum), pprint.pformat(expected_schema.values))) + elif schema_type in ['union', 'error_union']: + if True in [validate(s, datum) for s in expected_schema.schemas]: + return True + else: + errors = [] + for s in expected_schema.schemas: + try: + validate_ex(s, datum) + except ValidationException as e: + errors.append(str(e)) + raise ValidationException("`%s`\n is not valid, expected one of:\n\n%s\n\n the individual errors are:\n%s" % (pprint.pformat(datum), ",\n\n ".join([str(s) for s in expected_schema.schemas]), ";\n\n".join(errors))) + elif schema_type in ['record', 'error', 'request']: + if not isinstance(datum, dict): + raise ValidationException("`%s`\n is not a dict" % pprint.pformat(datum)) + try: + for f in expected_schema.fields: + validate_ex(f.type, datum.get(f.name)) + return True + except ValidationException as v: + raise ValidationException("%s\n while validating field `%s`" % (v, f.name)) + raise ValidationException("Unrecognized schema_type %s" % schema_type) diff --git a/cwltool/workflow.py b/cwltool/workflow.py new file mode 100644 index 000000000..4ff1ef577 --- /dev/null +++ b/cwltool/workflow.py @@ -0,0 +1,80 @@ +import job +import draft1tool +import draft2tool +from process import Process +import copy +import logging + +_logger = logging.getLogger("cwltool") + +def makeTool(toolpath_object): + if "schema" in toolpath_object: + return draft1tool.Tool(toolpath_object) + elif toolpath_object["class"] == "CommandLineTool": + return draft2tool.CommandLineTool(toolpath_object) + elif toolpath_object["class"] == "ExpressionTool": + return draft2tool.ExpressionTool(toolpath_object) + elif toolpath_object["class"] == "Workflow": + return Workflow(toolpath_object) + elif "impl" in toolpath_object: + return Step(toolpath_object) + + +class WorkflowJob(object): + def try_make_joborder(self, s): + jo = {} + for i in s.tool["inputs"]: + _logger.debug(i) + if "connect" in i: + src = i["connect"]["source"][1:] + if self.state.get(src): + jo[i["id"][1:]] = self.state.get(src) + else: + return None + return jo + + def run(self, outdir=None, **kwargs): + for s in self.steps: + s.completed = False + + run_all = len(self.steps) + while run_all: + made_progress = False + for s in self.steps: + if not s.completed: + joborder = self.try_make_joborder(s) + if joborder: + output = s.job(joborder).run() + for i in s.tool["outputs"]: + if "id" in i: + self.state[i["id"][1:]] = output[i["id"][1:]] + s.completed = True + made_progress = True + run_all -= 1 + if not made_progress: + raise Exception("Deadlocked") + + wo = {} + for i in self.tool["outputs"]: + if "connect" in i: + src = i["source"][1:] + wo[i["id"][1:]] = self.state[src] + + return wo + + +class Workflow(Process): + def __init__(self, toolpath_object): + super(Workflow, self).__init__(toolpath_object, "Workflow") + + def job(self, joborder, basedir, use_container=True): + wj = WorkflowJob() + wj.basedir = basedir + wj.steps = [makeTool(s) for s in self.tool.get("steps", [])] + wj.state = copy.deepcopy(joborder) + return wj + +class Step(Process): + def job(self, joborder, basedir, use_container=True): + # load the impl and instantiate that. + pass From d994fb9bddc0c9c7e4c265a9ee093547b953703c Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 6 Mar 2015 21:54:10 -0500 Subject: [PATCH 047/221] Can now run count-lines2-wf.json --- cwltool/draft2tool.py | 2 +- cwltool/workflow.py | 21 +++++++++++++-------- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 8b30eed2f..31a0f73bb 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -306,7 +306,7 @@ def collect_output(self, schema, builder, outdir): if "outputBinding" in schema: binding = schema["outputBinding"] if "glob" in binding: - r = [{"path": g} for g in glob.glob(binding["glob"])] + r = [{"path": g} for g in glob.glob(os.path.join(outdir, binding["glob"]))] for files in r: checksum = hashlib.sha1() with open(files["path"], "rb") as f: diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 4ff1ef577..182cf91c3 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -4,6 +4,7 @@ from process import Process import copy import logging +import random _logger = logging.getLogger("cwltool") @@ -21,7 +22,7 @@ def makeTool(toolpath_object): class WorkflowJob(object): - def try_make_joborder(self, s): + def try_make_job(self, s): jo = {} for i in s.tool["inputs"]: _logger.debug(i) @@ -31,7 +32,8 @@ def try_make_joborder(self, s): jo[i["id"][1:]] = self.state.get(src) else: return None - return jo + _logger.info("Creating job with input: %s", jo) + return s.job(jo, self.basedir) def run(self, outdir=None, **kwargs): for s in self.steps: @@ -42,10 +44,11 @@ def run(self, outdir=None, **kwargs): made_progress = False for s in self.steps: if not s.completed: - joborder = self.try_make_joborder(s) - if joborder: - output = s.job(joborder).run() + job = self.try_make_job(s) + if job: + (joutdir, output) = job.run(outdir=outdir) for i in s.tool["outputs"]: + _logger.info("Job got output: %s", output) if "id" in i: self.state[i["id"][1:]] = output[i["id"][1:]] s.completed = True @@ -55,12 +58,12 @@ def run(self, outdir=None, **kwargs): raise Exception("Deadlocked") wo = {} - for i in self.tool["outputs"]: + for i in self.outputs: if "connect" in i: - src = i["source"][1:] + src = i["connect"]["source"][1:] wo[i["id"][1:]] = self.state[src] - return wo + return (outdir, wo) class Workflow(Process): @@ -71,7 +74,9 @@ def job(self, joborder, basedir, use_container=True): wj = WorkflowJob() wj.basedir = basedir wj.steps = [makeTool(s) for s in self.tool.get("steps", [])] + random.shuffle(wj.steps) wj.state = copy.deepcopy(joborder) + wj.outputs = self.tool["outputs"] return wj class Step(Process): From 6bbda41d18ea0a27c80a2bb66af540ed5dcee156 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 9 Mar 2015 15:12:46 -0400 Subject: [PATCH 048/221] Renamed files to .cwl and added #!/usr/bin/env cwl-runner so that cwl files are now directly executable. --- cwltool/cwl-runner | 1 + 1 file changed, 1 insertion(+) create mode 120000 cwltool/cwl-runner diff --git a/cwltool/cwl-runner b/cwltool/cwl-runner new file mode 120000 index 000000000..11a5d8e18 --- /dev/null +++ b/cwltool/cwl-runner @@ -0,0 +1 @@ +main.py \ No newline at end of file From 765a479e8d1b1105e249d279c0e5b1678c783f78 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 9 Mar 2015 16:17:42 -0400 Subject: [PATCH 049/221] Support external process definitions. --- cwltool/workflow.py | 65 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 182cf91c3..828be8077 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -5,21 +5,21 @@ import copy import logging import random +from ref_resolver import from_url _logger = logging.getLogger("cwltool") def makeTool(toolpath_object): if "schema" in toolpath_object: return draft1tool.Tool(toolpath_object) + elif "impl" in toolpath_object and toolpath_object.get("class", "External") == "External": + return External(toolpath_object) elif toolpath_object["class"] == "CommandLineTool": return draft2tool.CommandLineTool(toolpath_object) elif toolpath_object["class"] == "ExpressionTool": return draft2tool.ExpressionTool(toolpath_object) elif toolpath_object["class"] == "Workflow": return Workflow(toolpath_object) - elif "impl" in toolpath_object: - return Step(toolpath_object) - class WorkflowJob(object): def try_make_job(self, s): @@ -50,7 +50,10 @@ def run(self, outdir=None, **kwargs): for i in s.tool["outputs"]: _logger.info("Job got output: %s", output) if "id" in i: - self.state[i["id"][1:]] = output[i["id"][1:]] + if i["id"][1:] in output: + self.state[i["id"][1:]] = output[i["id"][1:]] + else: + raise Exception("Output is missing expected field %s" % i["id"][1:]) s.completed = True made_progress = True run_all -= 1 @@ -79,7 +82,53 @@ def job(self, joborder, basedir, use_container=True): wj.outputs = self.tool["outputs"] return wj -class Step(Process): - def job(self, joborder, basedir, use_container=True): - # load the impl and instantiate that. - pass +class ExternalJob(object): + def __init__(self, tool, innerjob): + self.tool = tool + self.innerjob = innerjob + + def run(self, **kwargs): + self.impl = self.tool["impl"] + (outdir, output) = self.innerjob.run(**kwargs) + for i in self.tool["outputs"]: + d = i["def"][len(self.impl)+1:] + output[i["id"][1:]] = output[d] + del output[d] + + return (outdir, output) + +class External(Process): + def __init__(self, toolpath_object): + self.impl = toolpath_object["impl"] + self.embedded_tool = makeTool(from_url(self.impl)) + + if "id" in toolpath_object: + self.id = toolpath_object["id"] + else: + self.id = "#step_" + str(random.randint(1, 1000000000)) + + for i in toolpath_object["inputs"]: + d = i["def"][len(self.impl):] + toolid = i.get("id", self.id + "." + d[1:]) + for a in self.embedded_tool.tool["inputs"]: + if a["id"] == d: + i.update(a) + i["id"] = toolid + + for i in toolpath_object["outputs"]: + d = i["def"][len(self.impl):] + toolid = i["id"] + for a in self.embedded_tool.tool["outputs"]: + if a["id"] == d: + i.update(a) + i["id"] = toolid + + super(External, self).__init__(toolpath_object, "Process") + + def job(self, joborder, basedir, **kwargs): + for i in self.tool["inputs"]: + d = i["def"][len(self.impl)+1:] + joborder[d] = joborder[i["id"][1:]] + del joborder[i["id"][1:]] + + return ExternalJob(self.tool, self.embedded_tool.job(joborder, basedir, **kwargs)) From fa97a41751470e836880c24a6408ff122ed60403 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 9 Mar 2015 17:13:07 -0400 Subject: [PATCH 050/221] Better error reporting, allow plain strings in arguments --- cwltool/draft2tool.py | 18 ++++++++++++------ cwltool/workflow.py | 15 +++++++++------ 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 31a0f73bb..834422e5b 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -219,13 +219,19 @@ def job(self, joborder, basedir, use_container=True): if self.tool.get("arguments"): for i, a in enumerate(self.tool["arguments"]): - a = copy.copy(a) - if a.get("position"): - a["position"] = [a["position"], i] + if isinstance(a, dict): + a = copy.copy(a) + if a.get("position"): + a["position"] = [a["position"], i] + else: + a["position"] = [0, i] + a["valueFrom"] = builder.do_eval(a["valueFrom"]) + builder.bindings.append(a) else: - a["position"] = [0, i] - a["valueFrom"] = builder.do_eval(a["valueFrom"]) - builder.bindings.append(a) + builder.bindings.append({ + "position": [0, i], + "valueFrom": a + }) builder.bindings.sort(key=lambda a: a["position"]) diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 828be8077..d1ed4d975 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -14,12 +14,15 @@ def makeTool(toolpath_object): return draft1tool.Tool(toolpath_object) elif "impl" in toolpath_object and toolpath_object.get("class", "External") == "External": return External(toolpath_object) - elif toolpath_object["class"] == "CommandLineTool": - return draft2tool.CommandLineTool(toolpath_object) - elif toolpath_object["class"] == "ExpressionTool": - return draft2tool.ExpressionTool(toolpath_object) - elif toolpath_object["class"] == "Workflow": - return Workflow(toolpath_object) + if "class" in toolpath_object: + if toolpath_object["class"] == "CommandLineTool": + return draft2tool.CommandLineTool(toolpath_object) + elif toolpath_object["class"] == "ExpressionTool": + return draft2tool.ExpressionTool(toolpath_object) + elif toolpath_object["class"] == "Workflow": + return Workflow(toolpath_object) + else: + raise Exception("Missing 'class' field, expecting one of: Workflow, CommandLineTool, ExpressionTool, External") class WorkflowJob(object): def try_make_job(self, s): From e0972abc41e140c95fc4d2c28fa7de9ceeef2edc Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 9 Mar 2015 22:06:19 -0400 Subject: [PATCH 051/221] Add basic type checking for data links. --- cwltool/workflow.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/cwltool/workflow.py b/cwltool/workflow.py index d1ed4d975..195a7021c 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -1,6 +1,7 @@ import job import draft1tool import draft2tool +from draft2tool import aslist from process import Process import copy import logging @@ -24,17 +25,28 @@ def makeTool(toolpath_object): else: raise Exception("Missing 'class' field, expecting one of: Workflow, CommandLineTool, ExpressionTool, External") +def check_types(src, dest): + return src["type"] == dest["type"] + class WorkflowJob(object): def try_make_job(self, s): jo = {} for i in s.tool["inputs"]: _logger.debug(i) if "connect" in i: - src = i["connect"]["source"][1:] - if self.state.get(src): - jo[i["id"][1:]] = self.state.get(src) + connect = i["connect"] + if isinstance(connect, list): + # Handle multiple inputs + pass else: - return None + src = connect["source"][1:] + if src in self.state: + if check_types(self.state[src][0], i): + jo[i["id"][1:]] = self.state[src][1] + else: + raise Exception("Type mismatch '%s' and '%s'" % (src, i["id"][1:])) + else: + return None _logger.info("Creating job with input: %s", jo) return s.job(jo, self.basedir) @@ -54,7 +66,7 @@ def run(self, outdir=None, **kwargs): _logger.info("Job got output: %s", output) if "id" in i: if i["id"][1:] in output: - self.state[i["id"][1:]] = output[i["id"][1:]] + self.state[i["id"][1:]] = (i, output[i["id"][1:]]) else: raise Exception("Output is missing expected field %s" % i["id"][1:]) s.completed = True @@ -67,7 +79,7 @@ def run(self, outdir=None, **kwargs): for i in self.outputs: if "connect" in i: src = i["connect"]["source"][1:] - wo[i["id"][1:]] = self.state[src] + wo[i["id"][1:]] = self.state[src][1] return (outdir, wo) @@ -81,7 +93,12 @@ def job(self, joborder, basedir, use_container=True): wj.basedir = basedir wj.steps = [makeTool(s) for s in self.tool.get("steps", [])] random.shuffle(wj.steps) - wj.state = copy.deepcopy(joborder) + + wj.state = {} + for i in self.tool["inputs"]: + iid = i["id"][1:] + wj.state[iid] = (i, copy.deepcopy(joborder[iid])) + print wj.state wj.outputs = self.tool["outputs"] return wj From 633a340146ee5c6087f506bdec43f399856f4811 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 10 Mar 2015 11:41:35 -0400 Subject: [PATCH 052/221] Support fanout workflow steps and multiple incoming connections on a port. Support default values. --- cwltool/draft2tool.py | 17 ++++---- cwltool/main.py | 6 +-- cwltool/workflow.py | 91 ++++++++++++++++++++++++++++++++++++------- 3 files changed, 90 insertions(+), 24 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 834422e5b..fe5f81cde 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -115,6 +115,7 @@ def bind_input(self, schema, datum): if "valueFrom" in b: b["valueFrom"] = self.do_eval(b["valueFrom"], datum) + b["is_eval"] = True else: b["valueFrom"] = datum @@ -133,18 +134,20 @@ def generate_arg(self, binding): if isinstance(value, list): if binding.get("itemSeparator"): l = [binding["itemSeparator"].join([str(v) for v in value])] + elif binding.get("is_eval"): + return ([prefix] if prefix else []) + value elif prefix: return [prefix] + else: + return [] elif binding.get("is_file"): l = [self.pathmapper.mapper(value["path"])] elif isinstance(value, dict): - if prefix: - return [prefix] - elif isinstance(value, bool): - if value and prefix: - return [prefix] - else: - return [] + return [prefix] if prefix else [] + elif value is True and prefix: + return [prefix] + elif value is False or value is None: + return [] else: l = [value] diff --git a/cwltool/main.py b/cwltool/main.py index ea6796f39..cfaef081b 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -35,14 +35,14 @@ def main(): if args.debug: logging.getLogger("cwltool").setLevel(logging.DEBUG) + basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(args.job_order)) + try: - t = workflow.makeTool(from_url(args.tool)) + t = workflow.makeTool(from_url(args.tool), basedir) except (jsonschema.exceptions.ValidationError, validate.ValidationException): _logger.exception("Tool definition failed validation") return 1 - basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(args.job_order)) - try: job = t.job(from_url(args.job_order), basedir, use_container=(not args.no_container)) if args.conformance_test: diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 195a7021c..404bebff9 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -7,14 +7,15 @@ import logging import random from ref_resolver import from_url +import os _logger = logging.getLogger("cwltool") -def makeTool(toolpath_object): +def makeTool(toolpath_object, basedir): if "schema" in toolpath_object: return draft1tool.Tool(toolpath_object) elif "impl" in toolpath_object and toolpath_object.get("class", "External") == "External": - return External(toolpath_object) + return External(toolpath_object, basedir) if "class" in toolpath_object: if toolpath_object["class"] == "CommandLineTool": return draft2tool.CommandLineTool(toolpath_object) @@ -25,29 +26,55 @@ def makeTool(toolpath_object): else: raise Exception("Missing 'class' field, expecting one of: Workflow, CommandLineTool, ExpressionTool, External") -def check_types(src, dest): - return src["type"] == dest["type"] + +def should_fanout(src_type, dest_type): + if isinstance(src_type, dict): + if src_type["type"] == "array" and src_type["items"] == dest_type: + return True + return False class WorkflowJob(object): def try_make_job(self, s): jo = {} + fanout = None for i in s.tool["inputs"]: _logger.debug(i) if "connect" in i: connect = i["connect"] if isinstance(connect, list): # Handle multiple inputs - pass - else: - src = connect["source"][1:] + if not fanout: + fanout = i["id"][1:] + jo[i["id"][1:]] = [] + else: + raise Exception("Can only fanout on one port") + for c in aslist(connect): + src = c["source"][1:] if src in self.state: - if check_types(self.state[src][0], i): - jo[i["id"][1:]] = self.state[src][1] + if self.state[src][0]["type"] == i["type"]: + if fanout: + jo[i["id"][1:]].append(self.state[src][1]) + else: + jo[i["id"][1:]] = self.state[src][1] + elif should_fanout(self.state[src][0]["type"], i["type"]): + if fanout: + if fanout == i["id"][1:]: + jo[i["id"][1:]].extend(self.state[src][1]) + else: + raise Exception("Can only fanout on one port") + else: + fanout = i["id"][1:] + jo[i["id"][1:]] = self.state[src][1] else: raise Exception("Type mismatch '%s' and '%s'" % (src, i["id"][1:])) else: return None + elif "default" in i: + jo[i["id"][1:]] = i["default"] + _logger.info("Creating job with input: %s", jo) + if fanout: + s = Fanout(s, fanout) return s.job(jo, self.basedir) def run(self, outdir=None, **kwargs): @@ -91,14 +118,16 @@ def __init__(self, toolpath_object): def job(self, joborder, basedir, use_container=True): wj = WorkflowJob() wj.basedir = basedir - wj.steps = [makeTool(s) for s in self.tool.get("steps", [])] + wj.steps = [makeTool(s, basedir) for s in self.tool.get("steps", [])] random.shuffle(wj.steps) wj.state = {} for i in self.tool["inputs"]: iid = i["id"][1:] - wj.state[iid] = (i, copy.deepcopy(joborder[iid])) - print wj.state + if iid in joborder: + wj.state[iid] = (i, copy.deepcopy(joborder[iid])) + elif "default" in i: + wj.state[iid] = (i, copy.deepcopy(i["default"])) wj.outputs = self.tool["outputs"] return wj @@ -118,9 +147,9 @@ def run(self, **kwargs): return (outdir, output) class External(Process): - def __init__(self, toolpath_object): + def __init__(self, toolpath_object, basedir): self.impl = toolpath_object["impl"] - self.embedded_tool = makeTool(from_url(self.impl)) + self.embedded_tool = makeTool(from_url(os.path.join(basedir, self.impl)), basedir) if "id" in toolpath_object: self.id = toolpath_object["id"] @@ -152,3 +181,37 @@ def job(self, joborder, basedir, **kwargs): del joborder[i["id"][1:]] return ExternalJob(self.tool, self.embedded_tool.job(joborder, basedir, **kwargs)) + +class FanoutJob(object): + def __init__(self, outputports, jobs): + self.outputports = outputports + self.jobs = jobs + + def run(self, **kwargs): + outputs = {} + for outschema in self.outputports: + outputs[outschema["id"][1:]] = [] + for j in self.jobs: + (_, out) = j.run(**kwargs) + for outschema in self.outputports: + outputs[outschema["id"][1:]].append(out[outschema["id"][1:]]) + return (None, outputs) + +class Fanout(object): + def __init__(self, process, fanout_key): + self.process = process + self.fanout_key = fanout_key + self.outputports = [] + for out in self.process.tool["outputs"]: + newout = copy.deepcopy(out) + newout["type"] = {"type": "array", "items": out["type"]} + self.outputports.append(newout) + self.tool = {"outputs": self.outputports} + + def job(self, joborder, basedir, **kwargs): + jobs = [] + for fn in joborder[self.fanout_key]: + jo = copy.copy(joborder) + jo[self.fanout_key] = fn + jobs.append(self.process.job(jo, basedir, **kwargs)) + return FanoutJob(self.outputports, jobs) From 09ca5b79d0704e1eae31ac3dd27e8fcee922609b Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 10 Mar 2015 13:04:17 -0400 Subject: [PATCH 053/221] Example workflow with externally defined sub-workflow. --- cwltool/workflow.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 404bebff9..8cf11bef4 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -159,17 +159,27 @@ def __init__(self, toolpath_object, basedir): for i in toolpath_object["inputs"]: d = i["def"][len(self.impl):] toolid = i.get("id", self.id + "." + d[1:]) + found = False for a in self.embedded_tool.tool["inputs"]: if a["id"] == d: i.update(a) + found = True + if not found: + raise Exception("Did not find input '%s' in external process" % (i["def"])) + i["id"] = toolid for i in toolpath_object["outputs"]: d = i["def"][len(self.impl):] toolid = i["id"] + found = False for a in self.embedded_tool.tool["outputs"]: if a["id"] == d: i.update(a) + found = True + if not found: + raise Exception("Did not find output '%s' in external process" % (i["def"])) + i["id"] = toolid super(External, self).__init__(toolpath_object, "Process") From ac5923726bd3fb4f0e648f54ea325fa014f5899b Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 11 Mar 2015 11:09:58 -0400 Subject: [PATCH 054/221] Add contributers section. Rename dockerImport to dockerLoad and fix implementation to load tar from local filesystem or http. Move expression evaluation to occur after file paths are mapped. Add secondaryFiles to File schema. Fix bugs. --- cwltool/draft2tool.py | 36 +++++++++++++++++---------- cwltool/job.py | 57 ++++++++++++++++++++++++++++++++----------- cwltool/main.py | 2 +- 3 files changed, 67 insertions(+), 28 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index fe5f81cde..c50851a29 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -114,10 +114,8 @@ def bind_input(self, schema, datum): bi["position"] = b["position"] + bi["position"] if "valueFrom" in b: - b["valueFrom"] = self.do_eval(b["valueFrom"], datum) - b["is_eval"] = True - else: - b["valueFrom"] = datum + b["do_eval"] = b["valueFrom"] + b["valueFrom"] = datum if schema["type"] == "File": b["is_file"] = True @@ -127,6 +125,9 @@ def bind_input(self, schema, datum): def generate_arg(self, binding): value = binding["valueFrom"] + if "do_eval" in binding: + value = self.do_eval(binding["do_eval"], value) + prefix = binding.get("prefix") sep = binding.get("separator") @@ -134,7 +135,7 @@ def generate_arg(self, binding): if isinstance(value, list): if binding.get("itemSeparator"): l = [binding["itemSeparator"].join([str(v) for v in value])] - elif binding.get("is_eval"): + elif binding.get("do_eval"): return ([prefix] if prefix else []) + value elif prefix: return [prefix] @@ -228,7 +229,8 @@ def job(self, joborder, basedir, use_container=True): a["position"] = [a["position"], i] else: a["position"] = [0, i] - a["valueFrom"] = builder.do_eval(a["valueFrom"]) + a["do_eval"] = a["valueFrom"] + a["valueFrom"] = None builder.bindings.append(a) else: builder.bindings.append({ @@ -241,7 +243,7 @@ def job(self, joborder, basedir, use_container=True): _logger.debug(pprint.pformat(builder.bindings)) _logger.debug(pprint.pformat(builder.files)) - builder.files = [f["path"] for f in builder.files] + reffiles = [f["path"] for f in builder.files] j = CommandLineJob() j.joborder = builder.job @@ -254,7 +256,7 @@ def job(self, joborder, basedir, use_container=True): j.stdin = builder.do_eval(self.tool["stdin"]) if isinstance(j.stdin, dict): j.stdin = j.stdin["path"] - builder.files.append(j.stdin) + reffiles.append(j.stdin) if self.tool.get("stdout"): if isinstance(self.tool["stdout"], dict) and "id" in self.tool["stdout"]: @@ -282,16 +284,23 @@ def job(self, joborder, basedir, use_container=True): j.container["type"] = "docker" if "dockerPull" in r: j.container["pull"] = r["dockerPull"] - if "dockerImport" in r: - j.container["import"] = r["dockerImport"] + if "dockerLoad" in r: + if r["dockerLoad"].startswith("http"): + j.container["load"] = r["dockerLoad"] + else: + j.container["load"] = os.path.join(basedir, r["dockerLoad"]) if "dockerImageId" in r: j.container["imageId"] = r["dockerImageId"] else: j.container["imageId"] = r["dockerPull"] - builder.pathmapper = DockerPathMapper(builder.files, basedir) + builder.pathmapper = DockerPathMapper(reffiles, basedir) if builder.pathmapper is None: - builder.pathmapper = PathMapper(builder.files, basedir) + builder.pathmapper = PathMapper(reffiles, basedir) + + for f in builder.files: + f["path"] = builder.pathmapper.mapper(f["path"]) + j.command_line = flatten(map(builder.generate_arg, builder.bindings)) if j.stdin: @@ -308,7 +317,8 @@ def collect_output_ports(self, ports, builder, outdir): outputdoc = yaml.load(custom_output) validate.validate_ex(self.names.get_name("output_record_schema", ""), outputdoc) return outputdoc - return {port["id"][1:]: self.collect_output(port, builder, outdir) for port in ports} + ret = {port["id"][1:]: self.collect_output(port, builder, outdir) for port in ports} + return ret if ret is not None else {} def collect_output(self, schema, builder, outdir): r = None diff --git a/cwltool/job.py b/cwltool/job.py index 6a5469265..d183d7216 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -6,6 +6,7 @@ import yaml import logging import sys +import requests _logger = logging.getLogger("cwltool") @@ -23,25 +24,53 @@ def run(self, dry_run=False, pull_image=True, outdir=None): runtime = [] if self.container and self.container.get("type") == "docker": - if pull_image: + found = False + for ln in subprocess.check_output(["docker", "images", "--no-trunc"]).splitlines(): + try: + ln.index(self.container["imageId"]) + found = True + except ValueError: + pass + + if not found and pull_image: if "pull" in self.container: cmd = ["docker", "pull", self.container["pull"]] _logger.info(str(cmd)) if not dry_run: - subprocess.check_call(["docker", "pull", self.container["pull"]], stdout=sys.stderr) - elif "import" in self.container: - cmd = ["docker", "import", self.container["import"]] + subprocess.check_call(cmd, stdout=sys.stderr) + found = True + elif "load" in self.container: + cmd = ["docker", "load"] _logger.info(str(cmd)) if not dry_run: - subprocess.check_call(["docker", "import", self.container["import"]], stdout=sys.stderr) - - runtime = ["docker", "run", "-i"] - for d in self.pathmapper.dirs: - runtime.append("--volume=%s:%s:ro" % (os.path.abspath(d), self.pathmapper.dirs[d])) - runtime.append("--volume=%s:%s:ro" % (os.path.abspath(outdir), "/tmp/job_output")) - runtime.append("--workdir=%s" % ("/tmp/job_output")) - runtime.append("--user=%s" % (os.geteuid())) - runtime.append(self.container["imageId"]) + if os.path.exists(self.container["load"]): + _logger.info("Loading docker image from %s", self.container["load"]) + with open(self.container["load"], "rb") as f: + loadproc = subprocess.Popen(cmd, stdin=f, stdout=sys.stderr) + else: + _logger.info("Sending GET request to %s", self.container["load"]) + req = requests.get(self.container["load"], stream=True) + n = 0 + for chunk in req.iter_content(1024*1024): + n += len(chunk) + _logger.info(str(n)) + loadproc.stdin.write(chunk) + loadproc.stdin.close() + rcode = loadproc.wait() + if rcode != 0: + raise Exception("Docker load returned non-zero exit status %i" % (rcode)) + found = True + + if found: + runtime = ["docker", "run", "-i"] + for d in self.pathmapper.dirs: + runtime.append("--volume=%s:%s:ro" % (os.path.abspath(d), self.pathmapper.dirs[d])) + runtime.append("--volume=%s:%s:rw" % (os.path.abspath(outdir), "/tmp/job_output")) + runtime.append("--workdir=%s" % ("/tmp/job_output")) + runtime.append("--user=%s" % (os.geteuid())) + runtime.append(self.container["imageId"]) + else: + raise Exception("Docker image %s not found" % (self.container["imageId"])) stdin = None stdout = None @@ -80,7 +109,7 @@ def run(self, dry_run=False, pull_image=True, outdir=None): if stdin != subprocess.PIPE: stdin.close() - if stdout: + if stdout != sys.stderr: stdout.close() return (outdir, self.collect_outputs(outdir)) diff --git a/cwltool/main.py b/cwltool/main.py index cfaef081b..93048e4ff 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -58,7 +58,7 @@ def main(): (outdir, runjob) = job.run(dry_run=args.dry_run, pull_image=(not args.no_pull), outdir=args.outdir) _logger.info("Output directory is %s", outdir) print json.dumps(runjob) - except jsonschema.exceptions.ValidationError: + except (jsonschema.exceptions.ValidationError, validate.ValidationException): _logger.exception("Job order failed validation") return 1 From 71056452381ad70361718addb6c637e43387c4be Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 16 Mar 2015 21:53:50 -0400 Subject: [PATCH 055/221] Fix Python package generation. Fix path mapping. --- cwltool/draft2tool.py | 2 +- cwltool/job.py | 2 +- gittaggers.py | 20 ++++++++++++++++++++ setup.py | 26 ++++++++++---------------- 4 files changed, 32 insertions(+), 18 deletions(-) create mode 100644 gittaggers.py diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index c50851a29..01ed157b4 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -142,7 +142,7 @@ def generate_arg(self, binding): else: return [] elif binding.get("is_file"): - l = [self.pathmapper.mapper(value["path"])] + l = [value["path"]] elif isinstance(value, dict): return [prefix] if prefix else [] elif value is True and prefix: diff --git a/cwltool/job.py b/cwltool/job.py index d183d7216..d84687f37 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -25,7 +25,7 @@ def run(self, dry_run=False, pull_image=True, outdir=None): if self.container and self.container.get("type") == "docker": found = False - for ln in subprocess.check_output(["docker", "images", "--no-trunc"]).splitlines(): + for ln in subprocess.check_output(["docker", "images", "--no-trunc", "--all"]).splitlines(): try: ln.index(self.container["imageId"]) found = True diff --git a/gittaggers.py b/gittaggers.py new file mode 100644 index 000000000..922344381 --- /dev/null +++ b/gittaggers.py @@ -0,0 +1,20 @@ +from setuptools.command.egg_info import egg_info +import subprocess +import time + +class EggInfoFromGit(egg_info): + """Tag the build with git commit timestamp. + + If a build tag has already been set (e.g., "egg_info -b", building + from source package), leave it alone. + """ + def git_timestamp_tag(self): + gitinfo = subprocess.check_output( + ['git', 'log', '--first-parent', '--max-count=1', + '--format=format:%ct', '.']).strip() + return time.strftime('.%Y%m%d%H%M%S', time.gmtime(int(gitinfo))) + + def tags(self): + if self.tag_build is None: + self.tag_build = self.git_timestamp_tag() + return egg_info.tags(self) diff --git a/setup.py b/setup.py index cf80929bd..10915c201 100644 --- a/setup.py +++ b/setup.py @@ -1,29 +1,22 @@ #!/usr/bin/env python import os -import subprocess -import time +import sys +import setuptools.command.egg_info as egg_info_cmd from setuptools import setup, find_packages SETUP_DIR = os.path.dirname(__file__) README = os.path.join(SETUP_DIR, 'README.rst') -cmd_opts = {'egg_info': {}} try: - git_tags = subprocess.check_output( - ['git', 'log', '--first-parent', '--max-count=1', - '--format=format:%ct %h', SETUP_DIR]).split() - assert len(git_tags) == 2 -except (AssertionError, OSError, subprocess.CalledProcessError): - pass -else: - git_tags[0] = time.strftime('%Y%m%d%H%M%S', time.gmtime(int(git_tags[0]))) - cmd_opts['egg_info']['tag_build'] = '.{}.{}'.format(*git_tags) - + import gittaggers + tagger = gittaggers.EggInfoFromGit +except ImportError: + tagger = egg_info_cmd.egg_info setup(name='cwltool', - version='0.1', + version='1.0', description='Common workflow language reference implementation', long_description=open(README).read(), author='Common workflow language working group', @@ -32,7 +25,7 @@ download_url="https://github.com/common-workflow-language/common-workflow-language", license='Apache 2.0', packages=["cwltool"], - package_data={'cwltool': ['schemas/*.json']}, + package_data={'cwltool': ['schemas/draft-1/*', 'schemas/draft-2/*']}, include_package_data=True, install_requires=[ 'jsonschema >= 2.4.0', @@ -45,5 +38,6 @@ entry_points={ 'console_scripts': [ "cwltool=cwltool.main:main" ] }, - options=cmd_opts, + zip_safe=False, + cmdclass={'egg_info': tagger}, ) From 332870c0ac479c5ec20c9f01a215e40bfd866c81 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 17 Mar 2015 21:08:28 -0400 Subject: [PATCH 056/221] Add "environmentDefs" to specify environment variables. --- cwltool/draft2tool.py | 4 ++++ cwltool/job.py | 13 ++++++++++++- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 01ed157b4..4ab22aa90 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -277,6 +277,10 @@ def job(self, joborder, basedir, use_container=True): for t in self.tool.get("fileDefs", []): j.generatefiles[t["filename"]] = builder.do_eval(t["value"]) + j.environment = {} + for t in self.tool.get("environmentDefs", []): + j.environment[t["env"]] = builder.do_eval(t["value"]) + reqsAndHints = self.tool.get("requirements", []) + self.tool.get("hints", []) for r in reqsAndHints: if r["class"] == "DockerRequirement" and use_container: diff --git a/cwltool/job.py b/cwltool/job.py index d84687f37..98b0652d9 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -22,6 +22,7 @@ def run(self, dry_run=False, pull_image=True, outdir=None): json.dump(self.joborder, fp) runtime = [] + env = {} if self.container and self.container.get("type") == "docker": found = False @@ -68,9 +69,13 @@ def run(self, dry_run=False, pull_image=True, outdir=None): runtime.append("--volume=%s:%s:rw" % (os.path.abspath(outdir), "/tmp/job_output")) runtime.append("--workdir=%s" % ("/tmp/job_output")) runtime.append("--user=%s" % (os.geteuid())) + for t,v in self.environment.items(): + runtime.append("--env=%s=%s" % (t, v)) runtime.append(self.container["imageId"]) else: raise Exception("Docker image %s not found" % (self.container["imageId"])) + else: + env = self.environment stdin = None stdout = None @@ -99,7 +104,13 @@ def run(self, dry_run=False, pull_image=True, outdir=None): with open(os.path.join(outdir, t), "w") as f: f.write(self.generatefiles[t]) - sp = subprocess.Popen(runtime + self.command_line, shell=False, stdin=stdin, stdout=stdout) + sp = subprocess.Popen(runtime + self.command_line, + shell=False, + close_fds=True, + stdin=stdin, + stdout=stdout, + env=env, + cwd=outdir) if stdin == subprocess.PIPE: sp.stdin.close() From 41ca2e64e45a9ce13dde6a44d6be702dab76e047 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 17 Mar 2015 21:56:54 -0400 Subject: [PATCH 057/221] Add --print-rdf to cwltool to print rdf graph corresponding to a workflow or tool. --- cwltool/main.py | 22 ++++++++++++++++++---- setup.py | 4 +++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/cwltool/main.py b/cwltool/main.py index 93048e4ff..1ab182c13 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -15,18 +15,28 @@ _logger = logging.getLogger("cwltool") _logger.addHandler(logging.StreamHandler()) + +def printrdf(workflow, sr): + from rdflib import Graph, plugin + from rdflib.serializer import Serializer + wf = from_url(workflow) + g = Graph().parse(data=json.dumps(wf), format='json-ld', location=workflow) + print(g.serialize(format=sr)) + def main(): parser = argparse.ArgumentParser() - parser.add_argument("tool", type=str) - parser.add_argument("job_order", type=str) + parser.add_argument("workflow", type=str) + parser.add_argument("job_order", type=str, nargs="?", default=None) parser.add_argument("--conformance-test", action="store_true") parser.add_argument("--basedir", type=str) parser.add_argument("--outdir", type=str) - parser.add_argument("--no-container", action="store_true", help="Do not execute in a Docker container, even if one is specified in the tool file") + parser.add_argument("--no-container", action="store_true", help="Do not execute jobs in a Docker container, even when specified by the CommandLineTool") parser.add_argument("--no-pull", default=False, action="store_true", help="Do not try to pull the Docker image") parser.add_argument("--dry-run", action="store_true", help="Do not execute") parser.add_argument("--verbose", action="store_true", help="Print more logging") parser.add_argument("--debug", action="store_true", help="Print even more logging") + parser.add_argument("--print-rdf", action="store_true", help="Print corresponding RDF graph for workflow") + parser.add_argument("--rdf-serializer", help="Output RDF serialization format (one of turtle (default), n3, nt, xml)", default="turtle") args = parser.parse_args() @@ -35,10 +45,14 @@ def main(): if args.debug: logging.getLogger("cwltool").setLevel(logging.DEBUG) + if args.print_rdf: + printrdf(args.workflow, args.rdf_serializer) + return 0 + basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(args.job_order)) try: - t = workflow.makeTool(from_url(args.tool), basedir) + t = workflow.makeTool(from_url(args.workflow), basedir) except (jsonschema.exceptions.ValidationError, validate.ValidationException): _logger.exception("Tool definition failed validation") return 1 diff --git a/setup.py b/setup.py index 10915c201..6cfea5762 100644 --- a/setup.py +++ b/setup.py @@ -31,7 +31,9 @@ 'jsonschema >= 2.4.0', 'requests', 'PyYAML', - 'avro' + 'avro', + 'rdflib', + 'rdflib-jsonld' ], test_suite='tests', tests_require=[], From 3d6577432444c46f0b35bd4b39e0a05f39c37e7f Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 23 Mar 2015 21:02:51 -0400 Subject: [PATCH 058/221] Fix Python packaging to use correct git log for package time/version stamps. --- gittaggers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gittaggers.py b/gittaggers.py index 922344381..65ad8242b 100644 --- a/gittaggers.py +++ b/gittaggers.py @@ -11,7 +11,7 @@ class EggInfoFromGit(egg_info): def git_timestamp_tag(self): gitinfo = subprocess.check_output( ['git', 'log', '--first-parent', '--max-count=1', - '--format=format:%ct', '.']).strip() + '--format=format:%ct', '..']).strip() return time.strftime('.%Y%m%d%H%M%S', time.gmtime(int(gitinfo))) def tags(self): From fbb93749870b1ba9a228afa7c1bc1791ca449dca Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 23 Mar 2015 21:10:20 -0400 Subject: [PATCH 059/221] Fix Python packaging to use correct git log for package time/version stamps (2nd try) --- gittaggers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gittaggers.py b/gittaggers.py index 65ad8242b..55c3c2af3 100644 --- a/gittaggers.py +++ b/gittaggers.py @@ -11,7 +11,7 @@ class EggInfoFromGit(egg_info): def git_timestamp_tag(self): gitinfo = subprocess.check_output( ['git', 'log', '--first-parent', '--max-count=1', - '--format=format:%ct', '..']).strip() + '--format=format:%ct']).strip() return time.strftime('.%Y%m%d%H%M%S', time.gmtime(int(gitinfo))) def tags(self): From d3d1fde2533bc256b17b71f2e8bfad37273c4984 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 23 Mar 2015 21:27:06 -0400 Subject: [PATCH 060/221] Removed very misleading 'include_package_data' option from setup.py --- setup.py | 1 - 1 file changed, 1 deletion(-) diff --git a/setup.py b/setup.py index 6cfea5762..ff52d0290 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,6 @@ license='Apache 2.0', packages=["cwltool"], package_data={'cwltool': ['schemas/draft-1/*', 'schemas/draft-2/*']}, - include_package_data=True, install_requires=[ 'jsonschema >= 2.4.0', 'requests', From cf63d3265a5534eb286c776f1261a2fd1f2e0f0c Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 24 Mar 2015 17:26:33 +0000 Subject: [PATCH 061/221] setup.py workaround to ensure that schema files are included in sdist --- setup.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ff52d0290..7b27b8d12 100644 --- a/setup.py +++ b/setup.py @@ -3,6 +3,7 @@ import os import sys import setuptools.command.egg_info as egg_info_cmd +import shutil from setuptools import setup, find_packages @@ -15,6 +16,13 @@ except ImportError: tagger = egg_info_cmd.egg_info +# Remove the symlink and copy the schemas directory. +# This is a total hack, but older versions of setuptools +# won't follow symlinks or follow relative paths outside the +# source directory (ugh!) +os.unlink("cwltool/schemas") +shutil.copytree("../schemas", "cwltool/schemas") + setup(name='cwltool', version='1.0', description='Common workflow language reference implementation', @@ -25,7 +33,6 @@ download_url="https://github.com/common-workflow-language/common-workflow-language", license='Apache 2.0', packages=["cwltool"], - package_data={'cwltool': ['schemas/draft-1/*', 'schemas/draft-2/*']}, install_requires=[ 'jsonschema >= 2.4.0', 'requests', @@ -42,3 +49,7 @@ zip_safe=False, cmdclass={'egg_info': tagger}, ) + +# Restore the symlink +shutil.rmtree("cwltool/schemas") +os.symlink("../../schemas", "cwltool/schemas") From af0c00e3f6941aa31fcf5c9f75f91dcf531a7fc0 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 24 Mar 2015 20:00:38 +0000 Subject: [PATCH 062/221] revert to copy workaround --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 7b27b8d12..fe7562bdd 100644 --- a/setup.py +++ b/setup.py @@ -33,6 +33,7 @@ download_url="https://github.com/common-workflow-language/common-workflow-language", license='Apache 2.0', packages=["cwltool"], + package_data={'cwltool': ['schemas/draft-1/*', 'schemas/draft-2/*']}, install_requires=[ 'jsonschema >= 2.4.0', 'requests', From 70f4b89bbe7a46d78ed9ac8cdf17847fb3f18c3c Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 24 Mar 2015 16:32:44 -0400 Subject: [PATCH 063/221] Check for source tree or install. --- setup.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index fe7562bdd..d5a4720b0 100644 --- a/setup.py +++ b/setup.py @@ -20,8 +20,11 @@ # This is a total hack, but older versions of setuptools # won't follow symlinks or follow relative paths outside the # source directory (ugh!) -os.unlink("cwltool/schemas") -shutil.copytree("../schemas", "cwltool/schemas") +restore = False +if os.path.islink("cwltool/schemas") and os.path.exists("../schemas"): + os.unlink("cwltool/schemas") + shutil.copytree("../schemas", "cwltool/schemas") + restore = True setup(name='cwltool', version='1.0', @@ -51,6 +54,7 @@ cmdclass={'egg_info': tagger}, ) -# Restore the symlink -shutil.rmtree("cwltool/schemas") -os.symlink("../../schemas", "cwltool/schemas") +if restore: + # Restore the symlink + shutil.rmtree("cwltool/schemas") + os.symlink("../../schemas", "cwltool/schemas") From 708635aa8c7ca3902b74af9a304939bd30720486 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 26 Mar 2015 12:30:35 -0400 Subject: [PATCH 064/221] Tools for generating specification documentation. --- cwltool/process.py | 59 +++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 56 insertions(+), 3 deletions(-) diff --git a/cwltool/process.py b/cwltool/process.py index cf05b62ec..2524cbdb0 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -3,16 +3,55 @@ import json import validate import copy +import yaml +import copy +import logging +import pprint TOOL_CONTEXT_URL = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/master/schemas/draft-2/cwl-context.json" module_dir = os.path.dirname(os.path.abspath(__file__)) +_logger = logging.getLogger("cwltool") + +def specialize(items, spec): + if isinstance(items, dict): + for n in ("type", "items", "values"): + if n in items: + items[n] = specialize(items[n], spec) + return items + if isinstance(items, list): + n = [] + for i in items: + n.append(specialize(i, spec)) + return n + if isinstance(items, basestring): + if items in spec: + return spec[items] + return items + +def extend_avro(items): + types = {t["name"]: t for t in items} + n = [] + for t in items: + if "extends" in t: + r = copy.deepcopy(types[t["extends"]]) + r["name"] = t["name"] + if "specialize" in t: + r["fields"] = specialize(r["fields"], t["specialize"]) + r["fields"].extend(t["fields"]) + r["extends"] = t["extends"] + types[t["name"]] = r + t = r + n.append(t) + return n + class Process(object): def __init__(self, toolpath_object, validateAs): self.names = avro.schema.Names() - cwl_avsc = os.path.join(module_dir, 'schemas/draft-2/cwl.avsc') + cwl_avsc = os.path.join(module_dir, 'schemas/draft-2/cwl-avro.yml') with open(cwl_avsc) as f: - j = json.load(f) + j = yaml.load(f) + j = extend_avro(j) for t in j: avro.schema.make_avsc_object(t, self.names) @@ -24,7 +63,21 @@ def __init__(self, toolpath_object, validateAs): validate.validate_ex(self.names.get_name(validateAs, ""), self.tool) # Import schema defs - self.schemaDefs = {} + self.schemaDefs = { + "Any": [ + "null", + "boolean", + "int", + "long", + "float", + "double", + "bytes", + "string", + "File", + {"type": "array", "items": "Any"}, + {"type": "map", "values": "Any"} + ]} + if self.tool.get("schemaDefs"): for i in self.tool["schemaDefs"]: avro.schema.make_avsc_object(i, self.names) From dcf636db6839a5df1b65be60f90c60ade98cd83a Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 26 Mar 2015 17:15:08 -0400 Subject: [PATCH 065/221] Now embedding major documenation directly in schema. Lots of progress, but still haven't gotten to describing CommandLineTool and Workflow. --- cwltool/process.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cwltool/process.py b/cwltool/process.py index 2524cbdb0..9e9da8b03 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -40,6 +40,7 @@ def extend_avro(items): r["fields"] = specialize(r["fields"], t["specialize"]) r["fields"].extend(t["fields"]) r["extends"] = t["extends"] + r["doc"] = t.get("doc", "") types[t["name"]] = r t = r n.append(t) From 4acb2e89cd20f03216a8b611171a8775e65dfc32 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 27 Mar 2015 13:42:19 -0400 Subject: [PATCH 066/221] Run document generation as a workflow. --- cwltool/draft2tool.py | 20 +++++++++++++------- cwltool/job.py | 13 ++++++++++++- cwltool/main.py | 7 ++++++- cwltool/pathmapper.py | 4 ++-- cwltool/workflow.py | 2 +- 5 files changed, 34 insertions(+), 12 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 4ab22aa90..6b332a2fd 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -39,11 +39,11 @@ def do_eval(self, ex, context=None): return self.jseval(ex["value"], context) elif "invoke" in ex: return self.jseval(ex["invoke"], context) - elif ex.get("id"): - if ex["id"].startswith("#"): - return self.job[ex["id"][1:]] + elif ex.get("ref"): + if ex["ref"].startswith("#"): + return self.job[ex["ref"][1:]] else: - with open(os.path.join(self.basedir, ex["id"]), "r") as f: + with open(os.path.join(self.basedir, ex["ref"]), "r") as f: return f.read() else: return ex @@ -259,10 +259,10 @@ def job(self, joborder, basedir, use_container=True): reffiles.append(j.stdin) if self.tool.get("stdout"): - if isinstance(self.tool["stdout"], dict) and "id" in self.tool["stdout"]: + if isinstance(self.tool["stdout"], dict) and "ref" in self.tool["stdout"]: for out in self.tool.get("outputs", []): - if out["id"] == self.tool["stdout"]["id"]: - filename = self.tool["stdout"]["id"][1:] + if out["id"] == self.tool["stdout"]["ref"]: + filename = self.tool["stdout"]["ref"][1:] j.stdout = filename out["outputBinding"] = out.get("outputBinding", {}) out["outputBinding"]["glob"] = filename @@ -281,6 +281,10 @@ def job(self, joborder, basedir, use_container=True): for t in self.tool.get("environmentDefs", []): j.environment[t["env"]] = builder.do_eval(t["value"]) + for r in self.tool.get("requirements", []): + if r["class"] not in ("DockerRequirement", "MemoryRequirement"): + raise Exception("Unknown requirement %s" % (r["class"])) + reqsAndHints = self.tool.get("requirements", []) + self.tool.get("hints", []) for r in reqsAndHints: if r["class"] == "DockerRequirement" and use_container: @@ -288,6 +292,8 @@ def job(self, joborder, basedir, use_container=True): j.container["type"] = "docker" if "dockerPull" in r: j.container["pull"] = r["dockerPull"] + if "dockerFile" in r: + j.container["file"] = r["dockerFile"] if "dockerLoad" in r: if r["dockerLoad"].startswith("http"): j.container["load"] = r["dockerLoad"] diff --git a/cwltool/job.py b/cwltool/job.py index 98b0652d9..76895e378 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -11,7 +11,7 @@ _logger = logging.getLogger("cwltool") class CommandLineJob(object): - def run(self, dry_run=False, pull_image=True, outdir=None): + def run(self, dry_run=False, pull_image=True, outdir=None, rm_container=True): if not outdir: if not dry_run: outdir = tempfile.mkdtemp() @@ -34,6 +34,15 @@ def run(self, dry_run=False, pull_image=True, outdir=None): pass if not found and pull_image: + if "file" in self.container: + dockerfile_dir = tempfile.mkdtemp() + with open(os.path.join(dockerfile_dir, "Dockerfile"), "w") as df: + df.write(self.container["file"]) + cmd = ["docker", "build", "--tag=%s" % self.container["imageId"], dockerfile_dir] + _logger.info(str(cmd)) + if not dry_run: + subprocess.check_call(cmd, stdout=sys.stderr) + found = True if "pull" in self.container: cmd = ["docker", "pull", self.container["pull"]] _logger.info(str(cmd)) @@ -69,6 +78,8 @@ def run(self, dry_run=False, pull_image=True, outdir=None): runtime.append("--volume=%s:%s:rw" % (os.path.abspath(outdir), "/tmp/job_output")) runtime.append("--workdir=%s" % ("/tmp/job_output")) runtime.append("--user=%s" % (os.geteuid())) + if rm_container: + runtime.append("--rm") for t,v in self.environment.items(): runtime.append("--env=%s=%s" % (t, v)) runtime.append(self.container["imageId"]) diff --git a/cwltool/main.py b/cwltool/main.py index 1ab182c13..50f921cb2 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -31,6 +31,7 @@ def main(): parser.add_argument("--basedir", type=str) parser.add_argument("--outdir", type=str) parser.add_argument("--no-container", action="store_true", help="Do not execute jobs in a Docker container, even when specified by the CommandLineTool") + parser.add_argument("--leave-container", action="store_true", help="Do not delete Docker container after it exits") parser.add_argument("--no-pull", default=False, action="store_true", help="Do not try to pull the Docker image") parser.add_argument("--dry-run", action="store_true", help="Do not execute") parser.add_argument("--verbose", action="store_true", help="Print more logging") @@ -49,6 +50,10 @@ def main(): printrdf(args.workflow, args.rdf_serializer) return 0 + if not args.job_order: + _logger.error("Input object required") + return 1 + basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(args.job_order)) try: @@ -69,7 +74,7 @@ def main(): a["generatefiles"] = job.generatefiles print json.dumps(a) else: - (outdir, runjob) = job.run(dry_run=args.dry_run, pull_image=(not args.no_pull), outdir=args.outdir) + (outdir, runjob) = job.run(dry_run=args.dry_run, pull_image=(not args.no_pull), outdir=args.outdir, rm_container=(not args.leave_container)) _logger.info("Output directory is %s", outdir) print json.dumps(runjob) except (jsonschema.exceptions.ValidationError, validate.ValidationException): diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index fcb783924..45310f65e 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -18,7 +18,7 @@ def __init__(self, referenced_files, basedir): self._pathmap = {} self.dirs = {} for src in referenced_files: - abs = src if os.path.isabs(src) else os.path.join(basedir, src) + abs = src if os.path.isabs(src) else os.path.abspath(os.path.join(basedir, src)) dir, fn = os.path.split(abs) subdir = False @@ -47,7 +47,7 @@ def __init__(self, referenced_files, basedir): self.dirs[d] = name for src in referenced_files: - abs = src if os.path.isabs(src) else os.path.join(basedir, src) + abs = src if os.path.isabs(src) else os.path.abspath(os.path.join(basedir, src)) for d in self.dirs: if abs.startswith(d): self._pathmap[src] = os.path.join(self.dirs[d], abs[len(d)+1:]) diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 8cf11bef4..714131f75 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -88,7 +88,7 @@ def run(self, outdir=None, **kwargs): if not s.completed: job = self.try_make_job(s) if job: - (joutdir, output) = job.run(outdir=outdir) + (joutdir, output) = job.run(outdir=outdir, **kwargs) for i in s.tool["outputs"]: _logger.info("Job got output: %s", output) if "id" in i: From a83dc12cff30679ab83847d2471afdcfcbad8aa7 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 30 Mar 2015 17:13:00 -0400 Subject: [PATCH 067/221] Finished documentation for command line tool, added documentation for workflow, updated examples based on schema changes, initial commit of the actual specification document! --- cwltool/draft2tool.py | 8 +++----- cwltool/job.py | 2 +- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 6b332a2fd..274395ff0 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -35,10 +35,8 @@ def jseval(self, expression, context): def do_eval(self, ex, context=None): if isinstance(ex, dict): if ex.get("class") == "JavascriptExpression": - if "value" in ex: - return self.jseval(ex["value"], context) - elif "invoke" in ex: - return self.jseval(ex["invoke"], context) + if "script" in ex: + return self.jseval(ex["script"], context) elif ex.get("ref"): if ex["ref"].startswith("#"): return self.job[ex["ref"][1:]] @@ -322,7 +320,7 @@ def job(self, joborder, basedir, use_container=True): return j def collect_output_ports(self, ports, builder, outdir): - custom_output = os.path.join(outdir, "output.cwl.json") + custom_output = os.path.join(outdir, "cwl.output.json") if os.path.exists(custom_output): outputdoc = yaml.load(custom_output) validate.validate_ex(self.names.get_name("output_record_schema", ""), outputdoc) diff --git a/cwltool/job.py b/cwltool/job.py index 76895e378..724559f1c 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -18,7 +18,7 @@ def run(self, dry_run=False, pull_image=True, outdir=None, rm_container=True): else: outdir = "/tmp" - with open(os.path.join(outdir, "job.cwl.json"), "w") as fp: + with open(os.path.join(outdir, "cwl.input.json"), "w") as fp: json.dump(self.joborder, fp) runtime = [] From c963b3ef5c7bf922859f6209d256eca0c522bd54 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 7 Apr 2015 17:13:42 -0400 Subject: [PATCH 068/221] New explicit scatter. Incomplete, only "dotproduct" method implemented, still need to add "nested_crossproduct" and "flat_crossproduct". --- cwltool/draft2tool.py | 6 +- cwltool/workflow.py | 142 +++++++++++++++++++++++++----------------- 2 files changed, 90 insertions(+), 58 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 274395ff0..3d16dac8f 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -163,7 +163,11 @@ def generate_arg(self, binding): class Tool(Process): def _init_job(self, joborder, basedir): # Validate job order - validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) + try: + validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) + except validate.ValidationException as v: + _logger.error("Failed to validate %s\n%s" % (pprint.pformat(joborder), v)) + raise builder = Builder() builder.job = copy.deepcopy(joborder) diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 714131f75..1126cbcfe 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -8,9 +8,20 @@ import random from ref_resolver import from_url import os +from collections import namedtuple +import pprint _logger = logging.getLogger("cwltool") +WorkflowStateItem = namedtuple('WorkflowStateItem', ['parameter', 'value']) + +def idk(key): + if len(key) <= 1: + raise Exception("Identifier is too short") + if key[0] != '#': + raise Exception("Must start with #") + return key[1:] + def makeTool(toolpath_object, basedir): if "schema" in toolpath_object: return draft1tool.Tool(toolpath_object) @@ -27,55 +38,61 @@ def makeTool(toolpath_object, basedir): raise Exception("Missing 'class' field, expecting one of: Workflow, CommandLineTool, ExpressionTool, External") -def should_fanout(src_type, dest_type): - if isinstance(src_type, dict): - if src_type["type"] == "array" and src_type["items"] == dest_type: - return True - return False - class WorkflowJob(object): - def try_make_job(self, s): - jo = {} - fanout = None - for i in s.tool["inputs"]: + def try_make_job(self, step): + inputobj = {} + + if "scatter" in step.tool: + inputparms = copy.deepcopy(step.tool["inputs"]) + scatter = aslist(step.tool["scatter"]) + for i in inputparms: + if i["id"] in scatter: + i["type"] = {"type": "array", "items": i["type"]} + else: + inputparms = step.tool["inputs"] + + for i in inputparms: _logger.debug(i) if "connect" in i: connect = i["connect"] - if isinstance(connect, list): - # Handle multiple inputs - if not fanout: - fanout = i["id"][1:] - jo[i["id"][1:]] = [] - else: - raise Exception("Can only fanout on one port") + is_array = isinstance(i["type"], dict) and i["type"]["type"] == "array" + for c in aslist(connect): - src = c["source"][1:] + src = idk(c["source"]) if src in self.state: - if self.state[src][0]["type"] == i["type"]: - if fanout: - jo[i["id"][1:]].append(self.state[src][1]) + if self.state[src].parameter["type"] == i["type"]: + # source and input types are the same + if is_array and idk(i["id"]) in inputobj: + # concatenate arrays + inputobj[idk(i["id"])].extend(self.state[src].value) else: - jo[i["id"][1:]] = self.state[src][1] - elif should_fanout(self.state[src][0]["type"], i["type"]): - if fanout: - if fanout == i["id"][1:]: - jo[i["id"][1:]].extend(self.state[src][1]) - else: - raise Exception("Can only fanout on one port") + # just assign the value from state to input + inputobj[idk(i["id"])] = copy.deepcopy(self.state[src].value) + elif is_array and self.state[src].parameter["type"] == i["type"]["items"]: + # source type is the item type on the input array + # promote single item to array entry + if idk(i["id"]) in inputobj: + inputobj[idk(i["id"])].append(self.state[src][1]) else: - fanout = i["id"][1:] - jo[i["id"][1:]] = self.state[src][1] + inputobj[idk(i["id"])] = [self.state[src][1]] else: raise Exception("Type mismatch '%s' and '%s'" % (src, i["id"][1:])) else: return None elif "default" in i: - jo[i["id"][1:]] = i["default"] - - _logger.info("Creating job with input: %s", jo) - if fanout: - s = Fanout(s, fanout) - return s.job(jo, self.basedir) + inputobj[idk(i["id"])] = i["default"] + else: + raise Exception("Value for %s not specified" % (i["id"])) + + _logger.info("Creating job with input: %s", inputobj) + if "scatter" in step.tool: + if step.tool.get("scatterType") == "dotproduct" or step.tool.get("scatterType") is None: + step = DotProductScatter(step, aslist(step.tool["scatter"])) + elif step.tool.get("scatterType") == "nested_crossproduct": + step = NestedCrossProductScatter(step, aslist(step.tool["scatter"])) + elif step.tool.get("scatterType") == "flat_crossproduct": + step = FlatCrossProductScatter(step, aslist(step.tool["scatter"])) + return step.job(inputobj, self.basedir) def run(self, outdir=None, **kwargs): for s in self.steps: @@ -92,10 +109,10 @@ def run(self, outdir=None, **kwargs): for i in s.tool["outputs"]: _logger.info("Job got output: %s", output) if "id" in i: - if i["id"][1:] in output: - self.state[i["id"][1:]] = (i, output[i["id"][1:]]) + if idk(i["id"]) in output: + self.state[idk(i["id"])] = WorkflowStateItem(i, output[idk(i["id"])]) else: - raise Exception("Output is missing expected field %s" % i["id"][1:]) + raise Exception("Output is missing expected field %s" % idk(i["id"])) s.completed = True made_progress = True run_all -= 1 @@ -105,8 +122,8 @@ def run(self, outdir=None, **kwargs): wo = {} for i in self.outputs: if "connect" in i: - src = i["connect"]["source"][1:] - wo[i["id"][1:]] = self.state[src][1] + src = idk(i["connect"]["source"]) + wo[idk(i["id"])] = self.state[src][1] return (outdir, wo) @@ -123,11 +140,11 @@ def job(self, joborder, basedir, use_container=True): wj.state = {} for i in self.tool["inputs"]: - iid = i["id"][1:] + iid = idk(i["id"]) if iid in joborder: - wj.state[iid] = (i, copy.deepcopy(joborder[iid])) + wj.state[iid] = WorkflowStateItem(i, copy.deepcopy(joborder[iid])) elif "default" in i: - wj.state[iid] = (i, copy.deepcopy(i["default"])) + wj.state[iid] = WorkflowStateItem(i, copy.deepcopy(i["default"])) wj.outputs = self.tool["outputs"] return wj @@ -141,7 +158,7 @@ def run(self, **kwargs): (outdir, output) = self.innerjob.run(**kwargs) for i in self.tool["outputs"]: d = i["def"][len(self.impl)+1:] - output[i["id"][1:]] = output[d] + output[idk(i["id"])] = output[d] del output[d] return (outdir, output) @@ -158,7 +175,7 @@ def __init__(self, toolpath_object, basedir): for i in toolpath_object["inputs"]: d = i["def"][len(self.impl):] - toolid = i.get("id", self.id + "." + d[1:]) + toolid = i.get("id", self.id + "." + idk(d)) found = False for a in self.embedded_tool.tool["inputs"]: if a["id"] == d: @@ -187,12 +204,12 @@ def __init__(self, toolpath_object, basedir): def job(self, joborder, basedir, **kwargs): for i in self.tool["inputs"]: d = i["def"][len(self.impl)+1:] - joborder[d] = joborder[i["id"][1:]] - del joborder[i["id"][1:]] + joborder[d] = joborder[idk(i["id"])] + del joborder[idk(i["id"])] return ExternalJob(self.tool, self.embedded_tool.job(joborder, basedir, **kwargs)) -class FanoutJob(object): +class ScatterJob(object): def __init__(self, outputports, jobs): self.outputports = outputports self.jobs = jobs @@ -200,17 +217,18 @@ def __init__(self, outputports, jobs): def run(self, **kwargs): outputs = {} for outschema in self.outputports: - outputs[outschema["id"][1:]] = [] + outputs[idk(outschema["id"])] = [] for j in self.jobs: (_, out) = j.run(**kwargs) for outschema in self.outputports: - outputs[outschema["id"][1:]].append(out[outschema["id"][1:]]) + outputs[idk(outschema["id"])].append(out[idk(outschema["id"])]) return (None, outputs) -class Fanout(object): - def __init__(self, process, fanout_key): +class DotProductScatter(object): + def __init__(self, process, scatter_keys): self.process = process - self.fanout_key = fanout_key + self.scatter_keys = scatter_keys + self.outputports = [] for out in self.process.tool["outputs"]: newout = copy.deepcopy(out) @@ -220,8 +238,18 @@ def __init__(self, process, fanout_key): def job(self, joborder, basedir, **kwargs): jobs = [] - for fn in joborder[self.fanout_key]: + + l = None + for s in self.scatter_keys: + if l is None: + l = len(joborder[idk(s)]) + elif l != len(joborder[idk(s)]): + raise Exception("Length of input arrays must be equal when performing dotproduct scatter.") + + for i in range(0, l): jo = copy.copy(joborder) - jo[self.fanout_key] = fn + for s in self.scatter_keys: + jo[idk(s)] = joborder[idk(s)][i] jobs.append(self.process.job(jo, basedir, **kwargs)) - return FanoutJob(self.outputports, jobs) + + return ScatterJob(self.outputports, jobs) From a231fb9e124d356d134a9d0a9804b4fa42c1ee43 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 8 Apr 2015 16:25:35 -0400 Subject: [PATCH 069/221] Use iterator-based inversion of control to separate job generation from job execution. --- cwltool/draft1tool.py | 5 +- cwltool/draft2tool.py | 13 +- cwltool/job.py | 10 +- cwltool/main.py | 21 ++- cwltool/workflow.py | 349 ++++++++++++++++++++++++++---------------- 5 files changed, 244 insertions(+), 154 deletions(-) diff --git a/cwltool/draft1tool.py b/cwltool/draft1tool.py index fb9ae7357..06daf093f 100644 --- a/cwltool/draft1tool.py +++ b/cwltool/draft1tool.py @@ -254,7 +254,7 @@ def __init__(self, toolpath_object): raise Exception("Missing or invalid 'schema' field in tool description document, must be %s" % TOOL_SCHEMA_URL) tool_schema.validate(self.tool) - def job(self, joborder, basedir, use_container=True): + def job(self, joborder, basedir, output_callback, use_container=True): inputs = joborder['inputs'] Draft4Validator(self.tool['inputs']).validate(inputs) @@ -336,8 +336,9 @@ def job(self, joborder, basedir, use_container=True): j.pathmapper = d j.collect_outputs = functools.partial(self.collect_outputs, self.tool.get("outputs", {}), joborder) + j.output_callback = output_callback - return j + yield j def collect_outputs(self, schema, joborder, outdir): result_path = os.path.join(outdir, "result.cwl.json") diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 3d16dac8f..21507ca58 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -192,16 +192,16 @@ def __init__(self, toolpath_object): class ExpressionJob(object): def run(self, outdir=None, **kwargs): - return (outdir, self.builder.do_eval(self.script)) + self.output_callback(self.builder.do_eval(self.script)) - def job(self, joborder, basedir, **kwargs): + def job(self, joborder, basedir, output_callback, **kwargs): builder = self._init_job(joborder, basedir) j = ExpressionTool.ExpressionJob() j.builder = builder j.script = self.tool["script"] - - return j + j.output_callback = output_callback + yield j def aslist(l): if isinstance(l, list): @@ -213,7 +213,7 @@ class CommandLineTool(Tool): def __init__(self, toolpath_object): super(CommandLineTool, self).__init__(toolpath_object, "CommandLineTool") - def job(self, joborder, basedir, use_container=True): + def job(self, joborder, basedir, output_callback, use_container=True, **kwargs): builder = self._init_job(joborder, basedir) if self.tool["baseCommand"]: @@ -320,8 +320,9 @@ def job(self, joborder, basedir, use_container=True): j.pathmapper = builder.pathmapper j.collect_outputs = functools.partial(self.collect_output_ports, self.tool["outputs"], builder) + j.output_callback = output_callback - return j + yield j def collect_output_ports(self, ports, builder, outdir): custom_output = os.path.join(outdir, "cwl.output.json") diff --git a/cwltool/job.py b/cwltool/job.py index 724559f1c..ba5f93292 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -11,13 +11,7 @@ _logger = logging.getLogger("cwltool") class CommandLineJob(object): - def run(self, dry_run=False, pull_image=True, outdir=None, rm_container=True): - if not outdir: - if not dry_run: - outdir = tempfile.mkdtemp() - else: - outdir = "/tmp" - + def run(self, outdir, dry_run=False, pull_image=True, rm_container=True): with open(os.path.join(outdir, "cwl.input.json"), "w") as fp: json.dump(self.joborder, fp) @@ -134,4 +128,4 @@ def run(self, dry_run=False, pull_image=True, outdir=None, rm_container=True): if stdout != sys.stderr: stdout.close() - return (outdir, self.collect_outputs(outdir)) + self.output_callback(self.collect_outputs(outdir)) diff --git a/cwltool/main.py b/cwltool/main.py index 50f921cb2..30600d54f 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -11,6 +11,7 @@ import logging import workflow import validate +import tempfile _logger = logging.getLogger("cwltool") _logger.addHandler(logging.StreamHandler()) @@ -63,8 +64,13 @@ def main(): return 1 try: - job = t.job(from_url(args.job_order), basedir, use_container=(not args.no_container)) + final_output = [] + def output_callback(out): + final_output.append(out) + + jobiter = t.job(from_url(args.job_order), basedir, output_callback, use_container=(not args.no_container)) if args.conformance_test: + job = jobiter.next() a = {"args": job.command_line} if job.stdin: a["stdin"] = job.stdin @@ -74,9 +80,18 @@ def main(): a["generatefiles"] = job.generatefiles print json.dumps(a) else: - (outdir, runjob) = job.run(dry_run=args.dry_run, pull_image=(not args.no_pull), outdir=args.outdir, rm_container=(not args.leave_container)) + for r in jobiter: + if r: + if args.dry_run: + outdir = "/tmp" + elif args.outdir: + outdir = args.outdir + else: + outdir = tempfile.mkdtemp() + r.run(outdir, dry_run=args.dry_run, pull_image=(not args.no_pull), rm_container=(not args.leave_container)) + _logger.info("Output directory is %s", outdir) - print json.dumps(runjob) + print json.dumps(final_output[0]) except (jsonschema.exceptions.ValidationError, validate.ValidationException): _logger.exception("Job order failed validation") return 1 diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 1126cbcfe..2603015b8 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -10,6 +10,7 @@ import os from collections import namedtuple import pprint +import functools _logger = logging.getLogger("cwltool") @@ -38,130 +39,130 @@ def makeTool(toolpath_object, basedir): raise Exception("Missing 'class' field, expecting one of: Workflow, CommandLineTool, ExpressionTool, External") -class WorkflowJob(object): - def try_make_job(self, step): +class Workflow(Process): + def __init__(self, toolpath_object): + super(Workflow, self).__init__(toolpath_object, "Workflow") + + def receive_output(self, step, outputparms, jobout): + _logger.info("Job got output: %s", jobout) + for i in outputparms: + if "id" in i: + if idk(i["id"]) in jobout: + self.state[idk(i["id"])] = WorkflowStateItem(i, jobout[idk(i["id"])]) + else: + raise Exception("Output is missing expected field %s" % idk(i["id"])) + step.completed = True + + def try_make_job(self, step, basedir, **kwargs): inputobj = {} if "scatter" in step.tool: inputparms = copy.deepcopy(step.tool["inputs"]) + outputparms = copy.deepcopy(step.tool["outputs"]) scatter = aslist(step.tool["scatter"]) for i in inputparms: if i["id"] in scatter: i["type"] = {"type": "array", "items": i["type"]} - else: - inputparms = step.tool["inputs"] - for i in inputparms: - _logger.debug(i) - if "connect" in i: - connect = i["connect"] - is_array = isinstance(i["type"], dict) and i["type"]["type"] == "array" + if step.tool.get("scatterType") == "nested_crossproduct": + nesting = len(aslist(step.tool["scatter"])) + else: + nesting = 1 - for c in aslist(connect): - src = idk(c["source"]) + for r in xrange(0, nesting): + for i in outputparms: + i["type"] = {"type": "array", "items": i["type"]} + else: + inputparms = step.tool["inputs"] + outputparms = step.tool["outputs"] + + for inp in inputparms: + _logger.debug(inp) + iid = idk(inp["id"]) + if "connect" in inp: + connections = inp["connect"] + is_array = isinstance(inp["type"], dict) and inp["type"]["type"] == "array" + for connection in aslist(connections): + src = idk(connection["source"]) if src in self.state: - if self.state[src].parameter["type"] == i["type"]: + if self.state[src].parameter["type"] == inp["type"]: # source and input types are the same - if is_array and idk(i["id"]) in inputobj: - # concatenate arrays - inputobj[idk(i["id"])].extend(self.state[src].value) + if is_array and iid in inputobj: + # there's already a value in the input object, so extend the existing array + inputobj[iid].extend(self.state[src].value) else: - # just assign the value from state to input - inputobj[idk(i["id"])] = copy.deepcopy(self.state[src].value) - elif is_array and self.state[src].parameter["type"] == i["type"]["items"]: + # simply assign the value from state to input + inputobj[iid] = copy.deepcopy(self.state[src].value) + elif is_array and self.state[src].parameter["type"] == inp["type"]["items"]: # source type is the item type on the input array # promote single item to array entry - if idk(i["id"]) in inputobj: - inputobj[idk(i["id"])].append(self.state[src][1]) + if iid in inputobj: + inputobj[iid].append(self.state[src].value) else: - inputobj[idk(i["id"])] = [self.state[src][1]] + inputobj[iid] = [self.state[src].value] else: - raise Exception("Type mismatch '%s' and '%s'" % (src, i["id"][1:])) + raise Exception("Type mismatch '%s' and '%s'" % (src, inp["id"][1:])) else: - return None - elif "default" in i: - inputobj[idk(i["id"])] = i["default"] + return + elif "default" in inp: + inputobj[iid] = inp["default"] else: - raise Exception("Value for %s not specified" % (i["id"])) + raise Exception("Value for %s not specified" % (inp["id"])) _logger.info("Creating job with input: %s", inputobj) - if "scatter" in step.tool: + + callback = functools.partial(self.receive_output, step, outputparms) + + if step.tool.get("scatter"): if step.tool.get("scatterType") == "dotproduct" or step.tool.get("scatterType") is None: - step = DotProductScatter(step, aslist(step.tool["scatter"])) + jobs = dotproduct_scatter(step, inputobj, basedir, aslist(step.tool["scatter"]), callback, **kwargs) elif step.tool.get("scatterType") == "nested_crossproduct": - step = NestedCrossProductScatter(step, aslist(step.tool["scatter"])) + jobs = nested_rossproduct_scatter(step, inputobj, basedir, aslist(step.tool["scatter"]), callback, **kwargs) elif step.tool.get("scatterType") == "flat_crossproduct": - step = FlatCrossProductScatter(step, aslist(step.tool["scatter"])) - return step.job(inputobj, self.basedir) + jobs = flat_crossproduct_scatter(step, inputobj, basedir, aslist(step.tool["scatter"]), callback, 0, **kwargs) + else: + jobs = step.job(inputobj, basedir, callback, **kwargs) + + for j in jobs: + yield j - def run(self, outdir=None, **kwargs): - for s in self.steps: + def job(self, joborder, basedir, output_callback, **kwargs): + steps = [makeTool(step, basedir) for step in self.tool.get("steps", [])] + random.shuffle(steps) + + self.state = {} + for i in self.tool["inputs"]: + iid = idk(i["id"]) + if iid in joborder: + self.state[iid] = WorkflowStateItem(i, copy.deepcopy(joborder[iid])) + elif "default" in i: + self.state[iid] = WorkflowStateItem(i, copy.deepcopy(i["default"])) + + for s in steps: s.completed = False - run_all = len(self.steps) - while run_all: + completed = 0 + while completed < len(steps): made_progress = False - for s in self.steps: - if not s.completed: - job = self.try_make_job(s) - if job: - (joutdir, output) = job.run(outdir=outdir, **kwargs) - for i in s.tool["outputs"]: - _logger.info("Job got output: %s", output) - if "id" in i: - if idk(i["id"]) in output: - self.state[idk(i["id"])] = WorkflowStateItem(i, output[idk(i["id"])]) - else: - raise Exception("Output is missing expected field %s" % idk(i["id"])) - s.completed = True - made_progress = True - run_all -= 1 + completed = 0 + for step in steps: + if step.completed: + completed += 1 + else: + for newjob in self.try_make_job(step, basedir, **kwargs): + if newjob: + made_progress = True + yield newjob if not made_progress: - raise Exception("Deadlocked") + yield None wo = {} - for i in self.outputs: + for i in self.tool["outputs"]: if "connect" in i: src = idk(i["connect"]["source"]) - wo[idk(i["id"])] = self.state[src][1] + wo[idk(i["id"])] = self.state[src].value - return (outdir, wo) - - -class Workflow(Process): - def __init__(self, toolpath_object): - super(Workflow, self).__init__(toolpath_object, "Workflow") - - def job(self, joborder, basedir, use_container=True): - wj = WorkflowJob() - wj.basedir = basedir - wj.steps = [makeTool(s, basedir) for s in self.tool.get("steps", [])] - random.shuffle(wj.steps) - - wj.state = {} - for i in self.tool["inputs"]: - iid = idk(i["id"]) - if iid in joborder: - wj.state[iid] = WorkflowStateItem(i, copy.deepcopy(joborder[iid])) - elif "default" in i: - wj.state[iid] = WorkflowStateItem(i, copy.deepcopy(i["default"])) - wj.outputs = self.tool["outputs"] - return wj - -class ExternalJob(object): - def __init__(self, tool, innerjob): - self.tool = tool - self.innerjob = innerjob - - def run(self, **kwargs): - self.impl = self.tool["impl"] - (outdir, output) = self.innerjob.run(**kwargs) - for i in self.tool["outputs"]: - d = i["def"][len(self.impl)+1:] - output[idk(i["id"])] = output[d] - del output[d] - - return (outdir, output) + output_callback(wo) class External(Process): def __init__(self, toolpath_object, basedir): @@ -201,55 +202,133 @@ def __init__(self, toolpath_object, basedir): super(External, self).__init__(toolpath_object, "Process") - def job(self, joborder, basedir, **kwargs): + def receive_output(self, jobout): + self.output = {} + for i in self.tool["outputs"]: + if i["def"][:len(self.impl)] != self.impl: + raise Exception("'def' is '%s' but must refer to fragment of resource '%s' listed in 'impl'" % (i["def"], self.impl)) + d = idk(i["def"][len(self.impl):]) + self.output[idk(i["id"])] = jobout[d] + + def job(self, joborder, basedir, output_callback, **kwargs): for i in self.tool["inputs"]: d = i["def"][len(self.impl)+1:] joborder[d] = joborder[idk(i["id"])] del joborder[idk(i["id"])] - return ExternalJob(self.tool, self.embedded_tool.job(joborder, basedir, **kwargs)) - -class ScatterJob(object): - def __init__(self, outputports, jobs): - self.outputports = outputports - self.jobs = jobs - - def run(self, **kwargs): - outputs = {} - for outschema in self.outputports: - outputs[idk(outschema["id"])] = [] - for j in self.jobs: - (_, out) = j.run(**kwargs) - for outschema in self.outputports: - outputs[idk(outschema["id"])].append(out[idk(outschema["id"])]) - return (None, outputs) - -class DotProductScatter(object): - def __init__(self, process, scatter_keys): - self.process = process - self.scatter_keys = scatter_keys - - self.outputports = [] - for out in self.process.tool["outputs"]: - newout = copy.deepcopy(out) - newout["type"] = {"type": "array", "items": out["type"]} - self.outputports.append(newout) - self.tool = {"outputs": self.outputports} - - def job(self, joborder, basedir, **kwargs): - jobs = [] - - l = None - for s in self.scatter_keys: - if l is None: - l = len(joborder[idk(s)]) - elif l != len(joborder[idk(s)]): - raise Exception("Length of input arrays must be equal when performing dotproduct scatter.") - - for i in range(0, l): + self.output = None + for t in self.embedded_tool.job(joborder, basedir, self.receive_output, **kwargs): + yield t + + while self.output is None: + yield None + + output_callback(self.output) + + +class ReceiveScatterOutput(object): + def __init__(self, dest): + self.dest = dest + self.completed = 0 + + def receive_scatter_output(self, index, jobout): + for k,v in jobout.items(): + self.dest[k][index] = v + self.completed += 1 + +def dotproduct_scatter(process, joborder, basedir, scatter_keys, output_callback, **kwargs): + l = None + for s in scatter_keys: + if l is None: + l = len(joborder[idk(s)]) + elif l != len(joborder[idk(s)]): + raise Exception("Length of input arrays must be equal when performing dotproduct scatter.") + + output = {} + for i in process.tool["outputs"]: + output[idk(i["id"])] = [None] * l + + rc = ReceiveScatterOutput(output) + + for n in range(0, l): + jo = copy.copy(joborder) + for s in scatter_keys: + jo[idk(s)] = joborder[idk(s)][n] + + for j in process.job(jo, basedir, functools.partial(rc.receive_scatter_output, n), **kwargs): + yield j + + while rc.completed < l: + yield None + + output_callback(output) + + +def nested_crossproduct_scatter(process, joborder, basedir, scatter_keys, output_callback, **kwargs): + scatter_key = idk(scatter_keys[0]) + l = len(joborder[scatter_key]) + output = {} + for i in process["outputs"]: + output[idk(i["id"])] = [None] * l + + rc = ReceiveScatterOutput(output) + + for n in range(0, l): + jo = copy.copy(joborder) + jo[scatter_key] = joborder[scatter_key][n] + + if len(scatter_keys) == 1: + for j in process.job(jo, basedir, functools.partial(rc.receive_scatter_output, n), **kwargs): + yield j + else: + for j in nested_crossproduct_scatter(process, jo, basedir, scatter_keys[1:], functools.partial(rc.receive_scatter_output, n)): + yield j + + while rc.completed < l: + yield None + + output_callback(output) + +def crossproduct_size(joborder, scatter_keys): + scatter_key = idk(scatter_keys[0]) + if len(scatter_keys) == 1: + sum = len(joborder[scatter_key]) + else: + sum = 0 + for n in range(0, l): jo = copy.copy(joborder) - for s in self.scatter_keys: - jo[idk(s)] = joborder[idk(s)][i] - jobs.append(self.process.job(jo, basedir, **kwargs)) + jo[scatter_key] = joborder[scatter_key][n] + sum += crossproduct_size(joborder, scatter_keys[1:]) + return sum + +def flat_crossproduct_scatter(process, joborder, basedir, scatter_keys, output_callback, startindex, **kwargs): + scatter_key = idk(scatter_keys[0]) + l = len(joborder[scatter_key]) + + if startindex == 0: + output = {} + for i in process["outputs"]: + output[idk(i["id"])] = [None] * crossproduct_size(joborder, scatter_keys) + rc = ReceiveScatterOutput(output) + else: + rc = output_callback + + put = startindex + for n in range(0, l): + jo = copy.copy(joborder) + jo[scatter_key] = joborder[scatter_key][n] + + if len(scatter_keys) == 1: + for j in process.job(jo, basedir, functools.partial(rc.receive_scatter_output, put), **kwargs): + yield j + put += 1 + else: + for j in flat_crossproduct_scatter(process, jo, basedir, scatter_keys[1:], functools.partial(rc.receive_scatter_output, put)): + put += 1 + yield j + + if startindex == 0: + while rc.completed < put: + yield None - return ScatterJob(self.outputports, jobs) + output_callback(output) From 1b9e6f580a1dd8ba92bf9f9fbed41faa13c1536c Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 9 Apr 2015 15:26:51 -0400 Subject: [PATCH 070/221] Move "examples" to "conformance" since they are really test cases. Add tests for scatter/gather methods. --- cwltool/draft2tool.py | 2 ++ cwltool/main.py | 27 +++++++++++++--- cwltool/validate.py | 16 ++++++++-- cwltool/workflow.py | 73 +++++++++++++++++++++++++++---------------- 4 files changed, 84 insertions(+), 34 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 21507ca58..41a0d8bb7 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -359,6 +359,8 @@ def collect_output(self, schema, builder, outdir): r = r[0] if r else None elif binding.get("loadContents"): r = [v["contents"] for v in r] + if len(r) == 1: + r = r[0] else: r = None diff --git a/cwltool/main.py b/cwltool/main.py index 30600d54f..a35ee5139 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -59,8 +59,15 @@ def main(): try: t = workflow.makeTool(from_url(args.workflow), basedir) - except (jsonschema.exceptions.ValidationError, validate.ValidationException): - _logger.exception("Tool definition failed validation") + except (jsonschema.exceptions.ValidationError, validate.ValidationException) as e: + _logger.error("Tool definition failed validation:\n%s" % e) + if args.debug: + _logger.exception() + return 1 + except RuntimeError as e: + _logger.error(e) + if args.debug: + _logger.exception() return 1 try: @@ -80,6 +87,7 @@ def output_callback(out): a["generatefiles"] = job.generatefiles print json.dumps(a) else: + last = None for r in jobiter: if r: if args.dry_run: @@ -89,11 +97,22 @@ def output_callback(out): else: outdir = tempfile.mkdtemp() r.run(outdir, dry_run=args.dry_run, pull_image=(not args.no_pull), rm_container=(not args.leave_container)) + else: + print "Workflow deadlocked." + return 1 + last = r _logger.info("Output directory is %s", outdir) print json.dumps(final_output[0]) - except (jsonschema.exceptions.ValidationError, validate.ValidationException): - _logger.exception("Job order failed validation") + except (jsonschema.exceptions.ValidationError, validate.ValidationException) as e: + _logger.error("Input object failed validation:\n%s" % e) + if args.debug: + _logger.exception() + return 1 + except workflow.WorkflowException as e: + _logger.error("Workflow error:\n%s" % e) + if args.debug: + _logger.exception() return 1 return 0 diff --git a/cwltool/validate.py b/cwltool/validate.py index dbb589733..91a8c72a3 100644 --- a/cwltool/validate.py +++ b/cwltool/validate.py @@ -14,9 +14,13 @@ def validate(expected_schema, datum): LONG_MIN_VALUE = -(1 << 63) LONG_MAX_VALUE = (1 << 63) - 1 +def indent(v): + return "\n".join([" " + l for l in v.splitlines()]) + def validate_ex(expected_schema, datum): """Determine if a python datum is an instance of a schema.""" schema_type = expected_schema.type + if schema_type == 'null': if datum is None: return True @@ -74,7 +78,7 @@ def validate_ex(expected_schema, datum): raise ValidationException("%s\n while validating item at position %i `%s`" % (v, i, d)) return True else: - raise ValidationException("`%s`\n is not a list, expected list of\n %s" % (pprint.pformat(datum), expected_schema.items)) + raise ValidationException("`%s` is not a list, expected list of %s" % (pprint.pformat(datum), expected_schema.items)) elif schema_type == 'map': if (isinstance(datum, dict) and False not in [isinstance(k, basestring) for k in datum.keys()] and @@ -98,8 +102,14 @@ def validate_ex(expected_schema, datum): raise ValidationException("`%s`\n is not a dict" % pprint.pformat(datum)) try: for f in expected_schema.fields: - validate_ex(f.type, datum.get(f.name)) + try: + validate_ex(f.type, datum.get(f.name)) + except ValidationException as v: + if f.name not in datum: + raise ValidationException("Missing required field `%s`" % f.name) + else: + raise return True except ValidationException as v: - raise ValidationException("%s\n while validating field `%s`" % (v, f.name)) + raise ValidationException("Validating field `%s`:\n%s" % (f.name, indent(str(v)))) raise ValidationException("Unrecognized schema_type %s" % schema_type) diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 2603015b8..c795dcb57 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -11,16 +11,20 @@ from collections import namedtuple import pprint import functools +import validate _logger = logging.getLogger("cwltool") WorkflowStateItem = namedtuple('WorkflowStateItem', ['parameter', 'value']) +class WorkflowException(Exception): + pass + def idk(key): if len(key) <= 1: - raise Exception("Identifier is too short") + raise WorkflowException("Identifier is too short") if key[0] != '#': - raise Exception("Must start with #") + raise WorkflowException("Must start with #") return key[1:] def makeTool(toolpath_object, basedir): @@ -36,7 +40,7 @@ def makeTool(toolpath_object, basedir): elif toolpath_object["class"] == "Workflow": return Workflow(toolpath_object) else: - raise Exception("Missing 'class' field, expecting one of: Workflow, CommandLineTool, ExpressionTool, External") + raise WorkflowException("Missing 'class' field, expecting one of: Workflow, CommandLineTool, ExpressionTool, External") class Workflow(Process): @@ -50,7 +54,7 @@ def receive_output(self, step, outputparms, jobout): if idk(i["id"]) in jobout: self.state[idk(i["id"])] = WorkflowStateItem(i, jobout[idk(i["id"])]) else: - raise Exception("Output is missing expected field %s" % idk(i["id"])) + raise WorkflowException("Output is missing expected field %s" % idk(i["id"])) step.completed = True def try_make_job(self, step, basedir, **kwargs): @@ -60,11 +64,15 @@ def try_make_job(self, step, basedir, **kwargs): inputparms = copy.deepcopy(step.tool["inputs"]) outputparms = copy.deepcopy(step.tool["outputs"]) scatter = aslist(step.tool["scatter"]) - for i in inputparms: - if i["id"] in scatter: - i["type"] = {"type": "array", "items": i["type"]} - if step.tool.get("scatterType") == "nested_crossproduct": + inp_map = {i["id"]: i for i in inputparms} + for s in aslist(step.tool["scatter"]): + if s not in inp_map: + raise WorkflowException("Invalid Scatter parameter '%s'" % s) + + inp_map[s]["type"] = {"type": "array", "items": inp_map[s]["type"]} + + if step.tool.get("scatterMethod") == "nested_crossproduct": nesting = len(aslist(step.tool["scatter"])) else: nesting = 1 @@ -101,24 +109,28 @@ def try_make_job(self, step, basedir, **kwargs): else: inputobj[iid] = [self.state[src].value] else: - raise Exception("Type mismatch '%s' and '%s'" % (src, inp["id"][1:])) + raise WorkflowException("Type mismatch between '%s' (%s) and '%s' (%s)" % (src, self.state[src].parameter["type"], idk(inp["id"]), inp["type"])) else: - return + raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % ()) elif "default" in inp: inputobj[iid] = inp["default"] else: - raise Exception("Value for %s not specified" % (inp["id"])) + raise WorkflowException("Value for %s not specified" % (inp["id"])) _logger.info("Creating job with input: %s", inputobj) callback = functools.partial(self.receive_output, step, outputparms) if step.tool.get("scatter"): - if step.tool.get("scatterType") == "dotproduct" or step.tool.get("scatterType") is None: + method = step.tool.get("scatterMethod") + if method is None and len(aslist(step.tool["scatter"])) != 1: + raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs") + + if method == "dotproduct" or method is None: jobs = dotproduct_scatter(step, inputobj, basedir, aslist(step.tool["scatter"]), callback, **kwargs) - elif step.tool.get("scatterType") == "nested_crossproduct": - jobs = nested_rossproduct_scatter(step, inputobj, basedir, aslist(step.tool["scatter"]), callback, **kwargs) - elif step.tool.get("scatterType") == "flat_crossproduct": + elif method == "nested_crossproduct": + jobs = nested_crossproduct_scatter(step, inputobj, basedir, aslist(step.tool["scatter"]), callback, **kwargs) + elif method == "flat_crossproduct": jobs = flat_crossproduct_scatter(step, inputobj, basedir, aslist(step.tool["scatter"]), callback, 0, **kwargs) else: jobs = step.job(inputobj, basedir, callback, **kwargs) @@ -127,6 +139,9 @@ def try_make_job(self, step, basedir, **kwargs): yield j def job(self, joborder, basedir, output_callback, **kwargs): + # Validate job order + validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) + steps = [makeTool(step, basedir) for step in self.tool.get("steps", [])] random.shuffle(steps) @@ -137,8 +152,12 @@ def job(self, joborder, basedir, output_callback, **kwargs): self.state[iid] = WorkflowStateItem(i, copy.deepcopy(joborder[iid])) elif "default" in i: self.state[iid] = WorkflowStateItem(i, copy.deepcopy(i["default"])) + else: + raise WorkflowException("Input '%s' not in input object and does not have a default value." % (i["id"])) for s in steps: + for out in s.tool["outputs"]: + self.state[idk(out["id"])] = None s.completed = False completed = 0 @@ -153,7 +172,7 @@ def job(self, joborder, basedir, output_callback, **kwargs): if newjob: made_progress = True yield newjob - if not made_progress: + if not made_progress and completed < len(steps): yield None wo = {} @@ -183,7 +202,7 @@ def __init__(self, toolpath_object, basedir): i.update(a) found = True if not found: - raise Exception("Did not find input '%s' in external process" % (i["def"])) + raise WorkflowException("Did not find input '%s' in external process" % (i["def"])) i["id"] = toolid @@ -196,7 +215,7 @@ def __init__(self, toolpath_object, basedir): i.update(a) found = True if not found: - raise Exception("Did not find output '%s' in external process" % (i["def"])) + raise WorkflowException("Did not find output '%s' in external process" % (i["def"])) i["id"] = toolid @@ -206,7 +225,7 @@ def receive_output(self, jobout): self.output = {} for i in self.tool["outputs"]: if i["def"][:len(self.impl)] != self.impl: - raise Exception("'def' is '%s' but must refer to fragment of resource '%s' listed in 'impl'" % (i["def"], self.impl)) + raise WorkflowException("'def' is '%s' but must refer to fragment of resource '%s' listed in 'impl'" % (i["def"], self.impl)) d = idk(i["def"][len(self.impl):]) self.output[idk(i["id"])] = jobout[d] @@ -242,7 +261,7 @@ def dotproduct_scatter(process, joborder, basedir, scatter_keys, output_callback if l is None: l = len(joborder[idk(s)]) elif l != len(joborder[idk(s)]): - raise Exception("Length of input arrays must be equal when performing dotproduct scatter.") + raise WorkflowException("Length of input arrays must be equal when performing dotproduct scatter.") output = {} for i in process.tool["outputs"]: @@ -268,7 +287,7 @@ def nested_crossproduct_scatter(process, joborder, basedir, scatter_keys, output scatter_key = idk(scatter_keys[0]) l = len(joborder[scatter_key]) output = {} - for i in process["outputs"]: + for i in process.tool["outputs"]: output[idk(i["id"])] = [None] * l rc = ReceiveScatterOutput(output) @@ -281,7 +300,7 @@ def nested_crossproduct_scatter(process, joborder, basedir, scatter_keys, output for j in process.job(jo, basedir, functools.partial(rc.receive_scatter_output, n), **kwargs): yield j else: - for j in nested_crossproduct_scatter(process, jo, basedir, scatter_keys[1:], functools.partial(rc.receive_scatter_output, n)): + for j in nested_crossproduct_scatter(process, jo, basedir, scatter_keys[1:], functools.partial(rc.receive_scatter_output, n), **kwargs): yield j while rc.completed < l: @@ -295,7 +314,7 @@ def crossproduct_size(joborder, scatter_keys): sum = len(joborder[scatter_key]) else: sum = 0 - for n in range(0, l): + for n in range(0, len(joborder[scatter_key])): jo = copy.copy(joborder) jo[scatter_key] = joborder[scatter_key][n] sum += crossproduct_size(joborder, scatter_keys[1:]) @@ -305,9 +324,9 @@ def flat_crossproduct_scatter(process, joborder, basedir, scatter_keys, output_c scatter_key = idk(scatter_keys[0]) l = len(joborder[scatter_key]) - if startindex == 0: + if startindex == 0 and not isinstance(output_callback, ReceiveScatterOutput): output = {} - for i in process["outputs"]: + for i in process.tool["outputs"]: output[idk(i["id"])] = [None] * crossproduct_size(joborder, scatter_keys) rc = ReceiveScatterOutput(output) else: @@ -323,11 +342,11 @@ def flat_crossproduct_scatter(process, joborder, basedir, scatter_keys, output_c yield j put += 1 else: - for j in flat_crossproduct_scatter(process, jo, basedir, scatter_keys[1:], functools.partial(rc.receive_scatter_output, put)): + for j in flat_crossproduct_scatter(process, jo, basedir, scatter_keys[1:], rc, put, **kwargs): put += 1 yield j - if startindex == 0: + if startindex == 0 and not isinstance(output_callback, ReceiveScatterOutput): while rc.completed < put: yield None From 65f3232387646d82c0e040dfd74cc6577c1335b1 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 9 Apr 2015 17:17:51 -0400 Subject: [PATCH 071/221] Propagate requirements and hints. Greatly improve validation error messages. --- cwltool/draft2tool.py | 30 ++++++++--------------- cwltool/job.py | 1 + cwltool/process.py | 3 +++ cwltool/validate.py | 56 ++++++++++++++++++++++++++++++------------- cwltool/workflow.py | 14 +++++++---- 5 files changed, 63 insertions(+), 41 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 41a0d8bb7..37c1d58d5 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -14,6 +14,7 @@ import hashlib import random from process import Process +from process import WorkflowException import validate _logger = logging.getLogger("cwltool") @@ -22,6 +23,8 @@ module_dir = os.path.dirname(os.path.abspath(__file__)) +supportedProcessRequirements = ("DockerRequirement", "MemoryRequirement", "ExpressionEngineRequirement") + class Builder(object): def jseval(self, expression, context): if isinstance(expression, list): @@ -283,28 +286,15 @@ def job(self, joborder, basedir, output_callback, use_container=True, **kwargs): for t in self.tool.get("environmentDefs", []): j.environment[t["env"]] = builder.do_eval(t["value"]) - for r in self.tool.get("requirements", []): - if r["class"] not in ("DockerRequirement", "MemoryRequirement"): - raise Exception("Unknown requirement %s" % (r["class"])) + j.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) + j.hints = kwargs.get("hints", []) + self.tool.get("hints", []) + + for r in j.requirements: + if r["class"] not in supportedProcessRequirements: + raise WorkflowException("Unsupported process requirement %s" % (r["class"])) - reqsAndHints = self.tool.get("requirements", []) + self.tool.get("hints", []) - for r in reqsAndHints: + for r in (j.requirements + j.hints): if r["class"] == "DockerRequirement" and use_container: - j.container = {} - j.container["type"] = "docker" - if "dockerPull" in r: - j.container["pull"] = r["dockerPull"] - if "dockerFile" in r: - j.container["file"] = r["dockerFile"] - if "dockerLoad" in r: - if r["dockerLoad"].startswith("http"): - j.container["load"] = r["dockerLoad"] - else: - j.container["load"] = os.path.join(basedir, r["dockerLoad"]) - if "dockerImageId" in r: - j.container["imageId"] = r["dockerImageId"] - else: - j.container["imageId"] = r["dockerPull"] builder.pathmapper = DockerPathMapper(reffiles, basedir) if builder.pathmapper is None: diff --git a/cwltool/job.py b/cwltool/job.py index ba5f93292..17ac16756 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -12,6 +12,7 @@ class CommandLineJob(object): def run(self, outdir, dry_run=False, pull_image=True, rm_container=True): + with open(os.path.join(outdir, "cwl.input.json"), "w") as fp: json.dump(self.joborder, fp) diff --git a/cwltool/process.py b/cwltool/process.py index 9e9da8b03..cacda56af 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -13,6 +13,9 @@ _logger = logging.getLogger("cwltool") +class WorkflowException(Exception): + pass + def specialize(items, spec): if isinstance(items, dict): for n in ("type", "items", "values"): diff --git a/cwltool/validate.py b/cwltool/validate.py index 91a8c72a3..3a1f6266a 100644 --- a/cwltool/validate.py +++ b/cwltool/validate.py @@ -1,4 +1,5 @@ import pprint +import avro.schema class ValidationException(Exception): pass @@ -14,8 +15,29 @@ def validate(expected_schema, datum): LONG_MIN_VALUE = -(1 << 63) LONG_MAX_VALUE = (1 << 63) - 1 -def indent(v): - return "\n".join([" " + l for l in v.splitlines()]) +def indent(v, nolead=False): + if nolead: + return v.splitlines()[0] + "\n".join([" " + l for l in v.splitlines()[1:]]) + else: + return "\n".join([" " + l for l in v.splitlines()]) + +def friendly(v): + if isinstance(v, avro.schema.NamedSchema): + return v.name + if isinstance(v, avro.schema.ArraySchema): + return "array of <%s>" % friendly(v.items) + elif isinstance(v, avro.schema.PrimitiveSchema): + return v.type + elif isinstance(v, avro.schema.UnionSchema): + return " or ".join([friendly(s) for s in v.schemas]) + else: + return v + +def multi(v, q=""): + if '\n' in v: + return "%s%s%s\n" % (q, v, q) + else: + return "%s%s%s" % (q, v, q) def validate_ex(expected_schema, datum): """Determine if a python datum is an instance of a schema.""" @@ -25,60 +47,60 @@ def validate_ex(expected_schema, datum): if datum is None: return True else: - raise ValidationException("`%s` is not null" % datum) + raise ValidationException("the value `%s` is not null" % pprint.pformat(datum)) elif schema_type == 'boolean': if isinstance(datum, bool): return True else: - raise ValidationException("`%s` is not boolean" % datum) + raise ValidationException("the value `%s` is not boolean" % pprint.pformat(datum)) elif schema_type == 'string': if isinstance(datum, basestring): return True else: - raise ValidationException("`%s` is not string" % datum) + raise ValidationException("the value `%s` is not string" % pprint.pformat(datum)) elif schema_type == 'bytes': if isinstance(datum, str): return True else: - raise ValidationException("`%s` is not bytes" % datum) + raise ValidationException("the value `%s` is not bytes" % pprint.pformat(datum)) elif schema_type == 'int': if ((isinstance(datum, int) or isinstance(datum, long)) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE): return True else: - raise ValidationException("`%s` is not int" % datum) + raise ValidationException("`%s` is not int" % pprint.pformat(datum)) elif schema_type == 'long': if ((isinstance(datum, int) or isinstance(datum, long)) and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE): return True else: - raise ValidationException("`%s` is not long" % datum) + raise ValidationException("the value `%s` is not long" % pprint.pformat(datum)) elif schema_type in ['float', 'double']: if (isinstance(datum, int) or isinstance(datum, long) or isinstance(datum, float)): return True else: - raise ValidationException("`%s` is not float or double" % datum) + raise ValidationException("the value `%s` is not float or double" % pprint.pformat(datum)) elif schema_type == 'fixed': if isinstance(datum, str) and len(datum) == expected_schema.size: return True else: - raise ValidationException("`%s` is not fixed" % datum) + raise ValidationException("the value `%s` is not fixed" % pprint.pformat(datum)) elif schema_type == 'enum': if datum in expected_schema.symbols: return True else: - raise ValidationException("`%s`\n is not a valid enum symbol, expected\n %s" % (pprint.pformat(datum), pprint.pformat(expected_schema.symbols))) + raise ValidationException("the value `%s`\n is not a valid enum symbol, expected\n %s" % (pprint.pformat(datum), pprint.pformat(expected_schema.symbols))) elif schema_type == 'array': if isinstance(datum, list): for i, d in enumerate(datum): try: validate_ex(expected_schema.items, d) except ValidationException as v: - raise ValidationException("%s\n while validating item at position %i `%s`" % (v, i, d)) + raise ValidationException("At position %i\n%s" % (i, indent(str(v)))) return True else: - raise ValidationException("`%s` is not a list, expected list of %s" % (pprint.pformat(datum), expected_schema.items)) + raise ValidationException("the value `%s` is not a list, expected list of %s" % (pprint.pformat(datum), expected_schema.items)) elif schema_type == 'map': if (isinstance(datum, dict) and False not in [isinstance(k, basestring) for k in datum.keys()] and @@ -96,7 +118,9 @@ def validate_ex(expected_schema, datum): validate_ex(s, datum) except ValidationException as e: errors.append(str(e)) - raise ValidationException("`%s`\n is not valid, expected one of:\n\n%s\n\n the individual errors are:\n%s" % (pprint.pformat(datum), ",\n\n ".join([str(s) for s in expected_schema.schemas]), ";\n\n".join(errors))) + raise ValidationException("the value %s is not a valid type in the union, expected one of:\n%s" % (multi(pprint.pformat(datum), '`'), + "\n".join(["- %s, but\n %s" % (friendly(expected_schema.schemas[i]), indent(multi(errors[i]))) for i in range(0, len(expected_schema.schemas))]))) + elif schema_type in ['record', 'error', 'request']: if not isinstance(datum, dict): raise ValidationException("`%s`\n is not a dict" % pprint.pformat(datum)) @@ -106,10 +130,10 @@ def validate_ex(expected_schema, datum): validate_ex(f.type, datum.get(f.name)) except ValidationException as v: if f.name not in datum: - raise ValidationException("Missing required field `%s`" % f.name) + raise ValidationException("missing required field `%s`" % f.name) else: raise return True except ValidationException as v: - raise ValidationException("Validating field `%s`:\n%s" % (f.name, indent(str(v)))) + raise ValidationException("could not validate field `%s` because\n%s" % (f.name, multi(indent(str(v))))) raise ValidationException("Unrecognized schema_type %s" % schema_type) diff --git a/cwltool/workflow.py b/cwltool/workflow.py index c795dcb57..42d215dfc 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -3,6 +3,7 @@ import draft2tool from draft2tool import aslist from process import Process +from process import WorkflowException import copy import logging import random @@ -17,9 +18,6 @@ WorkflowStateItem = namedtuple('WorkflowStateItem', ['parameter', 'value']) -class WorkflowException(Exception): - pass - def idk(key): if len(key) <= 1: raise WorkflowException("Identifier is too short") @@ -142,6 +140,9 @@ def job(self, joborder, basedir, output_callback, **kwargs): # Validate job order validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) + requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) + hints = kwargs.get("hints", []) + self.tool.get("hints", []) + steps = [makeTool(step, basedir) for step in self.tool.get("steps", [])] random.shuffle(steps) @@ -168,7 +169,7 @@ def job(self, joborder, basedir, output_callback, **kwargs): if step.completed: completed += 1 else: - for newjob in self.try_make_job(step, basedir, **kwargs): + for newjob in self.try_make_job(step, basedir, requirements=requirements, hints=hints, **kwargs): if newjob: made_progress = True yield newjob @@ -235,8 +236,11 @@ def job(self, joborder, basedir, output_callback, **kwargs): joborder[d] = joborder[idk(i["id"])] del joborder[idk(i["id"])] + requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) + hints = kwargs.get("hints", []) + self.tool.get("hints", []) + self.output = None - for t in self.embedded_tool.job(joborder, basedir, self.receive_output, **kwargs): + for t in self.embedded_tool.job(joborder, basedir, self.receive_output, requirements=requirements, hints=hints, **kwargs): yield t while self.output is None: From e73f3c3a3cce00c6f07bdf4f81118bde80a44f64 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Sat, 11 Apr 2015 23:00:10 -0400 Subject: [PATCH 072/221] Pluggable expression engine work in progress. --- cwltool/aslist.py | 5 +++ cwltool/docker.py | 67 ++++++++++++++++++++++++++++++++++++++ cwltool/draft2tool.py | 8 +---- cwltool/expression.py | 34 +++++++++++++++++++ cwltool/job.py | 76 +++++++++---------------------------------- cwltool/workflow.py | 2 +- 6 files changed, 123 insertions(+), 69 deletions(-) create mode 100644 cwltool/aslist.py create mode 100644 cwltool/docker.py create mode 100644 cwltool/expression.py diff --git a/cwltool/aslist.py b/cwltool/aslist.py new file mode 100644 index 000000000..f34a0485b --- /dev/null +++ b/cwltool/aslist.py @@ -0,0 +1,5 @@ +def aslist(l): + if isinstance(l, list): + return l + else: + return [l] diff --git a/cwltool/docker.py b/cwltool/docker.py new file mode 100644 index 000000000..7d6bcbc9b --- /dev/null +++ b/cwltool/docker.py @@ -0,0 +1,67 @@ +import subprocess + +def get_image(dockerRequirement, pull_image): + found = False + for ln in subprocess.check_output(["docker", "images", "--no-trunc", "--all"]).splitlines(): + try: + ln.index(dockerRequirement["dockerImageId"]) + found = True + except ValueError: + pass + + if not found and pull_image: + if "dockerPull" in dockerRequirement: + cmd = ["docker", "pull", dockerRequirement["dockerPull"]] + _logger.info(str(cmd)) + if not dry_run: + subprocess.check_call(cmd, stdout=sys.stderr) + found = True + elif "dockerFile" in dockerRequirement: + dockerfile_dir = tempfile.mkdtemp() + with open(os.path.join(dockerfile_dir, "Dockerfile"), "w") as df: + df.write(dockerRequirement["dockerFile"]) + cmd = ["docker", "build", "--tag=%s" % dockerRequirement["dockerImageId"], dockerfile_dir] + _logger.info(str(cmd)) + if not dry_run: + subprocess.check_call(cmd, stdout=sys.stderr) + found = True + elif "dockerLoad" in dockerRequirement: + cmd = ["docker", "load"] + _logger.info(str(cmd)) + if not dry_run: + if os.path.exists(dockerRequirement["dockerLoad"]): + _logger.info("Loading docker image from %s", dockerRequirement["dockerLoad"]) + with open(dockerRequirement["dockerLoad"], "rb") as f: + loadproc = subprocess.Popen(cmd, stdin=f, stdout=sys.stderr) + else: + _logger.info("Sending GET request to %s", dockerRequirement["dockerLoad"]) + req = requests.get(dockerRequirement["dockerLoad"], stream=True) + n = 0 + for chunk in req.iter_content(1024*1024): + n += len(chunk) + _logger.info(str(n)) + loadproc.stdin.write(chunk) + loadproc.stdin.close() + rcode = loadproc.wait() + if rcode != 0: + raise Exception("Docker load returned non-zero exit status %i" % (rcode)) + found = True + + return found + + +def get_from_requirements(requirements, hints, pull_image): + if requirements: + for r in reversed(requirements): + if r["class"] == "DockerRequirement": + if docker.get_image(r, pull_image): + return r["dockerImageId"] + else: + raise Exception("Docker image %s not found" % (self.container["imageId"])) + if hints: + for r in reversed(hints): + if r["class"] == "DockerRequirement": + if docker.get_image(r, pull_image): + return r["dockerImageId"] + + return None diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 37c1d58d5..2ad3d70cc 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -16,6 +16,7 @@ from process import Process from process import WorkflowException import validate +from aslist import aslist _logger = logging.getLogger("cwltool") @@ -206,12 +207,6 @@ def job(self, joborder, basedir, output_callback, **kwargs): j.output_callback = output_callback yield j -def aslist(l): - if isinstance(l, list): - return l - else: - return [l] - class CommandLineTool(Tool): def __init__(self, toolpath_object): super(CommandLineTool, self).__init__(toolpath_object, "CommandLineTool") @@ -252,7 +247,6 @@ def job(self, joborder, basedir, output_callback, use_container=True, **kwargs): j = CommandLineJob() j.joborder = builder.job - j.container = None j.stdin = None j.stdout = None builder.pathmapper = None diff --git a/cwltool/expression.py b/cwltool/expression.py new file mode 100644 index 000000000..510e31c11 --- /dev/null +++ b/cwltool/expression.py @@ -0,0 +1,34 @@ +import docker +import subprocess +import json + +def exeval(ex, jobinput, requirements, context, pull_image): + for r in reversed(requirements): + if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]: + runtime = [] + img_id = docker.get_from_requirements(r.get("requirements"), r.get("hints"), pull_image) + if img_id: + runtime = ["docker", "run", "-i", "--rm", img_id] + + sp = subprocess.Popen(runtime + aslist(r["engineCommand"]), + shell=False, + close_fds=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + + inp = { + "script": ex["script"], + "expressionDefs": r.get("expressionDefs"), + "job": jobinput, + "context": context + } + + (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp)) + + raise WorkflowException("Unknown expression engine '%s'" % ex["engine"]) + +def do_eval(self, ex, jobinput, requirements, context=None, pull_image=True): + if isinstance(ex, dict) and "engine" in ex and "script" in ex: + return exeval(ex, jobinput, requirements, context) + else: + return ex diff --git a/cwltool/job.py b/cwltool/job.py index 17ac16756..751d8fa50 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -7,6 +7,7 @@ import logging import sys import requests +import docker _logger = logging.getLogger("cwltool") @@ -19,67 +20,20 @@ def run(self, outdir, dry_run=False, pull_image=True, rm_container=True): runtime = [] env = {} - if self.container and self.container.get("type") == "docker": - found = False - for ln in subprocess.check_output(["docker", "images", "--no-trunc", "--all"]).splitlines(): - try: - ln.index(self.container["imageId"]) - found = True - except ValueError: - pass - - if not found and pull_image: - if "file" in self.container: - dockerfile_dir = tempfile.mkdtemp() - with open(os.path.join(dockerfile_dir, "Dockerfile"), "w") as df: - df.write(self.container["file"]) - cmd = ["docker", "build", "--tag=%s" % self.container["imageId"], dockerfile_dir] - _logger.info(str(cmd)) - if not dry_run: - subprocess.check_call(cmd, stdout=sys.stderr) - found = True - if "pull" in self.container: - cmd = ["docker", "pull", self.container["pull"]] - _logger.info(str(cmd)) - if not dry_run: - subprocess.check_call(cmd, stdout=sys.stderr) - found = True - elif "load" in self.container: - cmd = ["docker", "load"] - _logger.info(str(cmd)) - if not dry_run: - if os.path.exists(self.container["load"]): - _logger.info("Loading docker image from %s", self.container["load"]) - with open(self.container["load"], "rb") as f: - loadproc = subprocess.Popen(cmd, stdin=f, stdout=sys.stderr) - else: - _logger.info("Sending GET request to %s", self.container["load"]) - req = requests.get(self.container["load"], stream=True) - n = 0 - for chunk in req.iter_content(1024*1024): - n += len(chunk) - _logger.info(str(n)) - loadproc.stdin.write(chunk) - loadproc.stdin.close() - rcode = loadproc.wait() - if rcode != 0: - raise Exception("Docker load returned non-zero exit status %i" % (rcode)) - found = True - - if found: - runtime = ["docker", "run", "-i"] - for d in self.pathmapper.dirs: - runtime.append("--volume=%s:%s:ro" % (os.path.abspath(d), self.pathmapper.dirs[d])) - runtime.append("--volume=%s:%s:rw" % (os.path.abspath(outdir), "/tmp/job_output")) - runtime.append("--workdir=%s" % ("/tmp/job_output")) - runtime.append("--user=%s" % (os.geteuid())) - if rm_container: - runtime.append("--rm") - for t,v in self.environment.items(): - runtime.append("--env=%s=%s" % (t, v)) - runtime.append(self.container["imageId"]) - else: - raise Exception("Docker image %s not found" % (self.container["imageId"])) + img_id = docker.get_from_requirements(self.requirements, self.hints, pull_image) + + if img_id: + runtime = ["docker", "run", "-i"] + for d in self.pathmapper.dirs: + runtime.append("--volume=%s:%s:ro" % (os.path.abspath(d), self.pathmapper.dirs[d])) + runtime.append("--volume=%s:%s:rw" % (os.path.abspath(outdir), "/tmp/job_output")) + runtime.append("--workdir=%s" % ("/tmp/job_output")) + runtime.append("--user=%s" % (os.geteuid())) + if rm_container: + runtime.append("--rm") + for t,v in self.environment.items(): + runtime.append("--env=%s=%s" % (t, v)) + runtime.append(img_id) else: env = self.environment diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 42d215dfc..f7d5e37e9 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -1,7 +1,7 @@ import job import draft1tool import draft2tool -from draft2tool import aslist +from aslist import aslist from process import Process from process import WorkflowException import copy From 9e4d5bc4f20fa06752c5fb716565efc52abe04de Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 15 Apr 2015 11:22:06 -0400 Subject: [PATCH 073/221] Pluggable expression engine works. Test cases are updated. Also added feature gating for "scatter". --- cwltool/docker.py | 18 +++-- cwltool/draft2tool.py | 107 +++++++++++++--------------- cwltool/expression.py | 50 ++++++++++--- cwltool/process.py | 62 +++++++++++++--- cwltool/validate.py | 2 +- cwltool/workflow.py | 38 ++++++---- docker-node-engine.sh | 4 ++ docker-node-engine/Dockerfile | 3 + docker-node-engine/cwlNodeEngine.js | 41 +++++++++++ setup.py | 4 +- 10 files changed, 233 insertions(+), 96 deletions(-) create mode 100755 docker-node-engine.sh create mode 100644 docker-node-engine/Dockerfile create mode 100755 docker-node-engine/cwlNodeEngine.js diff --git a/cwltool/docker.py b/cwltool/docker.py index 7d6bcbc9b..17318a364 100644 --- a/cwltool/docker.py +++ b/cwltool/docker.py @@ -1,7 +1,17 @@ import subprocess +import logging +import sys +import requests +import os -def get_image(dockerRequirement, pull_image): +_logger = logging.getLogger("cwltool") + +def get_image(dockerRequirement, pull_image, dry_run=False): found = False + + if "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement: + dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"] + for ln in subprocess.check_output(["docker", "images", "--no-trunc", "--all"]).splitlines(): try: ln.index(dockerRequirement["dockerImageId"]) @@ -50,18 +60,18 @@ def get_image(dockerRequirement, pull_image): return found -def get_from_requirements(requirements, hints, pull_image): +def get_from_requirements(requirements, hints, pull_image, dry_run=False): if requirements: for r in reversed(requirements): if r["class"] == "DockerRequirement": - if docker.get_image(r, pull_image): + if get_image(r, pull_image, dry_run): return r["dockerImageId"] else: raise Exception("Docker image %s not found" % (self.container["imageId"])) if hints: for r in reversed(hints): if r["class"] == "DockerRequirement": - if docker.get_image(r, pull_image): + if get_image(r, pull_image, dry_run): return r["dockerImageId"] return None diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 2ad3d70cc..9423a87d1 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -17,6 +17,7 @@ from process import WorkflowException import validate from aslist import aslist +import expression _logger = logging.getLogger("cwltool") @@ -24,31 +25,20 @@ module_dir = os.path.dirname(os.path.abspath(__file__)) -supportedProcessRequirements = ("DockerRequirement", "MemoryRequirement", "ExpressionEngineRequirement") +supportedProcessRequirements = ("DockerRequirement", + "MemoryRequirement", + "ExpressionEngineRequirement", + "ScatterFeature") class Builder(object): - def jseval(self, expression, context): - if isinstance(expression, list): - exp = "{return %s(%s);}" % (expression[0], ",".join([json.dumps(self.do_eval(e)) for e in expression[1:]])) - elif expression.startswith('{'): - exp = '{return function()%s();}' % (expression) - else: - exp = '{return %s;}' % (expression) - return sandboxjs.execjs(exp, "var $job = %s; var $self = %s; %s" % (json.dumps(self.job), json.dumps(context), self.jslib)) - - def do_eval(self, ex, context=None): - if isinstance(ex, dict): - if ex.get("class") == "JavascriptExpression": - if "script" in ex: - return self.jseval(ex["script"], context) - elif ex.get("ref"): - if ex["ref"].startswith("#"): - return self.job[ex["ref"][1:]] - else: - with open(os.path.join(self.basedir, ex["ref"]), "r") as f: - return f.read() - else: - return ex + # def jseval(self, expression, context): + # if isinstance(expression, list): + # exp = "{return %s(%s);}" % (expression[0], ",".join([json.dumps(self.do_eval(e)) for e in expression[1:]])) + # elif expression.startswith('{'): + # exp = '{return function()%s();}' % (expression) + # else: + # exp = '{return %s;}' % (expression) + # return sandboxjs.execjs(exp, "var $job = %s; var $self = %s; %s" % (json.dumps(self.job), json.dumps(context), self.jslib)) def bind_input(self, schema, datum): bindings = [] @@ -128,7 +118,7 @@ def bind_input(self, schema, datum): def generate_arg(self, binding): value = binding["valueFrom"] if "do_eval" in binding: - value = self.do_eval(binding["do_eval"], value) + value = expression.do_eval(binding["do_eval"], self.job, self.requirements, self.docpath, value) prefix = binding.get("prefix") sep = binding.get("separator") @@ -165,7 +155,7 @@ def generate_arg(self, binding): class Tool(Process): - def _init_job(self, joborder, basedir): + def _init_job(self, joborder, basedir, **kwargs): # Validate job order try: validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) @@ -173,6 +163,13 @@ def _init_job(self, joborder, basedir): _logger.error("Failed to validate %s\n%s" % (pprint.pformat(joborder), v)) raise + for r in self.tool.get("requirements", []): + if r["class"] not in supportedProcessRequirements: + raise WorkflowException("Unsupported process requirement %s" % (r["class"])) + + self.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) + self.hints = kwargs.get("hints", []) + self.tool.get("hints", []) + builder = Builder() builder.job = copy.deepcopy(joborder) builder.jslib = '' @@ -180,10 +177,7 @@ def _init_job(self, joborder, basedir): builder.files = [] builder.bindings = [] builder.schemaDefs = self.schemaDefs - - if self.tool.get("expressionDefs"): - for ex in self.tool['expressionDefs']: - builder.jslib += builder.do_eval(ex) + "\n" + builder.docpath = self.docpath builder.bindings.extend(builder.bind_input(self.inputs_record_schema, builder.job)) @@ -191,28 +185,31 @@ def _init_job(self, joborder, basedir): class ExpressionTool(Tool): - def __init__(self, toolpath_object): - super(ExpressionTool, self).__init__(toolpath_object, "ExpressionTool") + def __init__(self, toolpath_object, docpath): + super(ExpressionTool, self).__init__(toolpath_object, "ExpressionTool", docpath) class ExpressionJob(object): def run(self, outdir=None, **kwargs): - self.output_callback(self.builder.do_eval(self.script)) + self.output_callback(expression.do_eval(self.script, self.builder.job, self.requirements, self.builder.docpath)) def job(self, joborder, basedir, output_callback, **kwargs): - builder = self._init_job(joborder, basedir) + builder = self._init_job(joborder, basedir, **kwargs) j = ExpressionTool.ExpressionJob() j.builder = builder - j.script = self.tool["script"] + j.script = self.tool["expression"] j.output_callback = output_callback + j.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) + j.hints = kwargs.get("hints", []) + self.tool.get("hints", []) + yield j class CommandLineTool(Tool): - def __init__(self, toolpath_object): - super(CommandLineTool, self).__init__(toolpath_object, "CommandLineTool") + def __init__(self, toolpath_object, docpath): + super(CommandLineTool, self).__init__(toolpath_object, "CommandLineTool", docpath) def job(self, joborder, basedir, output_callback, use_container=True, **kwargs): - builder = self._init_job(joborder, basedir) + builder = self._init_job(joborder, basedir, **kwargs) if self.tool["baseCommand"]: for n, b in enumerate(aslist(self.tool["baseCommand"])): @@ -252,9 +249,9 @@ def job(self, joborder, basedir, output_callback, use_container=True, **kwargs): builder.pathmapper = None if self.tool.get("stdin"): - j.stdin = builder.do_eval(self.tool["stdin"]) - if isinstance(j.stdin, dict): - j.stdin = j.stdin["path"] + j.stdin = self.tool["stdin"] + if isinstance(j.stdin, dict) and "ref" in j.stdin: + j.stdin = builder.job[j.stdin["ref"][1:]]["path"] reffiles.append(j.stdin) if self.tool.get("stdout"): @@ -268,24 +265,12 @@ def job(self, joborder, basedir, output_callback, use_container=True, **kwargs): if not j.stdout: raise Exception("stdout refers to invalid output") else: - j.stdout = builder.do_eval(self.tool["stdout"]) + j.stdout = self.tool["stdout"] if os.path.isabs(j.stdout): raise Exception("stdout must be a relative path") - j.generatefiles = {} - for t in self.tool.get("fileDefs", []): - j.generatefiles[t["filename"]] = builder.do_eval(t["value"]) - - j.environment = {} - for t in self.tool.get("environmentDefs", []): - j.environment[t["env"]] = builder.do_eval(t["value"]) - - j.requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) - j.hints = kwargs.get("hints", []) + self.tool.get("hints", []) - - for r in j.requirements: - if r["class"] not in supportedProcessRequirements: - raise WorkflowException("Unsupported process requirement %s" % (r["class"])) + j.requirements = self.requirements + j.hints = self.hints for r in (j.requirements + j.hints): if r["class"] == "DockerRequirement" and use_container: @@ -297,6 +282,16 @@ def job(self, joborder, basedir, output_callback, use_container=True, **kwargs): for f in builder.files: f["path"] = builder.pathmapper.mapper(f["path"]) + builder.requirements = j.requirements + + j.generatefiles = {} + for t in self.tool.get("fileDefs", []): + j.generatefiles[t["filename"]] = expression.do_eval(t["value"], builder.job, j.requirements, self.docpath) + + j.environment = {} + for t in self.tool.get("environmentDefs", []): + j.environment[t["env"]] = expression.do_eval(t["value"], builder.job, j.requirements, self.docpath) + j.command_line = flatten(map(builder.generate_arg, builder.bindings)) if j.stdin: @@ -349,7 +344,7 @@ def collect_output(self, schema, builder, outdir): r = None if "valueFrom" in binding: - r = builder.do_eval(binding["valueFrom"], r) + r = expression.do_eval(binding["valueFrom"], builder.job, self.requirements, self.docpath, r) if not r and schema["type"] == "record": r = {} diff --git a/cwltool/expression.py b/cwltool/expression.py index 510e31c11..967755ae6 100644 --- a/cwltool/expression.py +++ b/cwltool/expression.py @@ -1,34 +1,64 @@ import docker import subprocess import json +from aslist import aslist +import logging +import os +from process import WorkflowException +import process +import yaml +import validate -def exeval(ex, jobinput, requirements, context, pull_image): +_logger = logging.getLogger("cwltool") + +def exeval(ex, jobinput, requirements, docpath, context, pull_image): for r in reversed(requirements): if r["class"] == "ExpressionEngineRequirement" and r["id"] == ex["engine"]: + if r["id"][0] != "#": + with open(os.path.join(docpath, r["id"])) as f: + ex_obj = yaml.load(f) + sch = process.get_schema() + validate.validate_ex(sch.get_name("ExpressionEngineRequirement", ""), ex_obj) + r = ex_obj + runtime = [] img_id = docker.get_from_requirements(r.get("requirements"), r.get("hints"), pull_image) if img_id: runtime = ["docker", "run", "-i", "--rm", img_id] - sp = subprocess.Popen(runtime + aslist(r["engineCommand"]), - shell=False, - close_fds=True, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE) + exdefs = [] + for exdef in r.get("expressionDefs", []): + if isinstance(exdef, dict) and "ref" in exdef: + with open(os.path.join(r["_docpath"], exdef["ref"])) as f: + exdefs.append(f.read()) + elif isinstance(exdef, basestring): + exdefs.append(exdef) inp = { "script": ex["script"], - "expressionDefs": r.get("expressionDefs"), + "expressionDefs": exdefs, "job": jobinput, "context": context } - (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp)) + _logger.debug(json.dumps(inp)) + + sp = subprocess.Popen(runtime + aslist(r["engineCommand"]), + shell=False, + close_fds=True, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE) + + (stdoutdata, stderrdata) = sp.communicate(json.dumps(inp) + "\n\n") + if sp.returncode != 0: + raise WorkflowException("Expression engine returned non-zero exit code.") + + return json.loads(stdoutdata) raise WorkflowException("Unknown expression engine '%s'" % ex["engine"]) -def do_eval(self, ex, jobinput, requirements, context=None, pull_image=True): +def do_eval(ex, jobinput, requirements, docpath, context=None, pull_image=True): if isinstance(ex, dict) and "engine" in ex and "script" in ex: - return exeval(ex, jobinput, requirements, context) + return exeval(ex, jobinput, requirements, docpath, context, pull_image) else: return ex diff --git a/cwltool/process.py b/cwltool/process.py index cacda56af..d712cd344 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -7,6 +7,7 @@ import copy import logging import pprint +from aslist import aslist TOOL_CONTEXT_URL = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/master/schemas/draft-2/cwl-context.json" module_dir = os.path.dirname(os.path.abspath(__file__)) @@ -49,23 +50,47 @@ def extend_avro(items): n.append(t) return n +def get_schema(): + names = avro.schema.Names() + cwl_avsc = os.path.join(module_dir, 'schemas/draft-2/cwl-avro.yml') + with open(cwl_avsc) as f: + j = yaml.load(f) + j = extend_avro(j) + for t in j: + avro.schema.make_avsc_object(t, names) + return names + class Process(object): - def __init__(self, toolpath_object, validateAs): - self.names = avro.schema.Names() - cwl_avsc = os.path.join(module_dir, 'schemas/draft-2/cwl-avro.yml') - with open(cwl_avsc) as f: - j = yaml.load(f) - j = extend_avro(j) - for t in j: - avro.schema.make_avsc_object(t, self.names) + def check_feature(self, feature, kwargs): + for t in kwargs.get("requirements", []): + if t["class"] == feature: + return True + for t in kwargs.get("hints", []): + if t["class"] == feature: + return True + return False + + def __init__(self, toolpath_object, validateAs, docpath): + self.names = get_schema() + self.docpath = docpath self.tool = toolpath_object + #if self.tool.get("@context") != TOOL_CONTEXT_URL: # raise Exception("Missing or invalid '@context' field in tool description document, must be %s" % TOOL_CONTEXT_URL) # Validate tool documument validate.validate_ex(self.names.get_name(validateAs, ""), self.tool) + self.validate_requirements(self.tool, "requirements") + self.validate_requirements(self.tool, "hints") + + for t in self.tool.get("requirements", []): + t["_docpath"] = docpath + + for t in self.tool.get("hints", []): + t["_docpath"] = docpath + # Import schema defs self.schemaDefs = { "Any": [ @@ -93,6 +118,8 @@ def __init__(self, toolpath_object, validateAs): c = copy.copy(i) c["name"] = c["id"][1:] del c["id"] + if "default" in c: + c["type"] = ["null"] + aslist(c["type"]) self.inputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.inputs_record_schema, self.names) @@ -101,5 +128,24 @@ def __init__(self, toolpath_object, validateAs): c = copy.copy(i) c["name"] = c["id"][1:] del c["id"] + if "default" in c: + c["type"] = ["null"] + aslist(c["type"]) self.outputs_record_schema["fields"].append(c) avro.schema.make_avsc_object(self.outputs_record_schema, self.names) + + def validate_requirements(self, tool, field): + for r in tool.get(field, []): + try: + if self.names.get_name(r["class"], "") is None: + raise validate.ValidationException("Unknown requirement %s" % (r["class"])) + validate.validate_ex(self.names.get_name(r["class"], ""), r) + if "requirements" in r: + self.validate_requirements(r, "requirements") + if "hints" in r: + self.validate_requirements(r, "hints") + except validate.ValidationException as v: + err = "While validating %s %s\n%s" % (field, r["class"], validate.indent(str(v))) + if field == "hints": + _logger.warn(err) + else: + raise validate.ValidationException(err) diff --git a/cwltool/validate.py b/cwltool/validate.py index 3a1f6266a..34a5a4b83 100644 --- a/cwltool/validate.py +++ b/cwltool/validate.py @@ -100,7 +100,7 @@ def validate_ex(expected_schema, datum): raise ValidationException("At position %i\n%s" % (i, indent(str(v)))) return True else: - raise ValidationException("the value `%s` is not a list, expected list of %s" % (pprint.pformat(datum), expected_schema.items)) + raise ValidationException("the value `%s` is not a list, expected list of %s" % (pprint.pformat(datum), friendly(expected_schema.items))) elif schema_type == 'map': if (isinstance(datum, dict) and False not in [isinstance(k, basestring) for k in datum.keys()] and diff --git a/cwltool/workflow.py b/cwltool/workflow.py index f7d5e37e9..9581c9763 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -25,25 +25,26 @@ def idk(key): raise WorkflowException("Must start with #") return key[1:] -def makeTool(toolpath_object, basedir): +def makeTool(toolpath_object, docpath): + """docpath is the directory the tool file is located.""" if "schema" in toolpath_object: return draft1tool.Tool(toolpath_object) elif "impl" in toolpath_object and toolpath_object.get("class", "External") == "External": - return External(toolpath_object, basedir) + return External(toolpath_object, docpath) if "class" in toolpath_object: if toolpath_object["class"] == "CommandLineTool": - return draft2tool.CommandLineTool(toolpath_object) + return draft2tool.CommandLineTool(toolpath_object, docpath) elif toolpath_object["class"] == "ExpressionTool": - return draft2tool.ExpressionTool(toolpath_object) + return draft2tool.ExpressionTool(toolpath_object, docpath) elif toolpath_object["class"] == "Workflow": - return Workflow(toolpath_object) + return Workflow(toolpath_object, docpath) else: raise WorkflowException("Missing 'class' field, expecting one of: Workflow, CommandLineTool, ExpressionTool, External") class Workflow(Process): - def __init__(self, toolpath_object): - super(Workflow, self).__init__(toolpath_object, "Workflow") + def __init__(self, toolpath_object, docpath): + super(Workflow, self).__init__(toolpath_object, "Workflow", docpath) def receive_output(self, step, outputparms, jobout): _logger.info("Job got output: %s", jobout) @@ -59,6 +60,8 @@ def try_make_job(self, step, basedir, **kwargs): inputobj = {} if "scatter" in step.tool: + if not self.check_feature("ScatterFeature", kwargs): + raise WorkflowException("Must include ScatterFeature in requirements.") inputparms = copy.deepcopy(step.tool["inputs"]) outputparms = copy.deepcopy(step.tool["outputs"]) scatter = aslist(step.tool["scatter"]) @@ -90,7 +93,7 @@ def try_make_job(self, step, basedir, **kwargs): is_array = isinstance(inp["type"], dict) and inp["type"]["type"] == "array" for connection in aslist(connections): src = idk(connection["source"]) - if src in self.state: + if src in self.state and self.state[src] is not None: if self.state[src].parameter["type"] == inp["type"]: # source and input types are the same if is_array and iid in inputobj: @@ -108,8 +111,10 @@ def try_make_job(self, step, basedir, **kwargs): inputobj[iid] = [self.state[src].value] else: raise WorkflowException("Type mismatch between '%s' (%s) and '%s' (%s)" % (src, self.state[src].parameter["type"], idk(inp["id"]), inp["type"])) + elif src not in self.state: + raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % (src, inp["id"])) else: - raise WorkflowException("Connect source '%s' on parameter '%s' does not exist" % ()) + return elif "default" in inp: inputobj[iid] = inp["default"] else: @@ -185,9 +190,12 @@ def job(self, joborder, basedir, output_callback, **kwargs): output_callback(wo) class External(Process): - def __init__(self, toolpath_object, basedir): + def __init__(self, toolpath_object, docpath): self.impl = toolpath_object["impl"] - self.embedded_tool = makeTool(from_url(os.path.join(basedir, self.impl)), basedir) + try: + self.embedded_tool = makeTool(from_url(os.path.join(docpath, self.impl)), docpath) + except validate.ValidationException as v: + raise WorkflowException("Tool definition %s failed validation:\n%s" % (os.path.join(docpath, self.impl), validate.indent(str(v)))) if "id" in toolpath_object: self.id = toolpath_object["id"] @@ -220,7 +228,7 @@ def __init__(self, toolpath_object, basedir): i["id"] = toolid - super(External, self).__init__(toolpath_object, "Process") + super(External, self).__init__(toolpath_object, "Process", docpath) def receive_output(self, jobout): self.output = {} @@ -236,11 +244,11 @@ def job(self, joborder, basedir, output_callback, **kwargs): joborder[d] = joborder[idk(i["id"])] del joborder[idk(i["id"])] - requirements = kwargs.get("requirements", []) + self.tool.get("requirements", []) - hints = kwargs.get("hints", []) + self.tool.get("hints", []) + kwargs["requirements"] = kwargs.get("requirements", []) + self.tool.get("requirements", []) + kwargs["hints"] = kwargs.get("hints", []) + self.tool.get("hints", []) self.output = None - for t in self.embedded_tool.job(joborder, basedir, self.receive_output, requirements=requirements, hints=hints, **kwargs): + for t in self.embedded_tool.job(joborder, basedir, self.receive_output, **kwargs): yield t while self.output is None: diff --git a/docker-node-engine.sh b/docker-node-engine.sh new file mode 100755 index 000000000..10957a3bb --- /dev/null +++ b/docker-node-engine.sh @@ -0,0 +1,4 @@ +#!/bin/sh + +cd docker-node-engine +docker build --tag=cwl-nodejs-engine . diff --git a/docker-node-engine/Dockerfile b/docker-node-engine/Dockerfile new file mode 100644 index 000000000..41d162897 --- /dev/null +++ b/docker-node-engine/Dockerfile @@ -0,0 +1,3 @@ +FROM debian:8 +RUN apt-get update && apt-get install -qq nodejs +ADD cwlNodeEngine.js /usr/local/bin/ diff --git a/docker-node-engine/cwlNodeEngine.js b/docker-node-engine/cwlNodeEngine.js new file mode 100755 index 000000000..a98503f1a --- /dev/null +++ b/docker-node-engine/cwlNodeEngine.js @@ -0,0 +1,41 @@ +#!/usr/bin/env nodejs + +"use strict"; + +process.stdin.setEncoding('utf8'); + +var incoming = ""; + +process.stdin.on('readable', function() { + var chunk = process.stdin.read(); + if (chunk !== null) { + incoming += chunk; + } +}); + +process.stdin.on('end', function() { + var j = JSON.parse(incoming); + var exp = "" + + if (j.script[0] == "{") { + exp = "{return function()" + j.script + "();}"; + } + else { + exp = "{return " + j.script + ";}"; + } + + var fn = '"use strict";\n'; + + if (j.expressionDefs) { + for (var index = 0; index < j.expressionDefs.length; ++index) { + fn += j.expressionDefs[index] + "\n"; + } + } + + fn += "var $job = " + JSON.stringify(j.job) + ";\n"; + fn += "var $self = " + JSON.stringify(j.context) + ";\n" + + fn += "(function()" + exp + ")()"; + + process.stdout.write(JSON.stringify(require("vm").runInNewContext(fn, {}))); +}); diff --git a/setup.py b/setup.py index d5a4720b0..dca391b03 100644 --- a/setup.py +++ b/setup.py @@ -42,8 +42,8 @@ 'requests', 'PyYAML', 'avro', - 'rdflib', - 'rdflib-jsonld' + 'rdflib >= 4.2.0', + 'rdflib-jsonld >= 0.3.0' ], test_suite='tests', tests_require=[], From 960f29ed100ab2f75afaa1900960a7a53250d547 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Sun, 3 May 2015 22:59:49 -0400 Subject: [PATCH 074/221] improvements to doc organization --- cwltool/process.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cwltool/process.py b/cwltool/process.py index d712cd344..88add6e92 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -44,6 +44,7 @@ def extend_avro(items): r["fields"] = specialize(r["fields"], t["specialize"]) r["fields"].extend(t["fields"]) r["extends"] = t["extends"] + r["abstract"] = t.get("abstract", False) r["doc"] = t.get("doc", "") types[t["name"]] = r t = r @@ -57,7 +58,8 @@ def get_schema(): j = yaml.load(f) j = extend_avro(j) for t in j: - avro.schema.make_avsc_object(t, names) + if not t.get("abstract"): + avro.schema.make_avsc_object(t, names) return names class Process(object): From 782f28a31c9aaa5f402a7cb6b07a80121318ca0c Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 4 May 2015 23:08:40 -0400 Subject: [PATCH 075/221] consolodated schema stuff into avro_ld --- avro_ld/__init__.py | 0 avro_ld/jsonld_context.py | 100 +++++++++++++ avro_ld/makedoc.py | 305 ++++++++++++++++++++++++++++++++++++++ avro_ld/schema.py | 44 ++++++ cwltool/main.py | 27 +++- cwltool/process.py | 42 +----- 6 files changed, 475 insertions(+), 43 deletions(-) create mode 100644 avro_ld/__init__.py create mode 100755 avro_ld/jsonld_context.py create mode 100644 avro_ld/makedoc.py create mode 100644 avro_ld/schema.py diff --git a/avro_ld/__init__.py b/avro_ld/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/avro_ld/jsonld_context.py b/avro_ld/jsonld_context.py new file mode 100755 index 000000000..1da690523 --- /dev/null +++ b/avro_ld/jsonld_context.py @@ -0,0 +1,100 @@ +import shutil +import json +import yaml +import os +import subprocess +import copy +import pprint +import re +import sys +import rdflib +from rdflib import Graph +import rdflib.namespace +from rdflib.namespace import RDF, RDFS + +def pred(datatype, field, name, context, defaultPrefix): + v = None + if field and "jsonldPredicate" in field: + v = field["jsonldPredicate"] + elif "jsonldPredicate" in datatype: + for d in datatype["jsonldPredicate"]: + if d["symbol"] == name: + v = d["predicate"] + elif field and "jsonldPrefix" in field: + defaultPrefix = field["jsonldPrefix"] + elif "jsonldPrefix" in datatype: + defaultPrefix = datatype["jsonldPrefix"] + + if not v: + v = "%s:%s" % (defaultPrefix, name) + + if name in context: + if context[name] != v: + raise Exception("Predicate collision on %s, %s != %s" % (name, context[name], v)) + else: + context[name] = v + + return v + +def avrold_to_jsonld_context(j): + context = {} + namespaces = {} + g = Graph() + defaultPrefix = "" + + for t in j: + if "jsonldVocab" in t: + for prefix in t["jsonldPrefixes"]: + context[prefix] = t["jsonldPrefixes"][prefix] + namespaces[prefix] = rdflib.namespace.Namespace(t["jsonldPrefixes"][prefix]) + if "jsonldVocab" in t: + defaultPrefix = t["jsonldVocab"] + + for k,v in namespaces.items(): + g.bind(k, v) + + for t in j: + if t["type"] == "record": + classnode = namespaces["cwl"][t["name"]] + g.add((classnode, RDF.type, RDFS.Class)) + + if "jsonldPrefix" in t: + predicate = "%s:%s" % (t["jsonldPrefix"], t["name"]) + else: + predicate = "%s:%s" % (defaultPrefix, t["name"]) + + if context.get(t["name"], predicate) != predicate: + raise Exception("Predicate collision on '%s', '%s' != '%s'" % (t["name"], context[t["name"]], predicate)) + + context[t["name"]] = predicate + + for i in t["fields"]: + v = pred(t, i, i["name"], context, defaultPrefix) + + if isinstance(v, basestring): + v = v if v[0] != "@" else None + else: + v = v["@id"] if v["@id"][0] != "@" else None + + if v: + (ns, ln) = rdflib.namespace.split_uri(unicode(v)) + propnode = namespaces[ns[0:-1]][ln] + g.add((propnode, RDF.type, RDF.Property)) + g.add((propnode, RDFS.domain, classnode)) + + # TODO generate range from datatype. + + if "extends" in t: + g.add((classnode, RDFS.subClassOf, namespaces["cwl"][t["extends"]])) + elif t["type"] == "enum": + for i in t["symbols"]: + pred(t, None, i, context, defaultPrefix) + + return (context, g) + +if __name__ == "__main__": + with open(sys.argv[1]) as f: + j = yaml.load(f) + (ctx, g) = avrold_to_jsonld_context(j) + print json.dumps(ctx, indent=4, sort_keys=True) + diff --git a/avro_ld/makedoc.py b/avro_ld/makedoc.py new file mode 100644 index 000000000..50a427c74 --- /dev/null +++ b/avro_ld/makedoc.py @@ -0,0 +1,305 @@ +import mistune +import schema +import json +import yaml +import os +import copy +import re +import sys +import StringIO + +def has_types(items): + r = [] + if isinstance(items, dict): + for n in ("type", "items", "values"): + if n in items: + r.extend(has_types(items[n])) + return r + if isinstance(items, list): + for i in items: + r.extend(has_types(i)) + return r + if isinstance(items, basestring): + return [items] + return [] + +class MyRenderer(mistune.Renderer): + def header(self, text, level, raw=None): + return """

%s

""" % (to_id(text), text) + +def to_id(text): + textid = text + if text[0] in ("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"): + try: + textid = text[text.index(" ")+1:] + except ValueError: + pass + textid = textid.lower().replace(" ", "_") + return textid + +class ToC(object): + def __init__(self): + self.first_toc_entry = True + self.numbering = [0] + self.toc = "" + self.start_numbering = True + + def add_entry(self, thisdepth, title): + depth = len(self.numbering) + if thisdepth < depth: + self.toc += "" + for n in range(0, depth-thisdepth): + self.numbering.pop() + self.toc += "" + self.numbering[-1] += 1 + elif thisdepth == depth: + if not self.first_toc_entry: + self.toc += "" + else: + self.first_toc_entry = False + self.numbering[-1] += 1 + elif thisdepth > depth: + self.numbering.append(1) + + if self.start_numbering: + num = "%i.%s" % (self.numbering[0], ".".join([str(n) for n in self.numbering[1:]])) + else: + num = "" + self.toc += """
  • %s %s" + c += """""" + return c + +def typefmt(tp, nbsp=False): + if isinstance(tp, list): + if nbsp: + return " | ".join([typefmt(n) for n in tp]) + else: + return " | ".join([typefmt(n) for n in tp]) + if isinstance(tp, dict): + if tp["type"] == "array": + return "array<%s>" % (typefmt(tp["items"], True)) + else: + if str(tp) in ("null", "boolean", "int", "long", "float", "double", "bytes", "string", "record", "enum", "array", "map"): + return """%s""" % str(tp) + else: + return """%s""" % (to_id(str(tp)), str(tp)) + +def add_dictlist(di, key, val): + if key not in di: + di[key] = [] + di[key].append(val) + +def number_headings(toc, maindoc): + mdlines = [] + for line in maindoc.splitlines(): + if line.strip() == "# Introduction": + toc.start_numbering = True + toc.numbering = [0] + + m = re.match(r'^(#+) (.*)', line) + if m: + num = toc.add_entry(len(m.group(1)), m.group(2)) + line = "%s %s %s" % (m.group(1), num, m.group(2)) + #elif len(line) > 0 and line[0] == "#": + # toc += """
  • %s
  • \n""" % (to_id(line[2:]), line[2:]) + line = re.sub(r'^(https?://\S+)', r'[\1](\1)', line) + mdlines.append(line) + + maindoc = '\n'.join(mdlines) + return maindoc + +class RenderType(object): + def __init__(self, toc, j): + self.typedoc = StringIO.StringIO() + self.toc = toc + self.subs = {} + self.docParent = {} + for t in j: + if "extends" in t: + add_dictlist(self.subs, t["extends"], t["name"]) + if "docParent" not in t: + add_dictlist(self.docParent, t["extends"], t["name"]) + + if t.get("docParent"): + add_dictlist(self.docParent, t["docParent"], t["name"]) + + alltypes = schema.extend_avro(j) + + self.typemap = {} + self.uses = {} + for t in alltypes: + self.typemap[t["name"]] = t + if t["type"] == "record": + for f in t["fields"]: + p = has_types(f) + for tp in p: + if tp not in self.uses: + self.uses[tp] = [] + if (t["name"], f["name"]) not in self.uses[tp]: + self.uses[tp].append((t["name"], f["name"])) + + for f in alltypes: + if "extends" not in f and not f.get("docParent"): + self.render_type(f, 1) + + + def render_type(self, f, depth): + if "doc" not in f: + f["doc"] = "" + + f["type"] = copy.deepcopy(f) + f["doc"] = "" + f = f["type"] + + if "doc" not in f: + f["doc"] = "" + if f["type"] == "record": + for field in f["fields"]: + if "doc" not in field: + field["doc"] = "" + + if f["type"] != "doc": + lines = [] + for l in f["doc"].splitlines(): + if len(l) > 0 and l[0] == "#": + l = "#" + l + lines.append(l) + f["doc"] = "\n".join(lines) + + num = self.toc.add_entry(depth, f["name"]) + doc = "## %s %s\n" % (num, f["name"]) + + if f["type"] == "doc": + f["doc"] = number_headings(self.toc, f["doc"]) + + if "extends" in f: + doc += "\n\nExtends [%s](#%s)" % (f["extends"], to_id(f["extends"])) + if f["name"] in self.subs: + doc += "\n\nExtended by" + doc += ", ".join([" [%s](#%s)" % (s, to_id(s)) for s in self.subs[f["name"]]]) + if f["name"] in self.uses: + doc += "\n\nReferenced by" + doc += ", ".join([" [%s.%s](#%s)" % (s[0], s[1], to_id(s[0])) for s in self.uses[f["name"]]]) + doc = doc + "\n\n" + f["doc"] + + doc = mistune.markdown(doc, renderer=MyRenderer()) + + if f["type"] == "record": # and not f.get("abstract"): + doc += "

    Fields

    " + doc += """""" + doc += "" + for i in f["fields"]: + doc += "" + tp = i["type"] + if isinstance(tp, list) and tp[0] == "null": + opt = False + tp = tp[1:] + else: + opt = True + doc += "" % (i["name"], typefmt(tp), opt, mistune.markdown(i["doc"])) + doc += "" + doc += """
    fieldtyperequireddescription
    %s%s%s%s
    """ + f["doc"] = doc + + self.typedoc.write(f["doc"]) + + for s in self.docParent.get(f["name"], []): + self.render_type(self.typemap[s], depth+1) + +def avrold_doc(j, outdoc): + toc = ToC() + toc.start_numbering = False + + rt = RenderType(toc, j) + + outdoc.write(""" + + + + + + + + + + + +
    + """) + + outdoc.write(""" +
    + +
    + """) + + outdoc.write(""" +
    """) + + outdoc.write(rt.typedoc.getvalue().encode("utf-8")) + + outdoc.write("""
    """) + + outdoc.write(""" +
    + + """) + +if __name__ == "__main__": + with open(sys.argv[1]) as f: + with open("index.html", "w") as i: + j = yaml.load(f) + avrold_doc(j, i) diff --git a/avro_ld/schema.py b/avro_ld/schema.py new file mode 100644 index 000000000..c305f537c --- /dev/null +++ b/avro_ld/schema.py @@ -0,0 +1,44 @@ +import avro +import copy + +def specialize(items, spec): + if isinstance(items, dict): + for n in ("type", "items", "values"): + if n in items: + items[n] = specialize(items[n], spec) + return items + if isinstance(items, list): + n = [] + for i in items: + n.append(specialize(i, spec)) + return n + if isinstance(items, basestring): + if items in spec: + return spec[items] + return items + +def extend_avro(items): + types = {t["name"]: t for t in items} + n = [] + for t in items: + if "extends" in t: + r = copy.deepcopy(types[t["extends"]]) + r["name"] = t["name"] + if "specialize" in t: + r["fields"] = specialize(r["fields"], t["specialize"]) + r["fields"].extend(t["fields"]) + r["extends"] = t["extends"] + r["abstract"] = t.get("abstract", False) + r["doc"] = t.get("doc", "") + types[t["name"]] = r + t = r + n.append(t) + return n + +def schema(j): + names = avro.schema.Names() + j = extend_avro(j) + for t in j: + if not t.get("abstract"): + avro.schema.make_avsc_object(t, names) + return names diff --git a/cwltool/main.py b/cwltool/main.py index a35ee5139..9193f0564 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -12,10 +12,13 @@ import workflow import validate import tempfile +import avro_ld.jsonld_context +import yaml _logger = logging.getLogger("cwltool") _logger.addHandler(logging.StreamHandler()) +module_dir = os.path.dirname(os.path.abspath(__file__)) def printrdf(workflow, sr): from rdflib import Graph, plugin @@ -26,7 +29,7 @@ def printrdf(workflow, sr): def main(): parser = argparse.ArgumentParser() - parser.add_argument("workflow", type=str) + parser.add_argument("workflow", type=str, nargs="?", default=None) parser.add_argument("job_order", type=str, nargs="?", default=None) parser.add_argument("--conformance-test", action="store_true") parser.add_argument("--basedir", type=str) @@ -35,11 +38,15 @@ def main(): parser.add_argument("--leave-container", action="store_true", help="Do not delete Docker container after it exits") parser.add_argument("--no-pull", default=False, action="store_true", help="Do not try to pull the Docker image") parser.add_argument("--dry-run", action="store_true", help="Do not execute") - parser.add_argument("--verbose", action="store_true", help="Print more logging") - parser.add_argument("--debug", action="store_true", help="Print even more logging") + + parser.add_argument("--print-jsonld-context", action="store_true", help="Print JSON-LD context for CWL file") parser.add_argument("--print-rdf", action="store_true", help="Print corresponding RDF graph for workflow") parser.add_argument("--rdf-serializer", help="Output RDF serialization format (one of turtle (default), n3, nt, xml)", default="turtle") + parser.add_argument("--verbose", action="store_true", help="Print more logging") + parser.add_argument("--debug", action="store_true", help="Print even more logging") + + args = parser.parse_args() if args.verbose: @@ -47,12 +54,26 @@ def main(): if args.debug: logging.getLogger("cwltool").setLevel(logging.DEBUG) + if args.print_jsonld_context: + cwl_avsc = os.path.join(module_dir, 'schemas/draft-2/cwl-avro.yml') + with open(cwl_avsc) as f: + j = yaml.load(f) + (ctx, g) = avro_ld.jsonld_context.avrold_to_jsonld_context(j) + print json.dumps(ctx, indent=4, sort_keys=True) + return 0 + + if not args.workflow: + _logger.error("CWL document required") + parser.print_help() + return 1 + if args.print_rdf: printrdf(args.workflow, args.rdf_serializer) return 0 if not args.job_order: _logger.error("Input object required") + parser.print_help() return 1 basedir = args.basedir if args.basedir else os.path.abspath(os.path.dirname(args.job_order)) diff --git a/cwltool/process.py b/cwltool/process.py index 88add6e92..b269f9ad1 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -8,6 +8,7 @@ import logging import pprint from aslist import aslist +import avro_ld.schema TOOL_CONTEXT_URL = "https://raw.githubusercontent.com/common-workflow-language/common-workflow-language/master/schemas/draft-2/cwl-context.json" module_dir = os.path.dirname(os.path.abspath(__file__)) @@ -17,50 +18,11 @@ class WorkflowException(Exception): pass -def specialize(items, spec): - if isinstance(items, dict): - for n in ("type", "items", "values"): - if n in items: - items[n] = specialize(items[n], spec) - return items - if isinstance(items, list): - n = [] - for i in items: - n.append(specialize(i, spec)) - return n - if isinstance(items, basestring): - if items in spec: - return spec[items] - return items - -def extend_avro(items): - types = {t["name"]: t for t in items} - n = [] - for t in items: - if "extends" in t: - r = copy.deepcopy(types[t["extends"]]) - r["name"] = t["name"] - if "specialize" in t: - r["fields"] = specialize(r["fields"], t["specialize"]) - r["fields"].extend(t["fields"]) - r["extends"] = t["extends"] - r["abstract"] = t.get("abstract", False) - r["doc"] = t.get("doc", "") - types[t["name"]] = r - t = r - n.append(t) - return n - def get_schema(): - names = avro.schema.Names() cwl_avsc = os.path.join(module_dir, 'schemas/draft-2/cwl-avro.yml') with open(cwl_avsc) as f: j = yaml.load(f) - j = extend_avro(j) - for t in j: - if not t.get("abstract"): - avro.schema.make_avsc_object(t, names) - return names + return avro_ld.schema.schema(j) class Process(object): def check_feature(self, feature, kwargs): From 0cb55372d922506315b546eb3092278ecadc4a90 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 8 May 2015 02:33:16 +0000 Subject: [PATCH 076/221] Build and run cwltool in Docker --- Dockerfile | 31 ++++++++++++ build-cwl-docker.sh | 13 +++++ cwl-docker.sh | 2 + wrapdocker | 115 ++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 161 insertions(+) create mode 100644 Dockerfile create mode 100755 build-cwl-docker.sh create mode 100755 cwl-docker.sh create mode 100755 wrapdocker diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000..4b524c57b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,31 @@ +FROM ubuntu:14.04 +MAINTAINER jerome.petazzoni@docker.com + +# Based on https://github.com/jpetazzo/dind + +# Let's start with some basic stuff. +RUN apt-get update -qq && apt-get install -qqy \ + apt-transport-https \ + ca-certificates \ + curl \ + lxc \ + iptables \ + python-setuptools + +# Install Docker from Docker Inc. repositories. +RUN curl -sSL https://get.docker.com/ubuntu/ | sh + +# Install the magic wrapper. +ADD ./wrapdocker /usr/local/bin/wrapdocker +RUN chmod +x /usr/local/bin/wrapdocker + +# Install cwltool +ADD setup.py README.rst cwltool/ /root/cwltool/ +ADD cwltool/ /root/cwltool/cwltool +ADD cwltool/schemas/ /root/cwltool/cwltool/schemas +RUN cd /root/cwltool && easy_install . + +# Define additional metadata for our image. +VOLUME /var/lib/docker +ENTRYPOINT ["wrapdocker", "cwltool"] + diff --git a/build-cwl-docker.sh b/build-cwl-docker.sh new file mode 100755 index 000000000..d28561b28 --- /dev/null +++ b/build-cwl-docker.sh @@ -0,0 +1,13 @@ +#!/bin/sh + +restore=0 +if test -L cwltool/schemas ; then + rm cwltool/schemas + cp -r ../schemas cwltool/schemas + restore=1 +fi +docker build -t cwltool . +if test $restore = 1 ; then + rm -r cwltool/schemas + ln -s ../../schemas cwltool/schemas +fi diff --git a/cwl-docker.sh b/cwl-docker.sh new file mode 100755 index 000000000..190d1db8b --- /dev/null +++ b/cwl-docker.sh @@ -0,0 +1,2 @@ +#!/bin/sh +docker run --privileged -ti --volume=$PWD:/tmp/workdir -w=/tmp/workdir cwltool $* diff --git a/wrapdocker b/wrapdocker new file mode 100755 index 000000000..f84897528 --- /dev/null +++ b/wrapdocker @@ -0,0 +1,115 @@ +#!/bin/bash + +# Taken from https://github.com/jpetazzo/dind + +# Ensure that all nodes in /dev/mapper correspond to mapped devices currently loaded by the device-mapper kernel driver +dmsetup mknodes + +# First, make sure that cgroups are mounted correctly. +CGROUP=/sys/fs/cgroup +: {LOG:=stdio} + +[ -d $CGROUP ] || + mkdir $CGROUP + +mountpoint -q $CGROUP || + mount -n -t tmpfs -o uid=0,gid=0,mode=0755 cgroup $CGROUP || { + echo "Could not make a tmpfs mount. Did you use --privileged?" + exit 1 + } + +if [ -d /sys/kernel/security ] && ! mountpoint -q /sys/kernel/security +then + mount -t securityfs none /sys/kernel/security || { + echo "Could not mount /sys/kernel/security." + echo "AppArmor detection and --privileged mode might break." + } +fi + +# Mount the cgroup hierarchies exactly as they are in the parent system. +for SUBSYS in $(cut -d: -f2 /proc/1/cgroup) +do + [ -d $CGROUP/$SUBSYS ] || mkdir $CGROUP/$SUBSYS + mountpoint -q $CGROUP/$SUBSYS || + mount -n -t cgroup -o $SUBSYS cgroup $CGROUP/$SUBSYS + + # The two following sections address a bug which manifests itself + # by a cryptic "lxc-start: no ns_cgroup option specified" when + # trying to start containers withina container. + # The bug seems to appear when the cgroup hierarchies are not + # mounted on the exact same directories in the host, and in the + # container. + + # Named, control-less cgroups are mounted with "-o name=foo" + # (and appear as such under /proc//cgroup) but are usually + # mounted on a directory named "foo" (without the "name=" prefix). + # Systemd and OpenRC (and possibly others) both create such a + # cgroup. To avoid the aforementioned bug, we symlink "foo" to + # "name=foo". This shouldn't have any adverse effect. + echo $SUBSYS | grep -q ^name= && { + NAME=$(echo $SUBSYS | sed s/^name=//) + ln -s $SUBSYS $CGROUP/$NAME + } + + # Likewise, on at least one system, it has been reported that + # systemd would mount the CPU and CPU accounting controllers + # (respectively "cpu" and "cpuacct") with "-o cpuacct,cpu" + # but on a directory called "cpu,cpuacct" (note the inversion + # in the order of the groups). This tries to work around it. + [ $SUBSYS = cpuacct,cpu ] && ln -s $SUBSYS $CGROUP/cpu,cpuacct +done + +# Note: as I write those lines, the LXC userland tools cannot setup +# a "sub-container" properly if the "devices" cgroup is not in its +# own hierarchy. Let's detect this and issue a warning. +grep -q :devices: /proc/1/cgroup || + echo "WARNING: the 'devices' cgroup should be in its own hierarchy." +grep -qw devices /proc/1/cgroup || + echo "WARNING: it looks like the 'devices' cgroup is not mounted." + +# Now, close extraneous file descriptors. +pushd /proc/self/fd >/dev/null +for FD in * +do + case "$FD" in + # Keep stdin/stdout/stderr + [012]) + ;; + # Nuke everything else + *) + eval exec "$FD>&-" + ;; + esac +done +popd >/dev/null + + +# If a pidfile is still around (for example after a container restart), +# delete it so that docker can start. +rm -rf /var/run/docker.pid + +# If we were given a PORT environment variable, start as a simple daemon; +# otherwise, spawn a shell as well +if [ "$PORT" ] +then + exec docker -d -H 0.0.0.0:$PORT -H unix:///var/run/docker.sock \ + $DOCKER_DAEMON_ARGS +else + if [ "$LOG" == "file" ] + then + docker -d $DOCKER_DAEMON_ARGS &>/var/log/docker.log & + else + docker -d $DOCKER_DAEMON_ARGS & + fi + (( timeout = 60 + SECONDS )) + until docker info >/dev/null 2>&1 + do + if (( SECONDS >= timeout )); then + echo 'Timed out trying to connect to internal docker host.' >&2 + break + fi + sleep 1 + done + [[ $1 ]] && exec "$@" + exec bash --login +fi From 46e610347766c16a31b16980d2ec412b726136e4 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 11 May 2015 13:15:23 -0400 Subject: [PATCH 077/221] Fix Docker loading from http. Add secondaryFiles support. --- cwltool/docker.py | 1 + cwltool/draft2tool.py | 30 +++++++++++++++++++++++------- cwltool/job.py | 5 +++++ cwltool/pathmapper.py | 7 +++---- 4 files changed, 32 insertions(+), 11 deletions(-) diff --git a/cwltool/docker.py b/cwltool/docker.py index 17318a364..d92bc1be6 100644 --- a/cwltool/docker.py +++ b/cwltool/docker.py @@ -44,6 +44,7 @@ def get_image(dockerRequirement, pull_image, dry_run=False): with open(dockerRequirement["dockerLoad"], "rb") as f: loadproc = subprocess.Popen(cmd, stdin=f, stdout=sys.stderr) else: + loadproc = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=sys.stderr) _logger.info("Sending GET request to %s", dockerRequirement["dockerLoad"]) req = requests.get(dockerRequirement["dockerLoad"], stream=True) n = 0 diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 9423a87d1..e6b45acbf 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -18,6 +18,7 @@ import validate from aslist import aslist import expression +import re _logger = logging.getLogger("cwltool") @@ -30,6 +31,12 @@ "ExpressionEngineRequirement", "ScatterFeature") +def substitute(value, replace): + if replace[0] == "^": + return substitute(value[0:value.rindex('.')], replace[1:]) + else: + return value + replace + class Builder(object): # def jseval(self, expression, context): # if isinstance(expression, list): @@ -91,6 +98,19 @@ def bind_input(self, schema, datum): with open(os.path.join(self.basedir, datum["path"]), "rb") as f: datum["contents"] = f.read(CONTENT_LIMIT) self.files.append(datum) + if "secondaryFiles" in schema: + if "secondaryFiles" not in datum: + datum["secondaryFiles"] = [] + for sf in aslist(schema["secondaryFiles"]): + if isinstance(sf, dict): + sfpath = expression.do_eval(sf, self.job, self.requirements, self.docpath, datum["path"]) + else: + sfpath = {"path": substitute(datum["path"], sf)} + if isinstance(sfpath, list): + datum["secondaryFiles"].extend(sfpath) + else: + datum["secondaryFiles"].append(sfpath) + self.files.append(sfpath) b = None if "commandLineBinding" in schema and isinstance(schema["commandLineBinding"], dict): @@ -157,11 +177,7 @@ def generate_arg(self, binding): class Tool(Process): def _init_job(self, joborder, basedir, **kwargs): # Validate job order - try: - validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) - except validate.ValidationException as v: - _logger.error("Failed to validate %s\n%s" % (pprint.pformat(joborder), v)) - raise + validate.validate_ex(self.names.get_name("input_record_schema", ""), joborder) for r in self.tool.get("requirements", []): if r["class"] not in supportedProcessRequirements: @@ -263,11 +279,11 @@ def job(self, joborder, basedir, output_callback, use_container=True, **kwargs): out["outputBinding"] = out.get("outputBinding", {}) out["outputBinding"]["glob"] = filename if not j.stdout: - raise Exception("stdout refers to invalid output") + raise validate.ValidationException("stdout refers to invalid output") else: j.stdout = self.tool["stdout"] if os.path.isabs(j.stdout): - raise Exception("stdout must be a relative path") + raise validate.ValidationException("stdout must be a relative path") j.requirements = self.requirements j.hints = self.hints diff --git a/cwltool/job.py b/cwltool/job.py index 751d8fa50..eccdea497 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -8,6 +8,7 @@ import sys import requests import docker +from process import WorkflowException _logger = logging.getLogger("cwltool") @@ -22,6 +23,10 @@ def run(self, outdir, dry_run=False, pull_image=True, rm_container=True): img_id = docker.get_from_requirements(self.requirements, self.hints, pull_image) + for f in self.pathmapper.files(): + if not os.path.exists(f): + raise WorkflowException("Required input file %s not found" % f) + if img_id: runtime = ["docker", "run", "-i"] for d in self.pathmapper.dirs: diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index 45310f65e..cdbd187dc 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -12,8 +12,10 @@ def __init__(self, referenced_files, basedir): def mapper(self, src): return self._pathmap[src] + def files(self): + return self._pathmap.keys() -class DockerPathMapper(object): +class DockerPathMapper(PathMapper): def __init__(self, referenced_files, basedir): self._pathmap = {} self.dirs = {} @@ -51,6 +53,3 @@ def __init__(self, referenced_files, basedir): for d in self.dirs: if abs.startswith(d): self._pathmap[src] = os.path.join(self.dirs[d], abs[len(d)+1:]) - - def mapper(self, src): - return self._pathmap[src] From 8d30574de0e0f763f745cec9a3730bd5e513eae3 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 11 May 2015 13:22:42 -0400 Subject: [PATCH 078/221] Scatter specified as requirement --- cwltool/draft2tool.py | 2 +- cwltool/process.py | 8 ++++---- cwltool/workflow.py | 15 +++++++-------- 3 files changed, 12 insertions(+), 13 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index e6b45acbf..23f295855 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -29,7 +29,7 @@ supportedProcessRequirements = ("DockerRequirement", "MemoryRequirement", "ExpressionEngineRequirement", - "ScatterFeature") + "Scatter") def substitute(value, replace): if replace[0] == "^": diff --git a/cwltool/process.py b/cwltool/process.py index d712cd344..c2c07e086 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -61,14 +61,14 @@ def get_schema(): return names class Process(object): - def check_feature(self, feature, kwargs): + def get_feature(self, feature, kwargs): for t in kwargs.get("requirements", []): if t["class"] == feature: - return True + return t for t in kwargs.get("hints", []): if t["class"] == feature: - return True - return False + return t + return None def __init__(self, toolpath_object, validateAs, docpath): self.names = get_schema() diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 9581c9763..0173ccde4 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -59,12 +59,11 @@ def receive_output(self, step, outputparms, jobout): def try_make_job(self, step, basedir, **kwargs): inputobj = {} - if "scatter" in step.tool: - if not self.check_feature("ScatterFeature", kwargs): - raise WorkflowException("Must include ScatterFeature in requirements.") + scatterSpec = self.get_feature("Scatter", requirements=self.tool["requirements"], hints=self.tool["hints"]) + if scatterSpec: inputparms = copy.deepcopy(step.tool["inputs"]) outputparms = copy.deepcopy(step.tool["outputs"]) - scatter = aslist(step.tool["scatter"]) + scatter = aslist(scatterSpec["scatter"]) inp_map = {i["id"]: i for i in inputparms} for s in aslist(step.tool["scatter"]): @@ -73,7 +72,7 @@ def try_make_job(self, step, basedir, **kwargs): inp_map[s]["type"] = {"type": "array", "items": inp_map[s]["type"]} - if step.tool.get("scatterMethod") == "nested_crossproduct": + if scatterSpec.get("scatterMethod") == "nested_crossproduct": nesting = len(aslist(step.tool["scatter"])) else: nesting = 1 @@ -124,9 +123,9 @@ def try_make_job(self, step, basedir, **kwargs): callback = functools.partial(self.receive_output, step, outputparms) - if step.tool.get("scatter"): - method = step.tool.get("scatterMethod") - if method is None and len(aslist(step.tool["scatter"])) != 1: + if scatterSpec: + method = scatterSpec.get("scatterMethod") + if method is None and len(aslist(scatterSpec["scatter"])) != 1: raise WorkflowException("Must specify scatterMethod when scattering over multiple inputs") if method == "dotproduct" or method is None: From bd56b0357bcf54bed639c4ea97ed0cf5e748a380 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 11 May 2015 21:55:18 -0400 Subject: [PATCH 079/221] Moved avro-ld under cwltool. Fixing tests. --- {avro_ld => cwltool/avro_ld}/__init__.py | 0 {avro_ld => cwltool/avro_ld}/jsonld_context.py | 0 {avro_ld => cwltool/avro_ld}/makedoc.py | 0 {avro_ld => cwltool/avro_ld}/schema.py | 0 cwltool/docker.py | 2 +- cwltool/workflow.py | 2 +- 6 files changed, 2 insertions(+), 2 deletions(-) rename {avro_ld => cwltool/avro_ld}/__init__.py (100%) rename {avro_ld => cwltool/avro_ld}/jsonld_context.py (100%) rename {avro_ld => cwltool/avro_ld}/makedoc.py (100%) rename {avro_ld => cwltool/avro_ld}/schema.py (100%) diff --git a/avro_ld/__init__.py b/cwltool/avro_ld/__init__.py similarity index 100% rename from avro_ld/__init__.py rename to cwltool/avro_ld/__init__.py diff --git a/avro_ld/jsonld_context.py b/cwltool/avro_ld/jsonld_context.py similarity index 100% rename from avro_ld/jsonld_context.py rename to cwltool/avro_ld/jsonld_context.py diff --git a/avro_ld/makedoc.py b/cwltool/avro_ld/makedoc.py similarity index 100% rename from avro_ld/makedoc.py rename to cwltool/avro_ld/makedoc.py diff --git a/avro_ld/schema.py b/cwltool/avro_ld/schema.py similarity index 100% rename from avro_ld/schema.py rename to cwltool/avro_ld/schema.py diff --git a/cwltool/docker.py b/cwltool/docker.py index 17318a364..cb278f691 100644 --- a/cwltool/docker.py +++ b/cwltool/docker.py @@ -67,7 +67,7 @@ def get_from_requirements(requirements, hints, pull_image, dry_run=False): if get_image(r, pull_image, dry_run): return r["dockerImageId"] else: - raise Exception("Docker image %s not found" % (self.container["imageId"])) + raise Exception("Docker image %s not found" % r["dockerImageId"]) if hints: for r in reversed(hints): if r["class"] == "DockerRequirement": diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 9581c9763..920779622 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -228,7 +228,7 @@ def __init__(self, toolpath_object, docpath): i["id"] = toolid - super(External, self).__init__(toolpath_object, "Process", docpath) + super(External, self).__init__(toolpath_object, "External", docpath) def receive_output(self, jobout): self.output = {} From 09f5112028b0e4cf570a8121a81f51ed6011d8c5 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 12 May 2015 09:08:55 -0400 Subject: [PATCH 080/221] Updating to load on "id" instead of draft-1 "$ref" --- cwltool/ref_resolver.py | 56 +++++++++++++++++------------------------ 1 file changed, 23 insertions(+), 33 deletions(-) diff --git a/cwltool/ref_resolver.py b/cwltool/ref_resolver.py index c71f7cdb2..613a3a28b 100644 --- a/cwltool/ref_resolver.py +++ b/cwltool/ref_resolver.py @@ -35,11 +35,12 @@ def __init__(self): def load(self, url, base_url=None): base_url = base_url or 'file://%s/' % os.path.abspath('.') - return self.resolve_ref({'$ref': url}, base_url) + return self.resolve_ref({'id': url}, base_url) def resolve_ref(self, obj, base_url): - ref, mixin, checksum = obj.pop('$ref', None), obj.pop('$mixin', None), obj.pop('$checksum', None) - ref = ref or mixin + ref, mixin = obj.pop('id', None) + if ref[0] == "#": + return obj url = urlparse.urljoin(base_url, ref) if url in self.resolved: return self.resolved[url] @@ -48,11 +49,9 @@ def resolve_ref(self, obj, base_url): self.resolving[url] = True doc_url, pointer = urlparse.urldefrag(url) document = self.fetch(doc_url) - fragment = copy.deepcopy(resolve_pointer(document, pointer)) + fragment = copy.deepcopy(resolve_fragment(document, pointer)) try: - self.verify_checksum(checksum, fragment) - if isinstance(fragment, dict) and mixin: - fragment = dict(obj, **fragment) + fragment = dict(obj, **fragment) result = self.resolve_all(fragment, doc_url) finally: del self.resolving[url] @@ -62,7 +61,7 @@ def resolve_all(self, document, base_url): if isinstance(document, list): iterator = enumerate(document) elif isinstance(document, dict): - if '$ref' in document or '$mixin' in document: + if 'id' in document: return self.resolve_ref(document, base_url) iterator = document.iteritems() else: @@ -95,24 +94,24 @@ def fetch(self, url): self.fetched[url] = result return result - def verify_checksum(self, checksum, document): - if not checksum: - return - hash_method, hexdigest = checksum.split('$') - if hexdigest != self.checksum(document, hash_method): - raise RuntimeError('Checksum does not match: %s' % checksum) - - def checksum(self, document, method='sha1'): - if method not in ('md5', 'sha1'): - raise NotImplementedError('Unsupported hash method: %s' % method) - normalized = json.dumps(document, sort_keys=True, separators=(',', ':')) - return getattr(hashlib, method)(normalized).hexdigest - - POINTER_DEFAULT = object() - -def resolve_pointer(document, pointer, default=POINTER_DEFAULT): +def resolve_fragment(document, frag): + if isinstance(document, dict): + if document.get("id") == frag: + return document + for d in document: + r = resolve_fragment(document[d], frag) + if r: + return r + elif isinstance(document, list): + for d in document: + r = resolve_fragment(d, frag) + if r: + return r + return None + +def resolve_json_pointer(document, pointer, default=POINTER_DEFAULT): parts = urlparse.unquote(pointer.lstrip('/#')).split('/') \ if pointer else [] for part in parts: @@ -130,16 +129,7 @@ def resolve_pointer(document, pointer, default=POINTER_DEFAULT): raise ValueError('Unresolvable JSON pointer: %r' % pointer) return document - loader = Loader() - -def to_json(obj, fp=None): - default = lambda o: (o.__json__() if callable(getattr(o, '__json__', None)) - else str(o)) - kwargs = dict(default=default, indent=2, sort_keys=True) - return json.dump(obj, fp, **kwargs) if fp else json.dumps(obj, **kwargs) - - def from_url(url, base_url=None): return loader.load(url, base_url) From 59b8144e6ab62ab02c9ef57811a6aa869d4af4ca Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 12 May 2015 09:19:14 -0400 Subject: [PATCH 081/221] Switch to avro_ld.makedoc for generating specification. --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index dca391b03..d75b6638a 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ url="https://github.com/common-workflow-language/common-workflow-language", download_url="https://github.com/common-workflow-language/common-workflow-language", license='Apache 2.0', - packages=["cwltool"], + packages=["cwltool", "cwltool.avro_ld"], package_data={'cwltool': ['schemas/draft-1/*', 'schemas/draft-2/*']}, install_requires=[ 'jsonschema >= 2.4.0', From 95c667d03aae6e535270324a6799a89c4bcc48f6 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 12 May 2015 11:35:45 -0400 Subject: [PATCH 082/221] Use css media query to adjust behavior of table of contents based on screen size and Bootstrap behavior. --- cwltool/avro_ld/makedoc.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/cwltool/avro_ld/makedoc.py b/cwltool/avro_ld/makedoc.py index 50a427c74..30798ea74 100644 --- a/cwltool/avro_ld/makedoc.py +++ b/cwltool/avro_ld/makedoc.py @@ -237,7 +237,6 @@ def avrold_doc(j, outdoc): body { height:100%; position: relative; - background-color: aliceblue; } #main { @@ -261,33 +260,38 @@ def avrold_doc(j, outdoc): background-color: transparent; } - .container-fluid { - height: 100%; + #main { + overflow-y: auto; } - .lefttoc { - height: 100%; + #lefttoc { + background-color: aliceblue; overflow-y: auto; } + @media (min-width: 992px) { + .full-height { + height: 100%; + } + } + - -
    + +
    """) outdoc.write(""" -
    -