From edde5ce1a09260056203b419d40f6fd15164593c Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 4 Jul 2016 01:10:46 -0400 Subject: [PATCH 01/11] Recover eliminate dirent work. --- cwltool/builder.py | 8 ++++- cwltool/cwltest.py | 13 +++----- cwltool/draft2tool.py | 57 +++++++++++++++++-------------- cwltool/main.py | 8 ++--- cwltool/pathmapper.py | 39 ++++++++-------------- cwltool/process.py | 78 +++++++++++++++++++++++++------------------ cwltool/update.py | 4 +-- cwltool/workflow.py | 2 ++ 8 files changed, 109 insertions(+), 100 deletions(-) diff --git a/cwltool/builder.py b/cwltool/builder.py index a417eeedb..8f6e8fca9 100644 --- a/cwltool/builder.py +++ b/cwltool/builder.py @@ -187,8 +187,14 @@ def generate_arg(self, binding): # type: (Dict[str,Any]) -> List[str] return [a for a in args if a is not None] - def do_eval(self, ex, context=None, pull_image=True): + def do_eval(self, ex, context=None, pull_image=True, recursive=False): # type: (Dict[str,str], Any, bool) -> Any + if recursive: + if isinstance(ex, dict): + return {k: self.do_eval(v, context, pull_image, recursive) for k,v in ex.iteritems()} + if isinstance(ex, list): + return [self.do_eval(v, context, pull_image, recursive) for v in ex] + return expression.do_eval(ex, self.job, self.requirements, self.outdir, self.tmpdir, self.resources, diff --git a/cwltool/cwltest.py b/cwltool/cwltest.py index d3a575eab..a7b7f95ff 100755 --- a/cwltool/cwltest.py +++ b/cwltool/cwltest.py @@ -30,18 +30,13 @@ def compare(a, b): # type: (Any, Any) -> bool try: if isinstance(a, dict): if a.get("class") == "File": - if "path" in a: - comp = "path" - else: - comp = "location" - if a[comp] == "Any" or b[comp] == "Any": + if a["path"] == "Any" or b["path"] == "Any": return True - if a[comp] and (not (b[comp].endswith("/" + a[comp]) - or ("/" not in b[comp] and a[comp] == b[comp]))): - raise CompareFail(u"%s does not end with %s" %(b[comp], a[comp])) + if not (b["path"].endswith("/" + a["path"]) or ("/" not in b["path"] and a["path"] == b["path"])): + raise CompareFail(u"%s does not end with %s" %(b["path"], a["path"])) # ignore empty collections b = {k: v for k, v in b.iteritems() - if not isinstance(v, (list, dict)) or len(v) > 0} + if not isinstance(v, (list, dict)) or len(v) > 0} elif a.get("class") == "Directory": if len(a["listing"]) != len(b["listing"]): return False diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index f0029ce92..b8f9dc2f6 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -6,7 +6,6 @@ import glob import logging import hashlib -import random import re import urlparse import tempfile @@ -17,7 +16,7 @@ import shellescape from typing import Callable, Any, Union, Generator, cast -from .process import Process, shortname, uniquename, getListing +from .process import Process, shortname, uniquename, getListing, normalizeFilesDirs from .errors import WorkflowException from .utils import aslist from . import expression @@ -50,7 +49,9 @@ def __init__(self): # type: () -> None def run(self, **kwargs): # type: (**Any) -> None try: - self.output_callback(self.builder.do_eval(self.script), "success") + ev = self.builder.do_eval(self.script) + normalizeFilesDirs(ev) + self.output_callback(ev, "success") except Exception as e: _logger.warn(u"Failed to evaluate expression:\n%s", e, exc_info=(e if kwargs.get('debug') else False)) self.output_callback({}, "permanentFail") @@ -110,6 +111,16 @@ def run(self, **kwargs): self.cachebuilder, self.outdir), "success") +# map files to assigned path inside a container. We need to also explicitly +# walk over input as implicit reassignment doesn't reach everything in builder.bindings +def check_adjust(builder, f): # type: (Dict[str,Any]) -> Dict[str,Any] + f["path"] = builder.pathmapper.mapper(f["location"])[1] + f["dirname"], f["basename"] = os.path.split(f["path"]) + if f["class"] == "File": + f["nameroot"], f["nameext"] = os.path.splitext(f["basename"]) + if not ACCEPTLIST_RE.match(f["basename"]): + raise WorkflowException("Invalid filename: '%s' contains illegal characters" % (f["basename"])) + return f class CommandLineTool(Process): def __init__(self, toolpath_object, **kwargs): @@ -141,8 +152,13 @@ def job(self, joborder, output_callback, **kwargs): cachebuilder = self._init_job(joborder, **cacheargs) cachebuilder.pathmapper = PathMapper(cachebuilder.files, kwargs["basedir"], - cachebuilder.stagedir) - + cachebuilder.stagedir, + separateDirs=False) + _check_adjust = partial(check_adjust, cachebuilder) + adjustFileObjs(cachebuilder.files, _check_adjust) + adjustFileObjs(cachebuilder.bindings, _check_adjust) + adjustDirObjs(cachebuilder.files, _check_adjust) + adjustDirObjs(cachebuilder.bindings, _check_adjust) cmdline = flatten(map(cachebuilder.generate_arg, cachebuilder.bindings)) (docker_req, docker_is_req) = self.get_requirement("DockerRequirement") if docker_req and kwargs.get("use_container") is not False: @@ -151,8 +167,9 @@ def job(self, joborder, output_callback, **kwargs): keydict = {u"cmdline": cmdline} for _,f in cachebuilder.pathmapper.items(): - st = os.stat(f[0]) - keydict[f[0]] = [st.st_size, int(st.st_mtime * 1000)] + if f.type == "File": + st = os.stat(f.resolved) + keydict[f.resolved] = [st.st_size, int(st.st_mtime * 1000)] interesting = {"DockerRequirement", "EnvVarRequirement", @@ -236,19 +253,10 @@ def rm_pending_output_callback(output_callback, jobcachepending, builder.pathmapper = self.makePathMapper(reffiles, builder.stagedir, **kwargs) builder.requirements = j.requirements - # map files to assigned path inside a container. We need to also explicitly - # walk over input as implicit reassignment doesn't reach everything in builder.bindings - def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any] - f["path"] = builder.pathmapper.mapper(f["location"])[1] - f["dirname"], f["basename"] = os.path.split(f["path"]) - if f["class"] == "File": - f["nameroot"], f["nameext"] = os.path.splitext(f["basename"]) - if not ACCEPTLIST_RE.match(f["basename"]): - raise WorkflowException("Invalid filename: '%s' contains illegal characters" % (f["basename"])) - return f - _logger.debug(u"[job %s] path mappings is %s", j.name, json.dumps({p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()}, indent=4)) + _check_adjust = partial(check_adjust, builder) + adjustFileObjs(builder.files, _check_adjust) adjustFileObjs(builder.bindings, _check_adjust) adjustDirObjs(builder.files, _check_adjust) @@ -273,14 +281,10 @@ def _check_adjust(f): # type: (Dict[str,Any]) -> Dict[str,Any] j.stagedir = builder.stagedir initialWorkdir = self.get_requirement("InitialWorkDirRequirement")[0] - j.generatefiles = {"class": "Directory", "listing": []} + j.generatefiles = {"class": "Directory", "listing": [], "basename": ""} if initialWorkdir: - if isinstance(initialWorkdir["listing"], (str, unicode)): - j.generatefiles["listing"] = builder.do_eval(initialWorkdir["listing"]) - else: - for t in initialWorkdir["listing"]: - j.generatefiles["listing"].append({"entryname": builder.do_eval(t["entryname"]), - "entry": copy.deepcopy(builder.do_eval(t["entry"]))}) + j.generatefiles["listing"] = builder.do_eval(initialWorkdir["listing"], recursive=True) + normalizeFilesDirs(j.generatefiles) j.environment = {} evr = self.get_requirement("EnvVarRequirement")[0] @@ -321,6 +325,8 @@ def collect_output_ports(self, ports, builder, outdir): # https://github.com/python/mypy/issues/797 partial(revmap_file, builder, outdir))) adjustFileObjs(ret, remove_path) + adjustDirObjs(ret, remove_path) + normalizeFilesDirs(ret) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret) return ret @@ -334,6 +340,7 @@ def collect_output_ports(self, ports, builder, outdir): if ret: adjustFileObjs(ret, remove_path) adjustDirObjs(ret, remove_path) + normalizeFilesDirs(ret) validate.validate_ex(self.names.get_name("outputs_record_schema", ""), ret) return ret if ret is not None else {} except validate.ValidationException as e: diff --git a/cwltool/main.py b/cwltool/main.py index 1bd3f14a7..88f704bbc 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -12,7 +12,6 @@ import urlparse import hashlib import pkg_resources # part of setuptools -import random import functools import rdflib @@ -26,7 +25,7 @@ from . import workflow from .errors import WorkflowException, UnsupportedRequirement from .cwlrdf import printrdf, printdot -from .process import shortname, Process, getListing, relocateOutputs, cleanIntermediate, scandeps +from .process import shortname, Process, getListing, relocateOutputs, cleanIntermediate, scandeps, normalizeFilesDirs from .load_tool import fetch_document, validate_document, make_tool from . import draft2tool from .builder import adjustFileObjs, adjustDirObjs @@ -418,12 +417,13 @@ def load_job_order(args, t, stdin, print_input_deps=False, relative_deps=False, return 0 def pathToLoc(p): - if "location" not in p: + if "location" not in p and "path" in p: p["location"] = p["path"] del p["path"] adjustDirObjs(job_order_object, pathToLoc) adjustFileObjs(job_order_object, pathToLoc) + normalizeFilesDirs(job_order_object) adjustDirObjs(job_order_object, functools.partial(getListing, StdFsAccess(input_basedir))) if "cwl:tool" in job_order_object: @@ -440,7 +440,7 @@ def printdeps(obj, document_loader, stdout, relative_deps, uri, basedir=None): "location": uri} def loadref(b, u): - return document_loader.resolve_ref(u, base_url=b)[0] + return document_loader.fetch(urlparse.urljoin(b, u)) sf = scandeps(basedir if basedir else uri, obj, set(("$import", "run")), diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index 8455d648a..6be7f9487 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -1,8 +1,8 @@ import os -import random import logging import stat import collections +import uuid from typing import Tuple, Set, Union, Any _logger = logging.getLogger("cwltool") @@ -68,38 +68,25 @@ def __init__(self, referenced_files, basedir, stagedir, separateDirs=True): def visitlisting(self, listing, stagedir, basedir): for ld in listing: - if "entryname" in ld: - tgt = os.path.join(stagedir, ld["entryname"]) - if isinstance(ld["entry"], (str, unicode)): - self._pathmap[str(id(ld["entry"]))] = MapperEnt(ld["entry"], tgt, "CreateFile") + tgt = os.path.join(stagedir, ld["basename"]) + if "contents" in ld and ld["location"].startswith("_:"): + self._pathmap[ld["location"]] = MapperEnt(ld["contents"], tgt, "CreateFile") + else: + if ld["class"] == "Directory": + self.visit(ld, tgt, basedir, copy=ld.get("writable", False)) else: - if ld["entry"]["class"] == "Directory": - self.visit(ld["entry"], tgt, basedir, copy=ld.get("writable", False)) - else: - self.visit(ld["entry"], stagedir, basedir, entryname=ld["entryname"], copy=ld.get("writable", False)) - #ab = ld["entry"]["location"] - #if ab.startswith("file://"): - # ab = ab[7:] - #self._pathmap[ld["entry"]["location"]] = MapperEnt(ab, tgt, ld["entry"]["class"]) - elif ld.get("class") == "File": - self.visit(ld, stagedir, basedir, copy=ld.get("writable", False)) - - def visit(self, obj, stagedir, basedir, entryname=None, copy=False): + self.visit(ld, stagedir, basedir, copy=ld.get("writable", False)) + + def visit(self, obj, stagedir, basedir, copy=False): if obj["class"] == "Directory": - if "location" in obj: - self._pathmap[obj["location"]] = MapperEnt(obj["location"], stagedir, "Directory") - else: - self._pathmap[str(id(obj))] = MapperEnt(str(id(obj)), stagedir, "Directory") + self._pathmap[obj["location"]] = MapperEnt(obj["location"], stagedir, "Directory") self.visitlisting(obj.get("listing", []), stagedir, basedir) elif obj["class"] == "File": path = obj["location"] if path in self._pathmap: return ab = abspath(path, basedir) - if entryname: - tgt = os.path.join(stagedir, entryname) - else: - tgt = os.path.join(stagedir, os.path.basename(path)) + tgt = os.path.join(stagedir, obj["basename"]) if copy: self._pathmap[path] = MapperEnt(ab, tgt, "WritableFile") else: @@ -114,7 +101,7 @@ def setup(self, referenced_files, basedir): stagedir = self.stagedir for fob in referenced_files: if self.separateDirs: - stagedir = os.path.join(self.stagedir, "stg%x" % random.randint(1, 1000000000)) + stagedir = os.path.join(self.stagedir, "stg%s" % uuid.uuid4()) self.visit(fob, stagedir, basedir) # Dereference symbolic links diff --git a/cwltool/process.py b/cwltool/process.py index ea708c36c..a6951357b 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -11,8 +11,8 @@ import pprint from collections import Iterable import errno -import random import shutil +import uuid import abc import schema_salad.validate as validate @@ -80,8 +80,8 @@ SCHEMA_CACHE = {} # type: Dict[str, Tuple[Loader, Union[avro.schema.Names, avro.schema.SchemaParseException], Dict[unicode, Any], Loader]] SCHEMA_FILE = None # type: Dict[unicode, Any] SCHEMA_DIR = None # type: Dict[unicode, Any] -SCHEMA_DIRENT = None # type: Dict[unicode, Any] SCHEMA_ANY = None # type: Dict[unicode, Any] +SCHEMA_EXPR = None # type: Dict[unicode, Any] def get_schema(version): # type: (str) -> Tuple[Loader, Union[avro.schema.Names, avro.schema.SchemaParseException], Dict[unicode,Any], Loader] @@ -162,11 +162,9 @@ def getListing(fs_access, rec): ent = {"class": "Directory", "location": ld} getListing(fs_access, ent) - listing.append({"entryname": os.path.basename(ld), - "entry": ent}) + listing.append(ent) else: - listing.append({"entryname": os.path.basename(ld), - "entry": {"class": "File", "location": ld}}) + listing.append({"class": "File", "location": ld}) rec["listing"] = listing def stageFiles(pm, stageFunc): @@ -289,16 +287,33 @@ def avroize_type(field_type, name_prefix=""): """ adds missing information to a type so that CWL types are valid in schema_salad. """ - if type(field_type) == list: - field_type_result = [] - for idx, field_type_item in enumerate(field_type): - field_type_result.append(avroize_type(field_type_item, name_prefix+"_"+str(idx))) - return field_type_result - elif type(field_type) == dict and "type" in field_type and field_type["type"] == "enum": - if "name" not in field_type: - field_type["name"] = name_prefix+"_type_enum" + if isinstance(field_type, list): + for f in field_type: + avroize_type(f, name_prefix) + elif isinstance(field_type, dict): + if field_type["type"] in ("enum", "record"): + if "name" not in field_type: + field_type["name"] = name_prefix+unicode(uuid.uuid4()) + if field_type["type"] == "record": + avroize_type(field_type["fields"], name_prefix) + if field_type["type"] == "array": + avroize_type(field_type["items"], name_prefix) return field_type +def normalizeFilesDirs(job): + def addLocation(d): + if "location" not in d: + if d["class"] == "File" and ("contents" not in d or "basename" not in d): + raise validate.ValidationException("Anonymous file object must have 'contents' and 'basename' fields.") + if d["class"] == "Directory" and ("listing" not in d or "basename" not in d): + raise validate.ValidationException("Anonymous directory object must have 'listing' and 'basename' fields.") + d["location"] = "_:" + unicode(uuid.uuid4()) + elif "basename" not in d: + parse = urlparse.urlparse(d["location"]) + d["basename"] = os.path.basename(parse.path) + + adjustFileObjs(job, addLocation) + adjustDirObjs(job, addLocation) class Process(object): __metaclass__ = abc.ABCMeta @@ -308,8 +323,8 @@ def __init__(self, toolpath_object, **kwargs): self.metadata = kwargs.get("metadata", {}) # type: Dict[str,Any] self.names = None # type: avro.schema.Names + global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY, SCHEMA_EXPR # pylint: disable=global-statement if SCHEMA_FILE is None: - global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_DIRENT, SCHEMA_ANY # pylint: disable=global-statement get_schema("draft-4") SCHEMA_ANY = cast(Dict[unicode, Any], SCHEMA_CACHE["draft-4"][3].idx["https://w3id.org/cwl/salad#Any"]) @@ -317,10 +332,10 @@ def __init__(self, toolpath_object, **kwargs): SCHEMA_CACHE["draft-4"][3].idx["https://w3id.org/cwl/cwl#File"]) SCHEMA_DIR = cast(Dict[unicode, Any], SCHEMA_CACHE["draft-4"][3].idx["https://w3id.org/cwl/cwl#Directory"]) - SCHEMA_DIRENT = cast(Dict[unicode, Any], - SCHEMA_CACHE["draft-4"][3].idx["https://w3id.org/cwl/cwl#Dirent"]) + SCHEMA_EXPR = cast(Dict[unicode, Any], + SCHEMA_CACHE["draft-4"][3].idx["https://w3id.org/cwl/cwl#Expression"]) - names = schema_salad.schema.make_avro_schema([SCHEMA_FILE, SCHEMA_DIR, SCHEMA_DIRENT, SCHEMA_ANY], + names = schema_salad.schema.make_avro_schema([SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY, SCHEMA_EXPR], schema_salad.ref_resolver.Loader({}))[0] if isinstance(names, avro.schema.SchemaParseException): raise names @@ -368,7 +383,7 @@ def __init__(self, toolpath_object, **kwargs): c["type"] = ["null"] + aslist(c["type"]) else: c["type"] = c["type"] - c["type"] = avroize_type(c["type"],c["name"]) + c["type"] = avroize_type(c["type"], c["name"]) if key == "inputs": self.inputs_record_schema["fields"].append(c) # type: ignore elif key == "outputs": @@ -394,6 +409,7 @@ def _init_job(self, joborder, **kwargs): unicode]], copy.deepcopy(joborder)) fillInDefaults(self.tool[u"inputs"], builder.job) + normalizeFilesDirs(builder.job) # Validate job order try: @@ -573,11 +589,9 @@ def nestdir(base, deps): while sp: nx = sp.pop() deps = { - "entryname": nx, - "entry": { - "class": "Directory", - "listing": [deps] - } + "class": "Directory", + "basename": nx, + "listing": [deps] } return deps @@ -585,16 +599,13 @@ def mergedirs(listing): r = [] ents = {} for e in listing: - if "entryname" in e: - if e["entryname"] not in ents: - ents[e["entryname"]] = e - elif e["entry"]["class"] == "Directory": - ents[e["entryname"]]["entry"]["listing"].extend(e["entry"]["listing"]) - else: - r.append(e) + if e["basename"] not in ents: + ents[e["basename"]] = e + elif e["class"] == "Directory": + ents[e["basename"]]["listing"].extend(e["listing"]) for e in ents.itervalues(): - if e["entry"]["class"] == "Directory": - e["entry"]["listing"] = mergedirs(e["entry"]["listing"]) + if e["class"] == "Directory": + e["listing"] = mergedirs(e["listing"]) r.extend(ents.itervalues()) return r @@ -643,5 +654,6 @@ def scandeps(base, doc, reffields, urlfields, loadref): for d in doc: r.extend(scandeps(base, d, reffields, urlfields, loadref)) + normalizeFilesDirs(r) r = mergedirs(r) return r diff --git a/cwltool/update.py b/cwltool/update.py index dfe76b14e..4ce8ee731 100644 --- a/cwltool/update.py +++ b/cwltool/update.py @@ -393,8 +393,8 @@ def _draft4Dev2toDev3(doc, loader, baseuri): doc["listing"] = [] for f in doc["fileDef"]: doc["listing"].append({ - "entryname": f["filename"], - "entry": f["fileContent"] + "basename": f["filename"], + "contents": f["fileContent"] }) del doc["fileDef"] for key, value in doc.items(): diff --git a/cwltool/workflow.py b/cwltool/workflow.py index e793d901a..a32e8dc07 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -91,6 +91,8 @@ def match_types(sinktype, src, iid, inputobj, linkMerge, valueFrom): def can_assign_src_to_sink(src, sink): # type: (Any, Any) -> bool """Check for identical type specifications, ignoring extra keys like inputBinding. """ + if sink == "Any": + return True if isinstance(src, dict) and isinstance(sink, dict): if src["type"] == "array" and sink["type"] == "array": return can_assign_src_to_sink(src["items"], sink["items"]) From 92d948579bcae60c1010066b459afbdc19cbafed Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 4 Jul 2016 01:11:15 -0400 Subject: [PATCH 02/11] Squashed 'cwltool/schemas/' changes from afe31c1..b40896c b40896c Fix basename. 34d946e Make location optional to support File contents literals. f69576a Remove mapSubject entryname. 7d002ad Fix field name conflict. d873510 Use file content literals for InitialWorkDir. 3fb9340 Get rid of Dirent. Update spec for basename. Add contents field to File spec. d5b2627 Fix ttl and context generation 01cf6db Site generation works again 7302ecc Make "location" field optional when "listing" is provided. 63dcbdc Merge branch 'master' of github.com:common-workflow-language/common-workflow-language 09bd8ef Updating site generation for draft-4. def6e52 Merge commit '6266928eac323672c0c44d16dca91d808fbc5e68' 6266928 Squashed 'draft-4/salad/' changes from 9c8c36f..c509a07 6f9cb1c fix the draft-4 tests cb78824 Merge pull request #236 from common-workflow-language/directory fcc5374 Merge remote-tracking branch 'origin/master' into directory a90b71f replace errant draft-4.dev3 with dev2 11b96f1 Merge pull request #242 from common-workflow-language/scatter-inputs 93b60d9 Merge remote-tracking branch 'origin/master' into scatter-inputs 9caa2cf Merge pull request #240 from common-workflow-language/dependency_hint db4113b Update text to say that 'inputs' on scatter is the post-scatter input object. 9ba0d38 Add test using value of inputs post-scatter. cae0407 Add test staging files into output directory for update. 972e3b7 Merge pull request #232 from common-workflow-language/move-file-related-fields 6c6ab97 fix failing tests 0b12a24 soften description, allow for list of specs, add identifier 43b5fb9 Describe LooseDependencyHints. 7eeb70a reparent file specific fields from SchemaBase to Parameter git-subtree-dir: cwltool/schemas git-subtree-split: b40896cb6fe5d29a2f34b815340d833844f2461c --- draft-4/CommandLineTool.yml | 91 +++--- draft-4/Process.yml | 163 +++++++---- draft-4/Workflow.yml | 12 +- draft-4/conformance_test_draft-4.yaml | 46 +-- draft-4/draft-4/cat1-tool.cwl | 2 + draft-4/draft-4/dir4-job.yml | 7 +- draft-4/draft-4/rename.cwl | 5 +- draft-4/draft-4/scatter-valuefrom-wf5.cwl | 58 ++++ draft-4/draft-4/search.cwl | 2 +- draft-4/draft-4/stagefile-job.yml | 3 + draft-4/draft-4/stagefile.cwl | 17 ++ draft-4/draft-4/template-tool.cwl | 5 +- draft-4/salad/schema_salad/jsonld_context.py | 49 ++++ draft-4/salad/schema_salad/main.py | 13 +- .../metaschema/metaschema_base.yml | 23 +- draft-4/salad/schema_salad/ref_resolver.py | 5 +- draft-4/salad/schema_salad/schema.py | 6 +- draft-4/salad/setup.py | 2 +- draft-4/salad/tests/test_examples.py | 73 +++++ site/cwlsite-draft3-job.json | 6 +- site/cwlsite-draft3-schemas.json | 6 +- site/cwlsite-draft4-job.json | 6 +- site/cwlsite-draft4-schemas.json | 6 +- site/cwlsite.cwl | 161 +++++------ site/draft3-deps.json | 207 +++++--------- site/draft4-deps.json | 198 +++++-------- site/linkchecker.cwl | 81 ++---- site/makecontext.cwl | 14 +- site/makedoc.cwl | 32 ++- site/makerdfs.cwl | 14 +- site/makespec.cwl | 10 +- site/userguide-input.json | 266 +++++++++--------- 32 files changed, 844 insertions(+), 745 deletions(-) create mode 100644 draft-4/draft-4/scatter-valuefrom-wf5.cwl create mode 100644 draft-4/draft-4/stagefile-job.yml create mode 100644 draft-4/draft-4/stagefile.cwl diff --git a/draft-4/CommandLineTool.yml b/draft-4/CommandLineTool.yml index f33dadfc4..afa1f37e8 100644 --- a/draft-4/CommandLineTool.yml +++ b/draft-4/CommandLineTool.yml @@ -591,47 +591,62 @@ $graph: Docker container. -- name: DirentExt - type: record +- type: record + name: SoftwareRequirement + extends: "#ProcessRequirement" doc: | - Define a file or subdirectory that must be placed in the designated output - directory prior to executing the command line tool. May be the result of - executing an expression, such as building a configuration file from a - template. + Software that should be configured in the environment of the defined + process. + + While absolute portability between platforms can be achieved with container + technologies like Docker, this mechanism provides an inexact and imprecise + fallback. The inexact nature is due to this standard not specifying the + base operating environment of the process or how the that environment is to + be configured as a result of these hints. The imprecise nature of these + hints is due to the `name` and `version` fields being unstructured and not + namespaced. The optional `identifier` field is recommended so that users + and platforms can distinguish between software that has the same name. fields: - - name: entryname - type: [string, Expression] - jsonldPredicate: - _id: cwl:entryname - doc: | - The name of the file or subdirectory to create in the output directory. - - name: entry - type: [string, Expression] - jsonldPredicate: - _id: cwl:entry - doc: | - If the value is a string literal or an expression which evaluates to a - string, a new file must be created with the string as the file contents. + - name: spec + type: + type: array + items: SoftwareSpec + doc: "The list of software to be configured." - If the value is an expression that evaluates to a `File` object, this - indicates the referenced file should be added to the designated output - directory prior to executing the tool. +- name: SoftwareSpec + type: record + fields: + - name: name + type: string + doc: "The name of the software to be configured." + jsonldPredicate: "@id" + - name: "version" + type: string? + doc: "The (optional) version of the software to configured." + - name: "identifier" + type: string? + doc: | + The (optional) identifier of the software to be configured. Should be a + IRI such as an [RRID](http://www.identifiers.org/rrid/SCR_001156). + Example: `http://identifiers.org/rrid/RRID:SCR_001156` - If the value is an expression that evaluates to a `Dirent` object, this - indicates that the File or Directory in `entry` should be added to the - designated output directory with the name in `entryname`. +- name: FileExt + type: record + extends: File + fields: + writable: + type: boolean? + doc: If true, the File must be writable. + jsonldPredicate: "cwl:writable" - If `writable` is false, the file may be made available using a bind - mount or file system link to avoid unnecessary copying of the input - file. - - name: writable +- name: DirectoryExt + type: record + extends: Directory + fields: + writable: type: boolean? - doc: | - If true, the file or directory must be writable by the tool. Changes - to the file or directory must be isolated and not visible by any other - CommandLineTool process. This may be implemented by making a copy of - the original file or directory. Default false (files and directories - read-only by default). + doc: If true, the Directory must be writable. + jsonldPredicate: "cwl:writable" - name: InitialWorkDirRequirement type: record @@ -644,19 +659,17 @@ $graph: - name: listing type: - type: array - items: [File, DirentExt] + items: [FileExt, DirectoryExt, string, Expression] - string - Expression jsonldPredicate: _id: "cwl:listing" - mapSubject: entryname - mapPredicate: entry doc: | The list of files or subdirectories that must be placed in the designated output directory prior to executing the command line tool. May be an expression. If so, the expression return value must validate - as `{type: array, items: [File, Dirent]}`. + as `{type: array, items: [File, Directory]}`. - name: EnvVarRequirement type: record diff --git a/draft-4/Process.yml b/draft-4/Process.yml index 351bab9d2..a0c877b19 100644 --- a/draft-4/Process.yml +++ b/draft-4/Process.yml @@ -65,7 +65,7 @@ $graph: _type: "@vocab" doc: Must be `File` to indicate this object describes a file. - name: location - type: string + type: ["null", string, Expression] doc: | A URI that identifies the file resource. This may be a relative reference, in which case it must be resolved using the base URI of the @@ -75,33 +75,51 @@ $graph: remote resource (due to unsupported protocol, access denied, or other issue) it must signal an error. + If the `location' field is not provided, the `contents` field must be + provided. The implementation must assign a unique identifier for + the `location` field. + If the `path` field is provided but the `location` field is not, an implementation may assign the value of the `path` field to `location`, then follow the rules above. + + Must be evaluated as an expression only when appearing in InitialWorkDirRequirement. jsonldPredicate: _id: "@id" _type: "@id" - name: path type: string? doc: | - The local path where the File is made available prior to executing a - CommandLineTool. This must be set by the implementation. This field + The local host path where the File is available when a CommandLineTool is + executed. This field must be set by the implementation. The final + path component must match the value of `basename`. This field must not be used in any other context. The command line tool being executed must be able to to access the file at `path` using the POSIX `open(2)` syscall. + + As a special case, if the `path` field is provided but the `location` + field is not, an implementation may assign the value of the `path` + field to `location`, and remove the `path` field. jsonldPredicate: "_id": "cwl:path" "_type": "@id" - name: basename - type: string? + type: [string, Expression] doc: | - The base name of the file, that is, the path component following the - final slash in the path. + The base name of the file, that is, the name of the file without any + leading directory path. The base name must not contain a slash `/`. - The implementation must set this field based on the value of `path` - prior to evaluating parameter references or expressions in a - CommandLineTool document. This field must not be used in any other - context. + If not provided, the implementation must set this field based on the + `location` field by taking the final path component after parsing + `location` as an IRI. If `basename` is provided, it is not required to + match the value from `location`. + + When this file is made available to a CommandLineTool, it must be named + with `basename`, i.e. the final component of the `path` field must match + `basename`. + + Must be evaluated as an expression only when appearing in InitialWorkDirRequirement. + jsonldPredicate: "cwl:basename" - name: dirname type: string? doc: | @@ -121,10 +139,8 @@ $graph: period. Leading periods on the basename are ignored; a basename of `.cshrc` will have a nameroot of `.cshrc`. - The implementation must set this field based on the value of `path` - prior to evaluating parameter references or expressions in a - CommandLineTool document. This field must not be used in any other - context. + The implementation must set this field automatically based on the value + of `basename` prior to evaluating parameter references or expressions. - name: nameext type: string? doc: | @@ -133,10 +149,8 @@ $graph: period. Leading periods on the basename are ignored; a basename of `.cshrc` will have an empty `nameext`. - The implementation must set this field based on the value of `path` - prior to evaluating parameter references or expressions in a - CommandLineTool document. This field must not be used in any other - context. + The implementation must set this field automatically based on the value + of `basename` prior to evaluating parameter references or expressions. - name: checksum type: ["null", string] doc: | @@ -149,7 +163,7 @@ $graph: type: - "null" - type: array - items: [File, Dirent] + items: [File, Directory] jsonldPredicate: "cwl:secondaryFiles" doc: | A list of additional files that are associated with the primary file @@ -179,26 +193,26 @@ $graph: File format ontologies may be provided in the "$schema" metadata at the root of the document. If no ontologies are specified in `$schema`, the runtime may perform exact file format matches. + - name: contents + type: ["null", string, Expression] + doc: | + File contents literal. Maximum of 64 KiB. + If neither `location` nor `path` is provided, `contents` must be + non-null. The implementation must assign a unique identifier for the + `location` field. When the file is staged as input to CommandLineTool, + the value of `contents` must be written to a file. + + If `loadContents` of `Binding` is true and `location` is valid, the + implementation must read up to the first 64 KiB of text from the file + and place it in the "contents" field. -- name: Dirent - type: record - fields: - - name: entryname - type: string - jsonldPredicate: - "_id": cwl:entryname - - name: entry - type: [File, Directory] - jsonldPredicate: - "_id": cwl:entry - name: Directory type: record docParent: "#CWLType" doc: | - Represents a directory to present to a command line tool. This could be a virtual - directory, made of files assembled from multiple locations. + Represents a directory to present to a command line tool. fields: - name: class type: @@ -211,7 +225,7 @@ $graph: _type: "@vocab" doc: Must be `Directory` to indicate this object describes a Directory. - name: location - type: string + type: string? doc: | A URI that identifies the directory resource. This may be a relative reference, in which case it must be resolved using the base URI of the @@ -222,6 +236,10 @@ $graph: unsupported protocol, access denied, or other issue) it must signal an error. + If the `location' field is not provided, the `listing` field must be + provided. The implementation must assign a unique identifier for + the `location` field. + If the `path` field is provided but the `location` field is not, an implementation may assign the value of the `path` field to `location`, then follow the rules above. @@ -239,21 +257,69 @@ $graph: jsonldPredicate: _id: "cwl:path" _type: "@id" + - name: basename + type: [string, Expression] + doc: | + The base name of the directory, that is, the name of the file without any + leading directory path. The base name must not contain a slash `/`. + + If not provided, the implementation must set this field based on the + `location` field by taking the final path component after parsing + `location` as an IRI. If `basename` is provided, it is not required to + match the value from `location`. + + When this file is made available to a CommandLineTool, it must be named + with `basename`, i.e. the final component of the `path` field must match + `basename`. + + Must be evaluated as an expression only when appearing in InitialWorkDirRequirement. + jsonldPredicate: "cwl:basename" - name: listing type: - "null" - type: array - items: [File, Dirent] - doc: List of files or subdirectories contained in this directory + items: [File, Directory] + - string + - Expression + doc: | + List of files or subdirectories contained in this directory. The name + of each file or subdirectory is determined by the `basename` field of + each `File` or `Directory` object. It is an error if a `File` shares a + `basename` with any other entry in `listing`. If two or more + `Directory` object share the same `basename`, this must be treated as + equivalent to a single subdirectory with the listings recursively + merged. + + Must be evaluated as an expression only when appearing in InitialWorkDirRequirement. jsonldPredicate: _id: "cwl:listing" - mapSubject: entryname - mapPredicate: entry - name: SchemaBase type: record abstract: true + + +- name: Parameter + type: record + extends: "#SchemaBase" + abstract: true + doc: | + Define an input or output parameter to a process. + fields: + - name: label + type: + - "null" + - string + jsonldPredicate: "rdfs:label" + doc: "A short, human-readable label of this parameter object." + + - name: description + type: + - "null" + - string + jsonldPredicate: "rdfs:comment" + doc: "A long, human-readable description of this parameter object." - name: secondaryFiles type: - "null" @@ -312,29 +378,6 @@ $graph: pipe. Default: `false`. -- name: Parameter - type: record - extends: "#SchemaBase" - abstract: true - doc: | - Define an input or output parameter to a process. - - fields: - - name: label - type: - - "null" - - string - jsonldPredicate: "rdfs:label" - doc: "A short, human-readable label of this parameter object." - - - name: description - type: - - "null" - - string - jsonldPredicate: "rdfs:comment" - doc: "A long, human-readable description of this parameter object." - - - type: enum name: Expression doc: | diff --git a/draft-4/Workflow.yml b/draft-4/Workflow.yml index fbd1bef66..d1f00fc32 100644 --- a/draft-4/Workflow.yml +++ b/draft-4/Workflow.yml @@ -255,10 +255,12 @@ $graph: the value of the parameter(s) specified in the `source` field, or null if there is no `source` field. - The value of `inputs` in the parameter reference or expression is the - input object to the workflow step after assigning the `source` values, - but before evaluating any step with `valueFrom`. The order of - evaluating `valueFrom` among step input parameters is undefined. + The value of `inputs` in the parameter reference or expression must be + the input object to the workflow step after assigning the `source` + values and then scattering. The order of evaluating `valueFrom` among + step input parameters is undefined and the result of evaluating + `valueFrom` on a parameter must not be visible to evaluation of + `valueFrom` on other parameters. - type: record @@ -502,7 +504,7 @@ $graph: extends: ProcessRequirement doc: | Indicates that the workflow platform must support nested workflows in - the `run` field of (WorkflowStep)(#WorkflowStep). + the `run` field of [WorkflowStep](#WorkflowStep). - name: ScatterFeatureRequirement type: record diff --git a/draft-4/conformance_test_draft-4.yaml b/draft-4/conformance_test_draft-4.yaml index 7769640b8..7d4b31a6d 100644 --- a/draft-4/conformance_test_draft-4.yaml +++ b/draft-4/conformance_test_draft-4.yaml @@ -78,28 +78,24 @@ doc: Test command execution in Docker with stdout redirection - job: - output: { - "output_file": { - "checksum": "sha1$cec7b8746a78c42060c96505887449bca0142976", - "size": 84, - "location": "error.txt", - "class": "File" - } - } tool: draft-4/egrep-stderr.cwl doc: Test command line with stderr redirection + output: + output_file: + class: File + checksum: sha1$cec7b8746a78c42060c96505887449bca0142976 + size: 84 + location: error.txt - job: - output: { - "output_file": { - "checksum": "sha1$cec7b8746a78c42060c96505887449bca0142976", - "size": 84, - "location": "", - "class": "File" - } - } tool: draft-4/egrep-stderr-shortcut.cwl doc: Test command line with stderr redirection, brief syntax + output: + output_file: + class: File + checksum: sha1$cec7b8746a78c42060c96505887449bca0142976 + size: 84 + location: Any - output: output_file: @@ -690,6 +686,12 @@ tool: "draft-4/scatter-valuefrom-wf4.cwl#main" doc: Test workflow scatter with two scatter parameters and dotproduct join method and valueFrom on step input +- job: draft-4/scatter-valuefrom-job1.json + output: + out: ["foo one one", "foo two two", "foo three three", "foo four four"] + tool: draft-4/scatter-valuefrom-wf5.cwl + doc: Test workflow scatter with single scatter parameter and valueFrom on step input + - job: draft-4/conflict-job.json output: { "fileout": { @@ -771,3 +773,15 @@ } tool: draft-4/dir5.cwl doc: Test dynamic initial work dir + +- job: draft-4/stagefile-job.yml + output: { + "outfile": { + "checksum": "sha1$2d6c5e4430c4b200227cc77d3b0025082505ee19", + "size": 1107, + "location": "whale.txt", + "class": "File" + } + } + tool: draft-4/stagefile.cwl + doc: Test writable staged files. diff --git a/draft-4/draft-4/cat1-tool.cwl b/draft-4/draft-4/cat1-tool.cwl index 2ccde1553..4b14f7bda 100755 --- a/draft-4/draft-4/cat1-tool.cwl +++ b/draft-4/draft-4/cat1-tool.cwl @@ -5,6 +5,8 @@ description: "Print the contents of a file to stdout using 'cat' running in a do hints: DockerRequirement: dockerPull: debian:wheezy + SoftwareRequirement: + name: cat inputs: file1: type: File diff --git a/draft-4/draft-4/dir4-job.yml b/draft-4/draft-4/dir4-job.yml index 8da860d27..f4ef8c7d5 100644 --- a/draft-4/draft-4/dir4-job.yml +++ b/draft-4/draft-4/dir4-job.yml @@ -4,7 +4,6 @@ inf: secondaryFiles: - class: File location: index.py - - entryname: xtestdir - entry: - class: Directory - location: testdir + - class: Directory + basename: xtestdir + location: testdir diff --git a/draft-4/draft-4/rename.cwl b/draft-4/draft-4/rename.cwl index e97743ee0..f11b7bc82 100644 --- a/draft-4/draft-4/rename.cwl +++ b/draft-4/draft-4/rename.cwl @@ -4,8 +4,9 @@ baseCommand: "true" requirements: InitialWorkDirRequirement: listing: - - entryname: $(inputs.newname) - entry: $(inputs.srcfile) + - class: File + basename: $(inputs.newname) + location: $(inputs.srcfile.location) inputs: srcfile: File newname: string diff --git a/draft-4/draft-4/scatter-valuefrom-wf5.cwl b/draft-4/draft-4/scatter-valuefrom-wf5.cwl new file mode 100644 index 000000000..417ea4997 --- /dev/null +++ b/draft-4/draft-4/scatter-valuefrom-wf5.cwl @@ -0,0 +1,58 @@ +#!/usr/bin/env cwl-runner +cwlVersion: cwl:draft-4.dev3 +class: Workflow +inputs: + inp: + type: + type: array + items: + type: record + name: instr + fields: + - name: instr + type: string +outputs: + out: + type: + type: array + items: string + outputSource: step1/echo_out + +requirements: + - class: ScatterFeatureRequirement + - class: StepInputExpressionRequirement + +steps: + step1: + in: + echo_in: + source: inp + valueFrom: $(self.instr) + first: + source: inp + valueFrom: $(inputs.echo_in.instr) + out: [echo_out] + scatter: echo_in + run: + class: CommandLineTool + inputs: + first: + type: string + inputBinding: + position: 1 + echo_in: + type: string + inputBinding: + position: 2 + outputs: + echo_out: + type: string + outputBinding: + glob: "step1_out" + loadContents: true + outputEval: $(self[0].contents) + baseCommand: "echo" + arguments: + - "-n" + - "foo" + stdout: "step1_out" diff --git a/draft-4/draft-4/search.cwl b/draft-4/draft-4/search.cwl index dc32c0955..d6ed4341d 100644 --- a/draft-4/draft-4/search.cwl +++ b/draft-4/draft-4/search.cwl @@ -9,7 +9,7 @@ $graph: requirements: - class: InitialWorkDirRequirement listing: - input.txt: $(inputs.file) + - $(inputs.file) - class: InlineJavascriptRequirement inputs: diff --git a/draft-4/draft-4/stagefile-job.yml b/draft-4/draft-4/stagefile-job.yml new file mode 100644 index 000000000..b9852bce0 --- /dev/null +++ b/draft-4/draft-4/stagefile-job.yml @@ -0,0 +1,3 @@ +infile: + class: File + location: whale.txt \ No newline at end of file diff --git a/draft-4/draft-4/stagefile.cwl b/draft-4/draft-4/stagefile.cwl new file mode 100644 index 000000000..eedf736fa --- /dev/null +++ b/draft-4/draft-4/stagefile.cwl @@ -0,0 +1,17 @@ +class: CommandLineTool +cwlVersion: draft-4.dev3 +requirements: + InitialWorkDirRequirement: + listing: + - class: File + location: $(inputs.infile.location) + writable: true +inputs: + infile: File +outputs: + outfile: + type: File + outputBinding: + glob: $(inputs.infile.basename) +baseCommand: "sed" +arguments: ["-i", "s/Ishmael/Bob/", $(inputs.infile.basename)] diff --git a/draft-4/draft-4/template-tool.cwl b/draft-4/draft-4/template-tool.cwl index f7a00024f..c7d473d32 100755 --- a/draft-4/draft-4/template-tool.cwl +++ b/draft-4/draft-4/template-tool.cwl @@ -10,8 +10,9 @@ requirements: - "var t = function(s) { return _.template(s)({'inputs': inputs}); };" - class: InitialWorkDirRequirement listing: - foo.txt: > - $(t("The file is <%= inputs.file1.path.split('/').slice(-1)[0] %>\n")) + - class: File + basename: foo.txt + contents: $(t("The file is <%= inputs.file1.path.split('/').slice(-1)[0] %>\n")) inputs: - id: file1 type: File diff --git a/draft-4/salad/schema_salad/jsonld_context.py b/draft-4/salad/schema_salad/jsonld_context.py index 79d98333f..225b686f5 100755 --- a/draft-4/salad/schema_salad/jsonld_context.py +++ b/draft-4/salad/schema_salad/jsonld_context.py @@ -175,3 +175,52 @@ def salad_to_jsonld_context(j, schema_ctx): process_type(t, g, context, defaultBase, namespaces, defaultPrefix) return (context, g) + +def fix_jsonld_ids(obj, ids): + # type: (Union[Dict[unicode, Any], List[Dict[unicode, Any]]], List[unicode]) -> None + if isinstance(obj, dict): + for i in ids: + if i in obj: + obj["@id"] = obj[i] + for v in obj.values(): + fix_jsonld_ids(v, ids) + if isinstance(obj, list): + for entry in obj: + fix_jsonld_ids(entry, ids) + +def makerdf(workflow, wf, ctx): + # type: (Union[str, unicode], Union[List[Dict[unicode, Any]], Dict[unicode, Any]], Loader.ContextType) -> Graph + prefixes = {} + idfields = [] + for k,v in ctx.iteritems(): + if isinstance(v, dict): + url = v["@id"] + else: + url = v + if url == "@id": + idfields.append(k) + doc_url, frg = urlparse.urldefrag(url) + if "/" in frg: + p, _ = frg.split("/") + prefixes[p] = u"%s#%s/" % (doc_url, p) + + if isinstance(wf, list): + wf = { + "@context": ctx, + "@graph": wf + } + else: + wf["@context"] = ctx + + fix_jsonld_ids(wf, idfields) + + g = Graph().parse(data=json.dumps(wf), format='json-ld', location=workflow) + + # Bug in json-ld loader causes @id fields to be added to the graph + for s,p,o in g.triples((None, URIRef("@id"), None)): + g.remove((s, p, o)) + + for k2,v2 in prefixes.iteritems(): + g.namespace_manager.bind(k2, v2) + + return g diff --git a/draft-4/salad/schema_salad/main.py b/draft-4/salad/schema_salad/main.py index 109e225ba..3c9a634e5 100644 --- a/draft-4/salad/schema_salad/main.py +++ b/draft-4/salad/schema_salad/main.py @@ -23,9 +23,8 @@ def printrdf(workflow, wf, ctx, sr): - # type: (str, Union[List, Dict[Any, Any], str, unicode], Dict[unicode, Any], str) -> None - g = Graph().parse(data=json.dumps(wf), format='json-ld', - location=workflow, context=ctx) + # type: (str, Union[List[Dict[unicode, Any]], Dict[unicode, Any]], Dict[unicode, Any], str) -> None + g = jsonld_context.makerdf(workflow, wf, ctx) print(g.serialize(format=sr)) @@ -210,8 +209,12 @@ def main(argsl=None): # type: (List[str]) -> int # Optionally convert the document to RDF if args.print_rdf: - printrdf(args.document, document, schema_ctx, args.rdf_serializer) - return 0 + if isinstance(document, (dict, list)): + printrdf(args.document, document, schema_ctx, args.rdf_serializer) + return 0 + else: + print("Document must be a dictionary or list.") + return 1 if args.print_metadata: print(json.dumps(doc_metadata, indent=4)) diff --git a/draft-4/salad/schema_salad/metaschema/metaschema_base.yml b/draft-4/salad/schema_salad/metaschema/metaschema_base.yml index 42901a60e..9b5c2aebc 100644 --- a/draft-4/salad/schema_salad/metaschema/metaschema_base.yml +++ b/draft-4/salad/schema_salad/metaschema/metaschema_base.yml @@ -43,19 +43,19 @@ $graph: type: record doc: A field of a record. fields: - - name: name + name: type: string jsonldPredicate: "@id" doc: | The name of the field - - name: doc + doc: type: string? doc: | A documentation string for this field jsonldPredicate: "sld:doc" - - name: type + type: type: - PrimitiveType - RecordSchema @@ -81,7 +81,7 @@ $graph: - name: RecordSchema type: record fields: - - name: type + type: doc: "Must be `record`" type: name: Record_symbol @@ -93,9 +93,12 @@ $graph: _type: "@vocab" typeDSL: true refScope: 2 - - name: "fields" + fields: type: RecordField[]? - jsonldPredicate: "sld:fields" + jsonldPredicate: + _id: sld:fields + mapSubject: name + mapPredicate: type doc: "Defines the fields of the record." @@ -104,7 +107,7 @@ $graph: doc: | Define an enumerated type. fields: - - name: type + type: doc: "Must be `enum`" type: name: Enum_symbol @@ -116,7 +119,7 @@ $graph: _type: "@vocab" typeDSL: true refScope: 2 - - name: "symbols" + symbols: type: string[] jsonldPredicate: _id: "sld:symbols" @@ -128,7 +131,7 @@ $graph: - name: ArraySchema type: record fields: - - name: type + type: doc: "Must be `array`" type: name: Array_symbol @@ -140,7 +143,7 @@ $graph: _type: "@vocab" typeDSL: true refScope: 2 - - name: items + items: type: - PrimitiveType - RecordSchema diff --git a/draft-4/salad/schema_salad/ref_resolver.py b/draft-4/salad/schema_salad/ref_resolver.py index 7051dbc53..4c4bb9db0 100644 --- a/draft-4/salad/schema_salad/ref_resolver.py +++ b/draft-4/salad/schema_salad/ref_resolver.py @@ -135,8 +135,9 @@ def expand_url(self, url, base_url, scoped_id=False, vocab_term=False, scoped_re frg = splitbase.fragment + u"/" + split.path else: frg = split.path + pt = splitbase.path if splitbase.path else "/" url = urlparse.urlunsplit( - (splitbase.scheme, splitbase.netloc, splitbase.path, splitbase.query, frg)) + (splitbase.scheme, splitbase.netloc, pt, splitbase.query, frg)) elif scoped_ref is not None and not split.fragment: pass else: @@ -156,7 +157,7 @@ def _add_properties(self, s): # type: (unicode) -> None u"http://www.w3.org/2000/01/rdf-schema#Literal") if not literal: self.url_fields.add(unicode(s)) - self.foreign_properties.add(s) + self.foreign_properties.add(unicode(s)) def add_namespaces(self, ns): # type: (Dict[unicode, unicode]) -> None self.vocab.update(ns) diff --git a/draft-4/salad/schema_salad/schema.py b/draft-4/salad/schema_salad/schema.py index 4c997ba61..c90c3446b 100644 --- a/draft-4/salad/schema_salad/schema.py +++ b/draft-4/salad/schema_salad/schema.py @@ -97,7 +97,11 @@ def get_metaschema(): "@type": "@id", "refScope": 1 }, - "fields": "sld:fields", + "fields": { + "@id": "sld:fields", + "mapSubject": "name", + "mapPredicate": "type" + }, "float": "http://www.w3.org/2001/XMLSchema#float", "identity": "https://w3id.org/cwl/salad#JsonldPredicate/identity", "int": "http://www.w3.org/2001/XMLSchema#int", diff --git a/draft-4/salad/setup.py b/draft-4/salad/setup.py index 9c12ddaf3..e049f9f84 100755 --- a/draft-4/salad/setup.py +++ b/draft-4/salad/setup.py @@ -41,7 +41,7 @@ extras_require = {} # TODO: to be removed when the above is added setup(name='schema-salad', - version='1.12', + version='1.13', description='Schema Annotations for Linked Avro Data (SALAD)', long_description=open(README).read(), author='Common workflow language working group', diff --git a/draft-4/salad/tests/test_examples.py b/draft-4/salad/tests/test_examples.py index 36c823a6c..f49e9089b 100644 --- a/draft-4/salad/tests/test_examples.py +++ b/draft-4/salad/tests/test_examples.py @@ -2,8 +2,11 @@ import schema_salad.ref_resolver import schema_salad.main import schema_salad.schema +from schema_salad.jsonld_context import makerdf import rdflib import ruamel.yaml as yaml +import json + try: from ruamel.yaml import CSafeLoader as SafeLoader except ImportError: @@ -249,5 +252,75 @@ def test_typedsl_ref(self): ra, _ = ldr.resolve_all({"type": "File[]?"}, "") self.assertEqual({'type': ['null', {'items': 'File', 'type': 'array'}]}, ra) + def test_scoped_id(self): + ldr = schema_salad.ref_resolver.Loader({}) + ctx = { + "id": "@id", + "location": { + "@id": "@id", + "@type": "@id" + }, + "bar": "http://example.com/bar", + "ex": "http://example.com/" + } + ldr.add_context(ctx) + + ra, _ = ldr.resolve_all({ + "id": "foo", + "bar": { + "id": "baz" + } + }, "http://example.com") + self.assertEqual({'id': 'http://example.com/#foo', + 'bar': { + 'id': 'http://example.com/#foo/baz'}, + }, ra) + + g = makerdf(None, ra, ctx) + print(g.serialize(format="n3")) + + ra, _ = ldr.resolve_all({ + "location": "foo", + "bar": { + "location": "baz" + } + }, "http://example.com", checklinks=False) + self.assertEqual({'location': 'http://example.com/foo', + 'bar': { + 'location': 'http://example.com/baz'}, + }, ra) + + g = makerdf(None, ra, ctx) + print(g.serialize(format="n3")) + + ra, _ = ldr.resolve_all({ + "id": "foo", + "bar": { + "location": "baz" + } + }, "http://example.com", checklinks=False) + self.assertEqual({'id': 'http://example.com/#foo', + 'bar': { + 'location': 'http://example.com/baz'}, + }, ra) + + g = makerdf(None, ra, ctx) + print(g.serialize(format="n3")) + + ra, _ = ldr.resolve_all({ + "location": "foo", + "bar": { + "id": "baz" + } + }, "http://example.com", checklinks=False) + self.assertEqual({'location': 'http://example.com/foo', + 'bar': { + 'id': 'http://example.com/#baz'}, + }, ra) + + g = makerdf(None, ra, ctx) + print(g.serialize(format="n3")) + + if __name__ == '__main__': unittest.main() diff --git a/site/cwlsite-draft3-job.json b/site/cwlsite-draft3-job.json index 2311b68dd..24fc20202 100644 --- a/site/cwlsite-draft3-job.json +++ b/site/cwlsite-draft3-job.json @@ -50,11 +50,7 @@ "primtype": "#CWLType" }, { - "source": { - "class": "File", - "path": "../draft-3/salad/schema_salad/metaschema/metaschema.yml", - "secondaryFiles": {"$import": "draft3-deps.json"} - }, + "source": {$import: draft3-metaschema.json}, "target": "draft-3/SchemaSalad.html", "renderlist": [ "https://w3id.org/cwl/salad#Semantic_Annotations_for_Linked_Avro_Data", diff --git a/site/cwlsite-draft3-schemas.json b/site/cwlsite-draft3-schemas.json index 33c8f43cf..cc6f9cf27 100644 --- a/site/cwlsite-draft3-schemas.json +++ b/site/cwlsite-draft3-schemas.json @@ -7,11 +7,7 @@ "context_target": "draft-3/cwl-context.json", "rdfs_target": "draft-3/cwl.ttl" }, { - "schema_in": { - "class": "File", - "path": "../draft-3/salad/schema_salad/metaschema/metaschema.yml", - "secondaryFiles": {"$import": "draft3-deps.json"} - }, + "schema_in": {$import: draft3-metaschema.json}, "context_target": "draft-3/salad-context.json", "rdfs_target": "draft-3/salad.ttl" }] diff --git a/site/cwlsite-draft4-job.json b/site/cwlsite-draft4-job.json index 273b5536e..1f3078d09 100644 --- a/site/cwlsite-draft4-job.json +++ b/site/cwlsite-draft4-job.json @@ -50,11 +50,7 @@ "primtype": "#CWLType" }, { - "source": { - "class": "File", - "path": "../draft-4/salad/schema_salad/metaschema/metaschema.yml", - "secondaryFiles": {"$import": "draft4-deps.json"} - }, + "source": {$import: draft4-metaschema.json}, "target": "draft-4/SchemaSalad.html", "renderlist": [ "https://w3id.org/cwl/salad#Semantic_Annotations_for_Linked_Avro_Data", diff --git a/site/cwlsite-draft4-schemas.json b/site/cwlsite-draft4-schemas.json index c01c31dd5..6681827f1 100644 --- a/site/cwlsite-draft4-schemas.json +++ b/site/cwlsite-draft4-schemas.json @@ -7,11 +7,7 @@ "context_target": "draft-4/cwl-context.json", "rdfs_target": "draft-4/cwl.ttl" }, { - "schema_in": { - "class": "File", - "path": "../draft-4/salad/schema_salad/metaschema/metaschema.yml", - "secondaryFiles": {"$import": "draft4-deps.json"} - }, + "schema_in": {$import: draft4-metaschema.json}, "context_target": "draft-4/salad-context.json", "rdfs_target": "draft-4/salad.ttl" } diff --git a/site/cwlsite.cwl b/site/cwlsite.cwl index d9af1e436..394c99e92 100755 --- a/site/cwlsite.cwl +++ b/site/cwlsite.cwl @@ -1,71 +1,41 @@ #!/usr/bin/env cwl-runner -cwlVersion: cwl:draft-3 +cwlVersion: cwl:draft-4.dev3 class: Workflow inputs: - - id: render + render: type: type: array items: - name: render type: record fields: - - name: source - type: File - - name: renderlist - type: - type: array - items: string - - name: redirect - type: - type: array - items: string - - name: target - type: string - - name: brandlink - type: string - - name: brandimg - type: string - - - id: schemas + source: File + renderlist: string[] + redirect: string[] + target: string + brandlink: string + brandimg: string + schemas: type: type: array items: - name: rdfs type: record fields: - - name: schema_in - type: File - - name: context_target - type: string - - name: rdfs_target - type: string - - id: brandimg - type: File - - id: empty + schema_in: File + context_target: string + rdfs_target: string + brandimg: File + empty: type: string default: "" outputs: - - id: doc_out - type: - type: array - items: File - source: ["#docs/out", "#brandimg"] - linkMerge: merge_flattened - - id: report + doc_out: type: File - source: "#report/out" - - id: context - type: - type: array - items: File - source: "#context/out" - - id: rdfs - type: - type: array - items: File - source: "#rdfs/out" + outputSource: merge/dir + report: + type: File + outputSource: report/out requirements: - class: ScatterFeatureRequirement @@ -73,62 +43,63 @@ requirements: - class: SubworkflowFeatureRequirement - class: MultipleInputFeatureRequirement - class: InlineJavascriptRequirement - expressionLib: - - $include: cwlpath.js hints: - class: DockerRequirement dockerPull: commonworkflowlanguage/cwltool_module steps: - - id: rdfs - inputs: - - {id: schema, source: "#schemas", valueFrom: $(self.schema_in) } - - {id: target, source: "#schemas", valueFrom: $(self.rdfs_target) } - outputs: - - { id: out } - scatter: ["#rdfs/schema", "#rdfs/target"] - scatterMethod: dotproduct + rdfs: + scatter: schemas + in: + schemas: schemas + schema: { valueFrom: $(inputs.schemas.schema_in) } + target: { valueFrom: $(inputs.schemas.rdfs_target) } + out: [out, targetdir] run: makerdfs.cwl - - id: context - inputs: - - {id: schema, source: "#schemas", valueFrom: $(self.schema_in) } - - {id: target, source: "#schemas", valueFrom: $(self.context_target) } - outputs: - - { id: out } - scatter: ["#context/schema", "#context/target"] - scatterMethod: dotproduct + context: + scatter: schemas + in: + schemas: schemas + schema: { valueFrom: $(inputs.schemas.schema_in) } + target: { valueFrom: $(inputs.schemas.context_target) } + out: [out, targetdir] run: makecontext.cwl - - id: docs - inputs: - - { id: source, source: "#render", valueFrom: $(self.source) } - - { id: target, source: "#render", valueFrom: $(self.target) } - - { id: renderlist, source: "#render", valueFrom: $(self.renderlist) } - - { id: redirect, source: "#render", valueFrom: $(self.redirect) } - - { id: brandlink, source: "#render", valueFrom: $(self.brandlink) } - - { id: brand, source: "#render", valueFrom: $(self.brandimg) } - - { id: primtype, source: "#render", valueFrom: $(self.primtype) } - outputs: - - { id: out } - - { id: targetdir } - scatter: - - "#docs/source" - - "#docs/target" - - "#docs/renderlist" - - "#docs/redirect" - - "#docs/brandlink" - - "#docs/primtype" - - "#docs/brand" - scatterMethod: dotproduct + docs: + scatter: render + in: + render: render + source: { valueFrom: $(inputs.render.source) } + target: { valueFrom: $(inputs.render.target) } + renderlist: { valueFrom: $(inputs.render.renderlist) } + redirect: { valueFrom: $(inputs.render.redirect) } + brandlink: { valueFrom: $(inputs.render.brandlink) } + brand: { valueFrom: $(inputs.render.brandimg) } + primtype: { valueFrom: $(inputs.render.primtype) } + out: [out, targetdir] run: makedoc.cwl - - id: report - inputs: - - {id: inp, source: ["#docs/out", "#brandimg"], linkMerge: merge_flattened } - - {id: dirs, source: ["#docs/targetdir", "#empty"], linkMerge: merge_flattened } - - {id: target, default: "linkchecker-report.txt"} - outputs: - - id: out + merge: + in: + primary: + source: docs/out + valueFrom: $(self[0]) + secondary: + source: [docs/out, rdfs/out, context/out, brandimg] + linkMerge: merge_flattened + valueFrom: $(self.slice(1)) + dirs: + source: [docs/targetdir, rdfs/targetdir, context/targetdir, empty] + linkMerge: merge_flattened + valueFrom: $(self.slice(1)) + out: [dir] + run: mergesecondary.cwl + + report: + in: + inp: merge/dir + target: { default: "linkchecker-report.txt"} + out: [out] run: linkchecker.cwl diff --git a/site/draft3-deps.json b/site/draft3-deps.json index 4cdcd0e2f..98b90f6e8 100644 --- a/site/draft3-deps.json +++ b/site/draft3-deps.json @@ -1,138 +1,77 @@ [ - { - "path": "../draft-3/Process.yml", - "class": "File", - "secondaryFiles": [ - { - "path": "../draft-3/concepts.md", - "class": "File" - }, - { - "path": "../draft-3/salad/schema_salad/metaschema/metaschema.yml", - "class": "File", - "secondaryFiles": [ - { - "path": "../draft-3/salad/schema_salad/metaschema/salad.md", - "class": "File" - }, - { - "path": "../draft-3/salad/schema_salad/metaschema/field_name.yml", - "class": "File", - "secondaryFiles": [ + { + "secondaryFiles": [ + { + "class": "File", + "location": "../draft-3/concepts.md" + }, + { + "entry": { + "class": "Directory", + "listing": [ { - "path": "../draft-3/salad/schema_salad/metaschema/field_name_schema.yml", - "class": "File" - }, - { - "path": "../draft-3/salad/schema_salad/metaschema/field_name_src.yml", - "class": "File" - }, - { - "path": "../draft-3/salad/schema_salad/metaschema/field_name_proc.yml", - "class": "File" - } - ] - }, - { - "path": "../draft-3/salad/schema_salad/metaschema/ident_res.yml", - "class": "File", - "secondaryFiles": [ - { - "path": "../draft-3/salad/schema_salad/metaschema/ident_res_schema.yml", - "class": "File" - }, - { - "path": "../draft-3/salad/schema_salad/metaschema/ident_res_src.yml", - "class": "File" - }, - { - "path": "../draft-3/salad/schema_salad/metaschema/ident_res_proc.yml", - "class": "File" - } - ] - }, - { - "path": "../draft-3/salad/schema_salad/metaschema/link_res.yml", - "class": "File", - "secondaryFiles": [ - { - "path": "../draft-3/salad/schema_salad/metaschema/link_res_schema.yml", - "class": "File" - }, - { - "path": "../draft-3/salad/schema_salad/metaschema/link_res_src.yml", - "class": "File" - }, - { - "path": "../draft-3/salad/schema_salad/metaschema/link_res_proc.yml", - "class": "File" - } - ] - }, - { - "path": "../draft-3/salad/schema_salad/metaschema/vocab_res.yml", - "class": "File", - "secondaryFiles": [ - { - "path": "../draft-3/salad/schema_salad/metaschema/vocab_res_schema.yml", - "class": "File" - }, - { - "path": "../draft-3/salad/schema_salad/metaschema/vocab_res_src.yml", - "class": "File" - }, - { - "path": "../draft-3/salad/schema_salad/metaschema/vocab_res_proc.yml", - "class": "File" + "entry": { + "class": "Directory", + "listing": [ + { + "entry": { + "class": "Directory", + "listing": [ + {$import: draft3-metaschema.json} + ] + }, + "entryname": "metaschema" + } + ] + }, + "entryname": "schema_salad" } ] - }, - { - "path": "../draft-3/salad/schema_salad/metaschema/import_include.md", - "class": "File" - } - ] - } - ] - }, - { - "path": "../draft-3/CommandLineTool.yml", - "class": "File", - "secondaryFiles": [ - { - "path": "../draft-3/contrib.md", - "class": "File" - }, - { - "path": "../draft-3/intro.md", - "class": "File" - }, - { - "path": "../draft-3/concepts.md", - "class": "File" - }, - { - "path": "../draft-3/invocation.md", - "class": "File" - } - ] - }, - { - "path": "../draft-3/Workflow.yml", - "class": "File", - "secondaryFiles": [ - { - "path": "../draft-3/contrib.md", - "class": "File" - }, - { - "path": "../draft-3/intro.md", - "class": "File" - }, - { - "path": "../draft-3/concepts.md", - "class": "File" - } - ] - } -] + }, + "entryname": "salad" + } + ], + "class": "File", + "location": "../draft-3/Process.yml" + }, + { + "secondaryFiles": [ + { + "class": "File", + "location": "../draft-3/contrib.md" + }, + { + "class": "File", + "location": "../draft-3/intro.md" + }, + { + "class": "File", + "location": "../draft-3/concepts.md" + }, + { + "class": "File", + "location": "../draft-3/invocation.md" + } + ], + "class": "File", + "location": "../draft-3/CommandLineTool.yml" + }, + { + "secondaryFiles": [ + { + "class": "File", + "location": "../draft-3/contrib.md" + }, + { + "class": "File", + "location": "../draft-3/intro.md" + }, + { + "class": "File", + "location": "../draft-3/concepts.md" + } + ], + "class": "File", + "location": "../draft-3/Workflow.yml" + } + ] diff --git a/site/draft4-deps.json b/site/draft4-deps.json index 157ccb1ec..3cddeb186 100644 --- a/site/draft4-deps.json +++ b/site/draft4-deps.json @@ -1,126 +1,80 @@ [ { - "path": "../draft-4/Process.yml", - "class": "File" - }, - { - "path": "../draft-4/concepts.md", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/metaschema.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/metaschema_base.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/salad.md", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/field_name.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/field_name_schema.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/field_name_src.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/field_name_proc.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/ident_res.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/ident_res_schema.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/ident_res_src.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/ident_res_proc.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/link_res.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/link_res_schema.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/link_res_src.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/link_res_proc.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/vocab_res.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/vocab_res_schema.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/vocab_res_src.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/vocab_res_proc.yml", - "class": "File" - }, - { - "path": "../draft-4/salad/schema_salad/metaschema/import_include.md", - "class": "File" - }, - { - "path": "../draft-4/CommandLineTool.yml", - "class": "File" - }, - { - "path": "../draft-4/contrib.md", - "class": "File" - }, - { - "path": "../draft-4/intro.md", - "class": "File" - }, - { - "path": "../draft-4/concepts.md", - "class": "File" - }, - { - "path": "../draft-4/invocation.md", - "class": "File" - }, - { - "path": "../draft-4/Workflow.yml", - "class": "File" - }, - { - "path": "../draft-4/contrib.md", - "class": "File" - }, - { - "path": "../draft-4/intro.md", - "class": "File" - }, - { - "path": "../draft-4/concepts.md", - "class": "File" + "secondaryFiles": [ + { + "class": "File", + "location": "../draft-4/concepts.md" + }, + { + "entry": { + "class": "Directory", + "listing": [ + { + "entry": { + "class": "Directory", + "listing": [ + { + "entry": { + "class": "Directory", + "listing": [ + { + "class": "File", + "location": "../draft-4/salad/schema_salad/metaschema/metaschema_base.yml" + } + ] + }, + "entryname": "metaschema" + } + ] + }, + "entryname": "schema_salad" + } + ] + }, + "entryname": "salad" + } + ], + "class": "File", + "location": "../draft-4/Process.yml" + }, + { + "secondaryFiles": [ + { + "class": "File", + "location": "../draft-4/contrib.md" + }, + { + "class": "File", + "location": "../draft-4/intro.md" + }, + { + "class": "File", + "location": "../draft-4/concepts.md" + }, + { + "class": "File", + "location": "../draft-4/invocation.md" + } + ], + "class": "File", + "location": "../draft-4/CommandLineTool.yml" + }, + { + "secondaryFiles": [ + { + "class": "File", + "location": "../draft-4/contrib.md" + }, + { + "class": "File", + "location": "../draft-4/intro.md" + }, + { + "class": "File", + "location": "../draft-4/concepts.md" + } + ], + "class": "File", + "location": "../draft-4/Workflow.yml" } ] diff --git a/site/linkchecker.cwl b/site/linkchecker.cwl index ec5f3eecc..59218329f 100644 --- a/site/linkchecker.cwl +++ b/site/linkchecker.cwl @@ -1,76 +1,29 @@ class: CommandLineTool -cwlVersion: draft-3 -requirements: - - class: ShellCommandRequirement - - class: InlineJavascriptRequirement - expressionLib: - - $include: cwlpath.js +cwlVersion: draft-4.dev3 hints: - - class: DockerRequirement + DockerRequirement: dockerFile: | FROM debian:8 RUN apt-get update && \ DEBIAN_FRONTEND=noninteractive apt-get -yq install w3c-linkchecker \ dockerImageId: commonworkflowlanguage/checklink - inputs: - - id: inp - type: - type: array - items: File - - id: dirs - type: - type: array - items: string - - id: target - type: string + inp: + type: File + inputBinding: {position: 1} + target: string outputs: - - id: out + out: type: File outputBinding: glob: $(inputs.target) -baseCommand: [] -arguments: - - "mkdir" - - "-p" - - valueFrom: | - ${ - var r = []; - for (var i=0; i < inputs.dirs.length; i++) { - if (inputs.dirs[i] != "") { - r.push(inputs.dirs[i]); - } - } - return r; - } - - {valueFrom: "&&", shellQuote: false} - - valueFrom: | - ${ - var r = []; - for (var i=0; i < inputs.inp.length; i++) { - if (i > 0) { - r.push("&&"); - } - r.push("ln"); - r.push("-s"); - r.push(inputs.inp[i].path); - r.push(runtime.outdir + "/" + inputs.dirs[i]); - } - return r; - } - - {valueFrom: "&&", shellQuote: false} - - "checklink" - - "-X(http.*|mailto:.*)" - - "-q" - - valueFrom: | - ${ - var r = []; - for (var i=0; i < inputs.inp.length; i++) { - r.push(cwl.path.basename(inputs.inp[i].path)); - } - return r; - } - - {valueFrom: " > ", shellQuote: false} - - valueFrom: $(inputs.target) - - {valueFrom: " && ! test -s", shellQuote: false} - - valueFrom: $(inputs.target) \ No newline at end of file + loadContents: true + #outputEval: | + # ${ + # return if (self.contents.length > 0) { + # + # } + # } +baseCommand: checklink +arguments: ["-X(http.*|mailto:.*)", "-q"] +stdout: $(inputs.target) diff --git a/site/makecontext.cwl b/site/makecontext.cwl index ae025e14f..bdf07c2c4 100644 --- a/site/makecontext.cwl +++ b/site/makecontext.cwl @@ -1,15 +1,15 @@ -cwlVersion: "cwl:draft-3" +cwlVersion: draft-4.dev3 class: CommandLineTool inputs: - - id: schema + schema: type: File inputBinding: {position: 1} - - id: target - type: string + target: string outputs: - - id: out - type: File + out: stdout + targetdir: + type: string outputBinding: - glob: $(inputs.target) + outputEval: $(inputs.target.match(/^([^/]+)\/[^/]/)[1]) baseCommand: [python, "-mschema_salad", "--print-jsonld-context"] stdout: $(inputs.target) diff --git a/site/makedoc.cwl b/site/makedoc.cwl index c046800c0..ad8004b07 100644 --- a/site/makedoc.cwl +++ b/site/makedoc.cwl @@ -1,46 +1,50 @@ -cwlVersion: "cwl:draft-3" +cwlVersion: draft-4.dev3 class: CommandLineTool requirements: - class: InlineJavascriptRequirement expressionLib: - $include: cwlpath.js inputs: - - id: source + source: type: File inputBinding: {position: 1} - - id: renderlist + renderlist: type: - "null" - type: array items: string inputBinding: {prefix: "--only"} inputBinding: {position: 2} - - id: redirect + redirect: type: - "null" - type: array items: string inputBinding: {prefix: "--redirect"} inputBinding: {position: 2} - - id: brand + brand: type: string inputBinding: {prefix: "--brand"} - - id: brandlink + brandlink: type: string inputBinding: {prefix: "--brandlink"} - - id: target + target: type: string - - id: primtype + primtype: type: ["null", string] inputBinding: {prefix: "--primtype"} outputs: - - id: out - type: File - outputBinding: - glob: $(inputs.target) - - id: targetdir + out: stdout + targetdir: type: string outputBinding: - outputEval: $(cwl.path.dirname(inputs.target)) + outputEval: | + ${ + var m = inputs.target.match(/^([^/]+)\/[^/]/); + if (m) + return m[1]; + else + return ""; + } baseCommand: [python, "-mschema_salad.makedoc"] stdout: $(inputs.target) diff --git a/site/makerdfs.cwl b/site/makerdfs.cwl index afbb1e121..d8b59d29f 100644 --- a/site/makerdfs.cwl +++ b/site/makerdfs.cwl @@ -1,15 +1,15 @@ -cwlVersion: "cwl:draft-3" +cwlVersion: draft-4.dev3 class: CommandLineTool inputs: - - id: schema + schema: type: File inputBinding: {position: 1} - - id: target - type: string + target: string outputs: - - id: out - type: File + out: stdout + targetdir: + type: string outputBinding: - glob: $(inputs.target) + outputEval: $(inputs.target.match(/^([^/]+)\/[^/]/)[1]) baseCommand: [python, "-mschema_salad", "--print-rdfs"] stdout: $(inputs.target) diff --git a/site/makespec.cwl b/site/makespec.cwl index 0eb34ce32..f2fe8bba1 100644 --- a/site/makespec.cwl +++ b/site/makespec.cwl @@ -1,13 +1,13 @@ -cwlVersion: "cwl:draft-3" +cwlVersion: draft-4.dev3 class: Workflow inputs: - - {id: "#schema_in", type: string} - - {id: "#context_target", type: string} - - {id: "#rdfs_target", type: string} + schema_in: string + context_target: string + rdfs_target: string outputs: - - id: index_out + index_out: type: File source: "#doc/out" diff --git a/site/userguide-input.json b/site/userguide-input.json index f393dad56..86e500051 100644 --- a/site/userguide-input.json +++ b/site/userguide-input.json @@ -1,134 +1,142 @@ { - "path": "../draft-3/UserGuide.yml", - "class": "File", "secondaryFiles": [ { - "path": "../draft-3/userguide-intro.md", - "class": "File" - }, - { - "path": "../draft-3/examples/1st-tool.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/echo-job.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/inp.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/inp-job.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/tar.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/tar-job.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/stdout.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/echo-job.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/tar-param.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/tar-param-job.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/docker.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/docker-job.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/arguments.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/arguments-job.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/array-inputs.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/array-inputs-job.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/array-outputs.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/array-outputs-job.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/record.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/record-job1.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/record-job2.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/record-job2.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/env.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/echo-job.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/expression.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/createfile.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/echo-job.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/linkfile.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/arguments-job.yml", - "class": "File" - }, - { - "path": "../draft-3/examples/1st-workflow.cwl", - "class": "File" - }, - { - "path": "../draft-3/examples/1st-workflow-job.yml", - "class": "File" + "class": "File", + "location": "../draft-3/userguide-intro.md" + }, + { + "entry": { + "class": "Directory", + "listing": [ + { + "class": "File", + "location": "../draft-3/examples/1st-tool.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/echo-job.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/inp.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/inp-job.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/tar.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/tar-job.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/stdout.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/echo-job.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/tar-param.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/tar-param-job.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/docker.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/docker-job.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/arguments.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/arguments-job.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/array-inputs.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/array-inputs-job.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/array-outputs.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/array-outputs-job.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/record.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/record-job1.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/record-job2.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/record-job2.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/env.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/echo-job.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/expression.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/createfile.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/echo-job.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/linkfile.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/arguments-job.yml" + }, + { + "class": "File", + "location": "../draft-3/examples/1st-workflow.cwl" + }, + { + "class": "File", + "location": "../draft-3/examples/1st-workflow-job.yml" + } + ] + }, + "entryname": "examples" } - ] + ], + "class": "File", + "location": "../draft-3/UserGuide.yml" } \ No newline at end of file From 51747877c5ce8386527e9cb3403f826035ceea11 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 4 Jul 2016 01:14:58 -0400 Subject: [PATCH 03/11] Get cwltest from master. --- cwltool/cwltest.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cwltool/cwltest.py b/cwltool/cwltest.py index a7b7f95ff..d3a575eab 100755 --- a/cwltool/cwltest.py +++ b/cwltool/cwltest.py @@ -30,13 +30,18 @@ def compare(a, b): # type: (Any, Any) -> bool try: if isinstance(a, dict): if a.get("class") == "File": - if a["path"] == "Any" or b["path"] == "Any": + if "path" in a: + comp = "path" + else: + comp = "location" + if a[comp] == "Any" or b[comp] == "Any": return True - if not (b["path"].endswith("/" + a["path"]) or ("/" not in b["path"] and a["path"] == b["path"])): - raise CompareFail(u"%s does not end with %s" %(b["path"], a["path"])) + if a[comp] and (not (b[comp].endswith("/" + a[comp]) + or ("/" not in b[comp] and a[comp] == b[comp]))): + raise CompareFail(u"%s does not end with %s" %(b[comp], a[comp])) # ignore empty collections b = {k: v for k, v in b.iteritems() - if not isinstance(v, (list, dict)) or len(v) > 0} + if not isinstance(v, (list, dict)) or len(v) > 0} elif a.get("class") == "Directory": if len(a["listing"]) != len(b["listing"]): return False From 7a8b38205c91808306600f9bbba0b12429119545 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 4 Jul 2016 01:52:47 -0400 Subject: [PATCH 04/11] Putting Dirent back in for InitialWorkDir --- cwltool/builder.py | 6 ++++-- cwltool/cwltest.py | 4 ++-- cwltool/draft2tool.py | 26 +++++++++++++++++++++++++- cwltool/pathmapper.py | 16 ++++++++++++++++ cwltool/process.py | 24 +++--------------------- cwltool/update.py | 4 ++-- 6 files changed, 52 insertions(+), 28 deletions(-) diff --git a/cwltool/builder.py b/cwltool/builder.py index 8f6e8fca9..6278cd3bf 100644 --- a/cwltool/builder.py +++ b/cwltool/builder.py @@ -6,7 +6,7 @@ from typing import Any, Union, AnyStr, Callable from .errors import WorkflowException from .stdfsaccess import StdFsAccess -from .pathmapper import PathMapper, adjustFileObjs, adjustDirObjs +from .pathmapper import PathMapper, adjustFileObjs, adjustDirObjs, normalizeFilesDirs CONTENT_LIMIT = 64 * 1024 @@ -112,7 +112,8 @@ def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]): if isinstance(sf, dict) or "$(" in sf or "${" in sf: secondary_eval = self.do_eval(sf, context=datum) if isinstance(secondary_eval, basestring): - sfpath = {"location": secondary_eval, "class": "File"} + sfpath = {"location": secondary_eval, + "class": "File"} else: sfpath = secondary_eval else: @@ -121,6 +122,7 @@ def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]): datum["secondaryFiles"].extend(sfpath) else: datum["secondaryFiles"].append(sfpath) + normalizeFilesDirs(datum["secondaryFiles"]) def _capture_files(f): self.files.append(f) diff --git a/cwltool/cwltest.py b/cwltool/cwltest.py index d3a575eab..18d850773 100755 --- a/cwltool/cwltest.py +++ b/cwltool/cwltest.py @@ -58,9 +58,9 @@ def compare(a, b): # type: (Any, Any) -> bool raise CompareFail(u"%s not in %s" % (json.dumps(i, indent=4, sort_keys=True), json.dumps(b, indent=4, sort_keys=True))) a = {k: v for k, v in a.iteritems() - if k not in ("path", "location", "listing")} + if k not in ("path", "location", "listing", "basename")} b = {k: v for k, v in b.iteritems() - if k not in ("path", "location", "listing")} + if k not in ("path", "location", "listing", "basename")} if len(a) != len(b): raise CompareFail(u"expected %s\ngot %s" % (json.dumps(a, indent=4, sort_keys=True), json.dumps(b, indent=4, sort_keys=True))) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index b8f9dc2f6..f670a38cf 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -283,7 +283,31 @@ def rm_pending_output_callback(output_callback, jobcachepending, initialWorkdir = self.get_requirement("InitialWorkDirRequirement")[0] j.generatefiles = {"class": "Directory", "listing": [], "basename": ""} if initialWorkdir: - j.generatefiles["listing"] = builder.do_eval(initialWorkdir["listing"], recursive=True) + ls = [] + if isinstance(initialWorkdir["listing"], (str, unicode)): + ls = builder.do_eval(initialWorkdir["listing"]) + else: + for t in initialWorkdir["listing"]: + if "entry" in t: + ls.append({ + "entryname": builder.do_eval(t["entryname"]), + "entry": builder.do_eval(t["entry"]) + }) + else: + ls.append(t) + for i,t in enumerate(ls): + if "entry" in t: + if isinstance(t["entry"], (str, unicode)): + ls[i] = { + "class": "File", + "basename": t["entryname"], + "contents": t["entry"] + } + else: + t["entry"]["basename"] = t["entryname"] + ls[i] = t["entry"] + j.generatefiles["listing"] = ls + normalizeFilesDirs(j.generatefiles) j.environment = {} diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index 6be7f9487..51f9304d8 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -3,6 +3,7 @@ import stat import collections import uuid +import urlparse from typing import Tuple, Set, Union, Any _logger = logging.getLogger("cwltool") @@ -45,6 +46,21 @@ def adjustDirObjs(rec, op): # type: (Any, Callable[[Any], Any]) -> None for d in rec: adjustDirObjs(d, op) +def normalizeFilesDirs(job): + def addLocation(d): + if "location" not in d: + if d["class"] == "File" and ("contents" not in d or "basename" not in d): + raise validate.ValidationException("Anonymous file object must have 'contents' and 'basename' fields.") + if d["class"] == "Directory" and ("listing" not in d or "basename" not in d): + raise validate.ValidationException("Anonymous directory object must have 'listing' and 'basename' fields.") + d["location"] = "_:" + unicode(uuid.uuid4()) + elif "basename" not in d: + parse = urlparse.urlparse(d["location"]) + d["basename"] = os.path.basename(parse.path) + + adjustFileObjs(job, addLocation) + adjustDirObjs(job, addLocation) + def abspath(src, basedir): # type: (unicode, unicode) -> unicode if src.startswith(u"file://"): diff --git a/cwltool/process.py b/cwltool/process.py index a6951357b..eebac88c6 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -30,7 +30,7 @@ from .stdfsaccess import StdFsAccess from .builder import Builder, adjustFileObjs, adjustDirObjs from .errors import WorkflowException, UnsupportedRequirement -from .pathmapper import PathMapper, abspath +from .pathmapper import PathMapper, abspath, normalizeFilesDirs _logger = logging.getLogger("cwltool") @@ -81,7 +81,6 @@ SCHEMA_FILE = None # type: Dict[unicode, Any] SCHEMA_DIR = None # type: Dict[unicode, Any] SCHEMA_ANY = None # type: Dict[unicode, Any] -SCHEMA_EXPR = None # type: Dict[unicode, Any] def get_schema(version): # type: (str) -> Tuple[Loader, Union[avro.schema.Names, avro.schema.SchemaParseException], Dict[unicode,Any], Loader] @@ -300,21 +299,6 @@ def avroize_type(field_type, name_prefix=""): avroize_type(field_type["items"], name_prefix) return field_type -def normalizeFilesDirs(job): - def addLocation(d): - if "location" not in d: - if d["class"] == "File" and ("contents" not in d or "basename" not in d): - raise validate.ValidationException("Anonymous file object must have 'contents' and 'basename' fields.") - if d["class"] == "Directory" and ("listing" not in d or "basename" not in d): - raise validate.ValidationException("Anonymous directory object must have 'listing' and 'basename' fields.") - d["location"] = "_:" + unicode(uuid.uuid4()) - elif "basename" not in d: - parse = urlparse.urlparse(d["location"]) - d["basename"] = os.path.basename(parse.path) - - adjustFileObjs(job, addLocation) - adjustDirObjs(job, addLocation) - class Process(object): __metaclass__ = abc.ABCMeta @@ -323,7 +307,7 @@ def __init__(self, toolpath_object, **kwargs): self.metadata = kwargs.get("metadata", {}) # type: Dict[str,Any] self.names = None # type: avro.schema.Names - global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY, SCHEMA_EXPR # pylint: disable=global-statement + global SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY # pylint: disable=global-statement if SCHEMA_FILE is None: get_schema("draft-4") SCHEMA_ANY = cast(Dict[unicode, Any], @@ -332,10 +316,8 @@ def __init__(self, toolpath_object, **kwargs): SCHEMA_CACHE["draft-4"][3].idx["https://w3id.org/cwl/cwl#File"]) SCHEMA_DIR = cast(Dict[unicode, Any], SCHEMA_CACHE["draft-4"][3].idx["https://w3id.org/cwl/cwl#Directory"]) - SCHEMA_EXPR = cast(Dict[unicode, Any], - SCHEMA_CACHE["draft-4"][3].idx["https://w3id.org/cwl/cwl#Expression"]) - names = schema_salad.schema.make_avro_schema([SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY, SCHEMA_EXPR], + names = schema_salad.schema.make_avro_schema([SCHEMA_FILE, SCHEMA_DIR, SCHEMA_ANY], schema_salad.ref_resolver.Loader({}))[0] if isinstance(names, avro.schema.SchemaParseException): raise names diff --git a/cwltool/update.py b/cwltool/update.py index 4ce8ee731..dfe76b14e 100644 --- a/cwltool/update.py +++ b/cwltool/update.py @@ -393,8 +393,8 @@ def _draft4Dev2toDev3(doc, loader, baseuri): doc["listing"] = [] for f in doc["fileDef"]: doc["listing"].append({ - "basename": f["filename"], - "contents": f["fileContent"] + "entryname": f["filename"], + "entry": f["fileContent"] }) del doc["fileDef"] for key, value in doc.items(): From 79eda7829e6706107940d295ca19a9980bc9ff84 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 4 Jul 2016 01:53:07 -0400 Subject: [PATCH 05/11] Squashed 'cwltool/schemas/' changes from b40896c..d5f2322 d5f2322 Restore Dirent for InitialWorkDir only. git-subtree-dir: cwltool/schemas git-subtree-split: d5f23220259fc40b943bbe0ae4465375dd1e2089 --- draft-4/CommandLineTool.yml | 56 +++++++++++++++++++++++++++---------- draft-4/Process.yml | 18 +++--------- 2 files changed, 46 insertions(+), 28 deletions(-) diff --git a/draft-4/CommandLineTool.yml b/draft-4/CommandLineTool.yml index afa1f37e8..0eb06f063 100644 --- a/draft-4/CommandLineTool.yml +++ b/draft-4/CommandLineTool.yml @@ -630,23 +630,48 @@ $graph: IRI such as an [RRID](http://www.identifiers.org/rrid/SCR_001156). Example: `http://identifiers.org/rrid/RRID:SCR_001156` -- name: FileExt +- name: Dirent type: record - extends: File + doc: | + Define a file or subdirectory that must be placed in the designated output + directory prior to executing the command line tool. May be the result of + executing an expression, such as building a configuration file from a + template. fields: - writable: - type: boolean? - doc: If true, the File must be writable. - jsonldPredicate: "cwl:writable" + - name: entryname + type: [string, Expression] + jsonldPredicate: + _id: cwl:entryname + doc: | + The name of the file or subdirectory to create in the output directory. + - name: entry + type: [string, Expression] + jsonldPredicate: + _id: cwl:entry + doc: | + If the value is a string literal or an expression which evaluates to a + string, a new file must be created with the string as the file contents. -- name: DirectoryExt - type: record - extends: Directory - fields: - writable: + If the value is an expression that evaluates to a `File` object, this + indicates the referenced file should be added to the designated output + directory prior to executing the tool. + + If the value is an expression that evaluates to a `Dirent` object, this + indicates that the File or Directory in `entry` should be added to the + designated output directory with the name in `entryname`. + + If `writable` is false, the file may be made available using a bind + mount or file system link to avoid unnecessary copying of the input + file. + - name: writable type: boolean? - doc: If true, the Directory must be writable. - jsonldPredicate: "cwl:writable" + doc: | + If true, the file or directory must be writable by the tool. Changes + to the file or directory must be isolated and not visible by any other + CommandLineTool process. This may be implemented by making a copy of + the original file or directory. Default false (files and directories + read-only by default). + - name: InitialWorkDirRequirement type: record @@ -659,11 +684,13 @@ $graph: - name: listing type: - type: array - items: [FileExt, DirectoryExt, string, Expression] + items: [File, Directory, Dirent, string, Expression] - string - Expression jsonldPredicate: _id: "cwl:listing" + mapSubject: entryname + mapPredicate: entry doc: | The list of files or subdirectories that must be placed in the designated output directory prior to executing the command line tool. @@ -671,6 +698,7 @@ $graph: May be an expression. If so, the expression return value must validate as `{type: array, items: [File, Directory]}`. + - name: EnvVarRequirement type: record extends: "#ProcessRequirement" diff --git a/draft-4/Process.yml b/draft-4/Process.yml index a0c877b19..6c74cacd5 100644 --- a/draft-4/Process.yml +++ b/draft-4/Process.yml @@ -65,7 +65,7 @@ $graph: _type: "@vocab" doc: Must be `File` to indicate this object describes a file. - name: location - type: ["null", string, Expression] + type: string? doc: | A URI that identifies the file resource. This may be a relative reference, in which case it must be resolved using the base URI of the @@ -82,8 +82,6 @@ $graph: If the `path` field is provided but the `location` field is not, an implementation may assign the value of the `path` field to `location`, then follow the rules above. - - Must be evaluated as an expression only when appearing in InitialWorkDirRequirement. jsonldPredicate: _id: "@id" _type: "@id" @@ -104,7 +102,7 @@ $graph: "_id": "cwl:path" "_type": "@id" - name: basename - type: [string, Expression] + type: string doc: | The base name of the file, that is, the name of the file without any leading directory path. The base name must not contain a slash `/`. @@ -117,8 +115,6 @@ $graph: When this file is made available to a CommandLineTool, it must be named with `basename`, i.e. the final component of the `path` field must match `basename`. - - Must be evaluated as an expression only when appearing in InitialWorkDirRequirement. jsonldPredicate: "cwl:basename" - name: dirname type: string? @@ -194,7 +190,7 @@ $graph: root of the document. If no ontologies are specified in `$schema`, the runtime may perform exact file format matches. - name: contents - type: ["null", string, Expression] + type: string? doc: | File contents literal. Maximum of 64 KiB. @@ -258,7 +254,7 @@ $graph: _id: "cwl:path" _type: "@id" - name: basename - type: [string, Expression] + type: string doc: | The base name of the directory, that is, the name of the file without any leading directory path. The base name must not contain a slash `/`. @@ -271,16 +267,12 @@ $graph: When this file is made available to a CommandLineTool, it must be named with `basename`, i.e. the final component of the `path` field must match `basename`. - - Must be evaluated as an expression only when appearing in InitialWorkDirRequirement. jsonldPredicate: "cwl:basename" - name: listing type: - "null" - type: array items: [File, Directory] - - string - - Expression doc: | List of files or subdirectories contained in this directory. The name of each file or subdirectory is determined by the `basename` field of @@ -289,8 +281,6 @@ $graph: `Directory` object share the same `basename`, this must be treated as equivalent to a single subdirectory with the listings recursively merged. - - Must be evaluated as an expression only when appearing in InitialWorkDirRequirement. jsonldPredicate: _id: "cwl:listing" From 83d57ec92261c837a202ad53023d9a178c6b772f Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 4 Jul 2016 01:57:59 -0400 Subject: [PATCH 06/11] Squashed 'cwltool/schemas/' changes from d5f2322..95b7454 95b7454 Using Dirent. git-subtree-dir: cwltool/schemas git-subtree-split: 95b74548745f47720194d1e46bb2c1d15b077a4d --- draft-4/CommandLineTool.yml | 3 ++- draft-4/draft-4/rename.cwl | 4 +--- draft-4/draft-4/search.cwl | 2 +- draft-4/draft-4/stagefile.cwl | 3 +-- draft-4/draft-4/template-tool.cwl | 4 +--- 5 files changed, 6 insertions(+), 10 deletions(-) diff --git a/draft-4/CommandLineTool.yml b/draft-4/CommandLineTool.yml index 0eb06f063..702ccae8d 100644 --- a/draft-4/CommandLineTool.yml +++ b/draft-4/CommandLineTool.yml @@ -639,11 +639,12 @@ $graph: template. fields: - name: entryname - type: [string, Expression] + type: ["null", string, Expression] jsonldPredicate: _id: cwl:entryname doc: | The name of the file or subdirectory to create in the output directory. + If `entry` is a File or Directory, this overrides `basename`. Optional. - name: entry type: [string, Expression] jsonldPredicate: diff --git a/draft-4/draft-4/rename.cwl b/draft-4/draft-4/rename.cwl index f11b7bc82..ca3ae58f4 100644 --- a/draft-4/draft-4/rename.cwl +++ b/draft-4/draft-4/rename.cwl @@ -4,9 +4,7 @@ baseCommand: "true" requirements: InitialWorkDirRequirement: listing: - - class: File - basename: $(inputs.newname) - location: $(inputs.srcfile.location) + $(inputs.newname): $(inputs.srcfile.location) inputs: srcfile: File newname: string diff --git a/draft-4/draft-4/search.cwl b/draft-4/draft-4/search.cwl index d6ed4341d..dc32c0955 100644 --- a/draft-4/draft-4/search.cwl +++ b/draft-4/draft-4/search.cwl @@ -9,7 +9,7 @@ $graph: requirements: - class: InitialWorkDirRequirement listing: - - $(inputs.file) + input.txt: $(inputs.file) - class: InlineJavascriptRequirement inputs: diff --git a/draft-4/draft-4/stagefile.cwl b/draft-4/draft-4/stagefile.cwl index eedf736fa..bf73c73db 100644 --- a/draft-4/draft-4/stagefile.cwl +++ b/draft-4/draft-4/stagefile.cwl @@ -3,8 +3,7 @@ cwlVersion: draft-4.dev3 requirements: InitialWorkDirRequirement: listing: - - class: File - location: $(inputs.infile.location) + - entry: $(inputs.infile) writable: true inputs: infile: File diff --git a/draft-4/draft-4/template-tool.cwl b/draft-4/draft-4/template-tool.cwl index c7d473d32..b3ec7b85d 100755 --- a/draft-4/draft-4/template-tool.cwl +++ b/draft-4/draft-4/template-tool.cwl @@ -10,9 +10,7 @@ requirements: - "var t = function(s) { return _.template(s)({'inputs': inputs}); };" - class: InitialWorkDirRequirement listing: - - class: File - basename: foo.txt - contents: $(t("The file is <%= inputs.file1.path.split('/').slice(-1)[0] %>\n")) + foo.txt: $(t("The file is <%= inputs.file1.path.split('/').slice(-1)[0] %>\n")) inputs: - id: file1 type: File From 6c567c51c1349d4a078ff3b566dec16860fceb5c Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 4 Jul 2016 02:03:02 -0400 Subject: [PATCH 07/11] Allow entryname to be optional. --- cwltool/draft2tool.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index f670a38cf..9d1aeae7b 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -289,10 +289,12 @@ def rm_pending_output_callback(output_callback, jobcachepending, else: for t in initialWorkdir["listing"]: if "entry" in t: - ls.append({ - "entryname": builder.do_eval(t["entryname"]), - "entry": builder.do_eval(t["entry"]) - }) + et = {"entry": builder.do_eval(t["entry"])} + if "entryname" in t: + et["entryname"] = builder.do_eval(t["entryname"]) + else: + et["entryname"] = None + ls.append(et) else: ls.append(t) for i,t in enumerate(ls): @@ -304,7 +306,9 @@ def rm_pending_output_callback(output_callback, jobcachepending, "contents": t["entry"] } else: - t["entry"]["basename"] = t["entryname"] + if t["entryname"]: + t = copy.deepcopy(t) + t["entry"]["basename"] = t["entryname"] ls[i] = t["entry"] j.generatefiles["listing"] = ls From abcbd3f012e16973f1c176a3734f204b091bb7e5 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 4 Jul 2016 09:57:46 -0400 Subject: [PATCH 08/11] Squashed 'cwltool/schemas/' changes from 95b7454..c1cd55f c1cd55f Remove conflicting mapSubject/mapPredicate git-subtree-dir: cwltool/schemas git-subtree-split: c1cd55f9a311d8206d2491cc472bbba62aaba218 --- draft-4/CommandLineTool.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/draft-4/CommandLineTool.yml b/draft-4/CommandLineTool.yml index 702ccae8d..9d4fa1bec 100644 --- a/draft-4/CommandLineTool.yml +++ b/draft-4/CommandLineTool.yml @@ -690,8 +690,6 @@ $graph: - Expression jsonldPredicate: _id: "cwl:listing" - mapSubject: entryname - mapPredicate: entry doc: | The list of files or subdirectories that must be placed in the designated output directory prior to executing the command line tool. From 66e6c361e394b0945859ee7bae90a6a22b6cfc3e Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 4 Jul 2016 10:31:12 -0400 Subject: [PATCH 09/11] Fix support for file literals. --- cwltool/draft2tool.py | 2 +- cwltool/job.py | 5 +++++ cwltool/pathmapper.py | 28 ++++++++++++++++------------ tests/echo.cwl | 2 +- tests/test_toolargparse.py | 4 ++-- 5 files changed, 25 insertions(+), 16 deletions(-) diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 9d1aeae7b..43c479eae 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -274,7 +274,7 @@ def rm_pending_output_callback(output_callback, jobcachepending, j.outdir = kwargs.get("outdir") or tempfile.mkdtemp(prefix=out_prefix) tmpdir_prefix = kwargs.get('tmpdir_prefix') j.tmpdir = kwargs.get("tmpdir") or tempfile.mkdtemp(prefix=tmpdir_prefix) - j.stagedir = None + j.stagedir = tempfile.mkdtemp(prefix=tmpdir_prefix) else: j.outdir = builder.outdir j.tmpdir = builder.tmpdir diff --git a/cwltool/job.py b/cwltool/job.py index 539c2f37e..22b6aa217 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -97,6 +97,11 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, vol = self.pathmapper.mapper(src) if vol.type == "File": runtime.append(u"--volume=%s:%s:ro" % (vol.resolved, vol.target)) + if vol.type == "CreateFile": + createtmp = os.path.join(self.stagedir, os.path.basename(vol.target)) + with open(createtmp, "w") as f: + f.write(vol.resolved.encode("utf-8")) + runtime.append(u"--volume=%s:%s:ro" % (createtmp, vol.target)) runtime.append(u"--volume=%s:%s:rw" % (os.path.abspath(self.outdir), "/var/spool/cwl")) runtime.append(u"--volume=%s:%s:rw" % (os.path.abspath(self.tmpdir), "/tmp")) runtime.append(u"--workdir=%s" % ("/var/spool/cwl")) diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index 51f9304d8..cd8772d27 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -5,6 +5,7 @@ import uuid import urlparse from typing import Tuple, Set, Union, Any +import schema_salad.validate as validate _logger = logging.getLogger("cwltool") @@ -49,12 +50,15 @@ def adjustDirObjs(rec, op): # type: (Any, Callable[[Any], Any]) -> None def normalizeFilesDirs(job): def addLocation(d): if "location" not in d: - if d["class"] == "File" and ("contents" not in d or "basename" not in d): + if d["class"] == "File" and ("contents" not in d): raise validate.ValidationException("Anonymous file object must have 'contents' and 'basename' fields.") if d["class"] == "Directory" and ("listing" not in d or "basename" not in d): raise validate.ValidationException("Anonymous directory object must have 'listing' and 'basename' fields.") d["location"] = "_:" + unicode(uuid.uuid4()) - elif "basename" not in d: + if "basename" not in d: + d["basename"] = unicode(uuid.uuid4()) + + if "basename" not in d: parse = urlparse.urlparse(d["location"]) d["basename"] = os.path.basename(parse.path) @@ -85,13 +89,10 @@ def __init__(self, referenced_files, basedir, stagedir, separateDirs=True): def visitlisting(self, listing, stagedir, basedir): for ld in listing: tgt = os.path.join(stagedir, ld["basename"]) - if "contents" in ld and ld["location"].startswith("_:"): - self._pathmap[ld["location"]] = MapperEnt(ld["contents"], tgt, "CreateFile") + if ld["class"] == "Directory": + self.visit(ld, tgt, basedir, copy=ld.get("writable", False)) else: - if ld["class"] == "Directory": - self.visit(ld, tgt, basedir, copy=ld.get("writable", False)) - else: - self.visit(ld, stagedir, basedir, copy=ld.get("writable", False)) + self.visit(ld, stagedir, basedir, copy=ld.get("writable", False)) def visit(self, obj, stagedir, basedir, copy=False): if obj["class"] == "Directory": @@ -103,11 +104,14 @@ def visit(self, obj, stagedir, basedir, copy=False): return ab = abspath(path, basedir) tgt = os.path.join(stagedir, obj["basename"]) - if copy: - self._pathmap[path] = MapperEnt(ab, tgt, "WritableFile") + if "contents" in obj and obj["location"].startswith("_:"): + self._pathmap[obj["location"]] = MapperEnt(obj["contents"], tgt, "CreateFile") else: - self._pathmap[path] = MapperEnt(ab, tgt, "File") - self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir) + if copy: + self._pathmap[path] = MapperEnt(ab, tgt, "WritableFile") + else: + self._pathmap[path] = MapperEnt(ab, tgt, "File") + self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir) def setup(self, referenced_files, basedir): # type: (Set[Any], unicode) -> None diff --git a/tests/echo.cwl b/tests/echo.cwl index da6943328..3333e57a8 100644 --- a/tests/echo.cwl +++ b/tests/echo.cwl @@ -1,4 +1,4 @@ -cwlVersion: cwl:draft-4.dev2 +cwlVersion: cwl:draft-4.dev3 class: CommandLineTool inputs: - id: inp diff --git a/tests/test_toolargparse.py b/tests/test_toolargparse.py index dfa108318..f21343b09 100644 --- a/tests/test_toolargparse.py +++ b/tests/test_toolargparse.py @@ -8,7 +8,7 @@ class ToolArgparse(unittest.TestCase): script=''' #!/usr/bin/env cwl-runner -cwlVersion: "draft-4.dev2" +cwlVersion: "draft-4.dev3" class: CommandLineTool description: "This tool is developed for SMC-RNA Challenge for detecting gene fusions (STAR fusion)" inputs: @@ -28,7 +28,7 @@ class ToolArgparse(unittest.TestCase): script2=''' #!/usr/bin/env cwl-runner -cwlVersion: 'cwl:draft-4.dev2' +cwlVersion: 'cwl:draft-4.dev3' class: CommandLineTool inputs: - id: bdg From ffd93d07e60216c1643044bad3633ffc30f3975d Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 4 Jul 2016 10:52:25 -0400 Subject: [PATCH 10/11] Add --on-error --- cwltool/main.py | 7 ++++++- cwltool/workflow.py | 5 +++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/cwltool/main.py b/cwltool/main.py index 88f704bbc..1ac8f6db5 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -154,6 +154,10 @@ def arg_parser(): # type: () -> argparse.ArgumentParser help="Will be passed to `docker run` as the '--net' " "parameter. Implies '--enable-net'.") + parser.add_argument("--on-error", type=str, + help="Desired workflow behavior when a step fails. One of 'stop' or 'continue'. " + "Default is 'stop.", default="stop") + parser.add_argument("workflow", type=str, nargs="?", default=None) parser.add_argument("job_order", nargs=argparse.REMAINDER) @@ -591,7 +595,8 @@ def main(argsl=None, 'tool_help': False, 'workflow': None, 'job_order': None, - 'pack': False}.iteritems(): + 'pack': False, + 'on_error': 'continue'}.iteritems(): if not hasattr(args, k): setattr(args, k, v) diff --git a/cwltool/workflow.py b/cwltool/workflow.py index a32e8dc07..95c23ad38 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -326,11 +326,16 @@ def job(self, joborder, output_callback, **kwargs): made_progress = False for step in self.steps: + if kwargs["on_error"] == "stop" and self.processStatus != "success": + break + if not step.submitted: step.iterable = self.try_make_job(step, **kwargs) if step.iterable: for newjob in step.iterable: + if kwargs["on_error"] == "stop" and self.processStatus != "success": + break if newjob: made_progress = True yield newjob From 81c4d7f5586230dfebfa003a3c35be20700f749b Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Tue, 5 Jul 2016 10:05:49 -0400 Subject: [PATCH 11/11] update types to be consistent --- cwltool/builder.py | 5 +++-- cwltool/draft2tool.py | 11 ++++++----- cwltool/job.py | 13 ++++++++----- cwltool/load_tool.py | 13 ++++++------- cwltool/main.py | 9 +++++---- cwltool/pathmapper.py | 31 ++++++++++++++++++------------- cwltool/process.py | 21 ++++++++++++++------- cwltool/stdfsaccess.py | 6 +++--- cwltool/update.py | 2 +- cwltool/workflow.py | 3 ++- 10 files changed, 66 insertions(+), 48 deletions(-) diff --git a/cwltool/builder.py b/cwltool/builder.py index 6278cd3bf..d66b98888 100644 --- a/cwltool/builder.py +++ b/cwltool/builder.py @@ -31,7 +31,8 @@ def __init__(self): # type: () -> None self.resources = None # type: Dict[str, Union[int, str]] self.bindings = [] # type: List[Dict[str, Any]] self.timeout = None # type: int - self.pathmapper = None # type: PathMapper + self.pathmapper = None # type: PathMapper + self.stagedir = None # type: unicode def bind_input(self, schema, datum, lead_pos=[], tail_pos=[]): # type: (Dict[unicode, Any], Any, List[int], List[int]) -> List[Dict[str, Any]] @@ -190,7 +191,7 @@ def generate_arg(self, binding): # type: (Dict[str,Any]) -> List[str] return [a for a in args if a is not None] def do_eval(self, ex, context=None, pull_image=True, recursive=False): - # type: (Dict[str,str], Any, bool) -> Any + # type: (Union[Dict[str, str], unicode], Any, bool, bool) -> Any if recursive: if isinstance(ex, dict): return {k: self.do_eval(v, context, pull_image, recursive) for k,v in ex.iteritems()} diff --git a/cwltool/draft2tool.py b/cwltool/draft2tool.py index 43c479eae..e5a8658ec 100644 --- a/cwltool/draft2tool.py +++ b/cwltool/draft2tool.py @@ -113,7 +113,8 @@ def run(self, **kwargs): # map files to assigned path inside a container. We need to also explicitly # walk over input as implicit reassignment doesn't reach everything in builder.bindings -def check_adjust(builder, f): # type: (Dict[str,Any]) -> Dict[str,Any] +def check_adjust(builder, f): + # type: (Builder, Dict[str, Any]) -> Dict[str,Any] f["path"] = builder.pathmapper.mapper(f["location"])[1] f["dirname"], f["basename"] = os.path.split(f["path"]) if f["class"] == "File": @@ -131,7 +132,7 @@ def makeJobRunner(self): # type: () -> CommandLineJob return CommandLineJob() def makePathMapper(self, reffiles, stagedir, **kwargs): - # type: (Set[Any], unicode, **Any) -> PathMapper + # type: (List[Any], unicode, **Any) -> PathMapper dockerReq, _ = self.get_requirement("DockerRequirement") try: return PathMapper(reffiles, kwargs["basedir"], stagedir) @@ -268,7 +269,7 @@ def rm_pending_output_callback(output_callback, jobcachepending, _logger.debug(u"[job %s] command line bindings is %s", j.name, json.dumps(builder.bindings, indent=4)) - dockerReq, _ = self.get_requirement("DockerRequirement") + dockerReq = self.get_requirement("DockerRequirement")[0] if dockerReq and kwargs.get("use_container"): out_prefix = kwargs.get("tmp_outdir_prefix") j.outdir = kwargs.get("outdir") or tempfile.mkdtemp(prefix=out_prefix) @@ -283,7 +284,7 @@ def rm_pending_output_callback(output_callback, jobcachepending, initialWorkdir = self.get_requirement("InitialWorkDirRequirement")[0] j.generatefiles = {"class": "Directory", "listing": [], "basename": ""} if initialWorkdir: - ls = [] + ls = [] # type: List[Dict[str, Any]] if isinstance(initialWorkdir["listing"], (str, unicode)): ls = builder.do_eval(initialWorkdir["listing"]) else: @@ -310,7 +311,7 @@ def rm_pending_output_callback(output_callback, jobcachepending, t = copy.deepcopy(t) t["entry"]["basename"] = t["entryname"] ls[i] = t["entry"] - j.generatefiles["listing"] = ls + j.generatefiles[u"listing"] = ls normalizeFilesDirs(j.generatefiles) diff --git a/cwltool/job.py b/cwltool/job.py index 22b6aa217..1a0b51548 100644 --- a/cwltool/job.py +++ b/cwltool/job.py @@ -58,11 +58,12 @@ def __init__(self): # type: () -> None self.outdir = None # type: str self.tmpdir = None # type: str self.environment = None # type: Dict[str,str] - self.generatefiles = None # type: Dict[str,Union[Dict[str,str],str]] + self.generatefiles = None # type: Dict[unicode, Union[List[Dict[str, str]], Dict[str,str], str]] + self.stagedir = None # type: unicode def run(self, dry_run=False, pull_image=True, rm_container=True, rm_tmpdir=True, move_outputs="move", **kwargs): - # type: (bool, bool, bool, bool, bool, **Any) -> Union[Tuple[str,Dict[None,None]],None] + # type: (bool, bool, bool, bool, bool, unicode, **Any) -> Union[Tuple[str,Dict[None,None]],None] if not os.path.exists(self.outdir): os.makedirs(self.outdir) @@ -78,10 +79,12 @@ def run(self, dry_run=False, pull_image=True, rm_container=True, (docker_req, docker_is_req) = get_feature(self, "DockerRequirement") - for f in self.pathmapper.files(): - p = self.pathmapper.mapper(f) + for knownfile in self.pathmapper.files(): + p = self.pathmapper.mapper(knownfile) if p.type == "File" and not os.path.isfile(p[0]): - raise WorkflowException(u"Input file %s (at %s) not found or is not a regular file." % (f, self.pathmapper.mapper(f)[0])) + raise WorkflowException( + u"Input file %s (at %s) not found or is not a regular file." + % (knownfile, self.pathmapper.mapper(knownfile)[0])) img_id = None if docker_req and kwargs.get("use_container") is not False: diff --git a/cwltool/load_tool.py b/cwltool/load_tool.py index f0f757dec..6a100b7ad 100644 --- a/cwltool/load_tool.py +++ b/cwltool/load_tool.py @@ -8,6 +8,7 @@ import urlparse from schema_salad.ref_resolver import Loader import schema_salad.validate as validate +from schema_salad.validate import ValidationException import schema_salad.schema as schema from avro.schema import Names from . import update @@ -37,13 +38,12 @@ def fetch_document(argsworkflow): workflowobj = argsworkflow uri = "#" + str(id(argsworkflow)) else: - raise validate.ValidationException( - "Must be URI or object: '%s'" % argsworkflow) + raise ValidationException("Must be URI or object: '%s'" % argsworkflow) return document_loader, workflowobj, uri def _convert_stdstreams_to_files(workflowobj): - # type: (Union[Dict[unicode, Any], List[Dict[unicode, Any]]) -> None + # type: (Union[Dict[unicode, Any], List[Dict[unicode, Any]]]) -> None if isinstance(workflowobj, dict): if ('class' in workflowobj @@ -53,7 +53,7 @@ def _convert_stdstreams_to_files(workflowobj): for streamtype in ['stdout', 'stderr']: if out['type'] == streamtype: if 'outputBinding' in out: - raise validate.ValidateException( + raise ValidationException( "Not allowed to specify outputBinding when" " using %s shortcut." % streamtype) if streamtype in workflowobj: @@ -109,12 +109,11 @@ def validate_document(document_loader, workflowobj, uri, workflowobj["id"] = fileuri processobj, metadata = document_loader.resolve_all(workflowobj, fileuri) if not isinstance(processobj, (dict, list)): - raise validate.ValidationException("Workflow must be a dict or list.") + raise ValidationException("Workflow must be a dict or list.") if not metadata: if not isinstance(processobj, dict): - raise validate.ValidationException( - "Draft-2 workflows must be a dict.") + raise ValidationException("Draft-2 workflows must be a dict.") metadata = {"$namespaces": processobj.get("$namespaces", {}), "$schemas": processobj.get("$schemas", []), "cwlVersion": processobj["cwlVersion"]} diff --git a/cwltool/main.py b/cwltool/main.py index 1ac8f6db5..4cb71f3eb 100755 --- a/cwltool/main.py +++ b/cwltool/main.py @@ -416,7 +416,7 @@ def load_job_order(args, t, stdin, print_input_deps=False, relative_deps=False, return 1 if print_input_deps: - printdeps(job_order_object, loader, stdout, relative_deps, + printdeps(job_order_object, loader, stdout, relative_deps, "", basedir=u"file://%s/" % input_basedir) return 0 @@ -428,7 +428,8 @@ def pathToLoc(p): adjustDirObjs(job_order_object, pathToLoc) adjustFileObjs(job_order_object, pathToLoc) normalizeFilesDirs(job_order_object) - adjustDirObjs(job_order_object, functools.partial(getListing, StdFsAccess(input_basedir))) + adjustDirObjs(job_order_object, cast(Callable[..., Any], + functools.partial(getListing, StdFsAccess(input_basedir)))) if "cwl:tool" in job_order_object: del job_order_object["cwl:tool"] @@ -439,9 +440,9 @@ def pathToLoc(p): def printdeps(obj, document_loader, stdout, relative_deps, uri, basedir=None): - # type: (Dict[unicode, Any], Loader, IO[Any], bool, str) -> None + # type: (Dict[unicode, Any], Loader, IO[Any], bool, unicode, str) -> None deps = {"class": "File", - "location": uri} + "location": uri} # type: Dict[unicode, Any] def loadref(b, u): return document_loader.fetch(urlparse.urljoin(b, u)) diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index cd8772d27..3d707fce4 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -4,14 +4,15 @@ import collections import uuid import urlparse -from typing import Tuple, Set, Union, Any +from functools import partial +from typing import Any, Callable, Set, Tuple, Union import schema_salad.validate as validate _logger = logging.getLogger("cwltool") -MapperEnt = collections.namedtuple("MapperEnt", ("resolved", "target", "type")) +MapperEnt = collections.namedtuple("MapperEnt", ["resolved", "target", "type"]) -def adjustFiles(rec, op): # type: (Any, Callable[..., Any]) -> None +def adjustFiles(rec, op): # type: (Any, Union[Callable[..., Any], partial[Any]]) -> None """Apply a mapping function to each File path in the object `rec`.""" if isinstance(rec, dict): @@ -23,7 +24,7 @@ def adjustFiles(rec, op): # type: (Any, Callable[..., Any]) -> None for d in rec: adjustFiles(d, op) -def adjustFileObjs(rec, op): # type: (Any, Callable[[Any], Any]) -> None +def adjustFileObjs(rec, op): # type: (Any, Union[Callable[..., Any], partial[Any]]) -> None """Apply an update function to each File object in the object `rec`.""" if isinstance(rec, dict): @@ -35,19 +36,21 @@ def adjustFileObjs(rec, op): # type: (Any, Callable[[Any], Any]) -> None for d in rec: adjustFileObjs(d, op) -def adjustDirObjs(rec, op): # type: (Any, Callable[[Any], Any]) -> None +def adjustDirObjs(rec, op): + # type: (Any, Union[Callable[..., Any], partial[Any]]) -> None """Apply an update function to each Directory object in the object `rec`.""" if isinstance(rec, dict): if rec.get("class") == "Directory": op(rec) - for d in rec: - adjustDirObjs(rec[d], op) + for key in rec: + adjustDirObjs(rec[key], op) if isinstance(rec, list): for d in rec: adjustDirObjs(d, op) def normalizeFilesDirs(job): + # type: (Union[List[Dict[unicode, Any]], Dict[unicode, Any]]) -> None def addLocation(d): if "location" not in d: if d["class"] == "File" and ("contents" not in d): @@ -80,13 +83,14 @@ class PathMapper(object): (absolute local path, absolute container path)""" def __init__(self, referenced_files, basedir, stagedir, separateDirs=True): - # type: (Set[Any], unicode, unicode) -> None - self._pathmap = {} # type: Dict[unicode, Tuple[unicode, unicode]] + # type: (List[Any], unicode, unicode, bool) -> None + self._pathmap = {} # type: Dict[unicode, MapperEnt] self.stagedir = stagedir self.separateDirs = separateDirs self.setup(referenced_files, basedir) def visitlisting(self, listing, stagedir, basedir): + # type: (List[Dict[unicode, Any]], unicode, unicode) -> None for ld in listing: tgt = os.path.join(stagedir, ld["basename"]) if ld["class"] == "Directory": @@ -95,6 +99,7 @@ def visitlisting(self, listing, stagedir, basedir): self.visit(ld, stagedir, basedir, copy=ld.get("writable", False)) def visit(self, obj, stagedir, basedir, copy=False): + # type: (Dict[unicode, Any], unicode, unicode, bool) -> None if obj["class"] == "Directory": self._pathmap[obj["location"]] = MapperEnt(obj["location"], stagedir, "Directory") self.visitlisting(obj.get("listing", []), stagedir, basedir) @@ -114,7 +119,7 @@ def visit(self, obj, stagedir, basedir, copy=False): self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir) def setup(self, referenced_files, basedir): - # type: (Set[Any], unicode) -> None + # type: (List[Any], unicode) -> None # Go through each file and set the target to its own directory along # with any secondary files. @@ -138,18 +143,18 @@ def setup(self, referenced_files, basedir): self._pathmap[path] = MapperEnt(deref, tgt, "File") - def mapper(self, src): # type: (unicode) -> Tuple[unicode, unicode] + def mapper(self, src): # type: (unicode) -> MapperEnt if u"#" in src: i = src.index(u"#") p = self._pathmap[src[:i]] - return (p.resolved, p.target + src[i:]) + return MapperEnt(p.resolved, p.target + src[i:], None) else: return self._pathmap[src] def files(self): # type: () -> List[unicode] return self._pathmap.keys() - def items(self): # type: () -> List[Tuple[unicode, Tuple[unicode, unicode]]] + def items(self): # type: () -> List[Tuple[unicode, MapperEnt]] return self._pathmap.items() def reversemap(self, target): # type: (unicode) -> Tuple[unicode, unicode] diff --git a/cwltool/process.py b/cwltool/process.py index eebac88c6..243d89b22 100644 --- a/cwltool/process.py +++ b/cwltool/process.py @@ -153,6 +153,7 @@ def adjustFilesWithSecondary(rec, op, primary=None): adjustFilesWithSecondary(d, op, primary) def getListing(fs_access, rec): + # type: (StdFsAccess, Dict[str, Any]) -> None if "listing" not in rec: listing = [] loc = rec["location"] @@ -167,6 +168,7 @@ def getListing(fs_access, rec): rec["listing"] = listing def stageFiles(pm, stageFunc): + # type: (PathMapper, Callable[..., Any]) -> None for f, p in pm.items(): if not os.path.exists(os.path.dirname(p.target)): os.makedirs(os.path.dirname(p.target), 0755) @@ -179,6 +181,7 @@ def stageFiles(pm, stageFunc): n.write(p.resolved.encode("utf-8")) def collectFilesAndDirs(obj, out): + # type: (Union[Dict[unicode, Any], List[Dict[unicode, Any]]], List[Dict[unicode, Any]]) -> None if isinstance(obj, dict): if obj.get("class") in ("File", "Directory"): out.append(obj) @@ -190,6 +193,7 @@ def collectFilesAndDirs(obj, out): collectFilesAndDirs(l, out) def relocateOutputs(outputObj, outdir, output_dirs, action): + # type: (Union[Dict[unicode, Any], List[Dict[unicode, Any]]], unicode, Set[unicode], unicode) -> Union[Dict[unicode, Any], List[Dict[unicode, Any]]] if action not in ("move", "copy"): return outputObj @@ -203,7 +207,7 @@ def moveIt(src, dst): _logger.debug("Copying %s to %s", src, dst) shutil.copy(src, dst) - outfiles = [] + outfiles = [] # type: List[Dict[unicode, Any]] collectFilesAndDirs(outputObj, outfiles) pm = PathMapper(outfiles, "", outdir, separateDirs=False) stageFiles(pm, moveIt) @@ -217,7 +221,7 @@ def _check_adjust(f): return outputObj -def cleanIntermediate(output_dirs): +def cleanIntermediate(output_dirs): # type: (Set[unicode]) -> None for a in output_dirs: if os.path.exists(a) and empty_subtree(a): _logger.debug(u"Removing intermediate output directory %s", a) @@ -283,6 +287,7 @@ def fillInDefaults(inputs, job): def avroize_type(field_type, name_prefix=""): + # type: (Union[List[Dict[unicode, Any]], Dict[unicode, Any]], unicode) -> Any """ adds missing information to a type so that CWL types are valid in schema_salad. """ @@ -562,6 +567,7 @@ def uniquename(stem): # type: (unicode) -> unicode return u def nestdir(base, deps): + # type: (unicode, Dict[unicode, Any]) -> Dict[unicode, Any] dirname = os.path.dirname(base) + "/" subid = deps["location"] if subid.startswith(dirname): @@ -578,8 +584,9 @@ def nestdir(base, deps): return deps def mergedirs(listing): - r = [] - ents = {} + # type: (List[Dict[unicode, Any]]) -> List[Dict[unicode, Any]] + r = [] # type: List[Dict[unicode, Any]] + ents = {} # type: Dict[unicode, Any] for e in listing: if e["basename"] not in ents: ents[e["basename"]] = e @@ -592,8 +599,8 @@ def mergedirs(listing): return r def scandeps(base, doc, reffields, urlfields, loadref): - # type: (unicode, Any, Set[str], Set[str], Callable[[unicode, str], Any]) -> List[Dict[str, str]] - r = [] + # type: (unicode, Any, Set[unicode], Set[unicode], Callable[[unicode, unicode], Any]) -> List[Dict[unicode, unicode]] + r = [] # type: List[Dict[unicode, unicode]] if isinstance(doc, dict): if "id" in doc: if doc["id"].startswith("file://"): @@ -616,7 +623,7 @@ def scandeps(base, doc, reffields, urlfields, loadref): deps = { "class": "File", "location": subid - } # type: Dict[str, Any] + } # type: Dict[unicode, Any] sf = scandeps(subid, sub, reffields, urlfields, loadref) if sf: deps["secondaryFiles"] = sf diff --git a/cwltool/stdfsaccess.py b/cwltool/stdfsaccess.py index cfdcb657b..53498013c 100644 --- a/cwltool/stdfsaccess.py +++ b/cwltool/stdfsaccess.py @@ -21,11 +21,11 @@ def open(self, fn, mode): # type: (unicode, str) -> BinaryIO def exists(self, fn): # type: (unicode) -> bool return os.path.exists(self._abs(fn)) - def isfile(self, fn): + def isfile(self, fn): # type: (unicode) -> bool return os.path.isfile(self._abs(fn)) - def isdir(self, fn): + def isdir(self, fn): # type: (unicode) -> bool return os.path.isdir(self._abs(fn)) - def listdir(self, fn): + def listdir(self, fn): # type: (unicode) -> List[unicode] return [abspath(l, fn) for l in os.listdir(self._abs(fn))] diff --git a/cwltool/update.py b/cwltool/update.py index dfe76b14e..dd0dc4090 100644 --- a/cwltool/update.py +++ b/cwltool/update.py @@ -324,7 +324,7 @@ def _draft3toDraft4dev1(doc, loader, baseuri): # type: (Any, Loader, str) -> Any if isinstance(doc, dict): if "class" in doc and doc["class"] == "Workflow": - def fixup(f): + def fixup(f): # type: (str) -> str doc, frg = urlparse.urldefrag(f) frg = '/'.join(frg.rsplit('.', 1)) return doc + "#" + frg diff --git a/cwltool/workflow.py b/cwltool/workflow.py index 95c23ad38..475c26aa4 100644 --- a/cwltool/workflow.py +++ b/cwltool/workflow.py @@ -245,6 +245,7 @@ def try_make_job(self, step, **kwargs): vfinputs = {shortname(k): v for k,v in inputobj.iteritems()} def postScatterEval(io): + # type: (Dict[unicode, Any]) -> Dict[unicode, Any] shortio = {shortname(k): v for k,v in io.iteritems()} def valueFromFunc(k, v): # type: (Any, Any) -> Any if k in valueFrom: @@ -301,7 +302,7 @@ def run(self, **kwargs): _logger.debug(u"[%s] workflow starting", self.name) def job(self, joborder, output_callback, **kwargs): - # type: (Dict[unicode, Any], Callable[[Any, Any], Any], bool, **Any) -> Generator[WorkflowJob, None, None] + # type: (Dict[unicode, Any], Callable[[Any, Any], Any], **Any) -> Generator[WorkflowJob, None, None] self.state = {} self.processStatus = "success"