Skip to content

Commit f021126

Browse files
author
Peter Amstutz
committed
Major refactor of document loading to validate draft-3 and draft-4 according to
the declared schemas. This avoids the problem of naively updating to the latest before validation, which doesn't catch invalid documents such as (for example) use draft-4 features in documents declared draft-3.
1 parent d2552c0 commit f021126

File tree

6 files changed

+172
-126
lines changed

6 files changed

+172
-126
lines changed

cwltool/factory.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ def __init__(self, makeTool=workflow.defaultMakeTool,
2424
self.execkwargs = execkwargs
2525

2626
def make(self, cwl, frag=None, debug=False):
27-
l = main.load_tool(cwl, False, True, self.makeTool, debug, urifrag=frag)
27+
l = main.load_tool(cwl, self.makeTool)
2828
if type(l) == int:
2929
raise Exception("Error loading tool")
3030
return Callable(l, self)

cwltool/load_tool.py

Lines changed: 81 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -1,23 +1,17 @@
11
import os
2-
from schema_salad.ref_resolver import Loader
3-
import schema_salad.validate as validate
2+
import logging
3+
import re
44
import urlparse
55
import sys
6-
import update
7-
import process
8-
9-
def load_tool(argsworkflow, updateonly, strict, makeTool, debug,
10-
print_pre=False,
11-
print_rdf=False,
12-
print_dot=False,
13-
print_deps=False,
14-
relative_deps=False,
15-
rdf_serializer=None,
16-
enable_dev=False,
17-
stdout=sys.stdout,
18-
urifrag=None):
19-
# type: (Union[str,unicode,dict[unicode,Any]], bool, bool, Callable[...,Process], bool, bool, bool, bool, bool, bool, Any, Any, Any) -> Any
6+
from schema_salad.ref_resolver import Loader
7+
import schema_salad.validate as validate
8+
import schema_salad.schema as schema
9+
from . import update
10+
from . import process
11+
12+
_logger = logging.getLogger("cwltool")
2013

14+
def fetch_document(argsworkflow):
2115
document_loader = Loader({"cwl": "https://w3id.org/cwl/cwl#", "id": "@id"})
2216

2317
jobobj = None
@@ -33,87 +27,105 @@ def load_tool(argsworkflow, updateonly, strict, makeTool, debug,
3327
workflowobj = document_loader.fetch(fileuri)
3428
elif isinstance(argsworkflow, dict):
3529
workflowobj = argsworkflow
36-
uri = urifrag
37-
fileuri = "#"
30+
uri = "#" + str(id(argsworkflow))
3831
else:
39-
raise validate.ValidationException("Must be URI or dict")
32+
raise validate.ValidationException("Must be URI or object: '%s'" % argsworkflow)
33+
34+
return document_loader, workflowobj, uri
4035

36+
37+
def validate_document(document_loader, workflowobj, uri, defaultVersion=None, enable_dev=False, strict=True):
38+
jobobj = None
4139
if "cwl:tool" in workflowobj:
4240
jobobj = workflowobj
4341
uri = urlparse.urljoin(uri, jobobj["cwl:tool"])
44-
fileuri, urifrag = urlparse.urldefrag(uri)
45-
workflowobj = document_loader.fetch(fileuri)
4642
del jobobj["cwl:tool"]
43+
workflowobj = fetch_document(uri)
4744

48-
if isinstance(workflowobj, list):
45+
if not isinstance(workflowobj, dict):
4946
if enable_dev:
50-
# bare list without a version must be treated as draft-2
51-
workflowobj = {"cwlVersion": "https://w3id.org/cwl/cwl#draft-2",
52-
"id": fileuri,
53-
"@graph": workflowobj}
47+
workflowobj = {
48+
"cwlVersion": "draft-2",
49+
"@graph": workflowobj
50+
}
5451
else:
5552
raise validate.ValidationException("Missing 'cwlVersion'")
5653

57-
workflowobj = update.update(workflowobj, document_loader, fileuri, enable_dev)
58-
document_loader.idx.clear()
54+
fileuri, urifrag = urlparse.urldefrag(uri)
55+
56+
if "cwlVersion" in workflowobj:
57+
workflowobj["cwlVersion"] = re.sub(r"^(?:cwl:|https://w3id.org/cwl/cwl#)", "", workflowobj["cwlVersion"])
58+
elif defaultVersion:
59+
workflowobj["cwlVersion"] = defaultVersion
60+
else:
61+
raise validate.ValidationException("Missing 'cwlVersion'")
62+
63+
if workflowobj["cwlVersion"] == "draft-2" or ".dev" in workflowobj["cwlVersion"]:
64+
# can't validate draft-2 directly, must run updater
65+
workflowobj = update.update(workflowobj, document_loader, fileuri, enable_dev, {})
5966

6067
(document_loader, avsc_names, schema_metadata) = process.get_schema(workflowobj["cwlVersion"])
6168

6269
if isinstance(avsc_names, Exception):
6370
raise avsc_names
6471

65-
if updateonly:
66-
stdout.write(json.dumps(workflowobj, indent=4))
67-
return 0
72+
workflowobj["id"] = fileuri
73+
processobj, metadata = schema.load_and_validate(document_loader, avsc_names, workflowobj, strict)
74+
75+
if not metadata:
76+
metadata = {"$namespaces": processobj.get("$namespaces", {}),
77+
"$schemas": processobj.get("$schemas", []),
78+
"cwlVersion": processobj["cwlVersion"]}
6879

69-
if print_deps:
70-
printdeps(workflowobj, document_loader, stdout, relative_deps)
71-
return 0
80+
if metadata.get("cwlVersion") != update.latest:
81+
processobj = update.update(processobj, document_loader, fileuri, enable_dev, metadata)
7282

73-
try:
74-
processobj, metadata = schema_salad.schema.load_and_validate(document_loader, avsc_names, workflowobj, strict)
75-
except (validate.ValidationException, RuntimeError) as e:
76-
_logger.error(u"Tool definition failed validation:\n%s", e, exc_info=(e if debug else False))
77-
return 1
83+
if jobobj:
84+
metadata["cwl:defaults"] = jobobj
7885

79-
if print_pre:
80-
stdout.write(json.dumps(processobj, indent=4))
81-
return 0
86+
return document_loader, avsc_names, processobj, metadata, uri
8287

83-
if print_rdf:
84-
printrdf(argsworkflow, processobj, document_loader.ctx, rdf_serializer, stdout)
85-
return 0
8688

87-
if print_dot:
88-
printdot(argsworkflow, processobj, document_loader.ctx, stdout)
89-
return 0
89+
def make_tool(document_loader, avsc_names, processobj, metadata, uri, makeTool, kwargs):
90+
processobj, _ = document_loader.resolve_ref(uri)
9091

91-
if urifrag:
92-
processobj, _ = document_loader.resolve_ref(uri)
93-
elif isinstance(processobj, list):
92+
if isinstance(processobj, list):
9493
if 1 == len(processobj):
9594
processobj = processobj[0]
9695
else:
97-
_logger.error(u"Tool file contains graph of multiple objects, must specify one of #%s",
98-
", #".join(urlparse.urldefrag(i["id"])[1]
99-
for i in processobj if "id" in i))
100-
return 1
101-
102-
if not metadata:
103-
metadata = {"$namespaces": processobj.get("$namespaces", {}), "$schemas": processobj.get("$schemas", []), 'cwlVersion': processobj["cwlVersion"]}
104-
105-
try:
106-
t = makeTool(processobj, strict=strict, makeTool=makeTool, loader=document_loader, avsc_names=avsc_names, metadata=metadata)
107-
except (validate.ValidationException) as e:
108-
_logger.error(u"Tool definition failed validation:\n%s", e, exc_info=(e if debug else False))
109-
return 1
110-
except (RuntimeError, workflow.WorkflowException) as e:
111-
_logger.error(u"Tool definition failed initialization:\n%s", e, exc_info=(e if debug else False))
112-
return 1
113-
114-
if jobobj:
96+
raise WorkflowException(u"Tool file contains graph of multiple objects, "
97+
"must specify one of #%s" %
98+
", #".join(urlparse.urldefrag(i["id"])[1]
99+
for i in processobj if "id" in i))
100+
101+
kwargs = kwargs.copy()
102+
kwargs.update({
103+
"makeTool": makeTool,
104+
"loader": document_loader,
105+
"avsc_names": avsc_names,
106+
"metadata": metadata
107+
})
108+
t = makeTool(processobj, **kwargs)
109+
110+
if "cwl:defaults" in metadata:
111+
jobobj = metadata["cwl:defaults"]
115112
for inp in t.tool["inputs"]:
116113
if shortname(inp["id"]) in jobobj:
117114
inp["default"] = jobobj[shortname(inp["id"])]
118115

119116
return t
117+
118+
119+
def load_tool(argsworkflow, makeTool, kwargs,
120+
defaultVersion=None,
121+
enable_dev=False,
122+
strict=True):
123+
124+
document_loader, workflowobj, uri = fetch_document(argsworkflow)
125+
document_loader, avsc_names, processobj, metadata, uri = validate_document(document_loader,
126+
workflowobj,
127+
uri,
128+
defaultVersion=defaultVersion,
129+
enable_dev=enable_dev,
130+
strict=strict)
131+
return make_tool(document_loader, avsc_names, processobj, metadata, uri, makeTool, kwargs)

cwltool/main.py

Lines changed: 40 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import sys
1010
import logging
1111
from . import workflow
12+
from .errors import WorkflowException
1213
import schema_salad.validate as validate
1314
import tempfile
1415
import schema_salad.jsonld_context
@@ -22,7 +23,7 @@
2223
from . import update
2324
from .process import shortname, Process
2425
import rdflib
25-
from load_tool import load_tool
26+
from load_tool import load_tool, fetch_document, validate_document, make_tool
2627
import hashlib
2728
from .utils import aslist
2829
from typing import Union, Any, cast, Callable, Dict, Tuple, IO
@@ -116,7 +117,6 @@ def arg_parser(): # type: () -> argparse.ArgumentParser
116117
exgroup.add_argument("--print-deps", action="store_true", help="Print CWL document dependencies.")
117118
exgroup.add_argument("--print-input-deps", action="store_true", help="Print input object document dependencies.")
118119
exgroup.add_argument("--version", action="store_true", help="Print version and exit")
119-
exgroup.add_argument("--update", action="store_true", help="Update to latest CWL version, print and exit")
120120

121121
exgroup = parser.add_mutually_exclusive_group()
122122
exgroup.add_argument("--strict", action="store_true", help="Strict validation (unrecognized or out of place fields are error)",
@@ -134,6 +134,8 @@ def arg_parser(): # type: () -> argparse.ArgumentParser
134134
parser.add_argument("--relative-deps", choices=['primary', 'cwd'], default="primary",
135135
help="When using --print-deps, print paths relative to primary file or current working directory.")
136136

137+
parser.add_argument("--enable-dev", action="store_true", help="Allow loading and running development versions of CWL spec.", default=False)
138+
137139
parser.add_argument("--enable-net", action="store_true",
138140
help="Use docker's default networking for containers; the default is "
139141
"to disable networking.")
@@ -188,15 +190,15 @@ def output_callback(out, processStatus):
188190
if r:
189191
r.run(**kwargs)
190192
else:
191-
raise workflow.WorkflowException("Workflow cannot make any more progress.")
192-
except workflow.WorkflowException:
193+
raise WorkflowException("Workflow cannot make any more progress.")
194+
except WorkflowException:
193195
raise
194196
except Exception as e:
195197
_logger.exception("Got workflow error")
196-
raise workflow.WorkflowException(unicode(e))
198+
raise WorkflowException(unicode(e))
197199

198200
if final_status[0] != "success":
199-
raise workflow.WorkflowException(u"Process status is %s" % (final_status))
201+
raise WorkflowException(u"Process status is %s" % (final_status))
200202

201203
return final_output[0]
202204

@@ -458,15 +460,37 @@ def main(argsl=None,
458460
return 1
459461

460462
try:
461-
t = load_tool(args.workflow, args.update, args.strict, makeTool, args.debug,
462-
print_pre=args.print_pre,
463-
print_rdf=args.print_rdf,
464-
print_dot=args.print_dot,
465-
print_deps=args.print_deps,
466-
relative_deps=args.relative_deps,
467-
rdf_serializer=args.rdf_serializer,
468-
enable_dev=args.enable_dev,
469-
stdout=stdout)
463+
document_loader, workflowobj, uri = fetch_document(args.workflow)
464+
465+
if args.print_deps:
466+
printdeps(workflowobj, document_loader, stdout, args.relative_deps)
467+
return 0
468+
469+
document_loader, avsc_names, processobj, metadata, uri = validate_document(document_loader,
470+
workflowobj, uri,
471+
enable_dev=args.enable_dev,
472+
strict=args.strict)
473+
474+
if args.print_pre:
475+
stdout.write(json.dumps(processobj, indent=4))
476+
return 0
477+
478+
if args.print_rdf:
479+
printrdf(uri, processobj, document_loader.ctx, args.rdf_serializer, stdout)
480+
return 0
481+
482+
if args.print_dot:
483+
printdot(uri, processobj, document_loader.ctx, stdout)
484+
return 0
485+
486+
t = make_tool(document_loader, avsc_names, processobj,
487+
metadata, uri, makeTool, {})
488+
except (validate.ValidationException) as e:
489+
_logger.error(u"Tool definition failed validation:\n%s", e, exc_info=(e if args.debug else False))
490+
return 1
491+
except (RuntimeError, WorkflowException) as e:
492+
_logger.error(u"Tool definition failed initialization:\n%s", e, exc_info=(e if args.debug else False))
493+
return 1
470494
except Exception as e:
471495
_logger.error(u"I'm sorry, I couldn't load this CWL file, try again with --debug for more information.\n%s\n", e, exc_info=(e if args.debug else False))
472496
return 1
@@ -533,7 +557,7 @@ def main(argsl=None,
533557
except (validate.ValidationException) as e:
534558
_logger.error(u"Input object failed validation:\n%s", e, exc_info=(e if args.debug else False))
535559
return 1
536-
except workflow.WorkflowException as e:
560+
except WorkflowException as e:
537561
_logger.error(u"Workflow error, try again with --debug for more information:\n %s", e, exc_info=(e if args.debug else False))
538562
return 1
539563
except Exception as e:

0 commit comments

Comments
 (0)