Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions WDL/CLI.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,7 +860,8 @@ def runner(
cfg.log_unused_options()

# report
outputs_json = {"outputs": values_to_json(output_env, namespace=target.name), "dir": rundir}
# TODO: For WDL 1.2, should we output Files and Directories in the "extended" format?
outputs_json = {"outputs": values_to_json(output_env, namespace=target.name, extended_format=False), "dir": rundir}
runner_standard_output(outputs_json, stdout_file, error_json, log_json)
return outputs_json

Expand Down Expand Up @@ -1080,11 +1081,13 @@ def runner_input(
f"missing required inputs for {target.name}: {', '.join(missing_inputs.keys())}"
)

# TODO: Is this where we should fill in the listing fields for Directory inputs that don't come with them?

# make a pass over the Env to create a dict for Cromwell-style input JSON
return (
target,
input_env,
values_to_json(input_env, namespace=(target.name if isinstance(target, Workflow) else "")),
values_to_json(input_env, namespace=(target.name if isinstance(target, Workflow) else ""), extended_format=True),
)


Expand Down Expand Up @@ -1144,7 +1147,7 @@ def runner_input_json_file(available_inputs, namespace, input_file, downloadable
ans = Value.rewrite_env_paths(
ans,
lambda v: validate_input_path(
v.value, isinstance(v, Value.Directory), downloadable, root
v.value["location"], isinstance(v, Value.Directory), downloadable, root
),
)

Expand Down Expand Up @@ -1299,6 +1302,9 @@ def validate_input_path(path, directory, downloadable, root):
2. resides within root
3. contains no symlinks pointing outside or to absolute paths
"""

print(f"Checking {path}")

if downloadable and downloadable(path, directory):
return path

Expand Down
4 changes: 2 additions & 2 deletions WDL/StdLib.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def _read(self, parse: Callable[[str], Value.Base]) -> Callable[[Value.File], Va
"generate read_* function implementation based on parse"

def f(file: Value.File) -> Value.Base:
with open(self._devirtualize_filename(file.value), "r") as infile:
with open(self._devirtualize_filename(file.value["location"]), "r") as infile:
return parse(infile.read())

return f
Expand Down Expand Up @@ -716,7 +716,7 @@ def _call_eager(self, expr: "Expr.Apply", arguments: List[Value.Base]) -> Value.
ans = []
for file in files.value:
if isinstance(file, Value.File):
ans.append(os.path.getsize(self.stdlib._devirtualize_filename(file.value)))
ans.append(os.path.getsize(self.stdlib._devirtualize_filename(file.value["location"])))
elif isinstance(file, Value.Null):
ans.append(0)
else:
Expand Down
198 changes: 184 additions & 14 deletions WDL/Value.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,13 @@
:top-classes: WDL.Value.Base
"""

import os
import json
import copy
import base64
import hashlib
from abc import ABC
from typing import Any, List, Optional, Tuple, Dict, Iterable, Union, Callable, Set, TYPE_CHECKING
from typing import cast, Any, List, Literal, Mapping, Optional, Tuple, Dict, Iterable, Union, Callable, Set, TypedDict, TYPE_CHECKING
from contextlib import suppress
from . import Error, Type, Env

Expand Down Expand Up @@ -165,21 +166,176 @@ def coerce(self, desired_type: Optional[Type.Base] = None) -> Base:
raise Error.EvalError(self.expr, msg) if self.expr else Error.RuntimeError(msg)
return super().coerce(desired_type)

# File values are recommended to support additional attributes by the spec, so
# we allow passing through unrecognized attributes.
#
# So we use a TypedDict to describe the attributes we know about.
class ExtendedFile(TypedDict, total=False):
type: Literal["File"]
location: str
basename: str

class File(String):
"""``value`` has Python type ``str``"""
def _parse_extended_file(value: Mapping[str, Any], parent_location: str | None = None) -> ExtendedFile:
"""
Make a ExtendedFile-typed clone of the given dict, with infer-abel fields
filled in, or raise Error.InputError if the input is not the right format.
"""
# We're going to possibly modify the input object, so copy it.
value = dict(value)
if "type" in value:
if value["type"] != "File":
raise Error.InputError("WDL.Value.File invalid type: " + str(value["type"]))
else:
value["type"] = "File"
if "basename" in value:
if not isinstance(value["basename"], str):
raise Error.InputError(f"WDL.Value.File invalid basename type: {type(value['basename'])}")
if "/" in value["basename"]:
raise Error.InputError(f"WDL.Value.File invalid basename: " + value["basename"])
if "location" not in value:
if parent_location is None:
raise Error.InputError("WDL.Value.File invalid JSON object: missing location without enclosing Directory available")
elif "basename" not in value:
raise Error.InputError("WDL.Value.File invalid JSON object: missing location and basename")
else:
value["location"] = os.path.join(parent_location, value["basename"])
if not isinstance(value["location"], str):
raise Error.InputError(f"WDL.Value.File invalid location type: {type(value['location'])}")
if value["location"] != value["location"].rstrip("/"):
raise Error.InputError("WDL.Value.File invalid path: " + value["location"])
if "basename" not in value:
# Remember the basename if it wasn't provided.
# TODO: Is this worth doing? Should this be reflected in our value TypedDict?
value["basename"] = os.path.basename(value["location"])

# Now we know it's a valid ExtendedFile
return cast(ExtendedFile, value)

class File(Base):
"""``value`` has Python type ``ExtendedFile``, which is a TypedDict representing the WDL 1.2 "extended" file syntax."""
value: ExtendedFile

def __init__(self, value: str | Mapping[str, Any], expr: "Optional[Expr.Base]" = None) -> None:
"""
Make a File from an input string or parsed JSON object.
"""

def __init__(self, value: str, expr: "Optional[Expr.Base]" = None) -> None:
super().__init__(value, expr=expr, subtype=Type.File())
if value != value.rstrip("/"):
raise Error.InputError("WDL.Value.File invalid path: " + value)
if isinstance(value, str):
# Always interpret strings as actual filenames at this level.
if value != value.rstrip("/"):
raise Error.InputError("WDL.Value.File invalid path: " + value)
file_value: ExtendedFile = {"type": "File", "location": value, "basename": os.path.basename(value)}
else:
file_value = _parse_extended_file(value)

super().__init__(Type.File(), file_value, expr=expr)

class Directory(String):
"""``value`` has Python type ``str``"""
def __str__(self) -> str:
return str(self.coerce(Type.String()))

def coerce(self, desired_type: Optional[Type.Base] = None) -> Base:
if isinstance(desired_type, Type.File):
return File(self.value, self.expr)
if isinstance(desired_type, Type.String):
# TODO: Do we need to think about localizing to a name ending in basename here?
return String(self.value["location"], self.expr)
return super().coerce(desired_type)

class ExtendedDirectory(TypedDict, total=False):
type: Literal["Directory"]
location: str
basename: str
# TODO: Should we let full File or Directory objects in the listing?
listing: List[Union[ExtendedFile, "ExtendedDirectory"]]

def _parse_extended_directory(value: Mapping[str, Any], parent_location: str | None = None) -> ExtendedDirectory:
"""
Make a ExtendedDirectory-typed clone of the given dict, with infer-abel fields
filled in, or raise Error.InputError if the input is not the right format.
"""

# We don't want to just clone the whole object immediately, so we go key by
# key through the keys we care about and then update with the rest.
dir_value = {}

if "type" in value:
if value["type"] != "Directory":
raise Error.InputError("WDL.Value.Directory invalid type: " + str(value["type"]))
dir_value["type"] = value["type"]
else:
dir_value["type"] = "Directory"
if "basename" in value:
if not isinstance(value["basename"], str):
raise Error.InputError(f"WDL.Value.Directory invalid basename type: {type(value['basename'])}")
if "/" in value["basename"]:
raise Error.InputError(f"WDL.Value.Directory invalid basename: " + value["basename"])
dir_value["basename"] = value["basename"]
if "location" in value:
if not isinstance(value["location"], str):
raise Error.InputError(f"WDL.Value.Directory invalid location type: {type(value['location'])}")
dir_value["location"] = value["location"]
else:
if parent_location is None:
raise Error.InputError("WDL.Value.Directory invalid JSON object: missing location without enclosing Directory available")
elif "basename" not in value:
raise Error.InputError("WDL.Value.Directory invalid JSON object: missing location and basename")
else:
dir_value["location"] = os.path.join(parent_location, value["basename"])
if "basename" not in value:
# Remember the basename if it wasn't provided.
# TODO: Is this worth doing? Should this be reflected in our value TypedDict?
dir_value["basename"] = os.path.basename(dir_value["location"].rstrip("/"))

if "listing" in value:
if not isinstance(value["listing"], list):
raise Error.InputError(f"WDL.Value.Directory invalid listing type: {type(value['listing'])}")
dir_value["listing"] = []
for item in value["listing"]:
if not isinstance(item, dict):
raise Error.InputError(f"WDL.Value.Directory invalid listing entry type: {type(item)}")
if "type" not in item:
raise Error.InputError(f"WDL.Value.Directory invalid listing entry has no type")
if item["type"] == "File":
dir_value["listing"].append(_parse_extended_file(item, dir_value["location"]))
elif item["type"] == "Directory":
dir_value["listing"].append(_parse_extended_directory(item, dir_value["location"]))
else:
raise Error.InputError(f"WDL.Value.Directory invalid listing entry type value: " + str(item["type"]))
else:
raise Error.InputError("WDL.Value.Directory has no listing")

# Now we know this is a ExtendedDirectory
return cast(ExtendedDirectory, dir_value)

class Directory(Base):
"""``value`` has Python type ``ExtendedDirectory``"""
value: ExtendedDirectory

def __init__(self, value: str | Mapping[str, Any], expr: "Optional[Expr.Base]" = None) -> None:
"""
Make a Directory from an input string or parsed JSON object.
"""

if isinstance(value, str):
# Always interpret strings as actual filenames at this level.
dir_value: ExtendedDirectory = {"type": "Directory", "location": value, "basename": os.path.basename(value)}
# TODO: fill the listing recursively somewhere where we have access to the config/plugins
else:
dir_value = _parse_extended_directory(value)

super().__init__(Type.Directory(), dir_value, expr=expr)

def __str__(self) -> str:
return str(self.coerce(Type.String()))

def coerce(self, desired_type: Optional[Type.Base] = None) -> Base:
if isinstance(desired_type, Type.Directory):
return Directory(self.value, self.expr)
if isinstance(desired_type, Type.String):
# TODO: Do we need to think about localizing to a name ending in basename here?
return String(self.value["location"], self.expr)
return super().coerce(desired_type)

def __init__(self, value: str, expr: "Optional[Expr.Base]" = None) -> None:
super().__init__(value, expr=expr, subtype=Type.Directory())


class Array(Base):
Expand Down Expand Up @@ -553,7 +709,21 @@ def from_json(type: Type.Base, value: Any) -> Base:
return Int(value)
if isinstance(type, (Type.Float, Type.Any)) and isinstance(value, (float, int)):
return Float(float(value))
if isinstance(type, Type.File) and isinstance(value, str):
if isinstance(type, Type.File) and isinstance(value, (str, dict)):
if isinstance(value, str):
try:
# The spec says an extended-syntax File value can come as a
# string encoding a JSON object.
parsed_value = json.loads(value)
if isinstance(parsed_value, dict):
return File(parsed_value)
except Error.InputError as e:
# A filename might look like a JSON dict but not describe a
# File value.
pass
except json.JSONDecodeError as e:
# A filename probably isn't actually a serialized JSON object.
pass
return File(value)
if isinstance(type, Type.Directory) and isinstance(value, str):
return Directory(value)
Expand Down Expand Up @@ -650,7 +820,7 @@ def map_paths(w: Base) -> Base:
fw = f(w)
if fw is None:
return Null(expr=w.expr)
w.value = fw
w.value["location"] = fw
# recursive descent into compound Values
elif isinstance(w.value, list):
value2: List[Any] = []
Expand Down Expand Up @@ -696,7 +866,7 @@ def rewrite_files(v: Base, f: Callable[[str], Optional[str]]) -> Base:
(deprecated: use ``rewrite_paths`` to handle Directory values as well)
"""

return rewrite_paths(v, lambda fd: f(fd.value) if isinstance(fd, File) else fd.value)
return rewrite_paths(v, lambda fd: f(fd.value["location"]) if isinstance(fd, File) else fd.value["location"])


def rewrite_env_files(
Expand Down
12 changes: 10 additions & 2 deletions WDL/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,23 +300,31 @@ def values_from_json(

def values_to_json(
values_env: Union[Env.Bindings[Value.Base], Env.Bindings[Tree.Decl], Env.Bindings[Type.Base]],
namespace: str = "",
namespace: str = "", extended_format: bool | None = None
) -> Dict[str, Any]:
"""
Convert a ``WDL.Env.Bindings[WDL.Value.Base]`` to a dict which ``json.dumps`` to
Cromwell-style JSON.

:param namespace: prefix this namespace to each key (e.g. workflow name)
:param extended_format: If set to True, outputs File and Directory values in WDL 1.2+ extended syntax.
"""
# also can be used on Env.Bindings[Tree.Decl] or Env.Types, then the right-hand side of
# each entry will be the type string.
if namespace and not namespace.endswith("."):
namespace += "."
if extended_format is None:
extended_format = False
ans = {}
for item in values_env:
v = item.value
if isinstance(v, Value.Base):
j = v.json
if not extended_format and isinstance(v, (Value.File, Value.Directory)):
# The JSON property spits out extended syntax. We need
# location-only syntax, which comes from string coercion.
j = v.coerce(Type.String()).json
else:
j = v.json
elif isinstance(item.value, Tree.Decl):
j = str(item.value.type)
else:
Expand Down
1 change: 1 addition & 0 deletions WDL/_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ def write_values_json(
values_to_json(values_env, namespace=namespace),
indent=2,
sort_keys=True,
extended_format=True,
),
filename,
)
Expand Down
Loading
Loading