diff --git a/cwltool/pathmapper.py b/cwltool/pathmapper.py index 6802a91eb..dd7c09ae7 100644 --- a/cwltool/pathmapper.py +++ b/cwltool/pathmapper.py @@ -5,6 +5,11 @@ import stat import uuid from functools import partial +from tempfile import NamedTemporaryFile + +import requests +from cachecontrol import CacheControl +from cachecontrol.caches import FileCache from typing import Any, Callable, Dict, Iterable, List, Set, Text, Tuple, Union import schema_salad.validate as validate @@ -139,6 +144,29 @@ def trim_listing(obj): if obj.get("location", "").startswith("file://") and "listing" in obj: del obj["listing"] +# Download http Files +def downloadHttpFile(httpurl): + # type: (Text) -> Text + cache_session = None + if "XDG_CACHE_HOME" in os.environ: + directory = os.environ["XDG_CACHE_HOME"] + elif "HOME" in os.environ: + directory = os.environ["HOME"] + else: + directory = os.path.expanduser('~') + + cache_session = CacheControl( + requests.Session(), + cache=FileCache( + os.path.join(directory, ".cache", "cwltool"))) + + r = cache_session.get(httpurl, stream=True) + with NamedTemporaryFile(mode='wb', delete=False) as f: + for chunk in r.iter_content(chunk_size=16384): + if chunk: # filter out keep-alive new chunks + f.write(chunk) + r.close() + return f.name class PathMapper(object): """Mapping of files from relative path provided in the file to a tuple of @@ -208,14 +236,18 @@ def visit(self, obj, stagedir, basedir, copy=False, staged=False): self._pathmap[obj["location"]] = MapperEnt(obj["contents"], tgt, "CreateFile", staged) else: with SourceLine(obj, "location", validate.ValidationException): - # Dereference symbolic links deref = ab - st = os.lstat(deref) - while stat.S_ISLNK(st.st_mode): - rl = os.readlink(deref) - deref = rl if os.path.isabs(rl) else os.path.join( - os.path.dirname(deref), rl) + if urllib.parse.urlsplit(deref).scheme in ['http','https']: + deref = downloadHttpFile(path) + else: + # Dereference symbolic links st = os.lstat(deref) + while stat.S_ISLNK(st.st_mode): + rl = os.readlink(deref) + deref = rl if os.path.isabs(rl) else os.path.join( + os.path.dirname(deref), rl) + st = os.lstat(deref) + self._pathmap[path] = MapperEnt(deref, tgt, "WritableFile" if copy else "File", staged) self.visitlisting(obj.get("secondaryFiles", []), stagedir, basedir, copy=copy, staged=staged) diff --git a/cwltool/stdfsaccess.py b/cwltool/stdfsaccess.py index df5056b04..72016a861 100644 --- a/cwltool/stdfsaccess.py +++ b/cwltool/stdfsaccess.py @@ -13,6 +13,8 @@ def abspath(src, basedir): # type: (Text, Text) -> Text if src.startswith(u"file://"): ab = six.text_type(uri_file_path(str(src))) + elif urllib.parse.urlsplit(src).scheme in ['http','https']: + return src else: if basedir.startswith(u"file://"): ab = src if os.path.isabs(src) else basedir+ '/'+ src diff --git a/tests/test_http_input.py b/tests/test_http_input.py new file mode 100644 index 000000000..e3a298190 --- /dev/null +++ b/tests/test_http_input.py @@ -0,0 +1,26 @@ +from __future__ import absolute_import +import unittest +import os +import tempfile +from cwltool.pathmapper import PathMapper + + +class TestHttpInput(unittest.TestCase): + def test_http_path_mapping(self): + class SubPathMapper(PathMapper): + def __init__(self, referenced_files, basedir, stagedir): + super(SubPathMapper, self).__init__(referenced_files, basedir, stagedir) + input_file_path = "https://raw.githubusercontent.com/common-workflow-language/cwltool/master/tests/2.fasta" + tempdir = tempfile.mkdtemp() + base_file = [{ + "class": "File", + "location": "https://raw.githubusercontent.com/common-workflow-language/cwltool/master/tests/2.fasta", + "basename": "chr20.fa" + }] + path_map_obj = SubPathMapper(base_file, os.getcwd(), tempdir) + + self.assertIn(input_file_path,path_map_obj._pathmap) + assert os.path.exists(path_map_obj._pathmap[input_file_path].resolved) == 1 + with open(path_map_obj._pathmap[input_file_path].resolved) as f: + self.assertIn(">Sequence 561 BP; 135 A; 106 C; 98 G; 222 T; 0 other;",f.read()) + f.close() \ No newline at end of file