Skip to content

Commit bed0226

Browse files
authored
Merge pull request #63 from mrc-ide/mrc-5015
Add support for pushing to http locations.
2 parents 919111e + 982a9a8 commit bed0226

23 files changed

+470
-175
lines changed

example/src/data/data.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import random
2+
3+
d = [random.random() for _ in range(10)]
4+
with open("result.txt", "w") as f:
5+
f.writelines(f"{x}\n" for x in d)

example/src/data/orderly.R

Lines changed: 0 additions & 3 deletions
This file was deleted.

example/src/depends/depends.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
import pyorderly
2+
3+
pyorderly.dependency(
4+
None, "latest(name == 'data')", {"input.txt": "result.txt"}
5+
)
6+
pyorderly.artefact("Summary", "summary.txt")
7+
8+
with open("input.txt") as f:
9+
d = [float(x.strip()) for x in f.readlines()]
10+
total = sum(d)
11+
with open("summary.txt", "w") as f:
12+
f.write(f"{total}\n")

example/src/depends/orderly.R

Lines changed: 0 additions & 7 deletions
This file was deleted.

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ exclude_lines = [
193193
"@abstractmethod",
194194
"@abc.abstractmethod",
195195
"@overload",
196+
"raise NotImplementedError",
196197
]
197198

198199
[tool.pytest.ini_options]

src/pyorderly/outpack/init.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from pathlib import Path
22

33
from pyorderly.outpack.config import Config, read_config, write_config
4+
from pyorderly.outpack.root import OutpackRoot
45

56

67
def outpack_init(
@@ -31,16 +32,15 @@ def outpack_init(
3132
_validate_same_core_configuration(config.core, read_config(path).core)
3233
else:
3334
path_outpack.mkdir(parents=True, exist_ok=True)
34-
path_outpack.joinpath("metadata").mkdir(parents=True, exist_ok=True)
35-
path_outpack.joinpath("location").mkdir(parents=True, exist_ok=True)
36-
path_outpack.joinpath("location/local").mkdir(
37-
parents=True, exist_ok=True
38-
)
35+
path_outpack.joinpath("metadata").mkdir(exist_ok=True)
36+
path_outpack.joinpath("location").mkdir(exist_ok=True)
37+
path_outpack.joinpath("location/local").mkdir(exist_ok=True)
3938
if path_archive is not None:
4039
path.joinpath(path_archive).mkdir(exist_ok=True)
40+
4141
write_config(config, path)
4242

43-
return path
43+
return OutpackRoot(path).path
4444

4545

4646
def _validate_same_core_configuration(now, then):

src/pyorderly/outpack/location.py

Lines changed: 56 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
11
import collections
22
import shutil
33
from pathlib import PurePath
4+
from typing import Union
45

56
from pyorderly.outpack.config import Location, update_config
67
from pyorderly.outpack.location_driver import LocationDriver
78
from pyorderly.outpack.location_http import OutpackLocationHTTP
89
from pyorderly.outpack.location_packit import outpack_location_packit
910
from pyorderly.outpack.location_path import OutpackLocationPath
1011
from pyorderly.outpack.location_ssh import OutpackLocationSSH, parse_ssh_url
12+
from pyorderly.outpack.metadata import MetadataCore
1113
from pyorderly.outpack.root import OutpackRoot, root_open
1214
from pyorderly.outpack.static import (
1315
LOCATION_LOCAL,
1416
LOCATION_ORPHAN,
1517
LOCATION_RESERVED_NAME,
1618
)
1719

20+
LocationSelector = Union[None, str, list[str]]
21+
1822

1923
def outpack_location_list(root=None, *, locate=True):
2024
root = root_open(root, locate=locate)
@@ -94,15 +98,20 @@ def outpack_location_rename(old, new, root=None, *, locate=True):
9498

9599

96100
def location_resolve_valid(
97-
location, root, *, include_local, include_orphan, allow_no_locations
98-
):
101+
location: LocationSelector,
102+
root: OutpackRoot,
103+
*,
104+
include_local: bool,
105+
include_orphan: bool,
106+
allow_no_locations: bool,
107+
) -> list[str]:
99108
if location is None:
100-
location = outpack_location_list(root)
109+
result = outpack_location_list(root)
101110
elif isinstance(location, str):
102111
if location not in outpack_location_list(root):
103112
msg = f"Unknown location: '{location}'"
104113
raise Exception(msg)
105-
location = [location]
114+
result = [location]
106115
elif isinstance(location, collections.abc.Iterable) and all(
107116
isinstance(item, str) for item in location
108117
):
@@ -111,24 +120,24 @@ def location_resolve_valid(
111120
unknown_text = "', '".join(unknown)
112121
msg = f"Unknown location: '{unknown_text}'"
113122
raise Exception(msg)
114-
location = list(location)
123+
result = list(location)
115124
else:
116125
msg = (
117126
"Invalid input for 'location'; expected None or a list of "
118127
"strings"
119128
)
120129
raise Exception(msg)
121130

122-
if not include_local and LOCATION_LOCAL in location:
123-
location.remove(LOCATION_LOCAL)
124-
if not include_orphan and LOCATION_ORPHAN in location: # pragma: no cover
125-
location.remove(LOCATION_ORPHAN)
131+
if not include_local and LOCATION_LOCAL in result:
132+
result.remove(LOCATION_LOCAL)
133+
if not include_orphan and LOCATION_ORPHAN in result: # pragma: no cover
134+
result.remove(LOCATION_ORPHAN)
126135

127-
if len(location) == 0 and not allow_no_locations:
136+
if len(result) == 0 and not allow_no_locations:
128137
msg = "No suitable location found"
129138
raise Exception(msg)
130139

131-
return location
140+
return result
132141

133142

134143
def _location_check_new_name(root, name):
@@ -169,3 +178,39 @@ def _location_driver(location_name, root) -> LocationDriver:
169178

170179
msg = "invalid location type"
171180
raise Exception(msg)
181+
182+
183+
def _find_all_dependencies(
184+
packet_ids: list[str],
185+
metadata: dict[str, MetadataCore],
186+
*,
187+
allow_missing_packets: bool = False,
188+
) -> list[str]:
189+
result = []
190+
191+
# This is a standard depth first search through the packet graph.
192+
seen = set(packet_ids)
193+
todo = list(packet_ids)
194+
while todo:
195+
packet_id = todo.pop()
196+
result.append(packet_id)
197+
198+
m = metadata.get(packet_id)
199+
if m is not None:
200+
for dep in m.depends:
201+
if dep.packet not in seen:
202+
seen.add(dep.packet)
203+
todo.append(dep.packet)
204+
elif not allow_missing_packets:
205+
msg = f"Unknown packet {packet_id}"
206+
raise Exception(msg)
207+
208+
# We want the result to be reverse-topologically sorted, such that
209+
# dependencies come before their dependents. In principle, we could get
210+
# such an order directly from the graph traversal, but doing this with
211+
# multiple start points is not as easy as it seems.
212+
#
213+
# Using a lexicographic sort on the packet IDs is a reasonable alternative
214+
# because the IDs start with their timestamp, and dependencies have smaller
215+
# timestamps than dependents.
216+
return sorted(result)

src/pyorderly/outpack/location_driver.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
import builtins
21
from abc import abstractmethod
32
from contextlib import AbstractContextManager
3+
from pathlib import Path
44

55
from pyorderly.outpack.metadata import MetadataCore, PacketFile, PacketLocation
66

@@ -14,12 +14,24 @@ class LocationDriver(AbstractContextManager):
1414
"""
1515

1616
@abstractmethod
17-
def list(self) -> dict[str, PacketLocation]: ...
17+
def list_packets(self) -> dict[str, PacketLocation]: ...
1818

1919
@abstractmethod
20-
def metadata(self, packet_ids: builtins.list[str]) -> dict[str, str]: ...
20+
def metadata(self, packet_ids: list[str]) -> dict[str, str]: ...
2121

2222
@abstractmethod
2323
def fetch_file(
2424
self, packet: MetadataCore, file: PacketFile, dest: str
2525
) -> None: ...
26+
27+
@abstractmethod
28+
def list_unknown_packets(self, ids: list[str]) -> list[str]: ...
29+
30+
@abstractmethod
31+
def list_unknown_files(self, hashes: list[str]) -> list[str]: ...
32+
33+
@abstractmethod
34+
def push_file(self, src: Path, hash: str): ...
35+
36+
@abstractmethod
37+
def push_metadata(self, src: Path, hash: str): ...

src/pyorderly/outpack/location_http.py

Lines changed: 34 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
import builtins
21
import shutil
2+
from pathlib import Path
33
from urllib.parse import urljoin
44

55
import requests
@@ -58,15 +58,15 @@ def __exit__(self, *args):
5858
self._client.__exit__(*args)
5959

6060
@override
61-
def list(self) -> dict[str, PacketLocation]:
61+
def list_packets(self) -> dict[str, PacketLocation]:
6262
response = self._client.get("metadata/list").json()
6363
data = response["data"]
6464
return {
6565
entry["packet"]: PacketLocation.from_dict(entry) for entry in data
6666
}
6767

6868
@override
69-
def metadata(self, ids: builtins.list[str]) -> dict[str, str]:
69+
def metadata(self, ids: list[str]) -> dict[str, str]:
7070
result = {}
7171
for i in ids:
7272
result[i] = self._client.get(f"metadata/{i}/text").text
@@ -78,3 +78,34 @@ def fetch_file(self, packet: MetadataCore, file: PacketFile, dest: str):
7878
response = self._client.get(f"file/{file.hash}", stream=True)
7979
with open(dest, "wb") as f:
8080
shutil.copyfileobj(response.raw, f)
81+
82+
@override
83+
def list_unknown_packets(self, ids: list[str]) -> list[str]:
84+
response = self._client.post(
85+
"packets/missing",
86+
json={
87+
"ids": ids,
88+
"unpacked": True,
89+
},
90+
).json()
91+
return response["data"]
92+
93+
@override
94+
def list_unknown_files(self, hashes: list[str]) -> list[str]:
95+
response = self._client.post(
96+
"files/missing",
97+
json={
98+
"hashes": hashes,
99+
},
100+
).json()
101+
return response["data"]
102+
103+
@override
104+
def push_file(self, src: Path, hash: str):
105+
with open(src, "rb") as f:
106+
self._client.post(f"file/{hash}", stream=True, data=f)
107+
108+
@override
109+
def push_metadata(self, src: Path, hash: str):
110+
with open(src, "rb") as f:
111+
self._client.post(f"packet/{hash}", stream=True, data=f)

src/pyorderly/outpack/location_path.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
import builtins
21
import os
32
import shutil
3+
from pathlib import Path
44

55
from typing_extensions import override
66

@@ -24,11 +24,11 @@ def __exit__(self, exc_type, exc_value, exc_tb):
2424
pass
2525

2626
@override
27-
def list(self) -> dict[str, PacketLocation]:
27+
def list_packets(self) -> dict[str, PacketLocation]:
2828
return self.__root.index.location(LOCATION_LOCAL)
2929

3030
@override
31-
def metadata(self, packet_ids: builtins.list[str]) -> dict[str, str]:
31+
def metadata(self, packet_ids: list[str]) -> dict[str, str]:
3232
all_ids = self.__root.index.location(LOCATION_LOCAL).keys()
3333
missing_ids = set(packet_ids).difference(all_ids)
3434
if missing_ids:
@@ -54,3 +54,19 @@ def fetch_file(self, _packet: MetadataCore, file: PacketFile, dest: str):
5454
msg = f"Hash '{file.hash}' not found at location"
5555
raise Exception(msg)
5656
shutil.copyfile(path, dest)
57+
58+
@override
59+
def list_unknown_packets(self, ids: list[str]) -> list[str]:
60+
raise NotImplementedError()
61+
62+
@override
63+
def list_unknown_files(self, hashes: list[str]) -> list[str]:
64+
raise NotImplementedError()
65+
66+
@override
67+
def push_file(self, src: Path, hash: str):
68+
raise NotImplementedError()
69+
70+
@override
71+
def push_metadata(self, src: Path, hash: str):
72+
raise NotImplementedError()

0 commit comments

Comments
 (0)