-
Notifications
You must be signed in to change notification settings - Fork 264
Conditionally use indexed_gzip #552
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 25 commits
9e60eeb
3d170be
32e3b05
572ab00
7cbbab2
052ec60
172bb61
bd39417
cc73dd6
9ceaf7f
bf2ad0d
388daae
382dcf8
89a2d06
a6972c0
0a111ca
961f882
cd65af4
bfc0013
d9a1ebd
093ae6e
d7c7cac
39dea17
82e7c84
191eb5c
16f191e
aae3417
9008696
ed55620
0b41c49
dc07879
67405b0
790c037
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,13 +25,16 @@ | |
|
||
See :mod:`nibabel.tests.test_proxy_api` for proxy API conformance checks. | ||
""" | ||
from contextlib import contextmanager | ||
from threading import RLock | ||
|
||
import numpy as np | ||
|
||
from .deprecated import deprecate_with_version | ||
from .volumeutils import array_from_file, apply_read_scaling | ||
from .fileslice import fileslice | ||
from .keywordonly import kw_only_meth | ||
from .openers import ImageOpener | ||
from .openers import ImageOpener, HAVE_INDEXED_GZIP | ||
|
||
|
||
class ArrayProxy(object): | ||
|
@@ -69,8 +72,8 @@ class ArrayProxy(object): | |
_header = None | ||
|
||
@kw_only_meth(2) | ||
def __init__(self, file_like, spec, mmap=True): | ||
""" Initialize array proxy instance | ||
def __init__(self, file_like, spec, mmap=True, keep_file_open='auto'): | ||
"""Initialize array proxy instance | ||
|
||
Parameters | ||
---------- | ||
|
@@ -99,8 +102,17 @@ def __init__(self, file_like, spec, mmap=True): | |
True gives the same behavior as ``mmap='c'``. If `file_like` | ||
cannot be memory-mapped, ignore `mmap` value and read array from | ||
file. | ||
scaling : {'fp', 'dv'}, optional, keyword only | ||
Type of scaling to use - see header ``get_data_scaling`` method. | ||
keep_file_open : { 'auto', True, False }, optional, keyword only | ||
`keep_file_open` controls whether a new file handle is created | ||
every time the image is accessed, or a single file handle is | ||
created and used for the lifetime of this ``ArrayProxy``. If | ||
``True``, a single file handle is created and used. If ``False``, | ||
a new file handle is created every time the image is accessed. If | ||
``'auto'`` (the default), and the optional ``indexed_gzip`` | ||
dependency is present, a single file handle is created and | ||
persisted. If ``indexed_gzip`` is not available, behaviour is the | ||
same as if ``keep_file_open is False``. If ``file_like`` is an | ||
open file handle, this setting has no effect. | ||
""" | ||
if mmap not in (True, False, 'c', 'r'): | ||
raise ValueError("mmap should be one of {True, False, 'c', 'r'}") | ||
|
@@ -125,6 +137,68 @@ def __init__(self, file_like, spec, mmap=True): | |
# Permit any specifier that can be interpreted as a numpy dtype | ||
self._dtype = np.dtype(self._dtype) | ||
self._mmap = mmap | ||
self._keep_file_open = self._should_keep_file_open(file_like, | ||
keep_file_open) | ||
self._lock = RLock() | ||
|
||
def __del__(self): | ||
"""If this ``ArrayProxy`` was created with ``keep_file_open=True``, | ||
the open file object is closed if necessary. | ||
""" | ||
if hasattr(self, '_opener') and not self._opener.closed: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What happens if the user passes an open file to the original image creation routine? Does that get closed too? |
||
self._opener.close_if_mine() | ||
self._opener = None | ||
|
||
def __getstate__(self): | ||
"""Returns the state of this ``ArrayProxy`` during pickling. """ | ||
state = self.__dict__.copy() | ||
state.pop('_lock', None) | ||
return state | ||
|
||
def __setstate__(self, state): | ||
"""Sets the state of this ``ArrayProxy`` during unpickling. """ | ||
self.__dict__.update(state) | ||
self._lock = RLock() | ||
|
||
def _should_keep_file_open(self, file_like, keep_file_open): | ||
"""Called by ``__init__``, and used to determine the final value of | ||
``keep_file_open``. | ||
|
||
The return value is derived from these rules: | ||
|
||
- If ``file_like`` is a file(-like) object, ``False`` is returned. | ||
Otherwise, ``file_like`` is assumed to be a file name. | ||
- if ``file_like`` ends with ``'gz'``, and the ``indexed_gzip`` | ||
library is available, ``True`` is returned. | ||
- Otherwise, ``False`` is returned. | ||
|
||
Parameters | ||
---------- | ||
|
||
file_like : object | ||
File-like object or filename, as passed to ``__init__``. | ||
keep_file_open : { 'auto', True, False } | ||
Flag as passed to ``__init__``. | ||
|
||
Returns | ||
------- | ||
|
||
The value of ``keep_file_open`` that will be used by this | ||
``ArrayProxy``. | ||
""" | ||
# if keep_file_open is True/False, we do what the user wants us to do | ||
if isinstance(keep_file_open, bool): | ||
return keep_file_open | ||
if keep_file_open != 'auto': | ||
raise ValueError( | ||
'keep_file_open should be one of {\'auto\', True, False }') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add None here too I think. |
||
|
||
# file_like is a handle - keep_file_open is irrelevant | ||
if hasattr(file_like, 'read') and hasattr(file_like, 'seek'): | ||
return False | ||
# Otherwise, if file_like is gzipped, and we have_indexed_gzip, we set | ||
# keep_file_open to True, else we set it to False | ||
return HAVE_INDEXED_GZIP and file_like.endswith('gz') | ||
|
||
@property | ||
@deprecate_with_version('ArrayProxy.header deprecated', '2.2', '3.0') | ||
|
@@ -155,12 +229,34 @@ def inter(self): | |
def is_proxy(self): | ||
return True | ||
|
||
@contextmanager | ||
def _get_fileobj(self): | ||
"""Create and return a new ``ImageOpener``, or return an existing one. | ||
|
||
The specific behaviour depends on the value of the ``keep_file_open`` | ||
flag that was passed to ``__init__``. | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Extra blank line. |
||
|
||
Yields | ||
------ | ||
ImageOpener | ||
A newly created ``ImageOpener`` instance, or an existing one, | ||
which provides access to the file. | ||
""" | ||
if self._keep_file_open: | ||
if not hasattr(self, '_opener'): | ||
self._opener = ImageOpener(self.file_like) | ||
yield self._opener | ||
else: | ||
with ImageOpener(self.file_like) as opener: | ||
yield opener | ||
|
||
def get_unscaled(self): | ||
''' Read of data from file | ||
""" Read of data from file | ||
|
||
This is an optional part of the proxy API | ||
''' | ||
with ImageOpener(self.file_like) as fileobj: | ||
""" | ||
with self._get_fileobj() as fileobj, self._lock: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I was thinking There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think, given that the This situation is a bit different to the locking needed in the |
||
raw_data = array_from_file(self._shape, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does this call not need to lock, as well? It also has a seek/read pairing. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, you're right, well spotted. I suppose the lock can occur at the ArrayProxy level, although it looks like there are other things in |
||
self._dtype, | ||
fileobj, | ||
|
@@ -175,18 +271,19 @@ def __array__(self): | |
return apply_read_scaling(raw_data, self._slope, self._inter) | ||
|
||
def __getitem__(self, slicer): | ||
with ImageOpener(self.file_like) as fileobj: | ||
with self._get_fileobj() as fileobj: | ||
raw_data = fileslice(fileobj, | ||
slicer, | ||
self._shape, | ||
self._dtype, | ||
self._offset, | ||
order=self.order) | ||
order=self.order, | ||
lock=self._lock) | ||
# Upcast as necessary for big slopes, intercepts | ||
return apply_read_scaling(raw_data, self._slope, self._inter) | ||
|
||
def reshape(self, shape): | ||
''' Return an ArrayProxy with a new shape, without modifying data ''' | ||
""" Return an ArrayProxy with a new shape, without modifying data """ | ||
size = np.prod(self._shape) | ||
|
||
# Calculate new shape if not fully specified | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,6 +17,20 @@ | |
SKIP_THRESH = 2 ** 8 | ||
|
||
|
||
class _NullLock(object): | ||
"""The ``_NullLock`` is an object which can be used in place of a | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Single standalone line as first line in docstring. Maybe:
|
||
``threading.Lock`` object, but doesn't actually do anything. | ||
|
||
It is used by the ``read_segments`` function in the event that a | ||
``Lock`` is not provided by the caller. | ||
""" | ||
def __enter__(self): | ||
pass | ||
|
||
def __exit__(self, exc_type, exc_val, exc_tb): | ||
return False | ||
|
||
|
||
def is_fancy(sliceobj): | ||
""" Returns True if sliceobj is attempting fancy indexing | ||
|
||
|
@@ -622,7 +636,7 @@ def slicers2segments(read_slicers, in_shape, offset, itemsize): | |
return all_segments | ||
|
||
|
||
def read_segments(fileobj, segments, n_bytes): | ||
def read_segments(fileobj, segments, n_bytes, lock=None): | ||
""" Read `n_bytes` byte data implied by `segments` from `fileobj` | ||
|
||
Parameters | ||
|
@@ -634,29 +648,41 @@ def read_segments(fileobj, segments, n_bytes): | |
absolute file offset in bytes and number of bytes to read | ||
n_bytes : int | ||
total number of bytes that will be read | ||
lock : threading.Lock | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can actually be any context manager - or at least the NullLock, no? Maybe:
|
||
If provided, used to ensure that paired calls to ``seek`` and ``read`` | ||
cannot be interrupted by another thread accessing the same ``fileobj``. | ||
Each thread which accesses the same file via ``read_segments`` must | ||
share a lock in order to ensure that the file access is thread-safe. | ||
A lock does not need to be provided for single-threaded access. | ||
|
||
Returns | ||
------- | ||
buffer : buffer object | ||
object implementing buffer protocol, such as byte string or ndarray or | ||
mmap or ctypes ``c_char_array`` | ||
""" | ||
# Make a lock-like thing to make the code below a bit nicer | ||
if lock is None: | ||
lock = _NullLock() | ||
|
||
if len(segments) == 0: | ||
if n_bytes != 0: | ||
raise ValueError("No segments, but non-zero n_bytes") | ||
return b'' | ||
if len(segments) == 1: | ||
offset, length = segments[0] | ||
fileobj.seek(offset) | ||
bytes = fileobj.read(length) | ||
with lock: | ||
fileobj.seek(offset) | ||
bytes = fileobj.read(length) | ||
if len(bytes) != n_bytes: | ||
raise ValueError("Whoops, not enough data in file") | ||
return bytes | ||
# More than one segment | ||
bytes = mmap(-1, n_bytes) | ||
for offset, length in segments: | ||
fileobj.seek(offset) | ||
bytes.write(fileobj.read(length)) | ||
with lock: | ||
fileobj.seek(offset) | ||
bytes.write(fileobj.read(length)) | ||
if bytes.tell() != n_bytes: | ||
raise ValueError("Oh dear, n_bytes does not look right") | ||
return bytes | ||
|
@@ -700,7 +726,7 @@ def _simple_fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', | |
|
||
|
||
def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', | ||
heuristic=threshold_heuristic): | ||
heuristic=threshold_heuristic, lock=None): | ||
""" Slice array in `fileobj` using `sliceobj` slicer and array definitions | ||
|
||
`fileobj` contains the contiguous binary data for an array ``A`` of shape, | ||
|
@@ -737,6 +763,9 @@ def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', | |
returning one of 'full', 'contiguous', None. See | ||
:func:`optimize_slicer` and see :func:`threshold_heuristic` for an | ||
example. | ||
lock: threading.Lock, optional | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As for |
||
If provided, used to ensure that paired calls to ``seek`` and ``read`` | ||
cannot be interrupted by another thread accessing the same ``fileobj``. | ||
|
||
Returns | ||
------- | ||
|
@@ -750,7 +779,7 @@ def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', | |
segments, sliced_shape, post_slicers = calc_slicedefs( | ||
sliceobj, shape, itemsize, offset, order) | ||
n_bytes = reduce(operator.mul, sliced_shape, 1) * itemsize | ||
bytes = read_segments(fileobj, segments, n_bytes) | ||
bytes = read_segments(fileobj, segments, n_bytes, lock) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe |
||
sliced = np.ndarray(sliced_shape, dtype, buffer=bytes, order=order) | ||
return sliced[post_slicers] | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think you now need
{None, 'auto', True, False}
, and to say that None is the default, and gives the KEEP_FILE_OPEN_DEFAULT value.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Likewise for the other docstrings.