Skip to content

Commit 4db40b9

Browse files
authored
Merge pull request #2204 from pupil-labs/avoid_loading_cloud_200hz_gaze_multiple_times
Fix loading 200 Hz gaze data in Pupil Invisible recordings
2 parents 7f56765 + f2dd0ea commit 4db40b9

File tree

2 files changed

+217
-150
lines changed

2 files changed

+217
-150
lines changed

pupil_src/shared_modules/pupil_recording/update/invisible.py

Lines changed: 217 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,15 @@
1313
import re
1414
import shutil
1515
import tempfile
16+
import typing as T
1617
from pathlib import Path
1718

1819
import av
1920
import file_methods as fm
2021
import methods as m
2122
import numpy as np
23+
import player_methods as pm
2224
from version_utils import parse_version
23-
from video_capture.utils import pi_gaze_items
2425

2526
from ..info import RecordingInfoFile
2627
from ..info import recording_info_utils as utils
@@ -320,3 +321,218 @@ def _pi_raw_time_load(path):
320321
@staticmethod
321322
def _pi_raw_time_save(path, arr):
322323
arr.tofile(str(path))
324+
325+
326+
def pi_gaze_items(root_dir):
327+
"""Yields one (location, timestamp, confidence) triplet for each gaze point
328+
329+
Pupil Invisible Companion records this information into three different sets of
330+
files. Their names can be matched by the following regex patterns:
331+
- `^gaze ps[0-9]+.raw$`
332+
- `^gaze ps[0-9]+.time$`
333+
- `^worn ps[0-9]+.raw$`
334+
335+
The worn data is a stream of values of either 0 or 255, indicating that the glasses
336+
were (not) worn. Pupil Player maps these to gaze confidence values of 0.0 and 1.0
337+
respectively.
338+
339+
Since all `*.time` files are converted to Pupil Player before this function is being
340+
called, we match the `^gaze ps[0-9]+_timestamps.npy$` pattern on the recording files
341+
instead. When looking for the location and worn data, the function just replaces the
342+
necessary parts of the timestamp file names instead of performing separate regex
343+
matches.
344+
345+
If the recording was successfully post-processed and downloaded from Pupil Cloud, it
346+
will contain 200Hz-densified gaze data. This data replaces the real-time recorded
347+
data by Pupil Invisible Companion and is stored in three files:
348+
- `gaze_200hz.raw`
349+
- `gaze_200hz.time` (or `gaze_200hz_timestamps.npy` if upgraded)
350+
- `worn_200hz.raw`
351+
352+
The worn data is a special case as it was introduced at different points in time to
353+
Pupil Invisible Companion and Pupil Cloud. In other words, it is possible that there
354+
is no worn data, only real-time recorded worn data, or 200 Hz worn data. The latter
355+
is preferred. If 200 Hz gaze data is only available with real-time recorded worn
356+
data, the latter is interpolated to 200 Hz using a k-nearest-neighbour (k=1)
357+
approach. If no worn data is available, or the numbers of worn samples and gaze
358+
timestamps are not consistent, Pupil Player assumes a confidence value of 1.0 for
359+
every gaze point.
360+
"""
361+
root_dir = Path(root_dir)
362+
# This pattern will match any filename that:
363+
# - starts with "gaze ps"
364+
# - is followed by one or more digits
365+
# - ends with "_timestamps.npy"
366+
timestamps_realtime_pattern = r"^gaze ps[0-9]+_timestamps.npy$"
367+
timestamps_realtime_paths = matched_files_by_name_pattern(
368+
root_dir, timestamps_realtime_pattern
369+
)
370+
# Use 200hz data only if both gaze data and timestamps are available at 200hz
371+
raw_200hz_path = _find_raw_200hz_path(root_dir)
372+
timestamps_200hz_path = _find_timestamps_200hz_path(root_dir)
373+
if raw_200hz_path and timestamps_200hz_path:
374+
worn_200hz_path = _find_worn_200hz_path(root_dir)
375+
yield from _pi_posthoc_200hz_gaze_items(
376+
raw_200hz_path,
377+
timestamps_200hz_path,
378+
worn_200hz_path,
379+
timestamps_realtime_paths,
380+
)
381+
else:
382+
yield from _pi_realtime_recorded_gaze_items(timestamps_realtime_paths)
383+
384+
385+
def _pi_posthoc_200hz_gaze_items(
386+
raw_200hz_path, timestamps_200hz_path, worn_200hz_path, timestamps_realtime_paths
387+
):
388+
raw_data = _load_raw_data(raw_200hz_path)
389+
timestamps = _load_timestamps_data(timestamps_200hz_path)
390+
391+
if worn_200hz_path is not None:
392+
conf_data = _load_worn_data(worn_200hz_path)
393+
else:
394+
conf_data = _find_and_load_densified_worn_data(
395+
timestamps, timestamps_realtime_paths
396+
)
397+
398+
raw_data, timestamps = _equalize_length_if_necessary(raw_data, timestamps)
399+
conf_data = _validated_conf_data(conf_data, timestamps)
400+
yield from zip(raw_data, timestamps, conf_data)
401+
402+
403+
def _pi_realtime_recorded_gaze_items(timestamps_realtime_paths):
404+
for timestamps_path in timestamps_realtime_paths:
405+
raw_data = _load_raw_data(_find_raw_path(timestamps_path))
406+
timestamps = _load_timestamps_data(timestamps_path)
407+
conf_data = _load_worn_data(_find_worn_path(timestamps_path))
408+
409+
raw_data, timestamps = _equalize_length_if_necessary(raw_data, timestamps)
410+
conf_data = _validated_conf_data(conf_data, timestamps)
411+
yield from zip(raw_data, timestamps, conf_data)
412+
413+
414+
def _find_timestamps_200hz_path(root_dir: Path):
415+
path = root_dir / "gaze_200hz_timestamps.npy"
416+
if path.is_file():
417+
return path
418+
else:
419+
return None
420+
421+
422+
def _find_raw_200hz_path(root_dir: Path):
423+
path = root_dir / "gaze_200hz.raw"
424+
if path.is_file():
425+
return path
426+
else:
427+
return None
428+
429+
430+
def _find_worn_200hz_path(root_dir: Path):
431+
path = root_dir / "worn_200hz.raw"
432+
if path.is_file():
433+
return path
434+
else:
435+
return None
436+
437+
438+
def _find_raw_path(timestamps_path: Path):
439+
name = timestamps_path.name.replace("_timestamps", "")
440+
path = timestamps_path.with_name(name).with_suffix(".raw")
441+
assert path.is_file(), f"The file does not exist at path: {path}"
442+
return path
443+
444+
445+
def _find_worn_path(timestamps_path: Path):
446+
name = timestamps_path.name
447+
name = name.replace("gaze", "worn")
448+
name = name.replace("_timestamps", "")
449+
path = timestamps_path.with_name(name).with_suffix(".raw")
450+
if path.is_file():
451+
return path
452+
else:
453+
return None
454+
455+
456+
def _load_timestamps_data(path):
457+
timestamps = np.load(str(path))
458+
return timestamps
459+
460+
461+
def _load_raw_data(path):
462+
raw_data = np.fromfile(str(path), "<f4")
463+
raw_data_dtype = raw_data.dtype
464+
raw_data.shape = (-1, 2)
465+
return np.asarray(raw_data, dtype=raw_data_dtype)
466+
467+
468+
def _load_worn_data(path: Path):
469+
if not (path and path.exists()):
470+
return None
471+
472+
confidences = np.fromfile(str(path), "<u1") / 255.0
473+
return np.clip(confidences, 0.0, 1.0)
474+
475+
476+
def _find_and_load_densified_worn_data(
477+
timestamps_200hz, timestamps_realtime_paths: T.List[Path]
478+
):
479+
if not timestamps_realtime_paths:
480+
return None
481+
# Load and densify confidence data when 200hz gaze is available, but only
482+
# non-200hz confidence is available
483+
conf_data, timestamps_realtime = _find_and_load_realtime_recorded_worn_data(
484+
timestamps_realtime_paths
485+
)
486+
densification_idc = pm.find_closest(timestamps_realtime, timestamps_200hz)
487+
return conf_data[densification_idc]
488+
489+
490+
def _find_and_load_realtime_recorded_worn_data(timestamps_realtime_paths: T.List[Path]):
491+
# assumes at least one path in `timestamps_realtime_paths`, otherwise np.concatenate
492+
# will raise ValueError: need at least one array to concatenate
493+
assert (
494+
len(timestamps_realtime_paths) > 0
495+
), "Requires at least one real-time recorded gaze timestamp path"
496+
conf_all = []
497+
ts_all = []
498+
for timestamps_path in timestamps_realtime_paths:
499+
ts = _load_timestamps_data(timestamps_path)
500+
conf_data = _load_worn_data(_find_worn_path(timestamps_path))
501+
conf_data = _validated_conf_data(conf_data, ts)
502+
conf_all.append(conf_data)
503+
ts_all.append(ts)
504+
conf_all = np.concatenate(conf_all)
505+
ts_all = np.concatenate(ts_all)
506+
return conf_all, ts_all
507+
508+
509+
def _equalize_length_if_necessary(raw_data, timestamps):
510+
if len(raw_data) != len(timestamps):
511+
logger.warning(
512+
f"There is a mismatch between the number of raw data ({len(raw_data)}) "
513+
f"and the number of timestamps ({len(timestamps)})!"
514+
)
515+
size = min(len(raw_data), len(timestamps))
516+
raw_data = raw_data[:size]
517+
timestamps = timestamps[:size]
518+
return raw_data, timestamps
519+
520+
521+
def _validated_conf_data(conf_data, timestamps):
522+
if conf_data is not None and len(conf_data) != len(timestamps):
523+
logger.warning(
524+
"There is a mismatch between the number of confidence data "
525+
f"({len(conf_data)}) and the number of timestamps ({len(timestamps)})! "
526+
"Not using confidence data."
527+
)
528+
conf_data = None
529+
if conf_data is None:
530+
conf_data = np.ones(len(timestamps))
531+
return conf_data
532+
533+
534+
def matched_files_by_name_pattern(parent_dir: Path, name_pattern: str) -> T.List[Path]:
535+
# Get all non-recursive directory contents
536+
contents = filter(Path.is_file, parent_dir.iterdir())
537+
# Filter content that matches the name by regex pattern
538+
return sorted(c for c in contents if re.match(name_pattern, c.name) is not None)

0 commit comments

Comments
 (0)