|
13 | 13 | import re |
14 | 14 | import shutil |
15 | 15 | import tempfile |
| 16 | +import typing as T |
16 | 17 | from pathlib import Path |
17 | 18 |
|
18 | 19 | import av |
19 | 20 | import file_methods as fm |
20 | 21 | import methods as m |
21 | 22 | import numpy as np |
| 23 | +import player_methods as pm |
22 | 24 | from version_utils import parse_version |
23 | | -from video_capture.utils import pi_gaze_items |
24 | 25 |
|
25 | 26 | from ..info import RecordingInfoFile |
26 | 27 | from ..info import recording_info_utils as utils |
@@ -320,3 +321,218 @@ def _pi_raw_time_load(path): |
320 | 321 | @staticmethod |
321 | 322 | def _pi_raw_time_save(path, arr): |
322 | 323 | arr.tofile(str(path)) |
| 324 | + |
| 325 | + |
| 326 | +def pi_gaze_items(root_dir): |
| 327 | + """Yields one (location, timestamp, confidence) triplet for each gaze point |
| 328 | +
|
| 329 | + Pupil Invisible Companion records this information into three different sets of |
| 330 | + files. Their names can be matched by the following regex patterns: |
| 331 | + - `^gaze ps[0-9]+.raw$` |
| 332 | + - `^gaze ps[0-9]+.time$` |
| 333 | + - `^worn ps[0-9]+.raw$` |
| 334 | +
|
| 335 | + The worn data is a stream of values of either 0 or 255, indicating that the glasses |
| 336 | + were (not) worn. Pupil Player maps these to gaze confidence values of 0.0 and 1.0 |
| 337 | + respectively. |
| 338 | +
|
| 339 | + Since all `*.time` files are converted to Pupil Player before this function is being |
| 340 | + called, we match the `^gaze ps[0-9]+_timestamps.npy$` pattern on the recording files |
| 341 | + instead. When looking for the location and worn data, the function just replaces the |
| 342 | + necessary parts of the timestamp file names instead of performing separate regex |
| 343 | + matches. |
| 344 | +
|
| 345 | + If the recording was successfully post-processed and downloaded from Pupil Cloud, it |
| 346 | + will contain 200Hz-densified gaze data. This data replaces the real-time recorded |
| 347 | + data by Pupil Invisible Companion and is stored in three files: |
| 348 | + - `gaze_200hz.raw` |
| 349 | + - `gaze_200hz.time` (or `gaze_200hz_timestamps.npy` if upgraded) |
| 350 | + - `worn_200hz.raw` |
| 351 | +
|
| 352 | + The worn data is a special case as it was introduced at different points in time to |
| 353 | + Pupil Invisible Companion and Pupil Cloud. In other words, it is possible that there |
| 354 | + is no worn data, only real-time recorded worn data, or 200 Hz worn data. The latter |
| 355 | + is preferred. If 200 Hz gaze data is only available with real-time recorded worn |
| 356 | + data, the latter is interpolated to 200 Hz using a k-nearest-neighbour (k=1) |
| 357 | + approach. If no worn data is available, or the numbers of worn samples and gaze |
| 358 | + timestamps are not consistent, Pupil Player assumes a confidence value of 1.0 for |
| 359 | + every gaze point. |
| 360 | + """ |
| 361 | + root_dir = Path(root_dir) |
| 362 | + # This pattern will match any filename that: |
| 363 | + # - starts with "gaze ps" |
| 364 | + # - is followed by one or more digits |
| 365 | + # - ends with "_timestamps.npy" |
| 366 | + timestamps_realtime_pattern = r"^gaze ps[0-9]+_timestamps.npy$" |
| 367 | + timestamps_realtime_paths = matched_files_by_name_pattern( |
| 368 | + root_dir, timestamps_realtime_pattern |
| 369 | + ) |
| 370 | + # Use 200hz data only if both gaze data and timestamps are available at 200hz |
| 371 | + raw_200hz_path = _find_raw_200hz_path(root_dir) |
| 372 | + timestamps_200hz_path = _find_timestamps_200hz_path(root_dir) |
| 373 | + if raw_200hz_path and timestamps_200hz_path: |
| 374 | + worn_200hz_path = _find_worn_200hz_path(root_dir) |
| 375 | + yield from _pi_posthoc_200hz_gaze_items( |
| 376 | + raw_200hz_path, |
| 377 | + timestamps_200hz_path, |
| 378 | + worn_200hz_path, |
| 379 | + timestamps_realtime_paths, |
| 380 | + ) |
| 381 | + else: |
| 382 | + yield from _pi_realtime_recorded_gaze_items(timestamps_realtime_paths) |
| 383 | + |
| 384 | + |
| 385 | +def _pi_posthoc_200hz_gaze_items( |
| 386 | + raw_200hz_path, timestamps_200hz_path, worn_200hz_path, timestamps_realtime_paths |
| 387 | +): |
| 388 | + raw_data = _load_raw_data(raw_200hz_path) |
| 389 | + timestamps = _load_timestamps_data(timestamps_200hz_path) |
| 390 | + |
| 391 | + if worn_200hz_path is not None: |
| 392 | + conf_data = _load_worn_data(worn_200hz_path) |
| 393 | + else: |
| 394 | + conf_data = _find_and_load_densified_worn_data( |
| 395 | + timestamps, timestamps_realtime_paths |
| 396 | + ) |
| 397 | + |
| 398 | + raw_data, timestamps = _equalize_length_if_necessary(raw_data, timestamps) |
| 399 | + conf_data = _validated_conf_data(conf_data, timestamps) |
| 400 | + yield from zip(raw_data, timestamps, conf_data) |
| 401 | + |
| 402 | + |
| 403 | +def _pi_realtime_recorded_gaze_items(timestamps_realtime_paths): |
| 404 | + for timestamps_path in timestamps_realtime_paths: |
| 405 | + raw_data = _load_raw_data(_find_raw_path(timestamps_path)) |
| 406 | + timestamps = _load_timestamps_data(timestamps_path) |
| 407 | + conf_data = _load_worn_data(_find_worn_path(timestamps_path)) |
| 408 | + |
| 409 | + raw_data, timestamps = _equalize_length_if_necessary(raw_data, timestamps) |
| 410 | + conf_data = _validated_conf_data(conf_data, timestamps) |
| 411 | + yield from zip(raw_data, timestamps, conf_data) |
| 412 | + |
| 413 | + |
| 414 | +def _find_timestamps_200hz_path(root_dir: Path): |
| 415 | + path = root_dir / "gaze_200hz_timestamps.npy" |
| 416 | + if path.is_file(): |
| 417 | + return path |
| 418 | + else: |
| 419 | + return None |
| 420 | + |
| 421 | + |
| 422 | +def _find_raw_200hz_path(root_dir: Path): |
| 423 | + path = root_dir / "gaze_200hz.raw" |
| 424 | + if path.is_file(): |
| 425 | + return path |
| 426 | + else: |
| 427 | + return None |
| 428 | + |
| 429 | + |
| 430 | +def _find_worn_200hz_path(root_dir: Path): |
| 431 | + path = root_dir / "worn_200hz.raw" |
| 432 | + if path.is_file(): |
| 433 | + return path |
| 434 | + else: |
| 435 | + return None |
| 436 | + |
| 437 | + |
| 438 | +def _find_raw_path(timestamps_path: Path): |
| 439 | + name = timestamps_path.name.replace("_timestamps", "") |
| 440 | + path = timestamps_path.with_name(name).with_suffix(".raw") |
| 441 | + assert path.is_file(), f"The file does not exist at path: {path}" |
| 442 | + return path |
| 443 | + |
| 444 | + |
| 445 | +def _find_worn_path(timestamps_path: Path): |
| 446 | + name = timestamps_path.name |
| 447 | + name = name.replace("gaze", "worn") |
| 448 | + name = name.replace("_timestamps", "") |
| 449 | + path = timestamps_path.with_name(name).with_suffix(".raw") |
| 450 | + if path.is_file(): |
| 451 | + return path |
| 452 | + else: |
| 453 | + return None |
| 454 | + |
| 455 | + |
| 456 | +def _load_timestamps_data(path): |
| 457 | + timestamps = np.load(str(path)) |
| 458 | + return timestamps |
| 459 | + |
| 460 | + |
| 461 | +def _load_raw_data(path): |
| 462 | + raw_data = np.fromfile(str(path), "<f4") |
| 463 | + raw_data_dtype = raw_data.dtype |
| 464 | + raw_data.shape = (-1, 2) |
| 465 | + return np.asarray(raw_data, dtype=raw_data_dtype) |
| 466 | + |
| 467 | + |
| 468 | +def _load_worn_data(path: Path): |
| 469 | + if not (path and path.exists()): |
| 470 | + return None |
| 471 | + |
| 472 | + confidences = np.fromfile(str(path), "<u1") / 255.0 |
| 473 | + return np.clip(confidences, 0.0, 1.0) |
| 474 | + |
| 475 | + |
| 476 | +def _find_and_load_densified_worn_data( |
| 477 | + timestamps_200hz, timestamps_realtime_paths: T.List[Path] |
| 478 | +): |
| 479 | + if not timestamps_realtime_paths: |
| 480 | + return None |
| 481 | + # Load and densify confidence data when 200hz gaze is available, but only |
| 482 | + # non-200hz confidence is available |
| 483 | + conf_data, timestamps_realtime = _find_and_load_realtime_recorded_worn_data( |
| 484 | + timestamps_realtime_paths |
| 485 | + ) |
| 486 | + densification_idc = pm.find_closest(timestamps_realtime, timestamps_200hz) |
| 487 | + return conf_data[densification_idc] |
| 488 | + |
| 489 | + |
| 490 | +def _find_and_load_realtime_recorded_worn_data(timestamps_realtime_paths: T.List[Path]): |
| 491 | + # assumes at least one path in `timestamps_realtime_paths`, otherwise np.concatenate |
| 492 | + # will raise ValueError: need at least one array to concatenate |
| 493 | + assert ( |
| 494 | + len(timestamps_realtime_paths) > 0 |
| 495 | + ), "Requires at least one real-time recorded gaze timestamp path" |
| 496 | + conf_all = [] |
| 497 | + ts_all = [] |
| 498 | + for timestamps_path in timestamps_realtime_paths: |
| 499 | + ts = _load_timestamps_data(timestamps_path) |
| 500 | + conf_data = _load_worn_data(_find_worn_path(timestamps_path)) |
| 501 | + conf_data = _validated_conf_data(conf_data, ts) |
| 502 | + conf_all.append(conf_data) |
| 503 | + ts_all.append(ts) |
| 504 | + conf_all = np.concatenate(conf_all) |
| 505 | + ts_all = np.concatenate(ts_all) |
| 506 | + return conf_all, ts_all |
| 507 | + |
| 508 | + |
| 509 | +def _equalize_length_if_necessary(raw_data, timestamps): |
| 510 | + if len(raw_data) != len(timestamps): |
| 511 | + logger.warning( |
| 512 | + f"There is a mismatch between the number of raw data ({len(raw_data)}) " |
| 513 | + f"and the number of timestamps ({len(timestamps)})!" |
| 514 | + ) |
| 515 | + size = min(len(raw_data), len(timestamps)) |
| 516 | + raw_data = raw_data[:size] |
| 517 | + timestamps = timestamps[:size] |
| 518 | + return raw_data, timestamps |
| 519 | + |
| 520 | + |
| 521 | +def _validated_conf_data(conf_data, timestamps): |
| 522 | + if conf_data is not None and len(conf_data) != len(timestamps): |
| 523 | + logger.warning( |
| 524 | + "There is a mismatch between the number of confidence data " |
| 525 | + f"({len(conf_data)}) and the number of timestamps ({len(timestamps)})! " |
| 526 | + "Not using confidence data." |
| 527 | + ) |
| 528 | + conf_data = None |
| 529 | + if conf_data is None: |
| 530 | + conf_data = np.ones(len(timestamps)) |
| 531 | + return conf_data |
| 532 | + |
| 533 | + |
| 534 | +def matched_files_by_name_pattern(parent_dir: Path, name_pattern: str) -> T.List[Path]: |
| 535 | + # Get all non-recursive directory contents |
| 536 | + contents = filter(Path.is_file, parent_dir.iterdir()) |
| 537 | + # Filter content that matches the name by regex pattern |
| 538 | + return sorted(c for c in contents if re.match(name_pattern, c.name) is not None) |
0 commit comments