-
Notifications
You must be signed in to change notification settings - Fork 168
Fixing HeaderIterDP's __len__ function #166
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
# Copyright (c) Facebook, Inc. and its affiliates. | ||
from typing import Iterator, TypeVar | ||
from warnings import warn | ||
|
||
from torchdata.datapipes import functional_datapipe | ||
from torchdata.datapipes.iter import IterDataPipe | ||
|
@@ -20,14 +21,28 @@ class HeaderIterDataPipe(IterDataPipe[T_co]): | |
def __init__(self, source_datapipe: IterDataPipe[T_co], limit: int = 10) -> None: | ||
self.source_datapipe: IterDataPipe[T_co] = source_datapipe | ||
self.limit: int = limit | ||
self.length: int = -1 | ||
|
||
def __iter__(self) -> Iterator[T_co]: | ||
for i, value in enumerate(self.source_datapipe): | ||
if i < self.limit: | ||
i: int = 0 | ||
for value in self.source_datapipe: | ||
i += 1 | ||
if i <= self.limit: | ||
yield value | ||
else: | ||
break | ||
self.length = min(i, self.limit) # We know length with certainty when we reach here | ||
|
||
# TODO(134): Fix the case that the length of source_datapipe is shorter than limit | ||
def __len__(self) -> int: | ||
return self.limit | ||
if self.length != -1: | ||
return self.length | ||
try: | ||
source_len = len(self.source_datapipe) | ||
self.length = min(source_len, self.limit) | ||
return self.length | ||
except TypeError: | ||
warn( | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is raising a warning and returning the best guess (i.e. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This looks good to me. |
||
"The length of this HeaderIterDataPipe is inferred to be equal to its limit." | ||
"The actual value may be smaller if the actual length of source_datapipe is smaller than the limit." | ||
) | ||
return self.limit |
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These four tests here show what behaviors are expected