@@ -14,16 +14,18 @@ public class PaginatedAddFilesIteratorImpl implements PaginatedAddFilesIterator
14
14
private final long pageSize ; // max num of files to return in this page
15
15
16
16
private long numAddFilesReturned = 0 ;
17
- private String lastLogFileName = null ; // when reading first page, lastLogFileName is absent
18
- private long rowIdxInLastFile = 0 ;
17
+ private String currentLogFileName = null ; // when reading first page, lastLogFileName is absent
18
+ private long currentRowIdxInLastFile = 0 ;
19
19
private FilteredColumnarBatch nextBatch = null ;
20
+ private String startingLogFileName ;
21
+ private long startingRowIdxInLastFile ;
20
22
21
23
public PaginatedAddFilesIteratorImpl (
22
24
Iterator <FilteredColumnarBatch > originalIterator , PaginationContext paginationContext ) {
23
25
this .originalIterator = originalIterator ;
24
26
this .pageSize = paginationContext .pageSize ;
25
- this .lastLogFileName = paginationContext .lastReadLogFileName ;
26
- this .rowIdxInLastFile = paginationContext .lastReadRowIdxInFile ;
27
+ this .startingLogFileName = paginationContext .lastReadLogFileName ;
28
+ this .startingRowIdxInLastFile = paginationContext .lastReadRowIdxInFile ;
27
29
}
28
30
29
31
@ Override
@@ -34,25 +36,29 @@ public boolean hasNext() {
34
36
if (numAddFilesReturned >= pageSize ) {
35
37
return false ;
36
38
}
37
- if (originalIterator .hasNext ()) {
38
- FilteredColumnarBatch batch = originalIterator .next ();
39
- String fileName = batch .getFileName (); // TODO: get parquet reader PR merged first
40
- if (!fileName .equals (lastLogFileName )) {
41
- lastLogFileName = fileName ;
39
+ while (originalIterator .hasNext ()) {
40
+ nextBatch = originalIterator .next ();
41
+ String fileName = nextBatch .getFileName (); // TODO: get parquet reader PR merged first
42
+ if (!fileName .equals (currentLogFileName )) {
43
+ currentLogFileName = fileName ;
42
44
System .out .println ("fileName " + fileName );
43
- rowIdxInLastFile = 0 ; // row idx starts from 1
45
+ currentRowIdxInLastFile = 0 ;// row idx starts from 1
44
46
}
45
- long numActiveAddFiles = batch .getNumOfTrueRows ();
46
- long rowNum =
47
- batch . getData (). getSize (); // number of rows, if 5 AddFile and 7 RemoveFile -> this is 12.
47
+ long numActiveAddFiles = nextBatch .getNumOfTrueRows ();
48
+ long rowNum = nextBatch . getData (). getSize ();
49
+ currentRowIdxInLastFile += rowNum ;
48
50
49
51
System .out .println ("numActiveAddFiles: " + numActiveAddFiles );
50
- System .out .println ("numTotalAddFiles: " + batch .getData ().getColumnVector (0 ).getSize ());
52
+ System .out .println ("numTotalAddFiles: " + nextBatch .getData ().getColumnVector (0 ).getSize ());
51
53
System .out .println ("numOfRows: " + rowNum );
52
54
53
- nextBatch = batch ;
55
+ if (currentLogFileName .compareTo (startingLogFileName ) < 0 ||
56
+ (currentLogFileName .equals (startingLogFileName ) && currentRowIdxInLastFile < startingRowIdxInLastFile )) {
57
+ //skip this batch
58
+ nextBatch = originalIterator .next ();
59
+ continue ;
60
+ }
54
61
numAddFilesReturned += numActiveAddFiles ;
55
- rowIdxInLastFile += rowNum ;
56
62
System .out .println ("numAddFilesReturned: " + numAddFilesReturned );
57
63
return true ;
58
64
}
@@ -78,6 +84,6 @@ public void close() throws IOException {
78
84
}
79
85
80
86
public Row getCurrentPageToken () {
81
- return new PageToken (lastLogFileName , rowIdxInLastFile ).getRow ();
87
+ return new PageToken (currentLogFileName , currentRowIdxInLastFile ).getRow ();
82
88
}
83
89
}
0 commit comments