Skip to content

Commit 23af529

Browse files
feat: optimize scan filters (#3651)
When possible, bubble up the maxVersions filter to the start of the filter chain. Semantically we can reorder maxVersions anywhere in a filter chain as long as it doesnt skip over any cell filters (timerange, value, etc).
1 parent 49bfe24 commit 23af529

File tree

4 files changed

+114
-19
lines changed

4 files changed

+114
-19
lines changed

bigtable-client-core-parent/bigtable-hbase/src/main/java/com/google/cloud/bigtable/hbase/adapters/read/ScanAdapter.java

Lines changed: 25 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
import com.google.common.collect.TreeRangeSet;
3737
import com.google.protobuf.ByteString;
3838
import java.io.IOException;
39+
import java.util.ArrayList;
40+
import java.util.List;
3941
import java.util.Map;
4042
import java.util.NavigableSet;
4143
import org.apache.hadoop.hbase.client.Scan;
@@ -115,18 +117,8 @@ public void throwIfUnsupportedScan(Scan scan) {
115117
*/
116118
public Filters.Filter buildFilter(Scan scan, ReadHooks hooks) {
117119
ChainFilter chain = FILTERS.chain();
118-
Optional<Filters.Filter> familyFilter = createColumnFamilyFilter(scan);
119-
if (familyFilter.isPresent()) {
120-
chain.filter(familyFilter.get());
121-
}
122-
123-
if (scan.getTimeRange() != null && !scan.getTimeRange().isAllTime()) {
124-
chain.filter(createTimeRangeFilter(scan.getTimeRange()));
125-
}
126120

127-
if (scan.getMaxVersions() != Integer.MAX_VALUE) {
128-
chain.filter(createColumnLimitFilter(scan.getMaxVersions()));
129-
}
121+
buildStartFilter(scan).forEach(chain::filter);
130122

131123
Optional<Filters.Filter> userFilter = createUserFilter(scan, hooks);
132124
if (userFilter.isPresent()) {
@@ -140,6 +132,28 @@ public Filters.Filter buildFilter(Scan scan, ReadHooks hooks) {
140132
return chain;
141133
}
142134

135+
private List<Filters.Filter> buildStartFilter(Scan scan) {
136+
List<Filters.Filter> filterList = new ArrayList<>();
137+
138+
Optional<Filters.Filter> familyFilter = createColumnFamilyFilter(scan);
139+
if (familyFilter.isPresent()) {
140+
filterList.add(familyFilter.get());
141+
}
142+
143+
boolean hasTimeRange = false;
144+
if (scan.getTimeRange() != null && !scan.getTimeRange().isAllTime()) {
145+
filterList.add(createTimeRangeFilter(scan.getTimeRange()));
146+
hasTimeRange = true;
147+
}
148+
149+
// maxVersions should appear as early as possible, but it must appear after timeRange
150+
if (scan.getMaxVersions() != Integer.MAX_VALUE) {
151+
int i = hasTimeRange ? filterList.size() : 0;
152+
filterList.add(i, createColumnLimitFilter(scan.getMaxVersions()));
153+
}
154+
return filterList;
155+
}
156+
143157
/** {@inheritDoc} */
144158
@Override
145159
public void adapt(Scan scan, ReadHooks readHooks, Query query) {

bigtable-client-core-parent/bigtable-hbase/src/test/java/com/google/cloud/bigtable/hbase/TestBigtableTable.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,12 +160,12 @@ public void getRequestsAreFullyPopulated() throws Exception {
160160
Assert.assertEquals(
161161
FILTERS
162162
.chain()
163+
.filter(FILTERS.limit().cellsPerColumn(1))
163164
.filter(
164165
FILTERS
165166
.chain()
166167
.filter(FILTERS.family().regex("family"))
167168
.filter(FILTERS.qualifier().regex("qualifier")))
168-
.filter(FILTERS.limit().cellsPerColumn(1))
169169
.toProto(),
170170
filterCaptor.getValue().toProto());
171171
}

bigtable-client-core-parent/bigtable-hbase/src/test/java/com/google/cloud/bigtable/hbase/adapters/TestCheckAndMutateUtil.java

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
package com.google.cloud.bigtable.hbase.adapters;
1717

1818
import static com.google.cloud.bigtable.data.v2.models.Filters.FILTERS;
19+
import static com.google.common.truth.Truth.assertThat;
1920

2021
import com.google.bigtable.v2.CheckAndMutateRowRequest;
2122
import com.google.bigtable.v2.Mutation;
@@ -75,8 +76,8 @@ private static void checkPredicate(CheckAndMutateRowRequest result) {
7576
RowFilter expected =
7677
FILTERS
7778
.chain()
78-
.filter(FAMILY_AND_QUAL_FILTER)
7979
.filter(FILTERS.limit().cellsPerColumn(1))
80+
.filter(FAMILY_AND_QUAL_FILTER)
8081
.filter(
8182
FILTERS
8283
.value()
@@ -260,10 +261,10 @@ public void testIfNotExists() throws DoNotRetryIOException {
260261
RowFilter expected =
261262
FILTERS
262263
.chain()
263-
.filter(FAMILY_AND_QUAL_FILTER)
264264
.filter(FILTERS.limit().cellsPerColumn(1))
265+
.filter(FAMILY_AND_QUAL_FILTER)
265266
.toProto();
266-
Assert.assertEquals(expected, result.getPredicateFilter());
267+
assertThat(result.getPredicateFilter()).isEqualTo(expected);
267268
}
268269

269270
@Test
@@ -287,12 +288,12 @@ public void testNotEqualsNull() throws DoNotRetryIOException {
287288
RowFilter expected =
288289
FILTERS
289290
.chain()
290-
.filter(FAMILY_AND_QUAL_FILTER)
291291
.filter(FILTERS.limit().cellsPerColumn(1))
292+
.filter(FAMILY_AND_QUAL_FILTER)
292293
.toProto();
293294

294295
checkPutMutation(result.getTrueMutations(0));
295-
Assert.assertEquals(expected, result.getPredicateFilter());
296+
assertThat(result.getPredicateFilter()).isEqualTo(expected);
296297
}
297298

298299
@Test
@@ -320,11 +321,11 @@ public void testCompareOpsOtherThanNotEqualsNull() throws DoNotRetryIOException
320321
RowFilter expected =
321322
FILTERS
322323
.chain()
323-
.filter(FAMILY_AND_QUAL_FILTER)
324324
.filter(FILTERS.limit().cellsPerColumn(1))
325+
.filter(FAMILY_AND_QUAL_FILTER)
325326
.toProto();
326327

327328
checkPutMutation(result.getFalseMutations(0));
328-
Assert.assertEquals(expected, result.getPredicateFilter());
329+
assertThat(result.getPredicateFilter()).isEqualTo(expected);
329330
}
330331
}

bigtable-client-core-parent/bigtable-hbase/src/test/java/com/google/cloud/bigtable/hbase/adapters/read/TestScanAdapter.java

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
import org.apache.hadoop.hbase.filter.Filter;
4343
import org.apache.hadoop.hbase.filter.FilterBase;
4444
import org.apache.hadoop.hbase.filter.FilterList;
45+
import org.apache.hadoop.hbase.filter.PrefixFilter;
4546
import org.apache.hadoop.hbase.util.Bytes;
4647
import org.junit.Assert;
4748
import org.junit.Test;
@@ -368,4 +369,83 @@ public void testColFamilyTimeRange() throws IOException {
368369

369370
Assert.assertEquals(expected.toProto(), query.toProto(requestContext).getFilter());
370371
}
372+
373+
@Test
374+
public void testMaxVersionsOptimizationDefault() {
375+
Scan scan =
376+
new Scan()
377+
.addColumn("cf".getBytes(), "q".getBytes())
378+
.setFilter(new PrefixFilter("blah".getBytes()));
379+
380+
scanAdapter.adapt(scan, throwingReadHooks, query);
381+
382+
Filters.Filter expected =
383+
FILTERS
384+
.chain()
385+
// Optimization: limit is first
386+
.filter(FILTERS.limit().cellsPerColumn(1))
387+
// scan columns next
388+
.filter(
389+
FILTERS
390+
.chain()
391+
.filter(FILTERS.family().exactMatch("cf"))
392+
.filter(FILTERS.qualifier().exactMatch("q")))
393+
// user filter
394+
.filter(FILTERS.key().regex("blah\\C*"));
395+
Assert.assertEquals(expected.toProto(), query.toProto(requestContext).getFilter());
396+
}
397+
398+
@Test
399+
public void testMaxVersionsOptimization() {
400+
Scan scan =
401+
new Scan()
402+
.addColumn("cf".getBytes(), "q".getBytes())
403+
.setFilter(new PrefixFilter("blah".getBytes()))
404+
.setMaxVersions(10);
405+
406+
scanAdapter.adapt(scan, throwingReadHooks, query);
407+
408+
Filters.Filter expected =
409+
FILTERS
410+
.chain()
411+
// Optimization: limit is first
412+
.filter(FILTERS.limit().cellsPerColumn(10))
413+
// scan columns next
414+
.filter(
415+
FILTERS
416+
.chain()
417+
.filter(FILTERS.family().exactMatch("cf"))
418+
.filter(FILTERS.qualifier().exactMatch("q")))
419+
// user filter
420+
.filter(FILTERS.key().regex("blah\\C*"));
421+
Assert.assertEquals(expected.toProto(), query.toProto(requestContext).getFilter());
422+
}
423+
424+
@Test
425+
public void testMaxVersionsWithTimeRanges() throws IOException {
426+
Scan scan =
427+
new Scan()
428+
.setTimeRange(0, 1_000)
429+
.addColumn("cf".getBytes(), "q".getBytes())
430+
.setFilter(new PrefixFilter("blah".getBytes()));
431+
432+
scanAdapter.adapt(scan, throwingReadHooks, query);
433+
434+
Filters.Filter expected =
435+
FILTERS
436+
.chain()
437+
// scan columns first, since maxVersion must come after timeRange
438+
.filter(
439+
FILTERS
440+
.chain()
441+
.filter(FILTERS.family().exactMatch("cf"))
442+
.filter(FILTERS.qualifier().exactMatch("q")))
443+
// Timestamp range next
444+
.filter(FILTERS.timestamp().range().of(0L, 1_000 * 1_000L))
445+
// maxVersions after range
446+
.filter(FILTERS.limit().cellsPerColumn(1))
447+
// user filter
448+
.filter(FILTERS.key().regex("blah\\C*"));
449+
Assert.assertEquals(expected.toProto(), query.toProto(requestContext).getFilter());
450+
}
371451
}

0 commit comments

Comments
 (0)