@@ -262,9 +262,25 @@ func (s *levelsController) dropTree() (int, error) {
262262// tables who only have keys with this prefix are quickly dropped. The ones which have other keys
263263// are run through MergeIterator and compacted to create new tables. All the mechanisms of
264264// compactions apply, i.e. level sizes and MANIFEST are updated as in the normal flow.
265- func (s * levelsController ) dropPrefix (prefix []byte ) error {
265+ func (s * levelsController ) dropPrefixes (prefixes [][]byte ) error {
266+ // Internal move keys related to the given prefix should also be skipped.
267+ for _ , prefix := range prefixes {
268+ key := make ([]byte , 0 , len (badgerMove )+ len (prefix ))
269+ key = append (key , badgerMove ... )
270+ key = append (key , prefix ... )
271+ prefixes = append (prefixes , key )
272+ }
273+
266274 opt := s .kv .opt
267- for _ , l := range s .levels {
275+ // Iterate levels in the reverse order because if we were to iterate from
276+ // lower level (say level 0) to a higher level (say level 3) we could have
277+ // a state in which level 0 is compacted and an older version of a key exists in lower level.
278+ // At this point, if someone creates an iterator, they would see an old
279+ // value for a key from lower levels. Iterating in reverse order ensures we
280+ // drop the oldest data first so that lookups never return stale data.
281+ for i := len (s .levels ) - 1 ; i >= 0 ; i -- {
282+ l := s .levels [i ]
283+
268284 l .RLock ()
269285 if l .level == 0 {
270286 size := len (l .tables )
@@ -276,7 +292,7 @@ func (s *levelsController) dropPrefix(prefix []byte) error {
276292 score : 1.74 ,
277293 // A unique number greater than 1.0 does two things. Helps identify this
278294 // function in logs, and forces a compaction.
279- dropPrefix : prefix ,
295+ dropPrefixes : prefixes ,
280296 }
281297 if err := s .doCompact (cp ); err != nil {
282298 opt .Warningf ("While compacting level 0: %v" , err )
@@ -286,39 +302,49 @@ func (s *levelsController) dropPrefix(prefix []byte) error {
286302 continue
287303 }
288304
289- var tables []* table.Table
290- // Internal move keys related to the given prefix should also be skipped.
291- moveKeyForPrefix := append (badgerMove , prefix ... )
292- prefixesToSkip := [][]byte {prefix , moveKeyForPrefix }
293- for _ , table := range l .tables {
294- var absent bool
295- switch {
296- case hasAnyPrefixes (table .Smallest (), prefixesToSkip ):
297- case hasAnyPrefixes (table .Biggest (), prefixesToSkip ):
298- case containsAnyPrefixes (table .Smallest (), table .Biggest (), prefixesToSkip ):
299- default :
300- absent = true
305+ // Build a list of compaction tableGroups affecting all the prefixes we
306+ // need to drop. We need to build tableGroups that satisfy the invariant that
307+ // bottom tables are consecutive.
308+ // tableGroup contains groups of consecutive tables.
309+ var tableGroups [][]* table.Table
310+ var tableGroup []* table.Table
311+
312+ finishGroup := func () {
313+ if len (tableGroup ) > 0 {
314+ tableGroups = append (tableGroups , tableGroup )
315+ tableGroup = nil
301316 }
302- if ! absent {
303- tables = append (tables , table )
317+ }
318+
319+ for _ , table := range l .tables {
320+ if containsAnyPrefixes (table .Smallest (), table .Biggest (), prefixes ) {
321+ tableGroup = append (tableGroup , table )
322+ } else {
323+ finishGroup ()
304324 }
305325 }
326+ finishGroup ()
327+
306328 l .RUnlock ()
307- if len (tables ) == 0 {
329+
330+ if len (tableGroups ) == 0 {
308331 continue
309332 }
310333
311- cd := compactDef {
312- elog : trace .New (fmt .Sprintf ("Badger.L%d" , l .level ), "Compact" ),
313- thisLevel : l ,
314- nextLevel : l ,
315- top : []* table.Table {},
316- bot : tables ,
317- dropPrefix : prefix ,
318- }
319- if err := s .runCompactDef (l .level , cd ); err != nil {
320- opt .Warningf ("While running compact def: %+v. Error: %v" , cd , err )
321- return err
334+ opt .Infof ("Dropping prefix at level %d (%d tableGroups)" , l .level , len (tableGroups ))
335+ for _ , operation := range tableGroups {
336+ cd := compactDef {
337+ elog : trace .New (fmt .Sprintf ("Badger.L%d" , l .level ), "Compact" ),
338+ thisLevel : l ,
339+ nextLevel : l ,
340+ top : nil ,
341+ bot : operation ,
342+ dropPrefixes : prefixes ,
343+ }
344+ if err := s .runCompactDef (l .level , cd ); err != nil {
345+ opt .Warningf ("While running compact def: %+v. Error: %v" , cd , err )
346+ return err
347+ }
322348 }
323349 }
324350 return nil
@@ -380,9 +406,9 @@ func (l *levelHandler) isCompactable(delSize int64) bool {
380406}
381407
382408type compactionPriority struct {
383- level int
384- score float64
385- dropPrefix []byte
409+ level int
410+ score float64
411+ dropPrefixes [] []byte
386412}
387413
388414// pickCompactLevel determines which level to compact.
@@ -470,13 +496,19 @@ func (s *levelsController) compactBuildTables(
470496
471497 // Next level has level>=1 and we can use ConcatIterator as key ranges do not overlap.
472498 var valid []* table.Table
499+
500+ nextTable:
473501 for _ , table := range botTables {
474- if len (cd .dropPrefix ) > 0 &&
475- bytes .HasPrefix (table .Smallest (), cd .dropPrefix ) &&
476- bytes .HasPrefix (table .Biggest (), cd .dropPrefix ) {
477- // All the keys in this table have the dropPrefix. So, this table does not need to be
478- // in the iterator and can be dropped immediately.
479- continue
502+ if len (cd .dropPrefixes ) > 0 {
503+ for _ , prefix := range cd .dropPrefixes {
504+ if bytes .HasPrefix (table .Smallest (), prefix ) &&
505+ bytes .HasPrefix (table .Biggest (), prefix ) {
506+ // All the keys in this table have the dropPrefix. So, this
507+ // table does not need to be in the iterator and can be
508+ // dropped immediately.
509+ continue nextTable
510+ }
511+ }
480512 }
481513 valid = append (valid , table )
482514 }
@@ -503,12 +535,9 @@ func (s *levelsController) compactBuildTables(
503535 timeStart := time .Now ()
504536 builder := table .NewTableBuilder ()
505537 var numKeys , numSkips uint64
506- // Internal move keys related to the given prefix should also be skipped.
507- moveKeyForPrefix := append (badgerMove , cd .dropPrefix ... )
508- prefixesToSkip := [][]byte {cd .dropPrefix , moveKeyForPrefix }
509538 for ; it .Valid (); it .Next () {
510539 // See if we need to skip the prefix.
511- if len (cd .dropPrefix ) > 0 && hasAnyPrefixes (it .Key (), prefixesToSkip ) {
540+ if len (cd .dropPrefixes ) > 0 && hasAnyPrefixes (it .Key (), cd . dropPrefixes ) {
512541 numSkips ++
513542 updateStats (it .Value ())
514543 continue
@@ -672,10 +701,24 @@ func hasAnyPrefixes(s []byte, listOfPrefixes [][]byte) bool {
672701 return false
673702}
674703
704+ func containsPrefix (smallValue , largeValue , prefix []byte ) bool {
705+ if bytes .HasPrefix (smallValue , prefix ) {
706+ return true
707+ }
708+ if bytes .HasPrefix (largeValue , prefix ) {
709+ return true
710+ }
711+ if bytes .Compare (prefix , smallValue ) > 0 &&
712+ bytes .Compare (prefix , largeValue ) < 0 {
713+ return true
714+ }
715+
716+ return false
717+ }
718+
675719func containsAnyPrefixes (smallValue , largeValue []byte , listOfPrefixes [][]byte ) bool {
676720 for _ , prefix := range listOfPrefixes {
677- if bytes .Compare (prefix , smallValue ) > 0 &&
678- bytes .Compare (prefix , largeValue ) < 0 {
721+ if containsPrefix (smallValue , largeValue , prefix ) {
679722 return true
680723 }
681724 }
@@ -697,7 +740,7 @@ type compactDef struct {
697740
698741 thisSize int64
699742
700- dropPrefix []byte
743+ dropPrefixes [] []byte
701744}
702745
703746func (cd * compactDef ) lockLevels () {
@@ -859,10 +902,10 @@ func (s *levelsController) doCompact(p compactionPriority) error {
859902 y .AssertTrue (l + 1 < s .kv .opt .MaxLevels ) // Sanity check.
860903
861904 cd := compactDef {
862- elog : trace .New (fmt .Sprintf ("Badger.L%d" , l ), "Compact" ),
863- thisLevel : s .levels [l ],
864- nextLevel : s .levels [l + 1 ],
865- dropPrefix : p .dropPrefix ,
905+ elog : trace .New (fmt .Sprintf ("Badger.L%d" , l ), "Compact" ),
906+ thisLevel : s .levels [l ],
907+ nextLevel : s .levels [l + 1 ],
908+ dropPrefixes : p .dropPrefixes ,
866909 }
867910 cd .elog .SetMaxEvents (100 )
868911 defer cd .elog .Finish ()
0 commit comments