@@ -12,60 +12,32 @@ import (
1212
1313// IndexDB manages the SQLite database for the file index.
1414// It wraps the underlying sql.DB connection and provides type-safe methods.
15- // Uses DELETE journal mode for maximum write performance with SQLite's built-in locking for concurrency.
1615type IndexDB struct {
1716 * TempDB
1817}
1918
2019// NewIndexDB creates a new index database in the cache directory.
21- // Optimized for low memory usage - no mmap, smaller cache, EXCLUSIVE locking.
2220func NewIndexDB (name string ) (* IndexDB , error ) {
23- // Create a temp DB for indexing (ID based on source name)
2421 db , err := NewTempDB ("index_" + name , & TempDBConfig {
25- BatchSize : 2500 ,
26- // cache_size: Negative values = pages, positive = KB
27- // Using 4KB pages for small entries reduces storage waste and RAM usage
28- CacheSizeKB : - 1000 , // ~4MB cache - reduced to minimize memory footprint
29- MmapSize : 0 , // DISABLED - mmap shows up in Docker RSS outside Go heap
30- Synchronous : "OFF" , // OFF for maximum write performance - data integrity not critical
31- TempStore : "FILE" , // FILE instead of MEMORY to reduce memory overhead
32- JournalMode : "DELETE" , // DELETE mode - simpler, no WAL overhead, lower memory usage
33- LockingMode : "EXCLUSIVE " , // EXCLUSIVE mode - single writer, prevents concurrent access issues
34- PageSize : 4096 , // 4KB page size - optimal for small entries (reduces storage waste)
35- AutoVacuum : "INCREMENTAL" , // Incremental auto-vacuum - prevents bloat without blocking operations
36- EnableLogging : false ,
22+ BatchSize : 5000 , // items per batch transaction
23+ CacheSizeKB : - 1000 , // ~4MB cache - reduced to minimize memory footprint
24+ MmapSize : 0 , // DISABLED - mmap shows up in Docker RSS outside Go heap
25+ Synchronous : "OFF" , // OFF for maximum write performance - data integrity not critical
26+ TempStore : "FILE" , // FILE instead of MEMORY to reduce memory overhead
27+ JournalMode : "OFF" , // OFF for maximum write performance - data integrity not critical
28+ LockingMode : "EXCLUSIVE" , // EXCLUSIVE mode - better cache retention, no change counter overhead
29+ PageSize : 4096 , // 4KB page size - optimal for small entries (reduces storage waste)
30+ AutoVacuum : "INCREMENTAL " , // Incremental auto-vacuum - prevents bloat without blocking operations
31+ EnableLogging : false ,
32+ HardHeapLimitBytes : defaultHardHeapLimitBytes , // Hard limit - operations fail if exceeded
33+ CacheSpillThreshold : 500 , // ~2MB threshold - triggers early spilling to reduce memory
3734 })
3835 if err != nil {
3936 return nil , err
4037 }
4138
4239 idxDB := & IndexDB {TempDB : db }
4340
44- // Set busy_timeout to 200ms - balance between responsiveness and reducing busy errors
45- if _ , err := db .Exec ("PRAGMA busy_timeout = 200" ); err != nil {
46- idxDB .Close ()
47- return nil , fmt .Errorf ("failed to set busy_timeout: %w" , err )
48- }
49-
50- // Limit SQLite's memory usage to reduce Docker RSS
51- // cache_spill = 1 allows page cache to spill to disk, reducing memory pressure
52- if _ , err := db .Exec ("PRAGMA cache_spill = 1" ); err != nil {
53- logger .Debugf ("Failed to set cache_spill: %v" , err )
54- }
55-
56- // CRITICAL: Set soft heap limit to 8MB - constrains SQLite's memory usage at OS level
57- // This prevents memory balloon during large transactions (UPSERT operations hold 2x data)
58- // SQLite will spill to disk when approaching this limit
59- if _ , err := db .Exec ("PRAGMA soft_heap_limit = 8388608" ); err != nil { // 8MB
60- logger .Debugf ("Failed to set soft_heap_limit: %v" , err )
61- }
62-
63- // Run incremental vacuum periodically (1000 pages = ~4MB at a time)
64- // This happens async and doesn't block - keeps DB size in check
65- if _ , err := db .Exec ("PRAGMA incremental_vacuum(1000)" ); err != nil {
66- logger .Debugf ("Incremental vacuum skipped: %v" , err )
67- }
68-
6941 if err := idxDB .CreateIndexTable (); err != nil {
7042 idxDB .Close ()
7143 return nil , err
@@ -182,6 +154,7 @@ func (db *IndexDB) BulkInsertItems(source string, items []*iteminfo.FileInfo) er
182154 }
183155 return err
184156 }
157+ defer stmt .Close () // Ensure statement is always closed
185158
186159 nowUnix := time .Now ().Unix ()
187160 for _ , info := range items {
@@ -200,16 +173,13 @@ func (db *IndexDB) BulkInsertItems(source string, items []*iteminfo.FileInfo) er
200173 nowUnix ,
201174 )
202175 if err != nil {
203- stmt .Close ()
204176 if isBusyError (err ) || isTransactionError (err ) {
205177 return nil // Non-fatal
206178 }
207179 return err
208180 }
209181 }
210182
211- stmt .Close ()
212-
213183 // Try to commit
214184 if err := tx .Commit (); err != nil {
215185 if isBusyError (err ) || isTransactionError (err ) {
@@ -222,15 +192,14 @@ func (db *IndexDB) BulkInsertItems(source string, items []*iteminfo.FileInfo) er
222192 if itemCount > 100 {
223193 logger .Debugf ("[DB_MEMORY] Transaction completed: %d items in %v (%.0f items/sec)" ,
224194 itemCount , duration , float64 (itemCount )/ duration .Seconds ())
225-
226- // CRITICAL: Release page cache after large transactions to prevent OS-level memory buildup
227- // Large UPSERT transactions can accumulate 10-20MB of page cache that doesn't auto-release
228- if itemCount >= 5000 {
229- db .Exec ("PRAGMA shrink_memory" ) // Forces SQLite to release unused memory back to OS
230- logger .Debugf ("[DB_MEMORY] Released page cache after %d-item transaction" , itemCount )
231- }
232195 }
233196
197+ // CRITICAL: Force SQLite to release memory back to OS
198+ // shrink_memory hints SQLite to release unused memory back to the OS
199+ // We call this after every transaction to prevent memory accumulation
200+ if _ , err := db .Exec ("PRAGMA shrink_memory" ); err != nil {
201+ logger .Debugf ("BulkInsertItems: failed to shrink memory: %v" , err )
202+ }
234203 return nil
235204}
236205
@@ -368,6 +337,11 @@ func (db *IndexDB) BulkUpdateSizes(source string, pathSizeUpdates map[string]int
368337 return err
369338 }
370339
340+ // CRITICAL: Force SQLite to release memory back to OS
341+ // shrink_memory hints SQLite to release unused memory back to the OS
342+ if _ , err := db .Exec ("PRAGMA shrink_memory" ); err != nil {
343+ logger .Debugf ("BulkUpdateSizes: failed to shrink memory: %v" , err )
344+ }
371345 return nil
372346}
373347
@@ -763,21 +737,18 @@ func (db *IndexDB) GetTotalSize(source string) (uint64, error) {
763737 return uint64 (totalSize ), nil
764738}
765739
766- // RecalculateDirectorySizes recalculates and updates all directory sizes based on their children
740+ // RecalculateDirectorySizes recalculates and updates all directory sizes based on their children.
741+ // This uses a bottom-up approach (deepest directories first) to avoid redundant SUM queries.
767742func (db * IndexDB ) RecalculateDirectorySizes (source , pathPrefix string ) (int , error ) {
768- startTime := time .Now ()
769-
770- // Get all directories under the path prefix, ordered by depth (deepest first)
771- cutoffTime := time .Now ().Add (- 1 * time .Second ).Unix ()
772-
743+ // 1. Get all directories under the path prefix, ordered by depth (deepest first)
744+ // Depth is determined by counting slashes in the path
773745 query := `
774746 SELECT path FROM index_items
775747 WHERE source = ? AND is_dir = 1 AND path GLOB ?
776- AND last_updated < ?
777748 ORDER BY LENGTH(path) - LENGTH(REPLACE(path, '/', '')) DESC
778749 `
779750
780- rows , err := db .Query (query , source , pathPrefix + "*" , cutoffTime )
751+ rows , err := db .Query (query , source , pathPrefix + "*" )
781752 if err != nil {
782753 if isBusyError (err ) || isTransactionError (err ) {
783754 return 0 , nil
@@ -789,28 +760,58 @@ func (db *IndexDB) RecalculateDirectorySizes(source, pathPrefix string) (int, er
789760 var directories []string
790761 for rows .Next () {
791762 var path string
792- if err : = rows .Scan (& path ); err != nil {
763+ if err = rows .Scan (& path ); err != nil {
793764 return 0 , err
794765 }
795766 directories = append (directories , path )
796767 }
797- if err : = rows .Err (); err != nil {
768+ if err = rows .Err (); err != nil {
798769 return 0 , err
799770 }
800771
801- // For each directory, calculate the sum of direct children sizes
802- updateCount := 0
803- for _ , dirPath := range directories {
804- // Calculate total size of all files (not directories) under this path
805- // Only sum file sizes to avoid double-counting (directory sizes are derived)
806- sizeQuery := `
772+ if len (directories ) == 0 {
773+ return 0 , nil
774+ }
775+
776+ // 2. Start a transaction for bulk updates
777+ tx , err := db .BeginTransaction ()
778+ if err != nil {
779+ if isBusyError (err ) || isTransactionError (err ) {
780+ return 0 , nil
781+ }
782+ return 0 , err
783+ }
784+ // No defer rollback - data integrity not critical, performance is priority
785+
786+ // Prepare statements for the loop
787+ // We only sum DIRECT children because we are processing bottom-up
788+ sizeStmt , err := tx .Prepare (`
807789 SELECT COALESCE(SUM(size), 0)
808790 FROM index_items
809- WHERE source = ? AND path GLOB ? AND path != ? AND is_dir = 0
810- `
791+ WHERE source = ? AND parent_path = ?
792+ ` )
793+ if err != nil {
794+ return 0 , err
795+ }
796+ defer sizeStmt .Close ()
811797
798+ updateStmt , err := tx .Prepare (`
799+ UPDATE index_items
800+ SET size = ?, last_updated = ?
801+ WHERE source = ? AND path = ?
802+ ` )
803+ if err != nil {
804+ return 0 , err
805+ }
806+ defer updateStmt .Close ()
807+
808+ nowUnix := time .Now ().Unix ()
809+ updateCount := 0
810+
811+ // 3. Process directories bottom-up
812+ for _ , dirPath := range directories {
812813 var totalSize int64
813- err := db .QueryRow (sizeQuery , source , dirPath + "*" , dirPath ).Scan (& totalSize )
814+ err := sizeStmt .QueryRow (source , dirPath ).Scan (& totalSize )
814815 if err != nil {
815816 if isBusyError (err ) || isTransactionError (err ) {
816817 continue
@@ -819,29 +820,31 @@ func (db *IndexDB) RecalculateDirectorySizes(source, pathPrefix string) (int, er
819820 continue
820821 }
821822
822- // Update the directory's size and last_updated timestamp
823- // This prevents the directory from being deleted as stale during cleanup
824- nowUnix := time .Now ().Unix ()
825- updateQuery := `UPDATE index_items SET size = ?, last_updated = ? WHERE source = ? AND path = ?`
826- result , err := db .Exec (updateQuery , totalSize , nowUnix , source , dirPath )
823+ _ , err = updateStmt .Exec (totalSize , nowUnix , source , dirPath )
827824 if err != nil {
828825 if isBusyError (err ) || isTransactionError (err ) {
829826 continue
830827 }
831828 logger .Errorf ("[DB_SIZE_CALC] Failed to update size for %s: %v" , dirPath , err )
832829 continue
833830 }
831+ updateCount ++
832+ }
834833
835- rowsAffected , _ := result .RowsAffected ()
836- if rowsAffected > 0 {
837- updateCount ++
834+ // 4. Commit transaction
835+ if err := tx .Commit (); err != nil {
836+ if isBusyError (err ) || isTransactionError (err ) {
837+ return 0 , nil
838838 }
839+ return 0 , err
839840 }
840841
841- duration := time .Since (startTime )
842842 if updateCount > 0 {
843- logger .Debugf ("[DB_MEMORY] RecalculateDirectorySizes: updated %d directories in %v (path: %s)" ,
844- updateCount , duration , pathPrefix )
843+ // CRITICAL: Force SQLite to release memory back to OS
844+ // shrink_memory hints SQLite to release unused memory back to the OS
845+ if _ , err := db .Exec ("PRAGMA shrink_memory" ); err != nil {
846+ logger .Debugf ("[DB_SIZE_CALC] Failed to shrink memory: %v" , err )
847+ }
845848 }
846849
847850 return updateCount , nil
0 commit comments