Skip to content

Commit 455ef2e

Browse files
authored
Tweak indexing (#763)
1 parent 629dcbc commit 455ef2e

File tree

20 files changed

+164
-129
lines changed

20 files changed

+164
-129
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,14 @@ All notable changes to this project will be documented in this file. For commit
99
- oidc groups header updates admin permission of existing user (either add/remove if role exists)'
1010
- builds amd64 binary with musl for compatibility (glic error) https://github.com/gtsteffaniak/filebrowser/issues/755
1111
- renamed `server.sources.config.disabled` to `server.sources.config.disableIndexing`
12+
- small indexing behavior tweaks.
13+
- markdown viewer hides sidebar https://github.com/gtsteffaniak/filebrowser/issues/744
1214

1315
**BugFixes**:
1416
- subtitles filename issue https://github.com/gtsteffaniak/filebrowser/issues/678
17+
- search result links not working with custom baseUrl https://github.com/gtsteffaniak/filebrowser/issues/746
18+
- preview error for office native preview https://github.com/gtsteffaniak/filebrowser/issues/744
19+
- more source name safety for special characters.
1520

1621
## v0.7.8-beta
1722

backend/adapters/fs/files/files.go

Lines changed: 13 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -41,32 +41,21 @@ func FileInfoFaster(opts iteminfo.FileOptions) (iteminfo.ExtendedFileInfo, error
4141
return response, err
4242
}
4343
opts.IsDir = isDir
44-
// TODO: whats the best way to save trips to disk here?
45-
// disabled using cache because its not clear if this is helping or hurting
46-
// check if the file exists in the index
47-
//info, exists := index.GetReducedMetadata(opts.Path, opts.IsDir)
48-
//if exists {
49-
// err := RefreshFileInfo(opts)
50-
// if err != nil {
51-
// return info, err
52-
// }
53-
// if opts.Content {
54-
// content := ""
55-
// content, err = getContent(opts.Path)
56-
// if err != nil {
57-
// return info, err
58-
// }
59-
// info.Content = content
60-
// }
61-
// return info, nil
62-
//}
44+
var info *iteminfo.FileInfo
45+
var exists bool
6346
err = index.RefreshFileInfo(opts)
6447
if err != nil {
65-
return response, err
66-
}
67-
info, exists := index.GetReducedMetadata(opts.Path, opts.IsDir)
68-
if !exists {
69-
return response, fmt.Errorf("could not get metadata for path: %v", opts.Path)
48+
if err == errors.ErrNotIndexed {
49+
info, err = index.GetFsDirInfo(opts.Path)
50+
if err != nil {
51+
return response, err
52+
}
53+
}
54+
} else {
55+
info, exists = index.GetReducedMetadata(opts.Path, opts.IsDir)
56+
if !exists {
57+
return response, fmt.Errorf("could not get metadata for path: %v", opts.Path)
58+
}
7059
}
7160
if opts.Content {
7261
if info.Size < 20*1024*1024 { // 20 megabytes in bytes

backend/cmd/root.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,17 +7,18 @@ import (
77
"os/signal"
88
"syscall"
99

10+
_ "net/http/pprof"
11+
1012
"github.com/gtsteffaniak/filebrowser/backend/adapters/fs/fileutils"
1113
"github.com/gtsteffaniak/filebrowser/backend/common/settings"
14+
"github.com/gtsteffaniak/filebrowser/backend/common/version"
1215
"github.com/gtsteffaniak/filebrowser/backend/database/storage"
1316
fbhttp "github.com/gtsteffaniak/filebrowser/backend/http"
1417
"github.com/gtsteffaniak/filebrowser/backend/indexing"
1518
"github.com/gtsteffaniak/filebrowser/backend/preview"
1619
"github.com/gtsteffaniak/filebrowser/backend/swagger/docs"
1720
"github.com/gtsteffaniak/go-logger/logger"
1821
"github.com/swaggo/swag"
19-
20-
"github.com/gtsteffaniak/filebrowser/backend/common/version"
2122
)
2223

2324
var store *storage.Storage

backend/common/errors/errors.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,5 @@ var (
2121
ErrNoTotpProvided = errors.New("OTP code is required for user")
2222
ErrNoTotpConfigured = errors.New("OTP is enforced, but user is not yet configured")
2323
ErrUnauthorized = errors.New("user unauthorized")
24+
ErrNotIndexed = errors.New("directory or item excluded from indexing")
2425
)

backend/common/settings/structs.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,17 @@ type Source struct {
7878
}
7979

8080
type SourceConfig struct {
81-
IndexingInterval uint32 `json:"indexingInterval"` // optional manual overide interval in seconds to re-index the source
82-
DisableIndexing bool `json:"disableIndexing"` // disable the indexing of this source
83-
MaxWatchers int `json:"maxWatchers"` // number of concurrent watchers to use for this source, currently not supported
84-
NeverWatch []string `json:"neverWatchPaths"` // paths to never watch, relative to the source path (eg. "/folder/file.txt")
85-
IgnoreHidden bool `json:"ignoreHidden"` // ignore hidden files and folders.
86-
IgnoreZeroSizeFolders bool `json:"ignoreZeroSizeFolders"` // ignore folders with 0 size
87-
Exclude IndexFilter `json:"exclude"` // exclude files and folders from indexing, if include is not set
88-
Include IndexFilter `json:"include"` // include files and folders from indexing, if exclude is not set
89-
DefaultUserScope string `json:"defaultUserScope"` // default "/" should match folders under path
90-
DefaultEnabled bool `json:"defaultEnabled"` // should be added as a default source for new users?
91-
CreateUserDir bool `json:"createUserDir"` // create a user directory for each user
81+
IndexingInterval uint32 `json:"indexingIntervalMinutes"` // optional manual overide interval in seconds to re-index the source
82+
DisableIndexing bool `json:"disableIndexing"` // disable the indexing of this source
83+
MaxWatchers int `json:"maxWatchers"` // number of concurrent watchers to use for this source, currently not supported
84+
NeverWatch []string `json:"neverWatchPaths"` // paths to never watch, relative to the source path (eg. "/folder/file.txt")
85+
IgnoreHidden bool `json:"ignoreHidden"` // ignore hidden files and folders.
86+
IgnoreZeroSizeFolders bool `json:"ignoreZeroSizeFolders"` // ignore folders with 0 size
87+
Exclude IndexFilter `json:"exclude"` // exclude files and folders from indexing, if include is not set
88+
Include IndexFilter `json:"include"` // include files and folders from indexing, if exclude is not set
89+
DefaultUserScope string `json:"defaultUserScope"` // default "/" should match folders under path
90+
DefaultEnabled bool `json:"defaultEnabled"` // should be added as a default source for new users?
91+
CreateUserDir bool `json:"createUserDir"` // create a user directory for each user
9292
}
9393

9494
type IndexFilter struct {

backend/http/search.go

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -60,13 +60,6 @@ func searchHandler(w http.ResponseWriter, r *http.Request, d *requestContext) (i
6060
source := r.URL.Query().Get("source")
6161
if source == "" {
6262
source = config.Server.DefaultSource.Name
63-
} else {
64-
var err error
65-
// decode url encoded source name
66-
source, err = url.QueryUnescape(source)
67-
if err != nil {
68-
return http.StatusBadRequest, fmt.Errorf("invalid source encoding: %v", err)
69-
}
7063
}
7164
scope := r.URL.Query().Get("scope")
7265
unencodedScope, err := url.QueryUnescape(scope)

backend/indexing/indexingFiles.go

Lines changed: 83 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ import (
1010
"sync"
1111
"time"
1212

13+
"github.com/gtsteffaniak/filebrowser/backend/common/errors"
1314
"github.com/gtsteffaniak/filebrowser/backend/common/settings"
1415
"github.com/gtsteffaniak/filebrowser/backend/common/utils"
1516
"github.com/gtsteffaniak/filebrowser/backend/indexing/iteminfo"
@@ -39,6 +40,7 @@ type Index struct {
3940
CurrentSchedule int `json:"-"`
4041
settings.Source `json:"-"`
4142
Directories map[string]*iteminfo.FileInfo `json:"-"`
43+
DirectoriesLedger map[string]bool `json:"-"`
4244
runningScannerCount int `json:"-"`
4345
SmartModifier time.Duration `json:"-"`
4446
FilesChangedDuringIndexing bool `json:"-"`
@@ -66,9 +68,10 @@ func init() {
6668
func Initialize(source settings.Source, mock bool) {
6769
indexesMutex.Lock()
6870
newIndex := Index{
69-
mock: mock,
70-
Source: source,
71-
Directories: make(map[string]*iteminfo.FileInfo),
71+
mock: mock,
72+
Source: source,
73+
Directories: make(map[string]*iteminfo.FileInfo),
74+
DirectoriesLedger: make(map[string]bool),
7275
}
7376
newIndex.ReducedIndex = ReducedIndex{
7477
Status: "indexing",
@@ -83,7 +86,7 @@ func Initialize(source settings.Source, mock bool) {
8386
newIndex.RunIndexing("/", false)
8487
go newIndex.setupIndexingScanners()
8588
} else {
86-
newIndex.Status = "disabled"
89+
newIndex.Status = "ready"
8790
logger.Debug("indexing disabled for source: " + newIndex.Name)
8891
}
8992
}
@@ -94,7 +97,7 @@ func (idx *Index) indexDirectory(adjustedPath string, quick, recursive bool) err
9497
// Open the directory
9598
dir, err := os.Open(realPath)
9699
if err != nil {
97-
idx.RemoveDirectory(adjustedPath) // Remove, must have been deleted
100+
// must have been deleted
98101
return err
99102
}
100103
defer dir.Close()
@@ -103,72 +106,121 @@ func (idx *Index) indexDirectory(adjustedPath string, quick, recursive bool) err
103106
if err != nil {
104107
return err
105108
}
109+
110+
// check if excluded from indexing
111+
hidden := isHidden(dirInfo, idx.Path+adjustedPath)
112+
if !recursive && idx.shouldSkip(true, hidden, adjustedPath) {
113+
return errors.ErrNotIndexed
114+
}
115+
116+
// if indexing, mark the directory as valid and indexed.
117+
if recursive {
118+
// Prevent race conditions if scanning becomes concurrent in the future.
119+
idx.mu.Lock()
120+
idx.DirectoriesLedger[adjustedPath] = true
121+
idx.mu.Unlock()
122+
}
106123
combinedPath := adjustedPath + "/"
107124
if adjustedPath == "/" {
108125
combinedPath = "/"
109126
}
110127
// get whats currently in cache
111128
idx.mu.RLock()
112129
cacheDirItems := []iteminfo.ItemInfo{}
113-
modChange := true // default to true
130+
modChange := false
114131
cachedDir, exists := idx.Directories[adjustedPath]
115-
if exists && quick {
132+
if exists {
116133
modChange = dirInfo.ModTime() != cachedDir.ModTime
117134
cacheDirItems = cachedDir.Folders
118135
}
119136
idx.mu.RUnlock()
120137

121138
// If the directory has not been modified since the last index, skip expensive readdir
122139
// recursively check cached dirs for mod time changes as well
123-
if !modChange && recursive {
124-
for _, item := range cacheDirItems {
125-
err = idx.indexDirectory(combinedPath+item.Name, quick, true)
126-
if err != nil {
127-
logger.Errorf("error indexing directory %v : %v", combinedPath+item.Name, err)
140+
if recursive {
141+
if modChange {
142+
idx.mu.Lock()
143+
idx.FilesChangedDuringIndexing = true
144+
idx.mu.Unlock()
145+
} else if quick {
146+
for _, item := range cacheDirItems {
147+
err = idx.indexDirectory(combinedPath+item.Name, quick, true)
148+
if err != nil && err != errors.ErrNotIndexed {
149+
logger.Errorf("error indexing directory %v : %v", combinedPath+item.Name, err)
150+
}
128151
}
152+
return nil
129153
}
130-
return nil
131154
}
155+
dirFileInfo, err2 := idx.GetDirInfo(dir, dirInfo, realPath, adjustedPath, combinedPath, quick, recursive)
156+
if err2 != nil {
157+
return err2
158+
}
159+
// Update the current directory metadata in the index
160+
idx.UpdateMetadata(dirFileInfo)
161+
return nil
162+
}
132163

133-
if quick {
134-
idx.mu.Lock()
135-
idx.FilesChangedDuringIndexing = true
136-
idx.mu.Unlock()
164+
func (idx *Index) GetFsDirInfo(adjustedPath string) (*iteminfo.FileInfo, error) {
165+
realPath, isDir, err := idx.GetRealPath(adjustedPath)
166+
if err != nil {
167+
return nil, err
137168
}
169+
if !isDir {
170+
return nil, fmt.Errorf("path is not a directory: %s", adjustedPath)
171+
}
172+
dir, err := os.Open(realPath)
173+
if err != nil {
174+
return nil, err
175+
}
176+
defer dir.Close()
138177

139-
// Read directory contents
140-
files, err := dir.Readdir(-1)
178+
dirInfo, err := dir.Stat()
141179
if err != nil {
142-
return err
180+
return nil, err
181+
}
182+
fmt.Println(dir.Name())
183+
combinedPath := adjustedPath + "/"
184+
if adjustedPath == "/" {
185+
combinedPath = "/"
143186
}
187+
return idx.GetDirInfo(dir, dirInfo, realPath, adjustedPath, combinedPath, false, false)
188+
}
144189

190+
func (idx *Index) GetDirInfo(dirInfo *os.File, stat os.FileInfo, realPath, adjustedPath, combinedPath string, quick, recursive bool) (*iteminfo.FileInfo, error) {
191+
// Read directory contents
192+
files, err := dirInfo.Readdir(-1)
193+
if err != nil {
194+
return nil, err
195+
}
145196
var totalSize int64
146197
fileInfos := []iteminfo.ItemInfo{}
147198
dirInfos := []iteminfo.ItemInfo{}
148199

149200
// Process each file and directory in the current directory
150201
for _, file := range files {
151-
isHidden := isHidden(file, idx.Path+combinedPath)
202+
hidden := isHidden(file, idx.Path+combinedPath)
152203
isDir := iteminfo.IsDirectory(file)
153204
fullCombined := combinedPath + file.Name()
154-
if idx.shouldSkip(isDir, isHidden, fullCombined) {
205+
if idx.shouldSkip(isDir, hidden, fullCombined) {
155206
continue
156207
}
157208
itemInfo := &iteminfo.ItemInfo{
158209
Name: file.Name(),
159210
ModTime: file.ModTime(),
160-
Hidden: isHidden,
211+
Hidden: hidden,
161212
}
162213

163214
if isDir {
164-
165215
// skip non-indexable dirs.
166216
if file.Name() == "$RECYCLE.BIN" || file.Name() == "System Volume Information" {
167217
continue
168218
}
169219

170220
dirPath := combinedPath + file.Name()
171221
if recursive {
222+
// clear for garbage collection
223+
file = nil
172224
// Recursively index the subdirectory
173225
err = idx.indexDirectory(dirPath, quick, recursive)
174226
if err != nil {
@@ -192,26 +244,25 @@ func (idx *Index) indexDirectory(adjustedPath string, quick, recursive bool) err
192244
idx.NumFiles++
193245
}
194246
}
247+
195248
if totalSize == 0 && idx.Config.IgnoreZeroSizeFolders {
196-
return nil
249+
return nil, errors.ErrNotIndexed
197250
}
251+
198252
// Create FileInfo for the current directory
199253
dirFileInfo := &iteminfo.FileInfo{
200254
Path: adjustedPath,
201255
Files: fileInfos,
202256
Folders: dirInfos,
203257
}
204258
dirFileInfo.ItemInfo = iteminfo.ItemInfo{
205-
Name: dirInfo.Name(),
259+
Name: filepath.Base(dirInfo.Name()),
206260
Type: "directory",
207261
Size: totalSize,
208-
ModTime: dirInfo.ModTime(),
262+
ModTime: stat.ModTime(),
209263
}
210-
211264
dirFileInfo.SortItems()
212-
// Update the current directory metadata in the index
213-
idx.UpdateMetadata(dirFileInfo)
214-
return nil
265+
return dirFileInfo, nil
215266
}
216267

217268
// input should be non-index path.
@@ -280,7 +331,7 @@ func (idx *Index) RefreshFileInfo(opts iteminfo.FileOptions) error {
280331
}
281332
err := idx.indexDirectory(refreshOptions.Path, false, false)
282333
if err != nil {
283-
return fmt.Errorf("file/folder does not exist to refresh data: %s", refreshOptions.Path)
334+
return err
284335
}
285336
file, exists := idx.GetMetadataInfo(refreshOptions.Path, true)
286337
if !exists {

0 commit comments

Comments
 (0)