Skip to content

Commit 4787038

Browse files
authored
Merge pull request #81 from joshbeard/skip-index
feat: skip index
2 parents 62bb06f + ef55ce0 commit 4787038

File tree

10 files changed

+364
-91
lines changed

10 files changed

+364
-91
lines changed

README.md

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ Flags:
4545
-q, --quiet Suppress log output
4646
-r, --recursive List files recursively
4747
-S, --skip strings A list of files or directories to skip. Comma separated or specified multiple times
48+
--skipindex-files strings A list of files that indicate a directory should be skipped for indexing but still included in the parent directory listing. Comma separated or specified multiple times (default [.skipindex])
4849
--sort-by string The order for the index page. One of: last_modified, name, natural_name (default "natural_name")
4950
-s, --source string REQUIRED. The source directory or S3 URI to list
5051
-t, --target string REQUIRED. The target directory or S3 URI to write to
@@ -277,6 +278,11 @@ order: "asc"
277278
# recursive enables indexing the source recursively.
278279
recursive: false
279280

281+
# skipindex_files is a list of filenames that, when present in a directory,
282+
# indicate that the directory should be skipped for indexing but still
283+
# included in the parent directory's listing.
284+
skipindex_files: [".skipindex"]
285+
280286
# skips is a list of filenames to skip.
281287
skips: []
282288

@@ -334,22 +340,46 @@ This is useful for:
334340
- Preventing indexing of directories that contain temporary or build files
335341
- Selectively controlling which directories appear in your indexes
336342

343+
## Excluding Directories from Indexing but Including in Parent Listing
344+
345+
You can also use the `--skipindex-files` flag to specify files that, when present in a directory, indicate that directory should be excluded from indexing but still appear in the parent directory's listing:
346+
347+
1. Skip generating an index for that directory (same as `--noindex-files`)
348+
2. **Still include the directory in the parent directory's index** (different from `--noindex-files`)
349+
3. Skip all subdirectories beneath it (same as `--noindex-files`)
350+
351+
This is particularly useful for directories that already have their own index pages (e.g., generated by a web framework) where you want to link to them from the parent directory, but don't want to overwrite their custom index.
352+
337353
### Examples
338354

339355
To exclude a directory using the default `.noindex` file:
340356

341357
```shell
342-
# Create an empty .noindex file in a directory you want to exclude
358+
# Create an empty .noindex file in a directory you want to exclude completely
343359
touch /path/to/directory/.noindex
344360
```
345361

362+
To exclude a directory from indexing but still include it in the parent's index:
363+
364+
```shell
365+
# Create an empty .skipindex file
366+
touch /path/to/directory/.skipindex
367+
```
368+
346369
To specify custom noindex filenames:
347370

348371
```shell
349372
# Use custom filenames instead of the default .noindex
350373
web-indexer --source /path/to/directory --target /path/to/output --noindex-files .private,.hidden,DO_NOT_INDEX
351374
```
352375

376+
To specify custom skipindex filenames:
377+
378+
```shell
379+
# Use custom filenames instead of the default .skipindex
380+
web-indexer --source /path/to/directory --target /path/to/output --skipindex-files index.html,.custom-index
381+
```
382+
353383
In your YAML configuration:
354384

355385
```yaml
@@ -359,10 +389,19 @@ noindex_files:
359389
- .private
360390
- .hidden
361391
- DO_NOT_INDEX
392+
skipindex_files:
393+
- index.html
394+
- .custom-index
362395
```
363396

364397
When the indexer encounters a directory with a noindex file, it will log a message at the INFO level:
365398

366399
```
367400
INFO Skipping /path/to/directory (found noindex file .noindex)
368401
```
402+
403+
When the indexer encounters a directory with a skipindex file, it will log a message at the INFO level:
404+
405+
```
406+
INFO Skipping indexing of /path/to/directory (found skipindex file .skipindex), will include in parent directory
407+
```

action.yml

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,11 @@ inputs:
2828
required: false
2929
minify:
3030
description: Boolean toggling minification of the generated HTML
31-
noindex_files:
32-
description: >
33-
A comma-separated list of filenames that will cause a directory/path to be skipped if present
31+
noindex-files:
32+
description: 'A list of files that indicate a directory should be skipped'
3433
required: false
3534
order:
36-
description: The order for the items. One of asc, desc (default "asc")
37-
required: false
35+
description: 'The order for the items. One of: asc, desc'
3836
recursive:
3937
description: Index files recursively
4038
skip:
@@ -59,6 +57,13 @@ inputs:
5957
description: 'The Docker image tag to use (e.g., latest, dev-pr123)'
6058
required: false
6159
default: 'latest'
60+
skipindex-files:
61+
description: 'A list of files that indicate a directory should be skipped for indexing but still included in the parent directory listing'
62+
required: false
63+
image_tag:
64+
description: 'The Docker image tag to use (e.g., latest, dev-pr123)'
65+
required: false
66+
default: 'latest'
6267

6368
runs:
6469
using: 'docker'
@@ -71,7 +76,8 @@ runs:
7176
LINK_TO_INDEX: ${{ inputs.link_to_index }}
7277
LOG_LEVEL: ${{ inputs.log_level }}
7378
MINIFY: ${{ inputs.minify }}
74-
NOINDEX_FILES: ${{ inputs.noindex_files }}
79+
NOINDEX_FILES: ${{ inputs.noindex-files }}
80+
SKIPINDEX_FILES: ${{ inputs.skipindex-files }}
7581
ORDER: ${{ inputs.order }}
7682
RECURSIVE: ${{ inputs.recursive }}
7783
SKIP: ${{ inputs.skip }}

entrypoint.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ cmd="web-indexer"
1313
[[ -n "$LOG_LEVEL" ]] && cmd="$cmd --log-level \"$LOG_LEVEL\""
1414
[[ "$MINIFY" == "true" ]] && cmd="$cmd --minify"
1515
[[ -n "$NOINDEX_FILES" ]] && cmd="$cmd --noindex-files \"$NOINDEX_FILES\""
16+
[[ -n "$SKIPINDEX_FILES" ]] && cmd="$cmd --skipindex-files \"$SKIPINDEX_FILES\""
1617
[[ -n "$ORDER" ]] && cmd="$cmd --order \"$ORDER\""
1718
[[ "$RECURSIVE" == "true" ]] && cmd="$cmd --recursive"
1819
[[ -n "$SKIP" ]] && cmd="$cmd --skip \"$SKIP\""

internal/webindexer/config.go

Lines changed: 22 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,27 +5,28 @@ import (
55
)
66

77
type Config struct {
8-
BaseURL string `yaml:"base_url" mapstructure:"base_url"`
9-
DateFormat string `yaml:"date_format" mapstructure:"date_format"`
10-
DirsFirst bool `yaml:"dirs_first" mapstructure:"dirs_first"`
11-
IndexFile string `yaml:"index_file" mapstructure:"index_file"`
12-
LinkToIndexes bool `yaml:"link_to_index" mapstructure:"link_to_index"`
13-
LogLevel string `yaml:"log_level" mapstructure:"log_level"`
14-
LogFile string `yaml:"log_file" mapstructure:"log_file"`
15-
Minify bool `yaml:"minify" mapstructure:"minify"`
16-
NoIndexFiles []string `yaml:"noindex_files" mapstructure:"noindex_files"`
17-
Order string `yaml:"order" mapstructure:"order"`
18-
Quiet bool `yaml:"quiet" mapstructure:"quiet"`
19-
Recursive bool `yaml:"recursive" mapstructure:"recursive"`
20-
Skips []string `yaml:"skips" mapstructure:"skips"`
21-
SortBy string `yaml:"sort_by" mapstructure:"sort_by"`
22-
Source string `yaml:"source" mapstructure:"source"`
23-
Target string `yaml:"target" mapstructure:"target"`
24-
Template string `yaml:"template" mapstructure:"template"`
25-
Theme string `yaml:"theme" mapstructure:"theme"`
26-
Title string `yaml:"title" mapstructure:"title"`
27-
CfgFile string `yaml:"-"`
28-
BasePath string `yaml:"-"`
8+
BaseURL string `yaml:"base_url" mapstructure:"base_url"`
9+
DateFormat string `yaml:"date_format" mapstructure:"date_format"`
10+
DirsFirst bool `yaml:"dirs_first" mapstructure:"dirs_first"`
11+
IndexFile string `yaml:"index_file" mapstructure:"index_file"`
12+
LinkToIndexes bool `yaml:"link_to_index" mapstructure:"link_to_index"`
13+
LogLevel string `yaml:"log_level" mapstructure:"log_level"`
14+
LogFile string `yaml:"log_file" mapstructure:"log_file"`
15+
Minify bool `yaml:"minify" mapstructure:"minify"`
16+
NoIndexFiles []string `yaml:"noindex_files" mapstructure:"noindex_files"`
17+
SkipIndexFiles []string `yaml:"skipindex_files" mapstructure:"skipindex_files"`
18+
Order string `yaml:"order" mapstructure:"order"`
19+
Quiet bool `yaml:"quiet" mapstructure:"quiet"`
20+
Recursive bool `yaml:"recursive" mapstructure:"recursive"`
21+
Skips []string `yaml:"skips" mapstructure:"skips"`
22+
SortBy string `yaml:"sort_by" mapstructure:"sort_by"`
23+
Source string `yaml:"source" mapstructure:"source"`
24+
Target string `yaml:"target" mapstructure:"target"`
25+
Template string `yaml:"template" mapstructure:"template"`
26+
Theme string `yaml:"theme" mapstructure:"theme"`
27+
Title string `yaml:"title" mapstructure:"title"`
28+
CfgFile string `yaml:"-"`
29+
BasePath string `yaml:"-"`
2930
}
3031

3132
type SortBy string

internal/webindexer/local.go

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -26,9 +26,20 @@ func (l *LocalBackend) Read(path string) ([]Item, bool, error) {
2626

2727
// First check for noindex files before processing anything else
2828
for _, file := range files {
29-
if !file.IsDir() && len(l.cfg.NoIndexFiles) > 0 && contains(l.cfg.NoIndexFiles, file.Name()) {
30-
log.Infof("Skipping %s (found noindex file %s)", path, file.Name())
31-
return nil, true, nil
29+
if !file.IsDir() {
30+
// Check for noindex files (skip directory entirely)
31+
if len(l.cfg.NoIndexFiles) > 0 && contains(l.cfg.NoIndexFiles, file.Name()) {
32+
log.Infof("Skipping %s (found noindex file %s)", path, file.Name())
33+
return nil, true, nil
34+
}
35+
36+
// Check for skipindex files (skip indexing but include in parent)
37+
if len(l.cfg.SkipIndexFiles) > 0 && contains(l.cfg.SkipIndexFiles, file.Name()) {
38+
log.Infof("Skipping indexing of %s (found skipindex file %s), will include in parent directory", path, file.Name())
39+
// Return empty items but mark as not having noindex file
40+
// This will prevent indexing this directory but still include it in the parent
41+
return []Item{}, false, nil
42+
}
3243
}
3344
}
3445

@@ -44,23 +55,23 @@ func (l *LocalBackend) Read(path string) ([]Item, bool, error) {
4455
return nil, false, fmt.Errorf("unable to stat file %s: %w", file.Name(), err)
4556
}
4657

47-
// If it's a directory, check if it contains a noindex file before adding it
58+
// If it's a directory, check if it contains a noindex or skipindex file before adding it
4859
if stat.IsDir() {
4960
subFiles, err := os.ReadDir(fullPath)
5061
if err != nil {
5162
return nil, false, fmt.Errorf("unable to read directory %s: %w", fullPath, err)
5263
}
5364

5465
// Skip this directory if it contains a noindex file
55-
hasNoIndex := false
66+
skipDir := false
5667
for _, subFile := range subFiles {
5768
if !subFile.IsDir() && len(l.cfg.NoIndexFiles) > 0 && contains(l.cfg.NoIndexFiles, subFile.Name()) {
5869
log.Infof("Skipping %s (found noindex file %s)", fullPath, subFile.Name())
59-
hasNoIndex = true
70+
skipDir = true
6071
break
6172
}
6273
}
63-
if hasNoIndex {
74+
if skipDir {
6475
continue
6576
}
6677
}
@@ -82,6 +93,15 @@ func (l *LocalBackend) Read(path string) ([]Item, bool, error) {
8293
return items, false, nil
8394
}
8495

96+
func (l *LocalBackend) EnsureDirExists(relativePath string) error {
97+
localPath := filepath.Join(l.cfg.Target, relativePath)
98+
if err := os.MkdirAll(localPath, 0o750); err != nil {
99+
return fmt.Errorf("failed to ensure directory exists %s: %w", localPath, err)
100+
}
101+
log.Debugf("Ensured directory exists: %s", localPath)
102+
return nil
103+
}
104+
85105
func (l *LocalBackend) Write(data Data, content string) error {
86106
prefix := data.RelativePath
87107
prefix = strings.TrimPrefix(prefix, l.cfg.BasePath)

internal/webindexer/s3.go

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,13 +48,22 @@ func (s *S3Backend) Read(prefix string) ([]Item, bool, error) {
4848
return nil, false, fmt.Errorf("unable to list S3 objects: %w", err)
4949
}
5050

51-
// First check for noindex files before processing anything else
51+
// First check for noindex files or skipindex files before processing anything else
5252
for _, content := range resp.Contents {
5353
fileName := filepath.Base(*content.Key)
54+
// Check for noindex files (skip directory entirely)
5455
if len(s.cfg.NoIndexFiles) > 0 && contains(s.cfg.NoIndexFiles, fileName) {
5556
log.Infof("Skipping %s/%s (found noindex file %s)", s.bucket, prefix, fileName)
5657
return nil, true, nil
5758
}
59+
60+
// Check for skipindex files (skip indexing but include in parent)
61+
if len(s.cfg.SkipIndexFiles) > 0 && contains(s.cfg.SkipIndexFiles, fileName) {
62+
log.Infof("Skipping indexing of %s/%s (found skipindex file %s), will include in parent directory", s.bucket, prefix, fileName)
63+
// Return empty items but mark as not having noindex file
64+
// This will prevent indexing this directory but still include it in the parent
65+
return []Item{}, false, nil
66+
}
5867
}
5968

6069
var items []Item
@@ -94,16 +103,16 @@ func (s *S3Backend) Read(prefix string) ([]Item, bool, error) {
94103
}
95104

96105
// Skip this prefix if it contains a noindex file
97-
hasNoIndex := false
106+
skipDir := false
98107
for _, content := range subResp.Contents {
99108
fileName := filepath.Base(*content.Key)
100109
if len(s.cfg.NoIndexFiles) > 0 && contains(s.cfg.NoIndexFiles, fileName) {
101110
log.Infof("Skipping %s/%s (found noindex file %s)", s.bucket, *commonPrefix.Prefix, fileName)
102-
hasNoIndex = true
111+
skipDir = true
103112
break
104113
}
105114
}
106-
if hasNoIndex {
115+
if skipDir {
107116
continue
108117
}
109118

@@ -118,6 +127,14 @@ func (s *S3Backend) Read(prefix string) ([]Item, bool, error) {
118127
return items, false, nil
119128
}
120129

130+
// EnsureDirExists is a no-op for S3 as directories are implicit.
131+
func (s *S3Backend) EnsureDirExists(relativePath string) error {
132+
log.Debugf("EnsureDirExists called for S3 (no-op): %s/%s", s.bucket, relativePath)
133+
// S3 directories are created implicitly by object keys.
134+
// We could potentially check if the bucket exists here if needed.
135+
return nil
136+
}
137+
121138
func (s *S3Backend) Write(data Data, content string) error {
122139
bucket, target := uriToBucketAndPrefix(s.cfg.Target)
123140
target = strings.TrimPrefix(target, s.cfg.BasePath)

0 commit comments

Comments
 (0)