Skip to content

Commit 8b59bef

Browse files
fix: make cache aware of subpaths (#481)
1 parent 4b190fc commit 8b59bef

File tree

3 files changed

+10
-4
lines changed

3 files changed

+10
-4
lines changed

compose.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ x-prod-environment: &prod-environment
2121

2222
x-dev-environment: &dev-environment
2323
DEBUG: "true"
24-
LOG_LEVEL: "debug"
24+
LOG_LEVEL: "DEBUG"
2525
RELOAD: "true"
2626
GITINGEST_SENTRY_ENVIRONMENT: ${GITINGEST_SENTRY_ENVIRONMENT:-development}
2727
# S3 Configuration for development

src/server/query_processor.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ async def _check_s3_cache(
9090
user_name=cast("str", query.user_name),
9191
repo_name=cast("str", query.repo_name),
9292
commit=query.commit,
93+
subpath=query.subpath,
9394
include_patterns=query.include_patterns,
9495
ignore_patterns=query.ignore_patterns,
9596
)
@@ -168,6 +169,7 @@ def _store_digest_content(
168169
user_name=cast("str", query.user_name),
169170
repo_name=cast("str", query.repo_name),
170171
commit=query.commit,
172+
subpath=query.subpath,
171173
include_patterns=query.include_patterns,
172174
ignore_patterns=query.ignore_patterns,
173175
)

src/server/s3_utils.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -62,14 +62,15 @@ def generate_s3_file_path(
6262
user_name: str,
6363
repo_name: str,
6464
commit: str,
65+
subpath: str,
6566
include_patterns: set[str] | None,
6667
ignore_patterns: set[str],
6768
) -> str:
6869
"""Generate S3 file path with proper naming convention.
6970
7071
The file path is formatted as:
7172
[<S3_DIRECTORY_PREFIX>/]ingest/<provider>/<repo-owner>/<repo-name>/<branch>/<commit-ID>/
72-
<exclude&include hash>/<owner>-<repo-name>.txt
73+
<exclude&include hash>/<owner>-<repo-name>-<subpath-hash>.txt
7374
7475
If S3_DIRECTORY_PREFIX environment variable is set, it will be prefixed to the path.
7576
The commit-ID is always included in the URL.
@@ -85,6 +86,8 @@ def generate_s3_file_path(
8586
Repository name.
8687
commit : str
8788
Commit hash.
89+
subpath : str
90+
Subpath of the repository.
8891
include_patterns : set[str] | None
8992
Set of patterns specifying which files to include.
9093
ignore_patterns : set[str]
@@ -111,9 +114,10 @@ def generate_s3_file_path(
111114
patterns_str = f"include:{sorted(include_patterns) if include_patterns else []}"
112115
patterns_str += f"exclude:{sorted(ignore_patterns)}"
113116
patterns_hash = hashlib.sha256(patterns_str.encode()).hexdigest()[:16]
117+
subpath_hash = hashlib.sha256(subpath.encode()).hexdigest()[:16]
114118

115-
# Build the base path using hostname directly
116-
base_path = f"ingest/{hostname}/{user_name}/{repo_name}/{commit}/{patterns_hash}/{user_name}-{repo_name}.txt"
119+
file_name = f"{user_name}-{repo_name}-{subpath_hash}.txt"
120+
base_path = f"ingest/{hostname}/{user_name}/{repo_name}/{commit}/{patterns_hash}/{file_name}"
117121

118122
# Check for S3_DIRECTORY_PREFIX environment variable
119123
s3_directory_prefix = os.getenv("S3_DIRECTORY_PREFIX")

0 commit comments

Comments
 (0)