Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/gitingest/schemas/ingestion.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class IngestionQuery(BaseModel): # pylint: disable=too-many-instance-attributes
tag : str | None
The tag of the repository.
max_file_size : int
The maximum file size to ingest (default: 10 MB).
The maximum file size to ingest in bytes (default: 10 MB).
ignore_patterns : set[str]
The patterns to ignore (default: ``set()``).
include_patterns : set[str] | None
Expand Down
8 changes: 5 additions & 3 deletions src/server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@
from __future__ import annotations

from enum import Enum
from typing import Union
from typing import TYPE_CHECKING, Union

from pydantic import BaseModel, Field, field_validator

from gitingest.utils.compat_func import removesuffix
from server.server_config import MAX_FILE_SIZE_KB

# needed for type checking (pydantic)
from server.form_types import IntForm, OptStrForm, StrForm # noqa: TC001 (typing-only-first-party-import)
if TYPE_CHECKING:
from server.form_types import IntForm, OptStrForm, StrForm


class PatternType(str, Enum):
Expand Down Expand Up @@ -39,7 +41,7 @@ class IngestRequest(BaseModel):
"""

input_text: str = Field(..., description="Git repository URL or slug to ingest")
max_file_size: int = Field(..., ge=0, le=500, description="File size slider position (0-500)")
max_file_size: int = Field(..., ge=1, le=MAX_FILE_SIZE_KB, description="File size in KB")
pattern_type: PatternType = Field(default=PatternType.EXCLUDE, description="Pattern type for file filtering")
pattern: str = Field(default="", description="Glob/regex pattern for file filtering")
token: str | None = Field(default=None, description="GitHub PAT for private repositories")
Expand Down
14 changes: 6 additions & 8 deletions src/server/query_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@
from server.models import IngestErrorResponse, IngestResponse, IngestSuccessResponse, PatternType
from server.s3_utils import generate_s3_file_path, is_s3_enabled, upload_to_s3
from server.server_config import MAX_DISPLAY_SIZE
from server.server_utils import Colors, log_slider_to_size
from server.server_utils import Colors


async def process_query(
input_text: str,
slider_position: int,
max_file_size: int,
pattern_type: PatternType,
pattern: str,
token: str | None = None,
Expand All @@ -32,8 +32,8 @@ async def process_query(
----------
input_text : str
Input text provided by the user, typically a Git repository URL or slug.
slider_position : int
Position of the slider, representing the maximum file size in the query.
max_file_size : int
Max file size in KB to be include in the digest.
pattern_type : PatternType
Type of pattern to use (either "include" or "exclude")
pattern : str
Expand All @@ -55,8 +55,6 @@ async def process_query(
if token:
validate_github_token(token)

max_file_size = log_slider_to_size(slider_position)

try:
query = await parse_remote_repo(input_text, token=token)
except Exception as exc:
Expand All @@ -65,7 +63,7 @@ async def process_query(
return IngestErrorResponse(error=str(exc))

query.url = cast("str", query.url)
query.max_file_size = max_file_size
query.max_file_size = max_file_size * 1024 # Convert to bytes since we currently use KB in higher levels
query.ignore_patterns, query.include_patterns = process_patterns(
exclude_patterns=pattern if pattern_type == PatternType.EXCLUDE else None,
include_patterns=pattern if pattern_type == PatternType.INCLUDE else None,
Expand Down Expand Up @@ -142,7 +140,7 @@ async def process_query(
digest_url=digest_url,
tree=tree,
content=content,
default_max_file_size=slider_position,
default_max_file_size=max_file_size,
pattern_type=pattern_type,
pattern=pattern,
)
Expand Down
6 changes: 3 additions & 3 deletions src/server/routers/ingest.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from server.models import IngestRequest
from server.routers_utils import COMMON_INGEST_RESPONSES, _perform_ingestion
from server.s3_utils import is_s3_enabled
from server.server_config import MAX_DISPLAY_SIZE
from server.server_config import DEFAULT_FILE_SIZE_KB
from server.server_utils import limiter

ingest_counter = Counter("gitingest_ingest_total", "Number of ingests", ["status", "url"])
Expand Down Expand Up @@ -58,7 +58,7 @@ async def api_ingest_get(
request: Request, # noqa: ARG001 (unused-function-argument) # pylint: disable=unused-argument
user: str,
repository: str,
max_file_size: int = MAX_DISPLAY_SIZE,
max_file_size: int = DEFAULT_FILE_SIZE_KB,
pattern_type: str = "exclude",
pattern: str = "",
token: str = "",
Expand All @@ -74,7 +74,7 @@ async def api_ingest_get(
- **repository** (`str`): GitHub repository name

**Query Parameters**
- **max_file_size** (`int`, optional): Maximum file size to include in the digest (default: 50 KB)
- **max_file_size** (`int`, optional): Maximum file size in KB to include in the digest (default: 5120 KB)
- **pattern_type** (`str`, optional): Type of pattern to use ("include" or "exclude", default: "exclude")
- **pattern** (`str`, optional): Pattern to include or exclude in the query (default: "")
- **token** (`str`, optional): GitHub personal access token for private repositories (default: "")
Expand Down
2 changes: 1 addition & 1 deletion src/server/routers_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ async def _perform_ingestion(

result = await process_query(
input_text=input_text,
slider_position=max_file_size,
max_file_size=max_file_size,
pattern_type=pattern_type,
pattern=pattern,
token=token,
Expand Down
4 changes: 2 additions & 2 deletions src/server/server_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
DELETE_REPO_AFTER: int = 60 * 60 # In seconds (1 hour)

# Slider configuration (if updated, update the logSliderToSize function in src/static/js/utils.js)
MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 MB
MAX_SLIDER_POSITION: int = 500 # Maximum slider position
DEFAULT_FILE_SIZE_KB: int = 5 * 1024 # 5 mb
MAX_FILE_SIZE_KB: int = 100 * 1024 # 100 mb

EXAMPLE_REPOS: list[dict[str, str]] = [
{"name": "Gitingest", "url": "https://github.com/coderamp-labs/gitingest"},
Expand Down
21 changes: 1 addition & 20 deletions src/server/server_utils.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Utility functions for the server."""

import asyncio
import math
import shutil
import time
from contextlib import asynccontextmanager, suppress
Expand All @@ -15,7 +14,7 @@
from slowapi.util import get_remote_address

from gitingest.config import TMP_BASE_PATH
from server.server_config import DELETE_REPO_AFTER, MAX_FILE_SIZE_KB, MAX_SLIDER_POSITION
from server.server_config import DELETE_REPO_AFTER

# Initialize a rate limiter
limiter = Limiter(key_func=get_remote_address)
Expand Down Expand Up @@ -161,24 +160,6 @@ def _append_line(path: Path, line: str) -> None:
fp.write(f"{line}\n")


def log_slider_to_size(position: int) -> int:
"""Convert a slider position to a file size in bytes using a logarithmic scale.

Parameters
----------
position : int
Slider position ranging from 0 to 500.

Returns
-------
int
File size in bytes corresponding to the slider position.

"""
maxv = math.log(MAX_FILE_SIZE_KB)
return round(math.exp(maxv * pow(position / MAX_SLIDER_POSITION, 1.5))) * 1024


## Color printing utility
class Colors:
"""ANSI color codes."""
Expand Down
4 changes: 2 additions & 2 deletions src/server/templates/components/git_form.jinja
Original file line number Diff line number Diff line change
Expand Up @@ -76,12 +76,12 @@
</label>
<input type="range"
id="file_size"
name="max_file_size"
min="0"
min="1"
max="500"
required
value="{{ default_max_file_size }}"
class="w-full h-3 bg-[#FAFAFA] bg-no-repeat bg-[length:50%_100%] bg-[#ebdbb7] appearance-none border-[3px] border-gray-900 rounded-sm focus:outline-none bg-gradient-to-r from-[#FE4A60] to-[#FE4A60] [&::-webkit-slider-thumb]:w-5 [&::-webkit-slider-thumb]:h-7 [&::-webkit-slider-thumb]:appearance-none [&::-webkit-slider-thumb]:bg-white [&::-webkit-slider-thumb]:rounded-sm [&::-webkit-slider-thumb]:cursor-pointer [&::-webkit-slider-thumb]:border-solid [&::-webkit-slider-thumb]:border-[3px] [&::-webkit-slider-thumb]:border-gray-900 [&::-webkit-slider-thumb]:shadow-[3px_3px_0_#000]">
<input type="hidden" id="max_file_size_kb" name="max_file_size" value="">
</div>
<!-- PAT checkbox with PAT field below -->
<div class="flex flex-col items-start w-full sm:col-span-2 lg:col-span-1 lg:row-span-2 lg:pt-3.5">
Expand Down
44 changes: 37 additions & 7 deletions src/static/js/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -126,13 +126,13 @@ function collectFormData(form) {
const json_data = {};
const inputText = form.querySelector('[name="input_text"]');
const token = form.querySelector('[name="token"]');
const slider = document.getElementById('file_size');
const hiddenInput = document.getElementById('max_file_size_kb');
const patternType = document.getElementById('pattern_type');
const pattern = document.getElementById('pattern');

if (inputText) {json_data.input_text = inputText.value;}
if (token) {json_data.token = token.value;}
if (slider) {json_data.max_file_size = slider.value;}
if (hiddenInput) {json_data.max_file_size = hiddenInput.value;}
if (patternType) {json_data.pattern_type = patternType.value;}
if (pattern) {json_data.pattern = pattern.value;}

Expand Down Expand Up @@ -206,6 +206,14 @@ function handleSubmit(event, showLoadingSpinner = false) {

if (!form) {return;}

// Ensure hidden input is updated before collecting form data
const slider = document.getElementById('file_size');
const hiddenInput = document.getElementById('max_file_size_kb');

if (slider && hiddenInput) {
hiddenInput.value = logSliderToSize(slider.value);
}

if (showLoadingSpinner) {
showLoading();
}
Expand All @@ -226,12 +234,32 @@ function handleSubmit(event, showLoadingSpinner = false) {
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(json_data)
})
.then((response) => response.json())
.then( (data) => {
// Hide loading overlay
.then(async (response) => {
let data;

try {
data = await response.json();
} catch {
data = {};
}
setButtonLoadingState(submitButton, false);

// Handle error
if (!response.ok) {
// Show all error details if present
if (Array.isArray(data.detail)) {
const details = data.detail.map((d) => `<li>${d.msg || JSON.stringify(d)}</li>`).join('');

showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'><b>Error(s):</b><ul>${details}</ul></div>`);

return;
}
// Other errors
showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'>${data.error || JSON.stringify(data) || 'An error occurred.'}</div>`);

return;
}

// Handle error in data
if (data.error) {
showError(`<div class='mb-6 p-4 bg-red-50 border border-red-200 rounded-lg text-red-700'>${data.error}</div>`);

Expand Down Expand Up @@ -327,14 +355,16 @@ function logSliderToSize(position) {
function initializeSlider() {
const slider = document.getElementById('file_size');
const sizeValue = document.getElementById('size_value');
const hiddenInput = document.getElementById('max_file_size_kb');

if (!slider || !sizeValue) {return;}
if (!slider || !sizeValue || !hiddenInput) {return;}

function updateSlider() {
const value = logSliderToSize(slider.value);

sizeValue.textContent = formatSize(value);
slider.style.backgroundSize = `${(slider.value / slider.max) * 100}% 100%`;
hiddenInput.value = value; // Set hidden input to KB value
}

// Update on slider change
Expand Down
Loading