Skip to content
Open
2 changes: 1 addition & 1 deletion api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ dependencies = [
"soundfile==0.13.1",
"tensorboardX==2.6.2.2",
"timm==1.0.15",
"transformerlab==0.0.53",
"transformerlab==0.0.54",
"transformerlab-inference==0.2.51",
"transformers==4.57.1",
"wandb==0.19.10",
Expand Down
11 changes: 6 additions & 5 deletions api/transformerlab/routers/compute_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class ProviderTaskLaunchRequest(BaseModel):
cluster_name: Optional[str] = Field(None, description="Base cluster name, suffix is appended automatically")
command: str = Field(..., description="Command to execute on the cluster")
subtype: Optional[str] = Field(None, description="Optional subtype for filtering")
interactive_type: Optional[str] = Field(None, description="Interactive task type (e.g. vscode)")
cpus: Optional[str] = None
memory: Optional[str] = None
disk_space: Optional[str] = None
Expand Down Expand Up @@ -758,11 +759,10 @@ async def launch_task_on_provider(

provider_instance = get_provider_instance(provider)

job_id = job_service.job_create(
type="REMOTE",
status="LAUNCHING",
experiment_id=request.experiment_id,
)
# Interactive tasks should start directly in INTERACTIVE state instead of LAUNCHING
initial_status = "INTERACTIVE" if request.subtype == "interactive" else "LAUNCHING"

job_id = job_service.job_create(type="REMOTE", status=initial_status, experiment_id=request.experiment_id)

base_name = request.cluster_name or request.task_name or provider.name
formatted_cluster_name = f"{_sanitize_cluster_basename(base_name)}-job-{job_id}"
Expand Down Expand Up @@ -836,6 +836,7 @@ async def launch_task_on_provider(
"command": request.command,
"cluster_name": formatted_cluster_name,
"subtype": request.subtype,
"interactive_type": request.interactive_type,
"cpus": request.cpus,
"memory": request.memory,
"disk_space": request.disk_space,
Expand Down
100 changes: 96 additions & 4 deletions api/transformerlab/routers/experiment/jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,11 @@

import transformerlab.services.job_service as job_service
from transformerlab.services.job_service import job_update_status
from transformerlab.services.provider_service import (
get_team_provider,
get_provider_instance,
)
from transformerlab.services.provider_service import get_team_provider, get_provider_instance
from transformerlab.routers.auth import get_user_and_team
from transformerlab.shared.models.user_model import get_async_session
from transformerlab.compute_providers.models import JobState
from transformerlab.utils.vscode_parser import get_vscode_tunnel_info
from lab import Job
from lab.dirs import get_workspace_dir

Expand Down Expand Up @@ -335,6 +333,100 @@ async def get_provider_job_logs(
}


@router.get("/{job_id}/vscode_tunnel_info")
async def get_vscode_tunnel_info_for_job(
experimentId: str,
job_id: str,
tail_lines: int = Query(400, ge=100, le=2000),
user_and_team=Depends(get_user_and_team),
session: AsyncSession = Depends(get_async_session),
):
"""
Parse provider logs for a REMOTE job and extract VS Code tunnel information.

This uses the shared vscode_parser helper to extract auth code and tunnel URL
from the provider logs, and is intended for interactive VS Code tasks.
"""

job = job_service.job_get(job_id)
if not job or str(job.get("experiment_id")) != str(experimentId):
raise HTTPException(status_code=404, detail="Job not found")

job_data = job.get("job_data") or {}
if not isinstance(job_data, dict):
try:
job_data = json.loads(job_data)
except JSONDecodeError:
job_data = {}

provider_id = job_data.get("provider_id")
cluster_name = job_data.get("cluster_name")
if not provider_id or not cluster_name:
raise HTTPException(
status_code=400, detail="Job does not contain provider metadata (provider_id/cluster_name missing)"
)

provider = await get_team_provider(session, user_and_team["team_id"], provider_id)
if not provider:
raise HTTPException(status_code=404, detail="Provider not found")

try:
provider_instance = get_provider_instance(provider)
except Exception as exc:
raise HTTPException(status_code=500, detail=f"Failed to initialize provider: {exc}") from exc

# Determine provider-side job id in the same way as provider_logs
provider_job_id: Optional[str | int] = job_data.get("provider_job_id")

if provider_job_id is None:
provider_job_ids = job_data.get("provider_job_ids")
if isinstance(provider_job_ids, list) and provider_job_ids:
provider_job_id = provider_job_ids[-1]

if provider_job_id is None:
try:
provider_jobs = provider_instance.list_jobs(cluster_name)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Failed to enumerate provider jobs: {exc}") from exc

if provider_jobs:
running_states = {JobState.RUNNING, JobState.PENDING}
chosen_job = next((pj for pj in provider_jobs if pj.state in running_states), provider_jobs[-1])
provider_job_id = chosen_job.job_id

if provider_job_id is None:
raise HTTPException(status_code=404, detail="Unable to determine provider job id for this job")

try:
raw_logs = provider_instance.get_job_logs(
cluster_name,
provider_job_id,
tail_lines=tail_lines or None,
follow=False,
)
except Exception as exc:
raise HTTPException(status_code=502, detail=f"Failed to fetch provider logs: {exc}") from exc

if isinstance(raw_logs, (bytes, bytearray)):
logs_text = raw_logs.decode("utf-8", errors="replace")
elif isinstance(raw_logs, str):
logs_text = raw_logs
else:
try:
logs_text = json.dumps(raw_logs, indent=2)
except TypeError:
logs_text = str(raw_logs)

tunnel_info = get_vscode_tunnel_info(logs_text)

return {
**tunnel_info,
"cluster_name": cluster_name,
"provider_id": provider_id,
"provider_job_id": str(provider_job_id),
}


# Templates


Expand Down
86 changes: 86 additions & 0 deletions api/transformerlab/utils/vscode_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import re
from typing import Optional, Tuple


def parse_vscode_tunnel_logs(logs: str) -> Tuple[Optional[str], Optional[str]]:
"""
Parse VSCode tunnel logs to extract auth code and tunnel URL.

Args:
logs: Job logs as string

Returns:
Tuple of (auth_code, tunnel_url) - both can be None if not found
"""
auth_code = None
tunnel_url = None

try:
lines = logs.split("\n")

for line in lines:
# Parse auth code: "use code 9669-7DED"
if "use code" in line and not auth_code:
match = re.search(r"use code (\w+-\w+)", line)
if match:
auth_code = match.group(1)

# Parse tunnel URL: "https://vscode.dev/tunnel/maclan/..."
if "vscode.dev/tunnel" in line and not tunnel_url:
# Look for the full URL
match = re.search(r"(https://vscode\.dev/tunnel/[^\s]+)", line)
if match:
tunnel_url = match.group(1)
else:
# If no full URL, look for just the tunnel path
match = re.search(r"(vscode\.dev/tunnel/[^\s]+)", line)
if match:
tunnel_url = f"https://{match.group(1)}"

return auth_code, tunnel_url

except Exception as e:
print(f"Error parsing VSCode tunnel logs: {e}")
return None, None


def is_vscode_tunnel_ready(logs: str) -> bool:
"""
Check if VSCode tunnel is ready based on logs.

Args:
logs: Job logs as string

Returns:
True if tunnel appears to be ready
"""
try:
# Check for tunnel URL presence
auth_code, tunnel_url = parse_vscode_tunnel_logs(logs)

# Tunnel is ready if we have both auth code and tunnel URL
return auth_code is not None and tunnel_url is not None

except Exception as e:
print(f"Error checking VSCode tunnel readiness: {e}")
return False


def get_vscode_tunnel_info(logs: str) -> dict:
"""
Get complete VSCode tunnel information from logs.

Args:
logs: Job logs as string

Returns:
Dictionary with tunnel information
"""
auth_code, tunnel_url = parse_vscode_tunnel_logs(logs)

return {
"auth_code": auth_code,
"tunnel_url": tunnel_url,
"is_ready": auth_code is not None and tunnel_url is not None,
"status": "ready" if (auth_code is not None and tunnel_url is not None) else "loading",
}
2 changes: 1 addition & 1 deletion lab-sdk/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "transformerlab"
version = "0.0.53"
version = "0.0.54"
description = "Python SDK for Transformer Lab"
readme = "README.md"
requires-python = ">=3.10"
Expand Down
3 changes: 2 additions & 1 deletion lab-sdk/src/lab/experiment.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,11 @@ def get_jobs(self, type: str = "", status: str = ""):
if job_id in cached_jobs:
# Use cached data for completed jobs
job_json = cached_jobs[job_id]
# Check status of job if not RUNNING, LAUNCHING or NOT_STARTED, then remove from cache
# Check status of job if not RUNNING, LAUNCHING, INTERACTIVE or NOT_STARTED, then remove from cache
if job_json.get("status", "") in [
"RUNNING",
"LAUNCHING",
"INTERACTIVE",
"NOT_STARTED",
]:
old_status = job_json.get("status", "")
Expand Down
Loading
Loading