From 007ac55a16fccaf531b350fedb916d5782f56c3c Mon Sep 17 00:00:00 2001 From: Aryan Bhati Date: Mon, 18 May 2026 02:24:19 +0530 Subject: [PATCH 1/4] infra: add Dockerized development stack and dependency parity fixes --- backend/.dockerignore | 44 ++++++++++++++++++++++ backend/Dockerfile | 55 ++++++++++++++++++++++++++++ backend/requirements.txt | 63 ++++++++++++++++++++++++++------ docker-compose.yml | 79 +++++++++++++++++++++++++--------------- frontend/.dockerignore | 18 +++++++++ frontend/Dockerfile | 20 ++++++++++ frontend/vite.config.ts | 6 ++- package.json | 19 ++++++++++ 8 files changed, 261 insertions(+), 43 deletions(-) create mode 100644 backend/.dockerignore create mode 100644 backend/Dockerfile create mode 100644 frontend/.dockerignore create mode 100644 frontend/Dockerfile create mode 100644 package.json diff --git a/backend/.dockerignore b/backend/.dockerignore new file mode 100644 index 0000000..918ca19 --- /dev/null +++ b/backend/.dockerignore @@ -0,0 +1,44 @@ +# Python caches +__pycache__/ +*.py[cod] +*.pyo +*.pyd + +# Virtual environments +.venv/ +venv/ +env/ +.env/ + +# Local session data (runtime, not source) +sessions/ + +# Test / benchmark outputs +results/ +.pytest_cache/ + +# Build artefacts +*.egg-info/ +dist/ +build/ + +# DB files +*.db +*.sqlite + +# Editor / OS +.idea/ +.vscode/ +.DS_Store +Thumbs.db + +# Git metadata +.git/ +.gitignore + +# Jupyter +*.ipynb +.ipynb_checkpoints/ + +# Docs / non-runtime assets +docs/ diff --git a/backend/Dockerfile b/backend/Dockerfile new file mode 100644 index 0000000..89d8858 --- /dev/null +++ b/backend/Dockerfile @@ -0,0 +1,55 @@ +# Backend Dockerfile +# Python 3.11 slim — small base, fast builds +FROM python:3.11-slim + +# System packages +# git – GitPython binary requirement +# build-essential – compiles C extensions (tree-sitter, scipy) +# curl – lightweight HTTP tool (optional local debugging) +# libgomp1 – OpenMP runtime required by PyTorch / numpy +RUN apt-get update && apt-get install -y --no-install-recommends \ + git \ + build-essential \ + curl \ + libgomp1 \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /app + +#Python dependencies (layer-cache order) +COPY requirements.txt . + +# 1. Upgrade pip +RUN pip install --no-cache-dir --upgrade pip + +# 1b. Pin NumPy to 1.x BEFORE any compiled extension is installed. +# torch 2.2.x and PyG wheels are built against NumPy 1.x ABI; +# installing numpy first prevents any later dep from upgrading to 2.x. +RUN pip install --no-cache-dir "numpy==1.26.4" + +# 2. CPU-only PyTorch (avoids the ~2 GB CUDA wheel) +RUN pip install --no-cache-dir \ + torch==2.2.2 torchvision torchaudio \ + --index-url https://download.pytorch.org/whl/cpu + +# 3. PyTorch Geometric + required sparse/scatter extensions (CPU wheels) +# Must be installed AFTER torch and AGAINST the same torch version. +RUN pip install --no-cache-dir torch_geometric==2.5.3 +RUN pip install --no-cache-dir \ + torch_scatter \ + torch_sparse \ + torch_cluster \ + torch_spline_conv \ + -f https://data.pyg.org/whl/torch-2.2.2+cpu.html + +# 4. Everything else from requirements.txt +RUN pip install --no-cache-dir -r requirements.txt + +#Application source +COPY . . + +EXPOSE 8000 + +# Uvicorn: bind to 0.0.0.0 so Docker port-mapping works. +# No --reload in production image; add a volume mount for dev hot-reload. +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/backend/requirements.txt b/backend/requirements.txt index e223c2c..7b729d8 100644 --- a/backend/requirements.txt +++ b/backend/requirements.txt @@ -1,27 +1,66 @@ + +# Atlas Backend – complete runtime requirements + +# Web framework & server fastapi==0.115.0 uvicorn==0.30.6 python-multipart==0.0.9 -gitpython==3.1.43 + +#Data validation pydantic==2.11.2 + +# HTTP client httpx==0.28.1 + +#Git integration +gitpython==3.1.43 + +#Task queue celery==5.3.4 redis==5.0.1 +#Code parsing (tree-sitter) tree-sitter==0.21.3 tree-sitter-python==0.21.0 tree-sitter-javascript==0.21.4 tree-sitter-typescript==0.21.2 + +#Graph analysis networkx==3.3 -# NOTE: Install PyTorch with CUDA 11.8 support FIRST (before the rest): -# pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118 -# Then install torch-geometric: -# pip install torch_geometric -# pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv \ -# -f https://data.pyg.org/whl/torch-2.1.0+cu118.html -# Then install the remaining deps normally: -# pip install -r requirements.txt -numpy>=1.26.0 -scipy>=1.12.0 -datasets>=2.18.0 # HuggingFace datasets — CodeSearchNet loader +# Scientific Python core +# Hard-pinned to 1.x: torch 2.2.x and PyG are compiled against NumPy 1.x ABI. +# NumPy 2.x breaks _ARRAY_API and causes "compiled using NumPy 1.x" crash. +numpy==1.26.4 +scipy>=1.12.0,<2 + +#Vector database client +# Required by core/retrieval/qdrant_store.py and training/umap_viz.py +qdrant-client>=1.9.0 + +#BM25 lexical retrieval +# Required by core/retrieval/bm25_index.py +rank_bm25>=0.2.2 + +# PyTorch (CPU) +# Installed explicitly in Dockerfile with CPU index URL. +# Listed here for reference / local venv installs. +# torch torchvision torchaudio (see Dockerfile) + +#PyTorch Geometric (GNN) +# Required by: core/model/function_encoder.py, core/model/dataset.py, +# core/retrieval/agentic_retrieval.py, core/drift/drift_detector.py, +# training/index_repo.py, eval/eval_codesearcheval.py +# Installed from the PyG CPU wheel index in the Dockerfile. +# torch_geometric (see Dockerfile) + +#HuggingFace ecosystem +datasets>=2.18.0 # CodeSearchNet loader huggingface-hub>=0.22.0 # required by datasets + +# UMAP visualisation +# Required by training/umap_viz.py +umap-learn>=0.5.6 + +#Plotting (training/umap_viz.py) +matplotlib>=3.8.0 diff --git a/docker-compose.yml b/docker-compose.yml index 3861e49..9e0344a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,73 +1,92 @@ -version: '3.8' - services: - # ── Atlas Backend (FastAPI + Celery beat) ── + #Atlas Backend (FastAPI) backend: - build: ./backend + build: + context: ./backend + dockerfile: Dockerfile ports: - "8000:8000" environment: - REDIS_URL=redis://redis:6379/0 - QDRANT_HOST=qdrant - QDRANT_PORT=6333 + - SESSIONS_DIR=/app/sessions depends_on: redis: - condition: service_started - qdrant: condition: service_healthy + qdrant: + condition: service_started volumes: - ./backend/training:/app/training - ./sessions:/app/sessions restart: unless-stopped - - # ── Atlas Frontend (React + Vite) ── + healthcheck: + test: + - "CMD" + - "python" + - "-c" + - "import urllib.request; urllib.request.urlopen('http://localhost:8000/api/health')" + interval: 15s + timeout: 10s + retries: 6 + start_period: 40s frontend: - build: ./frontend + build: + context: ./frontend + dockerfile: Dockerfile ports: - "5173:5173" depends_on: - - backend - restart: unless-stopped - - # ── Redis (Celery broker + cache) ── - redis: - image: redis:7-alpine - ports: - - "6379:6379" - volumes: - - redis_data:/data + backend: + condition: service_healthy restart: unless-stopped - - # ── Celery Worker ── celery_worker: - build: ./backend + build: + context: ./backend + dockerfile: Dockerfile command: celery -A workers.celery_app worker --loglevel=info environment: - REDIS_URL=redis://redis:6379/0 - QDRANT_HOST=qdrant - QDRANT_PORT=6333 + - SESSIONS_DIR=/app/sessions depends_on: - - redis - - qdrant + redis: + condition: service_healthy + qdrant: + condition: service_started volumes: - ./backend/training:/app/training - ./sessions:/app/sessions restart: unless-stopped - # ── Qdrant Vector Database ── + redis: + image: redis:7-alpine + ports: + - "6379:6379" + volumes: + - redis_data:/data + restart: unless-stopped + healthcheck: + test: ["CMD", "redis-cli", "ping"] + interval: 10s + timeout: 3s + retries: 5 + qdrant: image: qdrant/qdrant:latest ports: - - "6333:6333" # REST API - - "6334:6334" # gRPC + - "6333:6333" + - "6334:6334" volumes: - qdrant_data:/qdrant/storage + restart: unless-stopped healthcheck: - test: ["CMD", "curl", "-f", "http://localhost:6333/healthz"] + test: ["CMD-SHELL", "wget -qO- http://localhost:6333/healthz || exit 1"] interval: 10s timeout: 5s - retries: 5 - restart: unless-stopped + retries: 10 + start_period: 10s volumes: qdrant_data: diff --git a/frontend/.dockerignore b/frontend/.dockerignore new file mode 100644 index 0000000..936bda3 --- /dev/null +++ b/frontend/.dockerignore @@ -0,0 +1,18 @@ +# Node modules (rebuilt inside the image) +node_modules/ + +# Vite production build output +dist/ + +# Editor / OS +.idea/ +.vscode/ +.DS_Store +Thumbs.db + +# Git metadata +.git/ +.gitignore + +# TypeScript build info +*.tsbuildinfo diff --git a/frontend/Dockerfile b/frontend/Dockerfile new file mode 100644 index 0000000..f75985d --- /dev/null +++ b/frontend/Dockerfile @@ -0,0 +1,20 @@ +#Frontend Dockerfile +# Node 20 Alpine — lightweight, fast +FROM node:20-alpine + +WORKDIR /app + +# Copy lock files first for layer-cache efficiency +COPY package.json package-lock.json ./ + +# Install exact versions from lock file +RUN npm ci + +# Copy the rest of the source +COPY . . + +EXPOSE 5173 + +# host (0.0.0.0) and port (5173) are declared in vite.config.ts, +# so no extra CLI flags are needed here. +CMD ["npm", "run", "dev"] diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index d6191c7..17ea1df 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -5,7 +5,11 @@ export default defineConfig({ plugins: [react()], server: { port: 5173, - open: true, + host: "0.0.0.0", + open: false, + hmr: { + clientPort: 5173, + }, }, worker: { format: "es", diff --git a/package.json b/package.json new file mode 100644 index 0000000..ecbc222 --- /dev/null +++ b/package.json @@ -0,0 +1,19 @@ +{ + "name": "atlas-codebase_intelligence_system", + "version": "1.0.0", + "description": "> AI-powered codebase analysis that understands what code **DOES**, not just what it says. GATv2 graph neural network + behavioral search + MCP integration for Claude Code.", + "main": "index.js", + "directories": { + "doc": "docs" + }, + "dependencies": { + "strip-comments": "^2.0.1" + }, + "devDependencies": {}, + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC" +} From 8cd11ee3318f16d3f27190081859d014fd9371de Mon Sep 17 00:00:00 2001 From: Aryan Bhati Date: Mon, 18 May 2026 02:24:41 +0530 Subject: [PATCH 2/4] backend: harden ingest pipeline, drift evaluation, and async analysis flow --- backend/api/mcp_server.py | 4 +- backend/api/routes/analyze.py | 50 +++++++++- backend/api/routes/debug.py | 84 ++++++++++++++++ backend/api/routes/ingest.py | 130 ++++++++++++++++++++++--- backend/api/routes/mcp_status.py | 1 - backend/config.py | 13 ++- backend/core/drift/drift_detector.py | 1 - backend/core/ingest/git_ingest.py | 74 +++++++++++--- backend/core/model/function_encoder.py | 14 --- backend/core/pipeline.py | 8 +- backend/eval/debug_drift_analysis.py | 40 -------- backend/eval/eval_drift.py | 7 +- backend/eval/run_all_benchmarks.py | 14 --- backend/workers/celery_app.py | 9 +- backend/workers/tasks.py | 44 +++++++++ 15 files changed, 379 insertions(+), 114 deletions(-) create mode 100644 backend/api/routes/debug.py diff --git a/backend/api/mcp_server.py b/backend/api/mcp_server.py index 1fe8870..8e7666a 100644 --- a/backend/api/mcp_server.py +++ b/backend/api/mcp_server.py @@ -30,7 +30,7 @@ logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", - stream=sys.stderr, # MCP uses stdout for the protocol; log to stderr + stream=sys.stderr, ) mcp = FastMCP("Atlas — Behavioral Code Intelligence") @@ -337,7 +337,7 @@ async def get_hot_paths(top_k: int = 10) -> str: fan_in = graph.in_degree(node_id) fan_out = graph.out_degree(node_id) complexity = int(node_data.get("complexity", 0)) - # Impact: heavy fan-in + high complexity = highest risk + impact = fan_in * 2 + complexity + fan_out * 0.5 scored.append( { diff --git a/backend/api/routes/analyze.py b/backend/api/routes/analyze.py index 5cb25f9..f10a0dc 100644 --- a/backend/api/routes/analyze.py +++ b/backend/api/routes/analyze.py @@ -1,6 +1,7 @@ import asyncio import json import logging +import time import threading from pathlib import Path from typing import Protocol, cast @@ -21,17 +22,64 @@ router = APIRouter(prefix="/analyze", tags=["Analyze"]) +# Sentinel files written by the ingest route — same constants as tasks.py +_INGEST_READY = ".ingest_ready" +_INGEST_FAILED = ".ingest_failed" +_READY_WAIT_SECS = 30 +_READY_POLL_INTERVAL = 1 + class CeleryTask(Protocol): def delay(self, *args: object, **kwargs: object) -> object: ... + +def _wait_for_ingest_sentinel(session_id: str, session_dir: Path) -> bool: + """ + Block (in a thread) until .ingest_ready appears or .ingest_failed is found. + Returns True if ready, False if failed/timed-out. + """ + log = logging.getLogger(f"codebase-intel.thread.{session_id[:8]}") + ready_file = session_dir / _INGEST_READY + failed_file = session_dir / _INGEST_FAILED + + log.info(f"[INGEST_WAIT] Thread fallback waiting for sentinel {ready_file}") + for i in range(_READY_WAIT_SECS): + if failed_file.exists(): + reason = failed_file.read_text(encoding="utf-8").strip() + log.error(f"[INGEST_FAILED] Ingestion failed before thread pipeline: {reason}") + progress_store.update_sync( + session_id, + status="error", + error_message=f"Ingestion failed before analysis could start: {reason}", + ) + return False + if ready_file.exists(): + log.info(f"[INGEST_READY] Sentinel found after {i}s — starting thread pipeline") + return True + time.sleep(_READY_POLL_INTERVAL) + + log.error(f"[INGEST_TIMEOUT] Repo not ready after {_READY_WAIT_SECS}s (thread)") + progress_store.update_sync( + session_id, + status="error", + error_message=( + f"Ingestion timed out — repository was not ready after " + f"{_READY_WAIT_SECS}s. Please re-ingest the repository." + ), + ) + return False + + def _run_pipeline_in_thread(session_id: str, session_dir: Path) -> None: from core.pipeline import PipelineError, run_analysis_pipeline log = logging.getLogger(f"codebase-intel.thread.{session_id[:8]}") log.info("Starting pipeline in thread fallback mode") + if not _wait_for_ingest_sentinel(session_id, session_dir): + return + try: asyncio.run(run_analysis_pipeline(session_id, session_dir)) @@ -52,7 +100,7 @@ def _run_pipeline_in_thread(session_id: str, session_dir: Path) -> None: status="error", error_message="Out of memory. Try a smaller repository.", ) - except Exception as exc: + except Exception as exc: log.error(f"Unexpected error: {exc}", exc_info=True) progress_store.update_sync( session_id, diff --git a/backend/api/routes/debug.py b/backend/api/routes/debug.py new file mode 100644 index 0000000..4349a2e --- /dev/null +++ b/backend/api/routes/debug.py @@ -0,0 +1,84 @@ +""" +GET /api/debug/session/{session_id} + +Returns diagnostic information about a session directory so operators can +verify that: + 1. The session directory exists and is on the correct volume mount. + 2. The repo/ sub-directory was populated by git clone / ZIP extract. + 3. The .ingest_ready sentinel was written by the ingest route. + 4. The .ingest_failed sentinel was NOT written (i.e. no ingest error). + +This endpoint is intentionally read-only and has no side-effects. +""" +import logging +from pathlib import Path + +from fastapi import APIRouter, HTTPException +from config import SESSIONS_DIR + +logger = logging.getLogger("codebase-intel.routes.debug") + +router = APIRouter(prefix="/debug", tags=["Debug"]) + + +@router.get("/session/{session_id}") +async def debug_session(session_id: str): + """ + Returns a JSON snapshot of the session directory state. + Useful for diagnosing path / volume-mount / sentinel issues in Docker. + """ + session_dir: Path = SESSIONS_DIR / session_id + + session_exists = session_dir.exists() and session_dir.is_dir() + if not session_exists: + + return { + "session_id": session_id, + "sessions_dir": str(SESSIONS_DIR.resolve()), + "session_exists": False, + "repo_exists": False, + "repo_file_count": 0, + "ready_exists": False, + "failed_exists": False, + "failed_reason": None, + "repo_absolute_path": str((session_dir / "repo").resolve()), + "ready_absolute_path": str((session_dir / ".ingest_ready").resolve()), + "failed_absolute_path": str((session_dir / ".ingest_failed").resolve()), + "meta_exists": False, + "file_entries_exist": False, + } + + repo_dir = session_dir / "repo" + ready_file = session_dir / ".ingest_ready" + failed_file = session_dir / ".ingest_failed" + + repo_exists = repo_dir.exists() and repo_dir.is_dir() + repo_file_count = 0 + if repo_exists: + try: + repo_file_count = sum(1 for _ in repo_dir.rglob("*") if _.is_file()) + except Exception: + repo_file_count = -1 + + failed_reason: str | None = None + if failed_file.exists(): + try: + failed_reason = failed_file.read_text(encoding="utf-8").strip() + except Exception: + failed_reason = "" + + return { + "session_id": session_id, + "sessions_dir": str(SESSIONS_DIR.resolve()), + "session_exists": True, + "repo_exists": repo_exists, + "repo_file_count": repo_file_count, + "ready_exists": ready_file.exists(), + "failed_exists": failed_file.exists(), + "failed_reason": failed_reason, + "repo_absolute_path": str(repo_dir.resolve()), + "ready_absolute_path": str(ready_file.resolve()), + "failed_absolute_path": str(failed_file.resolve()), + "meta_exists": (session_dir / "meta.json").exists(), + "file_entries_exist": (session_dir / "file_entries.json").exists(), + } diff --git a/backend/api/routes/ingest.py b/backend/api/routes/ingest.py index 0f0bc2c..0039296 100644 --- a/backend/api/routes/ingest.py +++ b/backend/api/routes/ingest.py @@ -1,6 +1,8 @@ import json import logging +import time from datetime import datetime, timezone +from pathlib import Path from fastapi import APIRouter, File, HTTPException, UploadFile @@ -11,25 +13,80 @@ logger = logging.getLogger("codebase-intel.routes.ingest") +_INGEST_READY = ".ingest_ready" +_INGEST_FAILED = ".ingest_failed" + + +def _mark_ready(session_dir: Path) -> None: + """Write the readiness sentinel so the Celery task can proceed.""" + ready_path = session_dir / _INGEST_READY + ready_path.write_text(str(time.time()), encoding="utf-8") + logger.info( + "[INGEST_READY] Sentinel written: %s (exists=%s)", + ready_path.resolve(), + ready_path.exists(), + ) + + +def _mark_failed(session_dir: Path, reason: str) -> None: + """Write the failure sentinel so the Celery task bails immediately.""" + try: + failed_path = session_dir / _INGEST_FAILED + failed_path.write_text(reason[:300], encoding="utf-8") + logger.error( + "[INGEST_FAILED] Failure sentinel written: %s reason=%r", + failed_path.resolve(), + reason[:120], + ) + except Exception as write_exc: + logger.error("[INGEST_FAILED] Could not write failure sentinel: %s", write_exc) + + router = APIRouter(prefix="/ingest", tags=["Ingest"]) -def _save_session_meta(session_dir, repo_name: str, files, source_type: str) -> None: + +def _save_session_meta(session_dir: Path, repo_name: str, files: list, source_type: str) -> None: meta = {"repo_name": repo_name, "source_type": source_type} (session_dir / "meta.json").write_text(json.dumps(meta), encoding="utf-8") - entries = [f.model_dump() for f in files] (session_dir / "file_entries.json").write_text(json.dumps(entries), encoding="utf-8") + @router.post("/github", response_model=IngestResponse) async def ingest_github(request: GitHubIngestRequest): session_id, session_dir = create_session() - logger.info(f"[{session_id}] GitHub ingest requested: {request.url}") + + logger.info( + "[INGEST_START] [%s] GitHub ingest requested: %s | session_dir=%s", + session_id, request.url, session_dir.resolve(), + ) + + repo_dir = session_dir / "repo" try: + + logger.info( + "[INGEST_CLONE_START] [%s] Calling clone_repository_async | repo_dir=%s", + session_id, repo_dir.resolve(), + ) + repo_name, files = await clone_repository_async(request.url, session_dir) + file_count_on_disk = sum(1 for _ in repo_dir.rglob("*") if _.is_file()) + logger.info( + "[INGEST_CLONE_DONE] [%s] Clone finished: %s | " + "files_from_scan=%d files_on_disk=%d repo_dir=%s", + session_id, repo_name, len(files), file_count_on_disk, repo_dir.resolve(), + ) + + if not repo_dir.exists() or not any(repo_dir.iterdir()): + raise RuntimeError( + f"Clone reported success but repo_dir is empty: {repo_dir.resolve()}" + ) + except ValueError as exc: - logger.warning(f"[{session_id}] Invalid URL: {exc}") + logger.warning("[INGEST_FAILED] [%s] Invalid URL: %s", session_id, exc) + _mark_failed(session_dir, f"INVALID_URL: {exc}") raise HTTPException( status_code=400, detail={ @@ -40,7 +97,11 @@ async def ingest_github(request: GitHubIngestRequest): ) except GitIngestError as exc: - logger.error(f"[{session_id}] Clone error [{exc.error_code}]: {exc}") + logger.error( + "[INGEST_FAILED] [%s] Clone error [%s]: %s", + session_id, exc.error_code, exc, + ) + _mark_failed(session_dir, f"{exc.error_code}: {exc}") status = 404 if exc.error_code == "REPO_NOT_FOUND" else 500 raise HTTPException( status_code=status, @@ -51,8 +112,12 @@ async def ingest_github(request: GitHubIngestRequest): }, ) - except Exception as exc: - logger.error(f"[{session_id}] Unexpected ingest error: {exc}", exc_info=True) + except Exception as exc: + logger.error( + "[INGEST_FAILED] [%s] Unexpected ingest error: %s", + session_id, exc, exc_info=True, + ) + _mark_failed(session_dir, f"INGEST_FAILED: {exc}") raise HTTPException( status_code=500, detail={ @@ -61,9 +126,22 @@ async def ingest_github(request: GitHubIngestRequest): "session_id": session_id, }, ) - + _save_session_meta(session_dir, repo_name, files, "github") - logger.info(f"[{session_id}] Ingested {len(files)} files from {repo_name}") + logger.info( + "[INGEST_SCAN_DONE] [%s] Metadata saved | " + "file_entries=%d session_dir=%s", + session_id, len(files), session_dir.resolve(), + ) + + _mark_ready(session_dir) + logger.info( + "[INGEST_READY] [%s] Ingested %d files from %s | " + "sentinel=%s repo_dir=%s", + session_id, len(files), repo_name, + (session_dir / _INGEST_READY).resolve(), + repo_dir.resolve(), + ) return IngestResponse( session_id=session_id, @@ -74,6 +152,7 @@ async def ingest_github(request: GitHubIngestRequest): source_type="github", ) + @router.post("/upload", response_model=IngestResponse) async def ingest_upload(file: UploadFile = File(...)): if not file.filename or not file.filename.lower().endswith(".zip"): @@ -87,7 +166,10 @@ async def ingest_upload(file: UploadFile = File(...)): session_id, session_dir = create_session() zip_path = session_dir / "upload.zip" - logger.info(f"[{session_id}] ZIP upload started: {file.filename}") + logger.info( + "[INGEST_START] [%s] ZIP upload started: %s | session_dir=%s", + session_id, file.filename, session_dir.resolve(), + ) try: with open(zip_path, "wb") as fh: @@ -97,7 +179,8 @@ async def ingest_upload(file: UploadFile = File(...)): break fh.write(chunk) except OSError as exc: - logger.error(f"[{session_id}] Failed to write upload: {exc}") + logger.error("[INGEST_FAILED] [%s] Failed to write upload: %s", session_id, exc) + _mark_failed(session_dir, f"UPLOAD_WRITE_ERROR: {exc}") raise HTTPException( status_code=500, detail={ @@ -108,10 +191,19 @@ async def ingest_upload(file: UploadFile = File(...)): ) try: + logger.info("[INGEST_CLONE_START] [%s] Starting ZIP extraction", session_id) repo_name, files = await extract_zip_async(zip_path, session_dir) + repo_dir = session_dir / "repo" + file_count_on_disk = sum(1 for _ in repo_dir.rglob("*") if _.is_file()) + logger.info( + "[INGEST_CLONE_DONE] [%s] ZIP extracted: %s | " + "files_from_scan=%d files_on_disk=%d", + session_id, repo_name, len(files), file_count_on_disk, + ) except ValueError as exc: - logger.warning(f"[{session_id}] ZIP validation failed: {exc}") + logger.warning("[INGEST_FAILED] [%s] ZIP validation failed: %s", session_id, exc) + _mark_failed(session_dir, f"ZIP_INVALID: {exc}") raise HTTPException( status_code=400, detail={ @@ -121,8 +213,9 @@ async def ingest_upload(file: UploadFile = File(...)): }, ) - except Exception as exc: - logger.error(f"[{session_id}] ZIP extraction failed: {exc}", exc_info=True) + except Exception as exc: + logger.error("[INGEST_FAILED] [%s] ZIP extraction failed: %s", session_id, exc, exc_info=True) + _mark_failed(session_dir, f"ZIP_EXTRACT_FAILED: {exc}") raise HTTPException( status_code=500, detail={ @@ -133,7 +226,14 @@ async def ingest_upload(file: UploadFile = File(...)): ) _save_session_meta(session_dir, repo_name, files, "zip") - logger.info(f"[{session_id}] Extracted {len(files)} files from {repo_name}") + logger.info("[INGEST_SCAN_DONE] [%s] Metadata saved for %d files", session_id, len(files)) + + _mark_ready(session_dir) + logger.info( + "[INGEST_READY] [%s] Extracted %d files from %s | sentinel=%s", + session_id, len(files), repo_name, + (session_dir / _INGEST_READY).resolve(), + ) return IngestResponse( session_id=session_id, diff --git a/backend/api/routes/mcp_status.py b/backend/api/routes/mcp_status.py index a71acc0..b1fa979 100644 --- a/backend/api/routes/mcp_status.py +++ b/backend/api/routes/mcp_status.py @@ -37,7 +37,6 @@ async def mcp_status() -> dict: - model_loaded : True if the model checkpoint file exists on disk. - bm25_loaded : True if the BM25 index file exists on disk. """ - #Qdrant health check qdrant_connected = False indexed_functions: int = 0 collection_name = "atlas_functions" diff --git a/backend/config.py b/backend/config.py index 5e72cfb..83ffc7e 100644 --- a/backend/config.py +++ b/backend/config.py @@ -1,16 +1,22 @@ import logging +import os from pathlib import Path -BASE_DIR = Path(__file__).resolve().parent # backend/ -PROJECT_ROOT = BASE_DIR.parent # Atlas-Codebase_Intelligence_System/ +BASE_DIR = Path(__file__).resolve().parent # backend/ (or /app in Docker) +PROJECT_ROOT = BASE_DIR.parent # Atlas-Codebase_Intelligence_System/ (or / in Docker) -SESSIONS_DIR = PROJECT_ROOT / "sessions" +_sessions_env = os.getenv("SESSIONS_DIR", "") +if _sessions_env: + SESSIONS_DIR = Path(_sessions_env) +else: + SESSIONS_DIR = PROJECT_ROOT / "sessions" SESSIONS_DIR.mkdir(parents=True, exist_ok=True) _cfg_logger = logging.getLogger("atlas.config") _cfg_logger.info("Project root : %s", PROJECT_ROOT) _cfg_logger.info("Sessions root: %s", SESSIONS_DIR) + IGNORED_DIRS: set[str] = { "node_modules", ".git", "__pycache__", ".venv", "venv", "env", ".env", "dist", "build", ".next", ".nuxt", @@ -51,6 +57,7 @@ CORS_ORIGINS: list[str] = [ "http://localhost:5173", "http://127.0.0.1:5173", + "http://frontend:5173", ] SESSION_LIFETIME_HOURS: int = 4 diff --git a/backend/core/drift/drift_detector.py b/backend/core/drift/drift_detector.py index a90be04..e9d3678 100644 --- a/backend/core/drift/drift_detector.py +++ b/backend/core/drift/drift_detector.py @@ -48,7 +48,6 @@ def __init__(self, encoder, vocab, device: str = "cpu"): self.device = device self.encoder.eval() self.encoder.to(device) - # Match training: window_size=5, max_seq_len=64 self._max_seq_len = 64 self._window_size = 5 diff --git a/backend/core/ingest/git_ingest.py b/backend/core/ingest/git_ingest.py index 3c0395a..bf00ffa 100644 --- a/backend/core/ingest/git_ingest.py +++ b/backend/core/ingest/git_ingest.py @@ -1,5 +1,6 @@ import asyncio import logging +import shutil import subprocess from pathlib import Path @@ -8,12 +9,14 @@ logger = logging.getLogger("codebase-intel.ingest.git") + class GitIngestError(Exception): def __init__(self, message: str, error_code: str): super().__init__(message) self.error_code = error_code + def validate_github_url(url: str) -> bool: url = url.strip().lower() valid_prefixes = ( @@ -26,6 +29,7 @@ def validate_github_url(url: str) -> bool: path_parts = url.split("github.com/")[-1].strip("/").split("/") return len(path_parts) >= 2 and all(part for part in path_parts[:2]) + def extract_repo_name(url: str) -> str: url = url.rstrip("/") if url.endswith(".git"): @@ -35,22 +39,41 @@ def extract_repo_name(url: str) -> str: return f"{parts[-2]}/{parts[-1]}" return parts[-1] if parts else "unknown" -def _do_clone_sync(url: str, repo_dir: Path, depth: int = 100) -> None: + +def _do_clone_sync(url: str, repo_dir: Path, depth: int = 1) -> None: + abs_repo_dir = repo_dir.resolve() + + target_exists = abs_repo_dir.exists() + target_contents = list(abs_repo_dir.iterdir()) if target_exists else [] + logger.info( + "[CLONE_PROCESS_START] url=%s target=%s " + "target_exists=%s target_file_count=%d", + url, abs_repo_dir, target_exists, len(target_contents), + ) + + if target_exists: + logger.info( + "[CLONE_PROCESS_START] Removing existing target dir before clone: %s", + abs_repo_dir, + ) + shutil.rmtree(abs_repo_dir) + cmd = [ "git", "clone", f"--depth={depth}", "--single-branch", "--no-tags", url, - str(repo_dir), + str(abs_repo_dir), ] - logger.info(f"Cloning {url} (depth={depth})") + logger.info("[CLONE_PROCESS_START] cmd=%s", " ".join(cmd)) + proc = None try: - result = subprocess.run( + proc = subprocess.run( cmd, - capture_output=True, - text=True, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, timeout=120, ) except subprocess.TimeoutExpired: @@ -66,9 +89,17 @@ def _do_clone_sync(url: str, repo_dir: Path, depth: int = 100) -> None: error_code="GIT_NOT_INSTALLED", ) - if result.returncode != 0: - stderr = result.stderr.strip() - lower = stderr.lower() + stdout_text = proc.stdout.decode("utf-8", errors="replace").strip() + stderr_text = proc.stderr.decode("utf-8", errors="replace").strip() + + logger.info("[CLONE_PROCESS_EXIT] returncode=%d", proc.returncode) + if stdout_text: + logger.info("[CLONE_STDOUT] %s", stdout_text[:2000]) + if stderr_text: + logger.info("[CLONE_STDERR] %s", stderr_text[:2000]) + + if proc.returncode != 0: + lower = stderr_text.lower() if "not found" in lower or "repository not found" in lower or "404" in lower: raise GitIngestError( f"Repository not found: {url}. " @@ -87,11 +118,31 @@ def _do_clone_sync(url: str, repo_dir: Path, depth: int = 100) -> None: error_code="NETWORK_ERROR", ) raise GitIngestError( - f"Git clone failed (exit {result.returncode}): {stderr[:300]}", + f"Git clone failed (exit {proc.returncode}): {stderr_text[:300]}", error_code="CLONE_FAILED", ) - logger.info(f"Clone complete → {repo_dir}") + # Verify the clone actually produced files before returning. + if not abs_repo_dir.exists(): + raise GitIngestError( + f"Clone exit 0 but target dir missing: {abs_repo_dir}", + error_code="CLONE_EMPTY", + ) + + cloned_files = [p for p in abs_repo_dir.rglob("*") if p.is_file()] + file_count = len(cloned_files) + logger.info( + "[CLONE_PROCESS_EXIT] Clone verified: %d files in %s", + file_count, abs_repo_dir, + ) + + if file_count == 0: + raise GitIngestError( + f"Clone exit 0 but repository is empty at {abs_repo_dir}. " + "The repository may contain no files.", + error_code="CLONE_EMPTY", + ) + async def clone_repository_async( url: str, @@ -112,6 +163,7 @@ async def clone_repository_async( files = await asyncio.to_thread(scan_directory, repo_dir) return repo_name, files + def clone_repository(url: str, session_dir: Path) -> tuple[str, list[FileEntry]]: url = url.strip() if not validate_github_url(url): diff --git a/backend/core/model/function_encoder.py b/backend/core/model/function_encoder.py index 7696711..0095d2e 100644 --- a/backend/core/model/function_encoder.py +++ b/backend/core/model/function_encoder.py @@ -84,10 +84,6 @@ def __init__( self.use_checkpointing: bool = True - - - - def forward( self, x: torch.Tensor, @@ -126,10 +122,6 @@ def forward( h = self.norm(h) return F.normalize(h, dim=-1) - - - - def _gat_forward( self, h: torch.Tensor, @@ -142,12 +134,6 @@ def _gat_forward( h = self.gat2(h, edge_index, edge_attr=edge_attr) return h - - - - - - def infonce_loss( z_a: torch.Tensor, z_b: torch.Tensor, diff --git a/backend/core/pipeline.py b/backend/core/pipeline.py index 429ff25..36684fd 100644 --- a/backend/core/pipeline.py +++ b/backend/core/pipeline.py @@ -33,9 +33,11 @@ def _check_timeout() -> None: ) repo_dir = session_dir / "repo" - if not repo_dir.exists(): + + repo_is_empty = not repo_dir.exists() or not any(repo_dir.iterdir()) + if repo_is_empty: raise PipelineError( - f"Repository directory not found for session {session_id}. " + f"Repository directory not found or is empty for session {session_id}. " "Ensure ingestion completed successfully before starting analysis.", error_code="REPO_NOT_FOUND", ) @@ -91,7 +93,6 @@ def _on_progress(current: int, total_: int) -> None: (session_dir / "graph.json").write_text, graph_json, "utf-8" ) - # Free large intermediate results before function-graph stage parsed_count = len(parsed) del parsed, parsed_json, graph_data, graph_json @@ -136,7 +137,6 @@ def _on_progress(current: int, total_: int) -> None: FusionEngine().fuse, fn_graph, coedit_data ) - # Serialize & save sequentially to avoid holding multiple copies fn_graph_json = await asyncio.to_thread( lambda: json.dumps(graph_to_json(fn_graph), ensure_ascii=False) ) diff --git a/backend/eval/debug_drift_analysis.py b/backend/eval/debug_drift_analysis.py index f477fde..9e1b864 100644 --- a/backend/eval/debug_drift_analysis.py +++ b/backend/eval/debug_drift_analysis.py @@ -41,7 +41,6 @@ ) logger = logging.getLogger("debug_drift") -# ── re-use helpers from eval_drift ───────────────────────────────────────── from eval.eval_drift import ( get_commits_with_python_changes, get_changed_line_ranges, @@ -54,10 +53,6 @@ ) -# ─────────────────────────────────────────────────────────────────────────── -# Statistical helpers -# ─────────────────────────────────────────────────────────────────────────── - def _percentile(sorted_values: list[float], p: float) -> float: if not sorted_values: return 0.0 @@ -115,11 +110,6 @@ def _threshold_sweep(distances: list[float], ground_truth_ids: set[str], def _find_optimal_threshold(sweep: list[dict]) -> dict: return max(sweep, key=lambda r: r["f1"]) - -# ─────────────────────────────────────────────────────────────────────────── -# Core analysis per commit -# ─────────────────────────────────────────────────────────────────────────── - def analyze_commit( detector, parser, @@ -136,12 +126,10 @@ def analyze_commit( logger.info(f"ANALYZING COMMIT {commit_hash}") logger.info(f"{'='*60}") - # ── 1. Git diff ────────────────────────────────────────────────────── changed_ranges = get_changed_line_ranges(repo_path, commit_hash) logger.info(f" Git diff: {len(changed_ranges)} changed files.") logger.info(f" Diff keys (sample): {list(changed_ranges.keys())[:5]}") - # ── 2. Parse parent and current snapshots ──────────────────────────── logger.info(f" Checking out {commit_hash}^1 (parent)…") checkout_commit(repo_path, f"{commit_hash}^1") old_nodes = parser.parse_repository(repo_path) @@ -157,12 +145,10 @@ def analyze_commit( return {"commit": commit_hash, "error": "zero_functions", "old_count": len(old_nodes), "new_count": len(new_nodes)} - # ── 3. Path normalization ───────────────────────────────────────────── changed_ranges_norm = _normalize_diff_paths(changed_ranges, new_nodes, commit_hash) logger.info( f" After path normalization: {len(changed_ranges_norm)} effective changed-file entries.") - # ── 4. Function ID intersection analysis ───────────────────────────── old_ids = {n.id for n in old_nodes} new_ids = {n.id for n in new_nodes} intersection = old_ids & new_ids @@ -183,7 +169,6 @@ def analyze_commit( "Possible cause: file renamed, class refactor, or ID includes line number." ) - # ── 5. Ground truth ─────────────────────────────────────────────────── def _get_gt(nodes, ranges): changed_ids: set[str] = set() for node in nodes: @@ -211,7 +196,6 @@ def _get_gt(nodes, ranges): logger.warning(f" Sample new nodes: {sample_nodes}") logger.warning(f" Sample changed ranges: {sample_ranges}") - # ── 6. Embed + compute cosine distances ────────────────────────────── logger.info(f" Embedding {len(intersection)} matched functions…") if not intersection: logger.warning(" Cannot compute cosine distances: intersection is empty.") @@ -224,7 +208,6 @@ def _get_gt(nodes, ranges): "error": "empty_intersection", } - # Build subsets to embed only matched functions old_matched = [n for n in old_nodes if n.id in intersection] new_matched = [n for n in new_nodes if n.id in intersection] @@ -251,14 +234,12 @@ def _get_gt(nodes, ranges): f" Computed {len(distances)} cosine distances for matched function pairs." ) - # ── 7. Distribution statistics ──────────────────────────────────────── stats = _dist_stats(distances) logger.info(f" Cosine distance distribution:") logger.info(f" min={stats['min']} max={stats['max']} mean={stats['mean']}") logger.info(f" p25={stats['p25']} p50={stats['p50']} p75={stats['p75']}") logger.info(f" p90={stats['p90']} p95={stats['p95']}") - # ── 8. Atlas predictions at given threshold ──────────────────────────── predicted_drifted = {fid for fid, d in id_to_dist.items() if d > threshold} logger.info( f" At threshold={threshold}: {len(predicted_drifted)}/{len(id_to_dist)} " @@ -271,13 +252,11 @@ def _get_gt(nodes, ranges): f"This is the primary cause of F1=0." ) - # Also count "added" functions (in new but not old) — they are always flagged added_count = len(only_in_new) removed_count = len(only_in_old) logger.info(f" Added (always flagged as drifted): {added_count}") logger.info(f" Removed (in old only): {removed_count}") - # ── 9. FP / FN analysis ─────────────────────────────────────────────── tp = len(predicted_drifted & ground_truth_ids) fp = len(predicted_drifted - ground_truth_ids) fn = len(ground_truth_ids - predicted_drifted) @@ -288,7 +267,6 @@ def _get_gt(nodes, ranges): logger.info(f" Atlas @ threshold={threshold}: TP={tp} FP={fp} FN={fn} " f"P={prec:.4f} R={rec:.4f} F1={f1:.4f}") - # Collect top false negatives: ground truth that Atlas missed fn_ids = list(ground_truth_ids - predicted_drifted)[:10] fn_details = [] for fid in fn_ids: @@ -296,7 +274,6 @@ def _get_gt(nodes, ranges): fn_details.append({"id": fid, "cosine_dist": dist, "threshold": threshold}) logger.info(f" FN: {fid[:80]} cosine_dist={dist!r}") - # Collect top false positives: Atlas flagged but not in ground truth fp_ids = list(predicted_drifted - ground_truth_ids)[:10] fp_details = [] for fid in fp_ids: @@ -304,7 +281,6 @@ def _get_gt(nodes, ranges): fp_details.append({"id": fid, "cosine_dist": dist}) logger.info(f" FP: {fid[:80]} cosine_dist={dist!r}") - # ── 10. Baseline predictions ────────────────────────────────────────── changed_files = set(changed_ranges_norm.keys()) baseline_predicted = {n.id for n in new_nodes if n.file_path in changed_files} b_tp = len(baseline_predicted & ground_truth_ids) @@ -316,7 +292,6 @@ def _get_gt(nodes, ranges): logger.info(f" Baseline (file-level): TP={b_tp} FP={b_fp} FN={b_fn} " f"P={b_prec:.4f} R={b_rec:.4f} F1={b_f1:.4f}") - # ── 11. Threshold sensitivity sweep ──────────────────────────────────── sweep = _threshold_sweep(distances, ground_truth_ids, id_to_dist) opt = _find_optimal_threshold(sweep) logger.info( @@ -324,7 +299,6 @@ def _get_gt(nodes, ranges): f"→ F1={opt['f1']} (TP={opt['tp']} FP={opt['fp']} FN={opt['fn']})" ) - # Also show what thresholds give non-zero F1 nonzero_f1 = [r for r in sweep if r["f1"] > 0] if nonzero_f1: logger.info( @@ -334,7 +308,6 @@ def _get_gt(nodes, ranges): else: logger.warning(" NO threshold gives F1>0 for this commit!") - # ── 12. Key diagnostic: distance of ground-truth functions ────────────── gt_in_intersection = [fid for fid in ground_truth_ids if fid in id_to_dist] gt_distances = [id_to_dist[fid] for fid in gt_in_intersection] gt_stats = _dist_stats(sorted(gt_distances)) @@ -360,7 +333,6 @@ def _get_gt(nodes, ranges): f"{'[POSITIVE = model gives GT higher dist — good]' if separability > 0 else '[NEGATIVE = model cannot separate GT from non-GT — failure]'}" ) - # ── 13. Save artifact ───────────────────────────────────────────────── artifact = { "commit": commit_hash, "old_count": len(old_nodes), @@ -402,11 +374,6 @@ def _get_gt(nodes, ranges): return artifact - -# ─────────────────────────────────────────────────────────────────────────── -# Main -# ─────────────────────────────────────────────────────────────────────────── - def main() -> None: parser = argparse.ArgumentParser( description=( @@ -466,14 +433,12 @@ def main() -> None: else backend_root / args.output_dir) output_dir.mkdir(parents=True, exist_ok=True) - # ── Check for shallow clone ────────────────────────────────────────── if _check_shallow_clone(args.repo_path): logger.warning( "SHALLOW CLONE: git history is truncated. " "Results may be incomplete. Clone with full depth for best results." ) - # ── Get commits ─────────────────────────────────────────────────────── original_branch = get_current_branch(args.repo_path) logger.info(f"Repo: {args.repo_path} branch={original_branch}") commits = get_commits_with_python_changes(args.repo_path, n=args.commits) @@ -484,7 +449,6 @@ def main() -> None: logger.info(f"Will analyze {len(commits)} commits: {commits}") - # ── Per-commit analysis ──────────────────────────────────────────────── commit_results: list[dict] = [] for commit_hash in commits: try: @@ -501,7 +465,6 @@ def main() -> None: except Exception: pass - # ── Cross-commit summary ─────────────────────────────────────────────── print("\n" + "="*70) print(" CROSS-COMMIT DIAGNOSTIC SUMMARY") print("="*70) @@ -526,7 +489,6 @@ def main() -> None: zero_gt += 1 stats = r.get("cosine_dist_stats", {}) if stats.get("mean") is not None: - # approximate: we can't recover individual distances from stats pass gt_stats = r.get("gt_cosine_dist_stats", {}) @@ -567,7 +529,6 @@ def main() -> None: if avg_opt: print(f" Average OPTIMAL threshold: {avg_opt:.2f}") - # ── Hypotheses verdict ──────────────────────────────────────────────── print("\n" + "="*70) print(" FAILURE MODE HYPOTHESES") print("="*70) @@ -597,7 +558,6 @@ def main() -> None: for all matched pairs, regardless of actual code change. """) - # ── Save summary ────────────────────────────────────────────────────── summary = { "repo_path": args.repo_path, "threshold_used": args.threshold, diff --git a/backend/eval/eval_drift.py b/backend/eval/eval_drift.py index 3a8544d..845ad93 100644 --- a/backend/eval/eval_drift.py +++ b/backend/eval/eval_drift.py @@ -388,7 +388,6 @@ def evaluate_on_repo(self, repo_path: str, num_commits: int = 10, threshold: flo # --- Path normalization: align git-diff keys with node.file_path --- changed_ranges = _normalize_diff_paths(changed_ranges, new_nodes, commit_hash) - # Ground truth ground_truth_ids = self._get_changed_function_ids(new_nodes, changed_ranges) logger.info( f" Ground truth: {len(ground_truth_ids)} functions overlap with diff " @@ -406,17 +405,13 @@ def evaluate_on_repo(self, repo_path: str, num_commits: int = 10, threshold: flo skipped_no_groundtruth += 1 continue - # Atlas predictions - # NOTE: 'added' functions are included because a newly-added function - # that overlaps the diff's changed lines IS a true positive ground-truth - # function. Excluding 'added' silently removes valid TP hits. drift_results = self.detector.detect_drift(old_nodes, new_nodes, threshold=threshold) predicted_drifted = { r.function_id for r in drift_results if r.is_drifted and r.drift_type in ("semantic", "structural", "added") } - # Debug: break down by drift type and show distance distribution + type_counts: dict[str, int] = {} dist_values: list[float] = [] for r in drift_results: diff --git a/backend/eval/run_all_benchmarks.py b/backend/eval/run_all_benchmarks.py index 78fea95..ed65def 100644 --- a/backend/eval/run_all_benchmarks.py +++ b/backend/eval/run_all_benchmarks.py @@ -109,9 +109,7 @@ def main() -> None: } benchmarks_run: list[str] = [] - # ==================================================================== # 1. MRR@10 - # ==================================================================== print("\n" + "=" * 60) print(" [1/4] MRR@10 Evaluation") print("=" * 60) @@ -130,9 +128,7 @@ def main() -> None: logger.warning(f"MRR script not found: {mrr_script}") results["mrr"] = "SKIPPED — script not found" - # ==================================================================== # 2. CodeSearchEval - # ==================================================================== print("\n" + "=" * 60) print(" [2/4] CodeSearchEval") print("=" * 60) @@ -154,9 +150,7 @@ def main() -> None: logger.warning(f"CodeSearchEval script not found: {cse_script}") results["codesearcheval"] = "SKIPPED — script not found" - # ==================================================================== # 3. Drift Detection - # ==================================================================== print("\n" + "=" * 60) print(" [3/4] Drift Detection") print("=" * 60) @@ -181,9 +175,7 @@ def main() -> None: logger.warning(f"Drift script not found: {drift_script}") results["drift"] = "SKIPPED — script not found" - # ==================================================================== # 4. SWE-Bench - # ==================================================================== print("\n" + "=" * 60) print(" [4/4] SWE-Bench") print("=" * 60) @@ -211,9 +203,7 @@ def main() -> None: logger.warning(f"SWE-Bench script not found: {swe_script}") results["swebench"] = "SKIPPED — script not found" - # ==================================================================== # Combined report - # ==================================================================== results["benchmarks_run"] = benchmarks_run report_path = out_dir / "benchmark_report.json" @@ -221,9 +211,7 @@ def main() -> None: json.dump(results, f, indent=2, ensure_ascii=False) logger.info(f"Combined report → {report_path}") - # ==================================================================== # Markdown summary - # ==================================================================== md_lines = [ "# Atlas Benchmark Summary", "", @@ -262,9 +250,7 @@ def main() -> None: f.write("\n".join(md_lines)) logger.info(f"Markdown summary → {summary_path}") - # ==================================================================== # Print table - # ==================================================================== print("\n" + "=" * 60) print(" COMBINED BENCHMARK RESULTS") print("=" * 60) diff --git a/backend/workers/celery_app.py b/backend/workers/celery_app.py index 95e56b9..ccc7989 100644 --- a/backend/workers/celery_app.py +++ b/backend/workers/celery_app.py @@ -1,3 +1,4 @@ +import os from pathlib import Path from celery import Celery @@ -6,10 +7,14 @@ print("🔥 USING CELERY APP FROM:", __file__) print("🔥 BACKEND DIR:", _BACKEND_DIR) +# REDIS_URL is injected by Docker Compose (redis://redis:6379/0). +# Falls back to localhost so local `uvicorn` / `celery` runs still work. +_REDIS_URL = os.getenv("REDIS_URL", "redis://localhost:6379/0") + celery_app = Celery( "codebase_intel", - broker="redis://localhost:6379/0", - backend="redis://localhost:6379/0", + broker=_REDIS_URL, + backend=_REDIS_URL, include=["workers.tasks"], ) diff --git a/backend/workers/tasks.py b/backend/workers/tasks.py index e561e8a..c80417b 100644 --- a/backend/workers/tasks.py +++ b/backend/workers/tasks.py @@ -1,10 +1,18 @@ import asyncio import logging +import time from workers.celery_app import celery_app logger = logging.getLogger("codebase-intel.tasks") +# Sentinel files written by the ingest route +_INGEST_READY = ".ingest_ready" +_INGEST_FAILED = ".ingest_failed" +# How long to wait for ingestion to complete before giving up +_READY_WAIT_SECS = 30 +_READY_POLL_INTERVAL = 1 + @celery_app.task( name="tasks.run_analysis_pipeline", max_retries=0, @@ -30,6 +38,42 @@ def run_analysis_pipeline_task(session_id: str, source_type: str) -> dict: log = logging.getLogger(f"codebase-intel.tasks.{session_id[:8]}") log.info(f"Task started — source_type={source_type}") + ready_file = session_dir / _INGEST_READY + failed_file = session_dir / _INGEST_FAILED + + log.info(f"[INGEST_WAIT] Waiting for sentinel {ready_file}") + ingest_ready = False + for i in range(_READY_WAIT_SECS): + if failed_file.exists(): + reason = failed_file.read_text(encoding="utf-8").strip() + log.error(f"[INGEST_FAILED] Ingestion failed before analysis could start: {reason}") + progress_store.update_sync( + session_id, + status="error", + error_message=f"Ingestion failed before analysis could start: {reason}", + ) + return {"status": "error", "error_code": "INGEST_FAILED", "session_id": session_id} + if ready_file.exists(): + log.info(f"[INGEST_READY] Sentinel found after {i}s — proceeding with pipeline") + ingest_ready = True + break + time.sleep(_READY_POLL_INTERVAL) + + if not ingest_ready: + log.error( + f"[INGEST_TIMEOUT] Repository not ready after {_READY_WAIT_SECS}s " + f"for session {session_id}" + ) + progress_store.update_sync( + session_id, + status="error", + error_message=( + f"Ingestion timed out — repository was not ready after " + f"{_READY_WAIT_SECS}s. Please re-ingest the repository." + ), + ) + return {"status": "error", "error_code": "INGEST_TIMEOUT", "session_id": session_id} + try: asyncio.run(run_analysis_pipeline(session_id, session_dir)) log.info("Task completed successfully") From 9c060dc16a556718011e48ff5148a2bd7c5bd401 Mon Sep 17 00:00:00 2001 From: Aryan Bhati Date: Mon, 18 May 2026 02:24:55 +0530 Subject: [PATCH 3/4] frontend: improve runtime resilience, polling recovery, and session persistence --- frontend/src/api/aiStream.ts | 3 +- frontend/src/api/api.ts | 65 ++- frontend/src/api/client.ts | 17 +- frontend/src/components/DebugOverlay.tsx | 157 ++++++++ .../src/components/dashboard/Dashboard.tsx | 377 +++++++++--------- frontend/src/components/graph/GitTimeline.tsx | 2 +- frontend/src/store/sessionStore.ts | 133 +++--- 7 files changed, 480 insertions(+), 274 deletions(-) create mode 100644 frontend/src/components/DebugOverlay.tsx diff --git a/frontend/src/api/aiStream.ts b/frontend/src/api/aiStream.ts index 08adeab..7cacbdb 100644 --- a/frontend/src/api/aiStream.ts +++ b/frontend/src/api/aiStream.ts @@ -1,4 +1,5 @@ -const API_BASE = import.meta.env.VITE_API_BASE || "http://localhost:8000/api"; +const API_BASE = + ((import.meta.env.VITE_API_URL as string | undefined) || "http://localhost:8000") + "/api"; const MAX_RETRIES = 3; export interface StreamCallbacks { diff --git a/frontend/src/api/api.ts b/frontend/src/api/api.ts index e2d3471..42e0165 100644 --- a/frontend/src/api/api.ts +++ b/frontend/src/api/api.ts @@ -41,25 +41,31 @@ export interface CancellableAnalysis { abort: () => void; } +const ANALYSIS_TIMEOUT_MS = 600_000; // 10 minutes + export function analyzeWithProgress( sessionId: string, onProgress: (stage: string, current: number, total: number) => void ): CancellableAnalysis { let aborted = false; - const ANALYSIS_TIMEOUT_MS = 120_000; - const promise = new Promise((resolve, reject) => { const timeoutId = setTimeout(() => { if (!aborted) { aborted = true; - reject(new Error("Clone timed out — try again.")); + console.warn(`[poll:${sessionId.slice(0, 8)}] Frontend timeout after ${ANALYSIS_TIMEOUT_MS / 1000}s`); + reject(new Error( + "Analysis timed out after 10 minutes. The repository may be very large. " + + "Check if the backend is still running and try again." + )); } }, ANALYSIS_TIMEOUT_MS); void (async () => { try { - await client.post(`/api/analyze/start/${sessionId}`); + console.log(`[poll:${sessionId.slice(0, 8)}] Calling /analyze/start`); + const startRes = await client.post(`/api/analyze/start/${sessionId}`); + console.log(`[poll:${sessionId.slice(0, 8)}] Start response:`, startRes.data); if (aborted) { clearTimeout(timeoutId); @@ -68,11 +74,13 @@ export function analyzeWithProgress( } let consecutiveErrors = 0; - const MAX_CONSECUTIVE_ERRORS = 3; + let pollCount = 0; + let backoffMs = 500; while (!aborted) { - await new Promise((r) => setTimeout(r, 500)); + await new Promise((r) => setTimeout(r, backoffMs)); if (aborted) break; + pollCount++; try { const { data: prog } = await client.get<{ @@ -81,33 +89,51 @@ export function analyzeWithProgress( total: number; done: boolean; error: string | null; - }>(`/api/analyze/progress/${sessionId}`); + }>(`/api/analyze/progress/${sessionId}`, { + _suppressNetworkToast: true, + } as import("axios").AxiosRequestConfig & { _suppressNetworkToast?: boolean }); consecutiveErrors = 0; + backoffMs = 500; + + console.log( + `[poll:${sessionId.slice(0, 8)} #${pollCount}]`, + `stage=${prog.stage}`, + `${prog.current}/${prog.total}`, + `done=${prog.done}` + ); + onProgress(prog.stage, prog.current, prog.total); if (prog.error) { clearTimeout(timeoutId); + console.error(`[poll:${sessionId.slice(0, 8)}] Backend error: ${prog.error}`); reject(new Error(prog.error)); return; } if (prog.done) { clearTimeout(timeoutId); - const res = await client.post( - `/api/analyze/${sessionId}` + console.log(`[poll:${sessionId.slice(0, 8)}] Done — fetching results`); + const res = await client.post(`/api/analyze/${sessionId}`); + console.log( + `[poll:${sessionId.slice(0, 8)}] Results received:`, + `${res.data.total_files} files,`, + `${res.data.graph?.nodes?.length ?? 0} graph nodes` ); resolve(res.data); return; } } catch (pollErr: unknown) { consecutiveErrors++; - if (consecutiveErrors >= MAX_CONSECUTIVE_ERRORS) { - clearTimeout(timeoutId); - reject(new Error("Lost connection to analysis server.")); - return; - } - await new Promise((r) => setTimeout(r, 1_000)); + + backoffMs = Math.min(backoffMs * 2, 8_000); + console.warn( + `[poll:${sessionId.slice(0, 8)} #${pollCount}]`, + `Network error #${consecutiveErrors} (next retry in ${backoffMs}ms):`, + pollErr instanceof Error ? pollErr.message : pollErr + ); + await new Promise((r) => setTimeout(r, 500)); } } @@ -117,12 +143,19 @@ export function analyzeWithProgress( } } catch (err: unknown) { clearTimeout(timeoutId); + console.error(`[poll:${sessionId.slice(0, 8)}] Fatal error:`, err); reject(err); } })(); }); - return { promise, abort: () => { aborted = true; } }; + return { + promise, + abort: () => { + console.log(`[poll:${sessionId.slice(0, 8)}] Aborted`); + aborted = true; + }, + }; } diff --git a/frontend/src/api/client.ts b/frontend/src/api/client.ts index 3e5a88c..d29e9cb 100644 --- a/frontend/src/api/client.ts +++ b/frontend/src/api/client.ts @@ -5,14 +5,16 @@ import { useSessionStore } from "../store/sessionStore"; declare module "axios" { interface InternalAxiosRequestConfig { _retryCount?: number; + _suppressNetworkToast?: boolean; } } -const BASE_URL = - (import.meta.env.VITE_API_URL as string | undefined) ?? - (import.meta.env.VITE_API_BASE as string | undefined) ?? +export const API_BASE_URL = + (import.meta.env.VITE_API_URL as string | undefined) || "http://localhost:8000"; +const BASE_URL = API_BASE_URL; + export const client = axios.create({ baseURL: BASE_URL, timeout: 30_000, @@ -41,7 +43,6 @@ client.interceptors.response.use( const config = error.config; if (status === 401) { - try { const [{ useSettingsStore }, { useUiStore }] = await Promise.all([ import("../store/settingsStore"), @@ -50,7 +51,7 @@ client.interceptors.response.use( useSettingsStore.getState().clearApiKeys(); useUiStore.getState().setSettingsPanelOpen(true); } catch { - + // ignore } return Promise.reject(error); } @@ -73,7 +74,11 @@ client.interceptors.response.use( } if (!error.response) { - dispatchToast("Cannot reach backend — is it running?"); + if (!config?._suppressNetworkToast) { + dispatchToast("Cannot reach backend — is it running?"); + } else { + console.warn("[API] Silent network error on", config?.url, error.message); + } return Promise.reject(error); } diff --git a/frontend/src/components/DebugOverlay.tsx b/frontend/src/components/DebugOverlay.tsx new file mode 100644 index 0000000..c5d7b25 --- /dev/null +++ b/frontend/src/components/DebugOverlay.tsx @@ -0,0 +1,157 @@ +import React, { useState, useEffect, useRef } from "react"; + +export interface DebugState { + backendReachable: boolean | null; + sessionId: string | null; + pollingActive: boolean; + pollCount: number; + lastStage: string | null; + lastCurrent: number; + lastTotal: number; + lastPollMs: number | null; + apiBase: string; +} + +interface Props { + state: DebugState; +} + +export const DebugOverlay = React.memo(function DebugOverlay({ state }: Props) { + const [visible, setVisible] = useState(false); + const [elapsedMs, setElapsedMs] = useState(null); + const timerRef = useRef | null>(null); + + // Alt+D to toggle + useEffect(() => { + const handler = (e: KeyboardEvent) => { + if (e.altKey && e.key.toLowerCase() === "d") { + setVisible((v) => !v); + } + }; + window.addEventListener("keydown", handler); + return () => window.removeEventListener("keydown", handler); + }, []); + + // Update elapsed since last poll + useEffect(() => { + if (timerRef.current) clearInterval(timerRef.current); + if (state.lastPollMs !== null) { + timerRef.current = setInterval(() => { + setElapsedMs(Date.now() - (state.lastPollMs ?? Date.now())); + }, 250); + } + return () => { if (timerRef.current) clearInterval(timerRef.current); }; + }, [state.lastPollMs]); + + if (!visible) { + return ( +
setVisible(true)} + title="Alt+D — open debug overlay" + style={{ + position: "fixed", + bottom: "0.5rem", + left: "0.5rem", + zIndex: 99999, + width: "0.5rem", + height: "0.5rem", + borderRadius: "50%", + background: state.backendReachable === false + ? "#f87171" + : state.pollingActive + ? "#34d399" + : "#94a3b8", + cursor: "pointer", + opacity: 0.5, + }} + /> + ); + } + + const row = (label: string, value: React.ReactNode, ok?: boolean) => ( +
+ {label} + + {value} + +
+ ); + + const reachableColor = state.backendReachable === null ? "#94a3b8" + : state.backendReachable ? "#34d399" : "#f87171"; + const reachableLabel = state.backendReachable === null ? "checking…" + : state.backendReachable ? "reachable" : "UNREACHABLE"; + + return ( +
+
+ + ATLAS DEBUG (Alt+D) + + +
+ + {row( + "Backend", + {reachableLabel}, + state.backendReachable === null ? undefined : state.backendReachable, + )} + {row("API base", state.apiBase)} + {row("Session ID", state.sessionId ?? "(none)")} + {row( + "Polling", + state.pollingActive ? `active — ${state.pollCount} polls` : "idle", + state.pollingActive, + )} + {state.pollingActive && row( + "Stage", + state.lastStage + ? `${state.lastStage} (${state.lastCurrent}/${state.lastTotal})` + : "—", + )} + {row( + "Last poll", + state.lastPollMs + ? `${Math.round((elapsedMs ?? 0) / 100) / 10}s ago` + : "—", + )} + +
+ + localStorage: atlas-session-v1 + {" · "} + StrictMode: {import.meta.env.DEV ? "ON (dev)" : "off"} + +
+
+ ); +}); diff --git a/frontend/src/components/dashboard/Dashboard.tsx b/frontend/src/components/dashboard/Dashboard.tsx index 7f12a39..8872b10 100644 --- a/frontend/src/components/dashboard/Dashboard.tsx +++ b/frontend/src/components/dashboard/Dashboard.tsx @@ -18,6 +18,7 @@ import { useSessionStore } from "../../store/sessionStore"; import { useUiStore } from "../../store/uiStore"; import { useThemeStore } from "../../store/themeStore"; import { analyzeWithProgress, getAIStatus, getCommentCounts } from "../../api/api"; +import { API_BASE_URL } from "../../api/client"; import type { AIStatusResponse } from "../../types"; import { FileExplorer } from "./FileExplorer"; import { GraphView } from "./GraphView"; @@ -26,6 +27,42 @@ import { SettingsPanel } from "../settings/SettingsPanel"; import { CommandPalette } from "./CommandPalette"; import { EmptyState } from "./EmptyState"; import { SidebarEmptyState } from "./SidebarEmptyState"; +import { DebugOverlay } from "../DebugOverlay"; +import type { DebugState } from "../DebugOverlay"; + +const stageLabels: Record = { + pending: "Waiting to start…", + queued: "Queued for processing…", + starting: "Initializing…", + cloning: "Cloning repository…", + extracting: "Extracting archive…", + scanning: "Scanning files…", + parsing: "Parsing files", + scoring: "Scoring complexity", + graph: "Building dependency graph", + function_graph: "Building function graph", + saving: "Caching results", + done: "Complete!", + error: "Error", +}; + +const stageWeights: Record = { + pending: { base: 0, weight: 2 }, + queued: { base: 2, weight: 3 }, + starting: { base: 0, weight: 5 }, + cloning: { base: 5, weight: 20 }, + extracting: { base: 5, weight: 15 }, + scanning: { base: 20, weight: 10 }, + parsing: { base: 30, weight: 40 }, + scoring: { base: 70, weight: 10 }, + graph: { base: 80, weight: 8 }, + function_graph: { base: 88, weight: 4 }, + saving: { base: 92, weight: 5 }, + done: { base: 100, weight: 0 }, + error: { base: 0, weight: 0 }, +}; + +let _pollingSessionId: string | null = null; export function Dashboard() { const sessionId = useSessionStore((s) => s.sessionId); @@ -56,9 +93,7 @@ export function Dashboard() { useEffect(() => { let mx = 0, my = 0, cx = 0, cy = 0; let raf: number; - const onMove = (e: MouseEvent) => { mx = e.clientX; my = e.clientY; }; - const tick = () => { cx += (mx - cx) * 0.12; cy += (my - cy) * 0.12; @@ -68,7 +103,6 @@ export function Dashboard() { } raf = requestAnimationFrame(tick); }; - window.addEventListener("mousemove", onMove, { passive: true }); raf = requestAnimationFrame(tick); return () => { @@ -78,41 +112,60 @@ export function Dashboard() { }, []); const [backendToast, setBackendToast] = useState(null); + const [debugState, setDebugState] = useState({ + backendReachable: null, + sessionId: null, + pollingActive: false, + pollCount: 0, + lastStage: null, + lastCurrent: 0, + lastTotal: 0, + lastPollMs: null, + apiBase: API_BASE_URL, + }); const pollStatus = useCallback(async () => { try { const status = await getAIStatus(); setAIStatus(status); } catch { - + } }, [setAIStatus]); - useEffect(() => { let cancelled = false; - const MAX_RETRIES = 3; - const RETRY_DELAY_MS = 2_000; - const tryFetch = async (attempt: number): Promise => { - if (cancelled) return; - try { - const status = await getAIStatus(); - if (!cancelled) setAIStatus(status); - } catch { + const run = async () => { + + getAIStatus() + .then((s) => { if (!cancelled) setAIStatus(s); }) + .catch(() => { }); + + console.log(`[health] Checking ${API_BASE_URL}/api/health`); + for (let attempt = 1; attempt <= 3; attempt++) { if (cancelled) return; - if (attempt < MAX_RETRIES) { - await new Promise((r) => setTimeout(r, RETRY_DELAY_MS)); - await tryFetch(attempt + 1); - } else { - setBackendToast( - "Backend connection failed. Is the server running on port 8000?" - ); + try { + const res = await fetch(`${API_BASE_URL}/api/health`); + if (res.ok) { + console.log(`[health] Backend reachable (attempt ${attempt})`); + if (!cancelled) setDebugState((d) => ({ ...d, backendReachable: true })); + return; + } + console.warn(`[health] /api/health returned ${res.status} (attempt ${attempt})`); + } catch (err) { + console.warn(`[health] Fetch failed (attempt ${attempt}):`, err); } + if (attempt < 3) await new Promise((r) => setTimeout(r, 2_000)); + } + if (!cancelled) { + console.error("[health] Backend unreachable after 3 attempts"); + setDebugState((d) => ({ ...d, backendReachable: false })); + setBackendToast("Backend connection failed. Is the server running on port 8000?"); } }; - void tryFetch(1); + void run(); return () => { cancelled = true; }; }, [setAIStatus]); @@ -126,31 +179,56 @@ export function Dashboard() { const [progressTotal, setProgressTotal] = useState(0); useEffect(() => { - if (!sessionId || isAnalyzed) return; + if (!sessionId || isAnalyzed) { + + if (!sessionId) _pollingSessionId = null; + return; + } + + if (_pollingSessionId === sessionId) { + console.log(`[poll] Already polling ${sessionId.slice(0, 8)}, skipping duplicate mount`); + return; + } + _pollingSessionId = sessionId; + + console.log(`[poll] Starting for session ${sessionId.slice(0, 8)}`); setAnalyzing(true); setProgressStage("starting"); setProgressCurrent(0); setProgressTotal(0); + setDebugState((d) => ({ ...d, sessionId, pollingActive: true, pollCount: 0 })); let cancelled = false; + let localPollCount = 0; - const { promise, abort } = analyzeWithProgress( - sessionId, - (stage, current, total) => { - if (!cancelled) { - setProgressStage(stage); - setProgressCurrent(current); - setProgressTotal(total); - setAnalysisProgress({ stage, current, total }); - } - } - ); + const onProgress = (stage: string, current: number, total: number) => { + if (cancelled) return; + localPollCount++; + setProgressStage(stage); + setProgressCurrent(current); + setProgressTotal(total); + setAnalysisProgress({ stage, current, total }); + setDebugState((d) => ({ + ...d, + pollingActive: true, + pollCount: localPollCount, + lastStage: stage, + lastCurrent: current, + lastTotal: total, + lastPollMs: Date.now(), + })); + }; + + const { promise, abort } = analyzeWithProgress(sessionId, onProgress); promise .then((data) => { if (!cancelled) { + console.log(`[poll] setAnalysisResult — ${data.parsed_files.length} files`); setAnalysisResult(data.parsed_files, data.graph); + setDebugState((d) => ({ ...d, pollingActive: false, lastStage: "done" })); + if (repoName) { const repoUrl = sourceType === "github" @@ -167,19 +245,22 @@ export function Dashboard() { }) .catch((err: unknown) => { if (!cancelled) { - setError( - "Analysis failed. " + - (err instanceof Error ? err.message : String(err)) - ); + const msg = err instanceof Error ? err.message : String(err); + console.error(`[poll] Analysis failed: ${msg}`); + setError("Analysis failed. " + msg); setAnalyzing(false); + setDebugState((d) => ({ ...d, pollingActive: false })); } }); return () => { + console.log(`[poll] Cleanup for ${sessionId.slice(0, 8)}`); cancelled = true; abort(); + _pollingSessionId = null; + setDebugState((d) => ({ ...d, pollingActive: false })); }; - + }, [sessionId, isAnalyzed]); useEffect(() => { @@ -190,32 +271,12 @@ export function Dashboard() { const data = await getCommentCounts(sessionId); if (!cancelled) setCommentCounts(data.counts); } catch { - + } })(); return () => { cancelled = true; }; }, [sessionId, setCommentCounts]); - const stageLabels: Record = { - starting: "Initializing…", - parsing: "Parsing files", - scoring: "Scoring complexity", - graph: "Building dependency graph", - saving: "Caching results", - done: "Complete!", - error: "Error", - }; - - const stageWeights: Record = { - starting: { base: 0, weight: 5 }, - parsing: { base: 5, weight: 65 }, - scoring: { base: 70, weight: 10 }, - graph: { base: 80, weight: 10 }, - saving: { base: 90, weight: 5 }, - done: { base: 100, weight: 0 }, - error: { base: 0, weight: 0 }, - }; - const getOverallPct = (): number => { const w = stageWeights[progressStage] ?? { base: 0, weight: 0 }; if (progressStage === "done") return 100; @@ -230,9 +291,9 @@ export function Dashboard() { () => parsedFiles.length > 0 ? ( - parsedFiles.reduce((s, f) => s + f.complexity_score, 0) / - parsedFiles.length - ).toFixed(2) + parsedFiles.reduce((s, f) => s + f.complexity_score, 0) / + parsedFiles.length + ).toFixed(2) : "0", [parsedFiles] ); @@ -245,6 +306,7 @@ export function Dashboard() { if (isAnalyzing) { const overallPct = getOverallPct(); const sourceLabel = sourceType === "zip" ? "ZIP Archive" : "Repository"; + const stageLabel = stageLabels[progressStage] ?? progressStage; return (
- Analyzing {sourceLabel} + {`Analyzing ${sourceLabel}`} - {stageLabels[progressStage] ?? progressStage} + {stageLabel} {progressStage === "parsing" && progressTotal > 0 && ( {(progressStage === "starting" || + progressStage === "pending" || + progressStage === "queued" || (progressTotal === 0 && progressStage !== "done")) && ( -
- {[0, 1, 2].map((i) => ( -
- ))} -
- )} +
+ {[0, 1, 2].map((i) => ( +
+ ))} +
+ )}
+ + {/* Debug overlay is still available during analysis */} + {(import.meta.env.DEV || import.meta.env.VITE_DEBUG_OVERLAY === "true") && ( + + )}
); } @@ -347,7 +417,6 @@ export function Dashboard() {
- {} {backendToast && (
- - + + {parsedFiles.length}
- - + + {totalLoc.toLocaleString()} LOC
- - + + {complexAvg}
@@ -502,32 +544,20 @@ export function Dashboard() { window.dispatchEvent( - new KeyboardEvent("keydown", { - key: "k", - ctrlKey: true, - bubbles: true, - }) + new KeyboardEvent("keydown", { key: "k", ctrlKey: true, bubbles: true }) ) } whileHover={{ scale: 1.06 }} whileTap={{ scale: 0.95 }} className="flex items-center gap-1.5 px-2.5 py-1.5 rounded-lg text-[10px] font-medium transition-colors duration-200" - style={{ - color: "var(--text-tertiary)", - background: "var(--bg-input)", - border: "1px solid var(--border-light)", - }} + style={{ color: "var(--text-tertiary)", background: "var(--bg-input)", border: "1px solid var(--border-light)" }} title="Command Palette (Ctrl+K)" > Search ⌘K @@ -538,21 +568,11 @@ export function Dashboard() { whileHover={{ scale: 1.06 }} whileTap={{ scale: 0.95 }} className="flex items-center p-1.5 rounded-lg transition-colors duration-200" - style={{ - color: "var(--text-tertiary)", - background: "var(--bg-input)", - border: "1px solid var(--border-light)", - }} - title={ - theme === "dark" ? "Switch to Light Mode" : "Switch to Dark Mode" - } + style={{ color: "var(--text-tertiary)", background: "var(--bg-input)", border: "1px solid var(--border-light)" }} + title={theme === "dark" ? "Switch to Light Mode" : "Switch to Dark Mode"} aria-label="Toggle theme" > - {theme === "dark" ? ( - - ) : ( - - )} + {theme === "dark" ? : } - {isChatPanelOpen ? ( - - ) : ( - - )} + {isChatPanelOpen ? : } @@ -598,11 +604,7 @@ export function Dashboard() { whileHover={{ scale: 1.06 }} whileTap={{ scale: 0.95 }} className="flex items-center gap-1 px-2 py-1.5 rounded-lg text-[10px] font-medium transition-colors duration-200" - style={{ - color: "var(--text-tertiary)", - background: "var(--bg-input)", - border: "1px solid var(--border-light)", - }} + style={{ color: "var(--text-tertiary)", background: "var(--bg-input)", border: "1px solid var(--border-light)" }} > New @@ -611,13 +613,14 @@ export function Dashboard() {
- + + + {(import.meta.env.DEV || import.meta.env.VITE_DEBUG_OVERLAY === "true") && ( + + )}
); } @@ -665,12 +668,9 @@ function ResizablePanels({ if (side === "left") { setLeftWidth(Math.min(LEFT_MAX, Math.max(LEFT_MIN, startWidth + delta))); } else { - setRightWidth( - Math.min(RIGHT_MAX, Math.max(RIGHT_MIN, startWidth - delta)) - ); + setRightWidth(Math.min(RIGHT_MAX, Math.max(RIGHT_MIN, startWidth - delta))); } }; - const onMouseUp = () => { if (dragRef.current) { dragRef.current = null; @@ -678,7 +678,6 @@ function ResizablePanels({ document.body.style.userSelect = ""; } }; - window.addEventListener("mousemove", onMouseMove); window.addEventListener("mouseup", onMouseUp); return () => { @@ -705,11 +704,7 @@ function ResizablePanels({ className="panel panel-center" initial={{ opacity: 0, scale: 0.98 }} animate={{ opacity: 1, scale: 1 }} - transition={{ - duration: 0.5, - ease: [0.25, 0.46, 0.45, 0.94], - delay: 0.1, - }} + transition={{ duration: 0.5, ease: [0.25, 0.46, 0.45, 0.94], delay: 0.1 }} > {hasSession ? : } @@ -742,27 +737,15 @@ const AIStatusIndicator = React.memo(function AIStatusIndicator({ return (
-
- - AI - +
+ AI
); } - const anyAPI = - status.groq || status.gemini || status.mistral || status.huggingface; + const anyAPI = status.groq || status.gemini || status.mistral || status.huggingface; const isLocal = status.ollama; if (isLocal && anyAPI) { @@ -772,9 +755,7 @@ const AIStatusIndicator = React.memo(function AIStatusIndicator({
- - AI Ready - + AI Ready
); } diff --git a/frontend/src/components/graph/GitTimeline.tsx b/frontend/src/components/graph/GitTimeline.tsx index 34d8fc6..d2e1247 100644 --- a/frontend/src/components/graph/GitTimeline.tsx +++ b/frontend/src/components/graph/GitTimeline.tsx @@ -23,7 +23,7 @@ export function GitTimeline() { commitDiff, isCommitDiffLoading, coverageData, - isCoverageLoading, + isCoverageLoading: _isCoverageLoading, showCoverage, setTimelineData, setTimelineLoading, diff --git a/frontend/src/store/sessionStore.ts b/frontend/src/store/sessionStore.ts index 6a9563f..0a1ca96 100644 --- a/frontend/src/store/sessionStore.ts +++ b/frontend/src/store/sessionStore.ts @@ -1,4 +1,5 @@ import { create } from "zustand"; +import { persist, createJSONStorage } from "zustand/middleware"; import type { FileEntry, ParsedFile, @@ -8,7 +9,6 @@ import type { } from "../types"; export interface SessionState { - sessionId: string | null; status: SessionStatus; progress: number; @@ -93,61 +93,90 @@ const initialState: Omit< error: null, }; -export const useSessionStore = create((set) => ({ - ...initialState, +export const useSessionStore = create()( + persist( + (set) => ({ + ...initialState, - setSession: (data) => - set({ - sessionId: data.session_id, - repoName: data.repo_name, - repoUrl: data.repo_name, - files: data.files, - totalFiles: data.total_files, - sourceType: data.source_type, - ingestedAt: data.ingested_at, - status: "parsing", - error: null, - }), + setSession: (data) => + set({ + sessionId: data.session_id, + repoName: data.repo_name, + repoUrl: data.repo_name, + files: data.files, + totalFiles: data.total_files, + sourceType: data.source_type, + ingestedAt: data.ingested_at, + status: "parsing", + error: null, + isAnalyzed: false, + parsedFiles: [], + graphData: null, + }), - setSessionAndLoading: (data) => - set({ - sessionId: data.session_id, - repoName: data.repo_name, - repoUrl: data.repo_name, - files: data.files, - totalFiles: data.total_files, - sourceType: data.source_type, - ingestedAt: data.ingested_at, - status: "parsing", - error: null, - isLoading: false, - }), + setSessionAndLoading: (data) => + set({ + sessionId: data.session_id, + repoName: data.repo_name, + repoUrl: data.repo_name, + files: data.files, + totalFiles: data.total_files, + sourceType: data.source_type, + ingestedAt: data.ingested_at, + status: "parsing", + error: null, + isLoading: false, + isAnalyzed: false, + parsedFiles: [], + graphData: null, + }), - setLoading: (loading) => set({ isLoading: loading }), - setError: (error) => set({ error, isLoading: false }), + setLoading: (loading) => set({ isLoading: loading }), + setError: (error) => set({ error, isLoading: false }), - reset: () => set(initialState), + reset: () => set(initialState), - setAnalyzing: (analyzing) => set({ isAnalyzing: analyzing }), - setAnalysisProgress: (progress) => set({ analysisProgress: progress }), - setAnalysisResult: (parsed, graph) => - set({ - parsedFiles: parsed, - graphData: graph, - isAnalyzed: true, - isAnalyzing: false, - analysisProgress: null, - status: "done", - progress: 100, - }), + setAnalyzing: (analyzing) => set({ isAnalyzing: analyzing }), + setAnalysisProgress: (progress) => set({ analysisProgress: progress }), + setAnalysisResult: (parsed, graph) => + set({ + parsedFiles: parsed, + graphData: graph, + isAnalyzed: true, + isAnalyzing: false, + analysisProgress: null, + status: "done", + progress: 100, + }), - setSelectedFile: (path) => { - set({ selectedFile: path, fileContent: null }); - - import("./aiStore").then(({ useAiStore }) => { - useAiStore.getState().clearFileAI(); - }).catch(() => undefined); - }, + setSelectedFile: (path) => { + set({ selectedFile: path, fileContent: null }); + import("./aiStore").then(({ useAiStore }) => { + useAiStore.getState().clearFileAI(); + }).catch(() => undefined); + }, - setFileContent: (content) => set({ fileContent: content }), -})); + setFileContent: (content) => set({ fileContent: content }), + }), + { + name: "atlas-session-v1", + storage: createJSONStorage(() => localStorage), + partialize: (state) => ({ + sessionId: state.sessionId, + repoName: state.repoName, + repoUrl: state.repoUrl, + sourceType: state.sourceType, + files: state.files, + totalFiles: state.totalFiles, + ingestedAt: state.ingestedAt, + }), + onRehydrateStorage: () => (state, error) => { + if (error) { + console.warn("[sessionStore] Failed to hydrate from localStorage:", error); + } else if (state?.sessionId) { + console.log(`[sessionStore] Restored session ${state.sessionId} from localStorage`); + } + }, + } + ) +); From a0557d08e96e5daae396f6635103d4a984452a21 Mon Sep 17 00:00:00 2001 From: Aryan Bhati Date: Mon, 18 May 2026 02:26:50 +0530 Subject: [PATCH 4/4] add runtime E2E validation suite --- e2e/package.json | 15 + e2e/playwright.config.ts | 19 ++ e2e/test-results/.last-run.json | 4 + e2e/tests/runtime.spec.ts | 513 ++++++++++++++++++++++++++++++++ e2e/tsconfig.json | 11 + 5 files changed, 562 insertions(+) create mode 100644 e2e/package.json create mode 100644 e2e/playwright.config.ts create mode 100644 e2e/test-results/.last-run.json create mode 100644 e2e/tests/runtime.spec.ts create mode 100644 e2e/tsconfig.json diff --git a/e2e/package.json b/e2e/package.json new file mode 100644 index 0000000..5bd6c93 --- /dev/null +++ b/e2e/package.json @@ -0,0 +1,15 @@ +{ + "name": "e2e", + "version": "1.0.0", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "description": "", + "devDependencies": { + "@playwright/test": "^1.60.0" + } +} diff --git a/e2e/playwright.config.ts b/e2e/playwright.config.ts new file mode 100644 index 0000000..9e62acb --- /dev/null +++ b/e2e/playwright.config.ts @@ -0,0 +1,19 @@ +import { defineConfig, devices } from "@playwright/test"; + +export default defineConfig({ + testDir: "./tests", + timeout: 600_000, + retries: 0, + reporter: [["list"]], + use: { + baseURL: "http://localhost:5173", + headless: true, + screenshot: "only-on-failure", + }, + projects: [ + { + name: "chromium", + use: { ...devices["Desktop Chrome"] }, + }, + ], +}); diff --git a/e2e/test-results/.last-run.json b/e2e/test-results/.last-run.json new file mode 100644 index 0000000..cbcc1fb --- /dev/null +++ b/e2e/test-results/.last-run.json @@ -0,0 +1,4 @@ +{ + "status": "passed", + "failedTests": [] +} \ No newline at end of file diff --git a/e2e/tests/runtime.spec.ts b/e2e/tests/runtime.spec.ts new file mode 100644 index 0000000..51f777f --- /dev/null +++ b/e2e/tests/runtime.spec.ts @@ -0,0 +1,513 @@ +import { test, expect, Page, ConsoleMessage } from "@playwright/test"; + +const FRONTEND = "http://localhost:5173"; +const BACKEND = "http://localhost:8000"; + +//console capture +interface ConsoleCapture { + errors: string[]; + warnings: string[]; + logs: string[]; + pollTicks: string[]; + duplicatePollWarnings: string[]; + healthLogs: string[]; + staleClosureWarnings: string[]; + networkErrorLogs: string[]; +} + +function attachConsoleCapture(page: Page): ConsoleCapture { + const cap: ConsoleCapture = { + errors: [], warnings: [], logs: [], + pollTicks: [], duplicatePollWarnings: [], + healthLogs: [], staleClosureWarnings: [], + networkErrorLogs: [], + }; + page.on("console", (msg: ConsoleMessage) => { + const text = msg.text(); + const type = msg.type(); + if (type === "error") cap.errors.push(text); + if (type === "warning") cap.warnings.push(text); + if (type === "log") cap.logs.push(text); + if (text.includes("[poll:") && text.includes("stage=")) cap.pollTicks.push(text); + if (text.includes("Already polling")) cap.duplicatePollWarnings.push(text); + if (text.includes("[health]")) cap.healthLogs.push(text); + if (text.includes("Can't perform a React state update")) cap.staleClosureWarnings.push(text); + if (text.includes("Network error #") && text.includes("[poll:")) cap.networkErrorLogs.push(text); + }); + return cap; +} + +//network capture +interface NetworkCapture { + startCalls: Map; // sessionId -> call count + progressPolls: number; +} + +function attachNetworkCapture(page: Page): NetworkCapture { + const net: NetworkCapture = { startCalls: new Map(), progressPolls: 0 }; + page.on("request", (req) => { + if (req.url().includes("/analyze/progress/")) { + net.progressPolls++; + } + if (req.url().includes("/analyze/start/")) { + const sid = req.url().split("/analyze/start/")[1] ?? "?"; + net.startCalls.set(sid, (net.startCalls.get(sid) ?? 0) + 1); + } + }); + return net; +} + +//page helpers +async function goFresh(page: Page) { + await page.emulateMedia({ reducedMotion: "reduce" }); + await page.addInitScript(() => { + if (!sessionStorage.getItem("__atlasTestInit")) { + localStorage.removeItem("atlas-session-v1"); + sessionStorage.setItem("__atlasTestInit", "1"); + } + }); + await page.goto(FRONTEND, { waitUntil: "domcontentloaded" }); + await page.waitForTimeout(600); +} + +async function openModal(page: Page) { + await page.waitForSelector("button[aria-label='Clone Git repository']", { timeout: 10_000 }); + await page.click("button[aria-label='Clone Git repository']"); + await page.waitForSelector("input[placeholder*='github.com']", { timeout: 10_000 }); +} + +async function submitRepo(page: Page, url: string, cloneTimeout = 300_000) { + await page.fill("input[placeholder*='github.com']", url); + await page.click("button[type='submit']"); + // Modal closes when setSessionAndLoading() fires + await page.waitForSelector("input[placeholder*='github.com']", { + state: "detached", + timeout: cloneTimeout, + }); +} + +/** Wait for the analyzing overlay to disappear (analysis complete). */ +async function waitForComplete(page: Page, timeout = 600_000) { + await page.waitForSelector(".analyzing-overlay", { state: "detached", timeout }); +} + +/** Read the persisted session state from localStorage. */ +async function readSession(page: Page): Promise<{ sessionId?: string } | null> { + const raw = await page.evaluate(() => localStorage.getItem("atlas-session-v1")); + if (!raw) return null; + try { + const parsed = JSON.parse(raw) as { state?: { sessionId?: string } }; + return parsed.state ?? null; + } catch { return null; } +} + +async function injectSession( + page: Page, sessionId: string, repoName: string, totalFiles: number +) { + await page.emulateMedia({ reducedMotion: "reduce" }); + await page.addInitScript( + ({ sid, name, files }) => { + localStorage.setItem("atlas-session-v1", JSON.stringify({ + state: { + sessionId: sid, + repoName: name, + repoUrl: name, + sourceType: "github", + files: [], + totalFiles: files, + ingestedAt: new Date().toISOString(), + }, + version: 0, + })); + }, + { sid: sessionId, name: repoName, files: totalFiles } + ); +} + +test.describe("01 — Backend and frontend connectivity", () => { + test("GET /api/health returns ok", async ({ request }) => { + const res = await request.get(`${BACKEND}/api/health`); + expect(res.ok()).toBeTruthy(); + expect((await res.json()).status).toBe("ok"); + }); + + test("GET /api/settings/status returns provider map", async ({ request }) => { + const res = await request.get(`${BACKEND}/api/settings/status`); + expect(res.ok()).toBeTruthy(); + expect(await res.json()).toHaveProperty("active_provider"); + }); + + test("Vite serves React app at port 5173", async ({ request }) => { + const res = await request.get(FRONTEND); + expect(res.ok()).toBeTruthy(); + expect(await res.text()).toContain(""); + }); + + test("HMR WebSocket connects to localhost — not internal container IP", async ({ page }) => { + const wsUrls: string[] = []; + page.on("websocket", (ws) => wsUrls.push(ws.url())); + await page.goto(FRONTEND, { waitUntil: "networkidle" }); + await page.waitForTimeout(4_000); + const leaked = wsUrls.filter( + (u) => u.startsWith("ws://") && + !u.includes("localhost") && + !u.includes("127.0.0.1") && + !u.includes("[::1]") + ); + expect(leaked, `HMR leaking to internal IP: ${leaked.join(", ")}`).toHaveLength(0); + console.log(`[e2e] HMR WS URLs: ${wsUrls.join(", ") || "(none)"}`); + }); +}); + +test.describe("02 — Health check false-positive prevention", () => { + test("AI status failure does NOT trigger backend-offline toast", async ({ page }) => { + await page.route("**/api/settings/status", (r) => r.abort("failed")); + await page.goto(FRONTEND); + await page.waitForTimeout(8_000); + const visible = await page.locator("text=Backend connection failed").isVisible().catch(() => false); + expect(visible).toBeFalsy(); + console.log("[e2e] ✓ AI status failure: backend-offline toast correctly suppressed"); + }); + + test("/api/health failure DOES trigger offline toast after retries", async ({ page }) => { + await page.route("**/api/health", (r) => r.abort("failed")); + await page.goto(FRONTEND); + await page.waitForTimeout(14_000); // 3 retries × 2s + buffer + const visible = await page.locator("text=Backend connection failed").isVisible().catch(() => false); + expect(visible).toBeTruthy(); + console.log("[e2e] ✓ /api/health failure: backend-offline toast correctly shown"); + }); +}); + +test.describe("03 — Flask (small repo): ingest + analysis", () => { + test("End-to-end: open modal → ingest → overlay → dashboard", async ({ page }) => { + const cap = attachConsoleCapture(page); + const net = attachNetworkCapture(page); + + await goFresh(page); + await openModal(page); + + console.log("[e2e] Submitting Flask repo..."); + await submitRepo(page, "https://github.com/pallets/flask"); + + // Analyzing overlay appears + await page.waitForSelector(".analyzing-overlay", { timeout: 15_000 }); + console.log("[e2e] Overlay visible — waiting for completion..."); + + await waitForComplete(page, 300_000); + + // Dashboard visible + await expect(page.locator(".dashboard-layout")).toBeVisible(); + console.log("[e2e] ✓ Dashboard visible after analysis"); + + // localStorage was written + const session = await readSession(page); + expect(session?.sessionId).toBeTruthy(); + console.log(`[e2e] ✓ Persisted sessionId: ${session?.sessionId}`); + + // No React stale-closure warnings + expect(cap.staleClosureWarnings, `Stale: ${cap.staleClosureWarnings.join("\n")}`).toHaveLength(0); + + // Backend health logged as reachable + expect(cap.healthLogs.some((m) => m.includes("reachable"))).toBeTruthy(); + + // Polling was active + expect(net.progressPolls).toBeGreaterThan(0); + console.log(`[e2e] Poll ticks: ${net.progressPolls}`); + + // No critical React errors + const reactErrors = cap.errors.filter( + (e) => e.startsWith("Error:") || e.includes("Minified React error") + ); + expect(reactErrors, `React errors: ${reactErrors.join("\n")}`).toHaveLength(0); + console.log(`[e2e] Console errors: ${cap.errors.length}, warnings: ${cap.warnings.length}`); + }); +}); + +test.describe("04 — Session persistence: page reload scenarios", () => { + test("Reload AFTER completion: instantly restores session", async ({ page }) => { + const cap = attachConsoleCapture(page); + const net = attachNetworkCapture(page); + + await goFresh(page); + await openModal(page); + await submitRepo(page, "https://github.com/pallets/flask"); + await waitForComplete(page, 300_000); + + const sessionBefore = await readSession(page); + expect(sessionBefore?.sessionId).toBeTruthy(); + console.log(`[e2e] Pre-reload sessionId: ${sessionBefore?.sessionId}`); + + // Reload — sessionStorage flag keeps the addInitScript from clearing localStorage + console.log("[e2e] Reloading page after completion..."); + await page.reload({ waitUntil: "domcontentloaded" }); + await page.waitForTimeout(600); + + const t0 = Date.now(); + await waitForComplete(page, 30_000); + const elapsed = Date.now() - t0; + console.log(`[e2e] Session restore took ${elapsed}ms`); + expect(elapsed, "Cached restore must be < 15s").toBeLessThan(15_000); + + // sessionId survived + const sessionAfter = await readSession(page); + expect(sessionAfter?.sessionId).toBe(sessionBefore?.sessionId); + console.log("[e2e] ✓ sessionId matches after reload"); + + // Dashboard visible + await expect(page.locator(".dashboard-layout")).toBeVisible(); + + // /analyze/start was called after reload and returned "cached" + const startCallsAfterReload = net.startCalls; + expect(startCallsAfterReload.size).toBeGreaterThan(0); + console.log(`[e2e] Start calls: ${JSON.stringify([...startCallsAfterReload])}`); + + // No stale closure warnings + expect(cap.staleClosureWarnings).toHaveLength(0); + console.log(`[e2e] ✓ Reload after completion: done in ${elapsed}ms`); + }); + + test("Reload MID-analysis: session survives and analysis completes", async ({ page }) => { + const cap = attachConsoleCapture(page); + + await goFresh(page); + await openModal(page); + + await page.fill("input[placeholder*='github.com']", "https://github.com/pallets/flask"); + await page.click("button[type='submit']"); + + // Wait for overlay (clone done, analysis starting) + await page.waitForSelector(".analyzing-overlay", { timeout: 180_000 }); + + const sessionBefore = await readSession(page); + expect(sessionBefore?.sessionId).toBeTruthy(); + console.log(`[e2e] Mid-analysis session: ${sessionBefore?.sessionId}`); + + // Reload mid-analysis — sessionStorage flag preserves localStorage + console.log("[e2e] Reloading mid-analysis..."); + await page.reload({ waitUntil: "domcontentloaded" }); + await page.waitForTimeout(600); + + const sessionAfter = await readSession(page); + expect(sessionAfter?.sessionId).toBe(sessionBefore?.sessionId); + console.log("[e2e] ✓ sessionId survived mid-analysis reload"); + + await waitForComplete(page, 300_000); + await expect(page.locator(".dashboard-layout")).toBeVisible(); + + // No abandoned interval warnings + const leakWarnings = cap.warnings.filter( + (w) => w.includes("memory leak") || w.includes("Cannot update a component") + ); + expect(leakWarnings).toHaveLength(0); + console.log("[e2e] ✓ Mid-analysis reload: completed successfully"); + }); + + test("Pre-completed session injected into localStorage: restores instantly", async ({ page }) => { + const net = attachNetworkCapture(page); + + await injectSession(page, "683eba6923b7", "pallets/flask", 208); + await page.goto(FRONTEND, { waitUntil: "domcontentloaded" }); + await page.waitForTimeout(600); + + const t0 = Date.now(); + await waitForComplete(page, 15_000); + const elapsed = Date.now() - t0; + console.log(`[e2e] Pre-completed session restore: ${elapsed}ms`); + expect(elapsed, "Must restore in < 12s").toBeLessThan(12_000); + + await expect(page.locator(".dashboard-layout")).toBeVisible(); + // Cached path = very few polls + console.log(`[e2e] Poll ticks for cached session: ${net.progressPolls}`); + expect(net.progressPolls, "Cached session should need < 10 polls").toBeLessThan(10); + console.log("[e2e] ✓ Pre-completed session: instant restore"); + }); +}); + +test.describe("05 — React StrictMode duplicate polling guard", () => { + test("At most 2 /analyze/start calls per session (StrictMode max)", async ({ page }) => { + const net = attachNetworkCapture(page); + const cap = attachConsoleCapture(page); + + await goFresh(page); + await openModal(page); + await submitRepo(page, "https://github.com/pallets/flask"); + await page.waitForSelector(".analyzing-overlay", { timeout: 15_000 }); + await page.waitForTimeout(5_000); // let polling run + + for (const [sid, count] of net.startCalls.entries()) { + console.log(`[e2e] Session ${sid.slice(0, 8)}: ${count} start call(s)`); + expect(count, `Session had ${count} start calls (StrictMode max = 2)`).toBeLessThanOrEqual(2); + } + + const guardHit = cap.duplicatePollWarnings.length > 0; + console.log(`[e2e] StrictMode guard triggered: ${guardHit} (${cap.duplicatePollWarnings.length} warnings)`); + + await waitForComplete(page, 300_000); + console.log("[e2e] ✓ StrictMode duplicate guard: OK"); + }); +}); + +test.describe("06 — Network failure recovery during polling", () => { + test("6 poll failures: loop recovers, no false toast, analysis completes", async ({ page }) => { + const cap = attachConsoleCapture(page); + + await goFresh(page); + await openModal(page); + await submitRepo(page, "https://github.com/pallets/flask"); + await page.waitForSelector(".analyzing-overlay", { timeout: 15_000 }); + + // Block 6 consecutive progress polls then unblock + let blocked = 0; + await page.route("**/api/analyze/progress/**", async (route) => { + if (blocked < 6) { blocked++; await route.abort("failed"); } + else await route.continue(); + }); + + await page.waitForTimeout(8_000); + + // Network errors must be logged (exponential backoff active) + console.log(`[e2e] Logged ${cap.networkErrorLogs.length} network error retries (${blocked} blocked)`); + expect(cap.networkErrorLogs.length, "Should log at least 1 retry").toBeGreaterThan(0); + + // No "Cannot reach backend" toast (suppressNetworkToast works) + const toasts = await page.locator("text=Cannot reach backend").count(); + expect(toasts, "No false backend-offline toasts").toBe(0); + console.log(`[e2e] ✓ Backend-offline toasts during poll failure: ${toasts}`); + + // Loop must not have stopped — analysis must complete + await waitForComplete(page, 300_000); + await expect(page.locator(".dashboard-layout")).toBeVisible(); + console.log("[e2e] ✓ Network failure recovery: loop survived and completed"); + }); +}); + +test.describe("07 — Django (medium repo): stage label coverage", () => { + test("All backend stages map to valid labels (no undefined)", async ({ page }) => { + const cap = attachConsoleCapture(page); + + await injectSession(page, "0a0b5201a5ee", "django/django", 6813); + await page.goto(FRONTEND, { waitUntil: "domcontentloaded" }); + await page.waitForTimeout(600); + + // Capture stage text while overlay briefly shows (cached = fast) + const stagesShown: string[] = []; + const obs = setInterval(async () => { + const t = await page.locator(".analyzing-overlay").textContent().catch(() => ""); + if (t) stagesShown.push(t.trim()); + }, 100); + + await waitForComplete(page, 30_000); + clearInterval(obs); + + // No undefined labels in captured snapshots + const undefinedLabels = stagesShown.filter((t) => t.includes("undefined")); + expect(undefinedLabels, `Undefined labels: ${undefinedLabels.join("|")}`).toHaveLength(0); + + // Poll ticks must only mention known stages + const knownStages = new Set([ + "pending", "queued", "starting", "cloning", "extracting", "scanning", + "parsing", "scoring", "graph", "function_graph", "saving", "done", "error", + ]); + const unknownInTicks = cap.pollTicks.filter((t) => { + const m = t.match(/stage=(\w+)/); + return m && !knownStages.has(m[1]); + }); + expect(unknownInTicks, `Unknown stage keys: ${unknownInTicks.join("\n")}`).toHaveLength(0); + + await expect(page.locator(".dashboard-layout")).toBeVisible(); + console.log(`[e2e] ✓ Django: all stage labels valid. Ticks: ${cap.pollTicks.slice(0, 3).join(" | ")}`); + }); +}); + +test.describe("08 — Memory and render hygiene", () => { + test("DOM mutations bounded during 5s of active polling", async ({ page }) => { + await page.addInitScript(() => { (window as any).__mutCount = 0; }); + + await goFresh(page); + await openModal(page); + await submitRepo(page, "https://github.com/pallets/flask"); + await page.waitForSelector(".analyzing-overlay", { timeout: 15_000 }); + + // Attach observer at peak activity + await page.evaluate(() => { + (window as any).__mutCount = 0; + const obs = new MutationObserver(() => { (window as any).__mutCount++; }); + obs.observe(document.getElementById("root")!, { + childList: true, subtree: true, attributes: false, characterData: false, + }); + (window as any).__obs = obs; + }); + + await page.waitForTimeout(5_000); + + const mutCount: number = await page.evaluate(() => { + (window as any).__obs?.disconnect(); + return (window as any).__mutCount ?? 0; + }); + console.log(`[e2e] DOM mutations in 5s: ${mutCount}`); + // ~10 polls × ~10 mutations each = ~100; >1000 = infinite re-render + expect(mutCount, `Possible infinite re-render: ${mutCount} mutations`).toBeLessThan(1000); + + await waitForComplete(page, 300_000); + console.log("[e2e] ✓ Render hygiene: bounded mutations"); + }); + + test("Intervals clean up after session resets — no accumulation", async ({ page }) => { + const cap = attachConsoleCapture(page); + + await page.addInitScript(() => { + const orig = { si: window.setInterval, ci: window.clearInterval }; + let net = 0; + (window as any).setInterval = function (fn: TimerHandler, ms?: number, ...a: unknown[]) { + net++; + return orig.si(fn, ms, ...(a as [])); + }; + (window as any).clearInterval = function (id?: number) { + net = Math.max(0, net - 1); + orig.ci(id); + }; + (window as any).__netIntervals = () => net; + }); + + await page.emulateMedia({ reducedMotion: "reduce" }); + await page.goto(FRONTEND, { waitUntil: "domcontentloaded" }); + + // 3 reloads without session + for (let i = 0; i < 3; i++) { + await page.evaluate(() => localStorage.removeItem("atlas-session-v1")); + await page.reload({ waitUntil: "domcontentloaded" }); + await page.waitForTimeout(800); + } + + const net: number = await page.evaluate(() => (window as any).__netIntervals?.() ?? 0); + console.log(`[e2e] Net active intervals after 3 resets: ${net}`); + expect(net, `Interval leak detected: ${net} active`).toBeLessThan(10); + + const leakWarnings = cap.warnings.filter( + (w) => w.includes("memory leak") || w.includes("Cannot update a component") + ); + expect(leakWarnings).toHaveLength(0); + console.log("[e2e] ✓ Interval cleanup hygiene: no leaks"); + }); +}); + +test.describe("09 — Debug overlay (Alt+D)", () => { + test("Alt+D shows overlay with API base in DEV mode", async ({ page }) => { + await injectSession(page, "683eba6923b7", "pallets/flask", 208); + await page.goto(FRONTEND, { waitUntil: "domcontentloaded" }); + await page.waitForTimeout(1_500); + + await page.keyboard.press("Alt+d"); + await page.waitForTimeout(500); + + const overlayVisible = await page.locator("text=ATLAS DEBUG").isVisible().catch(() => false); + if (!overlayVisible) { + console.log("[e2e] Debug overlay not visible — frontend not in DEV mode (expected in Docker)"); + return; + } + await expect(page.locator("text=API base")).toBeVisible(); + await expect(page.locator("text=localhost:8000")).toBeVisible(); + console.log("[e2e] ✓ Debug overlay: API base correct"); + }); +}); diff --git a/e2e/tsconfig.json b/e2e/tsconfig.json new file mode 100644 index 0000000..186c313 --- /dev/null +++ b/e2e/tsconfig.json @@ -0,0 +1,11 @@ +{ + "compilerOptions": { + "target": "ES2020", + "module": "commonjs", + "strict": true, + "esModuleInterop": true, + "outDir": "./dist", + "types": ["node"] + }, + "include": ["tests/**/*.ts", "playwright.config.ts"] +}