Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions backend/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
# Python caches
__pycache__/
*.py[cod]
*.pyo
*.pyd

# Virtual environments
.venv/
venv/
env/
.env/

# Local session data (runtime, not source)
sessions/

# Test / benchmark outputs
results/
.pytest_cache/

# Build artefacts
*.egg-info/
dist/
build/

# DB files
*.db
*.sqlite

# Editor / OS
.idea/
.vscode/
.DS_Store
Thumbs.db

# Git metadata
.git/
.gitignore

# Jupyter
*.ipynb
.ipynb_checkpoints/

# Docs / non-runtime assets
docs/
55 changes: 55 additions & 0 deletions backend/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# Backend Dockerfile
# Python 3.11 slim — small base, fast builds
FROM python:3.11-slim

# System packages
# git – GitPython binary requirement
# build-essential – compiles C extensions (tree-sitter, scipy)
# curl – lightweight HTTP tool (optional local debugging)
# libgomp1 – OpenMP runtime required by PyTorch / numpy
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
build-essential \
curl \
libgomp1 \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /app

#Python dependencies (layer-cache order)
COPY requirements.txt .

# 1. Upgrade pip
RUN pip install --no-cache-dir --upgrade pip

# 1b. Pin NumPy to 1.x BEFORE any compiled extension is installed.
# torch 2.2.x and PyG wheels are built against NumPy 1.x ABI;
# installing numpy first prevents any later dep from upgrading to 2.x.
RUN pip install --no-cache-dir "numpy==1.26.4"

# 2. CPU-only PyTorch (avoids the ~2 GB CUDA wheel)
RUN pip install --no-cache-dir \
torch==2.2.2 torchvision torchaudio \
--index-url https://download.pytorch.org/whl/cpu

# 3. PyTorch Geometric + required sparse/scatter extensions (CPU wheels)
# Must be installed AFTER torch and AGAINST the same torch version.
RUN pip install --no-cache-dir torch_geometric==2.5.3
RUN pip install --no-cache-dir \
torch_scatter \
torch_sparse \
torch_cluster \
torch_spline_conv \
-f https://data.pyg.org/whl/torch-2.2.2+cpu.html

# 4. Everything else from requirements.txt
RUN pip install --no-cache-dir -r requirements.txt

#Application source
COPY . .

EXPOSE 8000

# Uvicorn: bind to 0.0.0.0 so Docker port-mapping works.
# No --reload in production image; add a volume mount for dev hot-reload.
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
4 changes: 2 additions & 2 deletions backend/api/mcp_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
stream=sys.stderr, # MCP uses stdout for the protocol; log to stderr
stream=sys.stderr,
)

mcp = FastMCP("Atlas — Behavioral Code Intelligence")
Expand Down Expand Up @@ -337,7 +337,7 @@ async def get_hot_paths(top_k: int = 10) -> str:
fan_in = graph.in_degree(node_id)
fan_out = graph.out_degree(node_id)
complexity = int(node_data.get("complexity", 0))
# Impact: heavy fan-in + high complexity = highest risk

impact = fan_in * 2 + complexity + fan_out * 0.5
scored.append(
{
Expand Down
50 changes: 49 additions & 1 deletion backend/api/routes/analyze.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import asyncio
import json
import logging
import time
import threading
from pathlib import Path
from typing import Protocol, cast
Expand All @@ -21,17 +22,64 @@

router = APIRouter(prefix="/analyze", tags=["Analyze"])

# Sentinel files written by the ingest route — same constants as tasks.py
_INGEST_READY = ".ingest_ready"
_INGEST_FAILED = ".ingest_failed"
_READY_WAIT_SECS = 30
_READY_POLL_INTERVAL = 1


class CeleryTask(Protocol):
def delay(self, *args: object, **kwargs: object) -> object:
...


def _wait_for_ingest_sentinel(session_id: str, session_dir: Path) -> bool:
"""
Block (in a thread) until .ingest_ready appears or .ingest_failed is found.
Returns True if ready, False if failed/timed-out.
"""
log = logging.getLogger(f"codebase-intel.thread.{session_id[:8]}")
ready_file = session_dir / _INGEST_READY
failed_file = session_dir / _INGEST_FAILED

log.info(f"[INGEST_WAIT] Thread fallback waiting for sentinel {ready_file}")
for i in range(_READY_WAIT_SECS):
if failed_file.exists():
reason = failed_file.read_text(encoding="utf-8").strip()
log.error(f"[INGEST_FAILED] Ingestion failed before thread pipeline: {reason}")
progress_store.update_sync(
session_id,
status="error",
error_message=f"Ingestion failed before analysis could start: {reason}",
)
return False
if ready_file.exists():
log.info(f"[INGEST_READY] Sentinel found after {i}s — starting thread pipeline")
return True
time.sleep(_READY_POLL_INTERVAL)

log.error(f"[INGEST_TIMEOUT] Repo not ready after {_READY_WAIT_SECS}s (thread)")
progress_store.update_sync(
session_id,
status="error",
error_message=(
f"Ingestion timed out — repository was not ready after "
f"{_READY_WAIT_SECS}s. Please re-ingest the repository."
),
)
return False


def _run_pipeline_in_thread(session_id: str, session_dir: Path) -> None:
from core.pipeline import PipelineError, run_analysis_pipeline

log = logging.getLogger(f"codebase-intel.thread.{session_id[:8]}")
log.info("Starting pipeline in thread fallback mode")

if not _wait_for_ingest_sentinel(session_id, session_dir):
return

try:
asyncio.run(run_analysis_pipeline(session_id, session_dir))

Expand All @@ -52,7 +100,7 @@ def _run_pipeline_in_thread(session_id: str, session_dir: Path) -> None:
status="error",
error_message="Out of memory. Try a smaller repository.",
)
except Exception as exc:
except Exception as exc:
log.error(f"Unexpected error: {exc}", exc_info=True)
progress_store.update_sync(
session_id,
Expand Down
84 changes: 84 additions & 0 deletions backend/api/routes/debug.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
"""
GET /api/debug/session/{session_id}

Returns diagnostic information about a session directory so operators can
verify that:
1. The session directory exists and is on the correct volume mount.
2. The repo/ sub-directory was populated by git clone / ZIP extract.
3. The .ingest_ready sentinel was written by the ingest route.
4. The .ingest_failed sentinel was NOT written (i.e. no ingest error).

This endpoint is intentionally read-only and has no side-effects.
"""
import logging
from pathlib import Path

from fastapi import APIRouter, HTTPException
from config import SESSIONS_DIR

logger = logging.getLogger("codebase-intel.routes.debug")

router = APIRouter(prefix="/debug", tags=["Debug"])


@router.get("/session/{session_id}")
async def debug_session(session_id: str):
"""
Returns a JSON snapshot of the session directory state.
Useful for diagnosing path / volume-mount / sentinel issues in Docker.
"""
session_dir: Path = SESSIONS_DIR / session_id

session_exists = session_dir.exists() and session_dir.is_dir()
if not session_exists:

return {
"session_id": session_id,
"sessions_dir": str(SESSIONS_DIR.resolve()),
"session_exists": False,
"repo_exists": False,
"repo_file_count": 0,
"ready_exists": False,
"failed_exists": False,
"failed_reason": None,
"repo_absolute_path": str((session_dir / "repo").resolve()),
"ready_absolute_path": str((session_dir / ".ingest_ready").resolve()),
"failed_absolute_path": str((session_dir / ".ingest_failed").resolve()),
"meta_exists": False,
"file_entries_exist": False,
}

repo_dir = session_dir / "repo"
ready_file = session_dir / ".ingest_ready"
failed_file = session_dir / ".ingest_failed"

repo_exists = repo_dir.exists() and repo_dir.is_dir()
repo_file_count = 0
if repo_exists:
try:
repo_file_count = sum(1 for _ in repo_dir.rglob("*") if _.is_file())
except Exception:
repo_file_count = -1

failed_reason: str | None = None
if failed_file.exists():
try:
failed_reason = failed_file.read_text(encoding="utf-8").strip()
except Exception:
failed_reason = "<unreadable>"

return {
"session_id": session_id,
"sessions_dir": str(SESSIONS_DIR.resolve()),
"session_exists": True,
"repo_exists": repo_exists,
"repo_file_count": repo_file_count,
"ready_exists": ready_file.exists(),
"failed_exists": failed_file.exists(),
"failed_reason": failed_reason,
"repo_absolute_path": str(repo_dir.resolve()),
"ready_absolute_path": str(ready_file.resolve()),
"failed_absolute_path": str(failed_file.resolve()),
"meta_exists": (session_dir / "meta.json").exists(),
"file_entries_exist": (session_dir / "file_entries.json").exists(),
}
Loading
Loading