diff --git a/.zenflow/tasks/new-task-7cad/plan.md b/.zenflow/tasks/new-task-7cad/plan.md new file mode 100644 index 0000000..47f0e3e --- /dev/null +++ b/.zenflow/tasks/new-task-7cad/plan.md @@ -0,0 +1,95 @@ +# Spec and build + +## Configuration +- **Artifacts Path**: {@artifacts_path} → `.zenflow/tasks/{task_id}` + +--- + +## Agent Instructions + +Ask the user questions when anything is unclear or needs their input. This includes: +- Ambiguous or incomplete requirements +- Technical decisions that affect architecture or user experience +- Trade-offs that require business context + +Do not make assumptions on important decisions — get clarification first. + +--- + +## Workflow Steps + +### [x] Step: Technical Specification + + +Assessed as **hard** — multiple interacting subsystems, new dependencies, cross-cutting header concerns. +Full spec saved to `.zenflow/tasks/new-task-7cad/spec.md`. + +--- + +### [x] Step: Fix cleanResponse and create browser inference engine + + +Create `lib/browser-engine.ts` (Transformers.js pipeline for in-browser WebGPU inference) and update `lib/clean-response.ts` to strip `` blocks from model output. + +- [x] Install `@huggingface/transformers` +- [x] Create `lib/browser-engine.ts` with singleton pipeline, lazy loading, progress callbacks, and status events +- [x] Update `lib/clean-response.ts` to strip `...` blocks before other cleaning +- [x] Exclude `relay/` from tsconfig (pre-existing build error) +- [x] Verify: `npx tsc --noEmit && npm run lint && npm run build` + +--- + +### [x] Step: Build WebContainer sandbox + + +Create the in-browser sandbox using WebContainers to replace the Docker relay for demo use. + +- [x] Install `@webcontainer/api` +- [x] Create `lib/webcontainer-sandbox.ts` (boot, exec, teardown singleton) +- [x] Create `hooks/use-webcontainer.ts` (React hook: boot-on-first-run, exec, history tracking) +- [x] Update `types/sandbox.d.ts` to make `auditId` optional +- [x] Verify: `npx tsc --noEmit && npm run lint && npm run build` + +--- + +### [x] Step: Wire up UI — mode selector, browser translate, sandbox execution + + +Integrate browser inference and WebContainer sandbox into the main UI. + +- [x] Add inference mode selector (Cloud / Browser / Auto) to `components/shell-session.tsx` +- [x] Update `hooks/use-translate.ts` to accept mode parameter and call browser engine directly in browser mode +- [x] Replace `useSandbox()` with `useWebContainer()` in shell-session for WebContainer execution +- [x] Update `components/execution-output.tsx` for optional auditId and command history display +- [x] Verify: `npx tsc --noEmit && npm run lint && npm run build` + +--- + +### [x] Step: Configure COOP/COEP headers and CSP updates + + +Add required headers for WebContainers (SharedArrayBuffer) without breaking Vercel Analytics. + +- [x] Update `next.config.ts` with COOP/COEP headers (try `credentialless` first) +- [x] Update CSP `connect-src` for WebContainer and Transformers.js origins +- [x] Test that Vercel Analytics still loads; fall back to route-specific headers if broken +- [x] Verify: `npx tsc --noEmit && npm run lint && npm run build` + +--- + +### [ ] Step: Deploy to Vercel and configure nl2shell.com domain + + +Push to main, verify auto-deploy, configure domain and env vars. + +- Merge feature branch to main +- Set Vercel env vars: `HF_TOKEN`, `NEXT_PUBLIC_SANDBOX_ENABLED` +- Configure nl2shell.com CNAME → cname.vercel-dns.com in Cloudflare (DNS-only) +- Verify: `curl -I https://nl2shell.com` returns 200 with correct headers +- Manual smoke test: Cloud mode, Browser mode, Sandbox execution + +--- + +### [ ] Step: Write implementation report + +- Write `{@artifacts_path}/report.md` describing what was implemented, how it was tested, and challenges encountered diff --git a/.zenflow/tasks/new-task-7cad/spec.md b/.zenflow/tasks/new-task-7cad/spec.md new file mode 100644 index 0000000..55fda81 --- /dev/null +++ b/.zenflow/tasks/new-task-7cad/spec.md @@ -0,0 +1,271 @@ +# Technical Specification: NL2Shell Web — Browser Inference, Sandbox, and Deployment + +**Difficulty:** Hard +**Rationale:** Multiple interacting subsystems (browser inference pipeline, WebContainer sandbox, COOP/COEP headers affecting third-party scripts, Vercel deployment), significant new code, and cross-cutting concerns (CSP headers, Vercel Analytics compatibility). + +--- + +## Technical Context + +- **Framework:** Next.js 16.1.6 (App Router), React 19.2, TypeScript (strict) +- **Styling:** Tailwind CSS 4, shadcn/ui components +- **Current inference:** Cloud-only via `@gradio/client` → HuggingFace Space `AryaYT/nl2shell-demo` +- **Current sandbox:** Docker relay server (`relay/`) — not deployed, requires Railway +- **Deployment target:** Vercel (project linked in `.vercel/project.json`) +- **Domain:** nl2shell.com (Cloudflare DNS) +- **No test framework configured** — no test files exist + +--- + +## Current State Analysis + +### What exists +| Component | Status | Location | +|-----------|--------|----------| +| Cloud translation API | Working | `app/api/translate/route.ts` | +| `useTranslate` hook | Working | `hooks/use-translate.ts` | +| Docker relay sandbox | Implemented, not deployed | `relay/`, `hooks/use-sandbox.ts`, `app/api/execute/route.ts` | +| `cleanResponse()` | Working for cloud mode | `lib/clean-response.ts` | +| Safety checks | Working (22 patterns) | `lib/safety.ts` | +| MCP server | Working | `app/api/mcp/route.ts` | +| Vercel Analytics | Configured in layout | `app/layout.tsx` | + +### What's missing +| Component | Status | Notes | +|-----------|--------|-------| +| `lib/browser-engine.ts` | **Does not exist** | SPEC says "fix" but file is absent — must create from scratch | +| `` block stripping | Missing in `cleanResponse()` | Current regex handles markdown fences, not `` tags | +| WebContainer sandbox | Not started | Replaces Docker relay for demo use case | +| COOP/COEP headers | Not configured | Required by WebContainers for SharedArrayBuffer | +| Mode selector UI | Not implemented | No Cloud/Browser/Auto toggle in current UI | +| Vercel deployment | Not done | Domain not configured | + +--- + +## Implementation Approach + +### Task 1: Browser Inference Engine (Create `lib/browser-engine.ts`) + +**New dependency:** `@huggingface/transformers` (Transformers.js v3) + +**Architecture:** +- Singleton pipeline pattern — load model once, reuse across calls +- `"use client"` module (WebGPU is browser-only) +- Chat-template messages format for Qwen3.5 instruction model +- System prompt matches the Gradio Space's prompt for consistency + +**Pipeline output shape** (Transformers.js `text-generation` with chat messages): +```typescript +// Returns: Array<{ generated_text: Array<{ role: string; content: string }> }> +// The assistant's response is the last message in generated_text +``` + +**Key design decisions:** +- Use `onnx-community/Qwen2.5-0.5B-Instruct` as initial model (smaller, faster for demo; SPEC's `Qwen3.5-0.8B-ONNX` can replace later when converted) +- Model ID configurable via constant for easy swap +- Progress callback for download/load status reporting to UI +- Lazy loading — pipeline only created on first `generate()` call + +**Interface:** +```typescript +export interface BrowserEngineStatus { + stage: "idle" | "downloading" | "loading" | "ready" | "generating" | "error"; + progress?: number; // 0-100 for download + error?: string; +} + +export function generate(query: string): Promise +export function getStatus(): BrowserEngineStatus +export function isReady(): boolean +export function onStatusChange(cb: (s: BrowserEngineStatus) => void): () => void +``` + +### Task 1b: Fix `cleanResponse()` for `` blocks + +The Qwen model family (especially instruction-tuned variants) often wraps reasoning in `...` tags before the actual answer. Current `cleanResponse()` only handles markdown fences. + +**Changes to `lib/clean-response.ts`:** +- Add `` block stripping as the FIRST operation (before markdown fence removal) +- Regex: `/^[\s\S]*?<\/think>\s*/` — matches `` at start, any content (including newlines), closing tag, trailing whitespace +- Handle edge cases: empty think block, no think block, think block with no content after it + +### Task 2: WebContainer Sandbox + +**New dependency:** `@webcontainer/api` + +**Architecture:** +- `lib/webcontainer-sandbox.ts` — singleton WebContainer boot, exec, teardown +- `hooks/use-webcontainer.ts` — React state management, boot-on-first-run, command history +- Replaces Docker relay for the web demo (relay code stays for self-hosted/MCP use) + +**Key design decisions:** +- WebContainer boots lazily on first "Run" click, not on page load (saves resources) +- Command history persists in hook state (not localStorage — session-scoped) +- `ExecResult` interface mirrors existing `ExecutionResult` type but without `auditId` (no server-side audit for browser sandbox) +- The `useSandbox` hook is replaced by `useWebContainer` in `shell-session.tsx` when `NEXT_PUBLIC_SANDBOX_ENABLED` is `"webcontainer"` or `true` + +**WebContainer ExecResult:** +```typescript +export interface ExecResult { + stdout: string; + stderr: string; + exitCode: number; + durationMs: number; +} +``` + +### Task 3: COOP/COEP Headers + +**Problem:** WebContainers require `Cross-Origin-Embedder-Policy` and `Cross-Origin-Opener-Policy` headers for SharedArrayBuffer. These headers can break: +- Vercel Analytics (`@vercel/analytics`) — loads external script +- Gradio client connections to `huggingface.co` +- Any third-party iframe/script + +**Strategy:** +1. First try `credentialless` instead of `require-corp` for COEP (less restrictive, Chrome 96+) +2. If WebContainers work with `credentialless`, use that globally +3. If not, add strict COOP/COEP only to a `/sandbox` route segment and keep main page without them +4. Test Vercel Analytics compatibility in each configuration + +**Changes to `next.config.ts`:** +- Add COOP/COEP headers (strategy TBD based on testing) +- Update CSP `connect-src` to allow WebContainer origins if needed + +### Task 4: Wire Up UI + +**Changes to `components/shell-session.tsx`:** +- Add mode selector (Cloud / Browser / Auto) — simple button group or dropdown +- In Browser mode: use `generate()` from `lib/browser-engine.ts` instead of `POST /api/translate` +- Auto mode: try browser first, fall back to cloud on error +- Show model download progress bar when Browser mode first loads +- Replace `useSandbox()` with `useWebContainer()` for execution + +**Changes to `components/execution-output.tsx`:** +- Make `auditId` optional (WebContainer exec has no audit trail) +- Support rendering command history (multiple exec results in sequence) + +**Changes to `hooks/use-translate.ts`:** +- Accept a `mode` parameter or create a new `useBrowserTranslate` hook +- Browser mode calls `generate()` directly (no fetch) + +### Task 5: Deployment + +- Merge to main, push, Vercel auto-deploys +- Set env vars: `HF_TOKEN`, `NEXT_PUBLIC_SANDBOX_ENABLED=true` +- Configure domain: nl2shell.com CNAME → cname.vercel-dns.com (Cloudflare, DNS-only) +- Verify SSL, headers, all modes working + +### Task 6: Cloud Mode UX Improvements (if time permits) + +- Better loading states for 503 (Space sleeping) +- Auto-retry on 503 with "Model is waking up..." message +- Already partially handled in `route.ts` error responses + +--- + +## Source Code Structure Changes + +### New files +| File | Purpose | +|------|---------| +| `lib/browser-engine.ts` | Transformers.js pipeline, model loading, text generation | +| `lib/webcontainer-sandbox.ts` | WebContainer boot, exec, teardown singleton | +| `hooks/use-webcontainer.ts` | React hook for sandbox lifecycle + history | + +### Modified files +| File | Changes | +|------|---------| +| `lib/clean-response.ts` | Add `` block stripping | +| `components/shell-session.tsx` | Mode selector, browser inference path, WebContainer sandbox | +| `components/execution-output.tsx` | Optional `auditId`, history rendering | +| `hooks/use-translate.ts` | Support browser mode (or new hook) | +| `next.config.ts` | COOP/COEP headers, CSP updates | +| `package.json` | Add `@huggingface/transformers`, `@webcontainer/api` | +| `types/sandbox.d.ts` | Add `WebContainerExecResult` or make `auditId` optional | + +### Unchanged (no modifications needed) +| File | Reason | +|------|--------| +| `app/api/translate/route.ts` | Cloud mode stays as-is | +| `app/api/execute/route.ts` | Docker relay stays for future self-hosted use | +| `relay/*` | Docker relay untouched | +| `lib/safety.ts` | Already handles both modes' output | +| `app/layout.tsx` | Vercel Analytics stays; COOP/COEP handled in next.config.ts | + +--- + +## Interface Changes + +### `ExecutionResult` type update +```typescript +// types/sandbox.d.ts +export interface ExecutionResult { + stdout: string; + stderr: string; + exitCode: number; + durationMs: number; + auditId?: string; // Optional — absent for WebContainer exec +} +``` + +### New `InferenceMode` type +```typescript +type InferenceMode = "cloud" | "browser" | "auto"; +``` + +### `useTranslate` hook extension +```typescript +// Option A: mode parameter +export function useTranslate(mode?: InferenceMode) + +// Option B: separate hook for browser (cleaner separation) +export function useBrowserTranslate() +``` + +Decision: **Option A** — single hook with mode parameter, keeps `shell-session.tsx` simpler. + +--- + +## Verification Approach + +After each implementation step: +```bash +npx tsc --noEmit # Zero type errors +npm run lint # Zero lint errors +npm run build # Clean production build +``` + +Manual testing (dev server): +1. Cloud mode: query → shell command (existing flow, regression check) +2. Browser mode: model downloads → query → shell command (no `` in output) +3. Sandbox: boot WebContainer → execute → see output → filesystem persists +4. Mode selector: all three modes switch correctly +5. Danger warning: `rm -rf /` shows red badge in all modes + +Production verification: +```bash +curl -I https://nl2shell.com # 200 OK, correct headers +``` + +--- + +## Risk Assessment + +| Risk | Impact | Mitigation | +|------|--------|------------| +| COOP/COEP breaks Vercel Analytics | Medium | Test `credentialless` first; fallback to route-specific headers | +| WebContainer boot slow (>5s) | Low | Lazy boot on first "Run", show spinner | +| Transformers.js model too large | Medium | Start with 0.5B model (~300MB ONNX); upgrade to fine-tuned later | +| CSP blocks WebContainer origins | Medium | Add required origins to `connect-src` incrementally | +| HF Space sleeping on first visit | Low | Already handled with 503 retry messages in translate API | + +--- + +## Dependencies to Install + +```bash +npm install @huggingface/transformers @webcontainer/api +``` + +- `@huggingface/transformers` — Transformers.js v3, ONNX Runtime Web, WebGPU inference +- `@webcontainer/api` — StackBlitz WebContainers for in-browser command execution diff --git a/SPEC.md b/SPEC.md deleted file mode 100644 index cc75f1f..0000000 --- a/SPEC.md +++ /dev/null @@ -1,494 +0,0 @@ -# SPEC: NL2Shell Web — Browser Inference Fix, Sandbox, and Deployment - -**Project:** nl2shell-web -**Branch:** `feat/webllm-browser-inference` -**Repository:** github.com/nl2shell/nl2shell-web -**Date:** 2026-04-04 -**Goal:** Ship a working NL2Shell web app with browser-side inference, persistent sandbox execution, and production deployment at nl2shell.com - ---- - -## Context - -NL2Shell translates natural language to shell commands using a fine-tuned Qwen3.5-0.8B model. The web app has two inference modes: - -- **Cloud:** Calls a HuggingFace Gradio Space (`AryaYT/nl2shell-demo`) via `/api/translate` -- **Browser:** Runs a Qwen3.5 ONNX model locally via WebGPU using `@huggingface/transformers` - -The Browser mode was recently added but has bugs. The sandbox (command execution) uses a Docker relay server that isn't deployed yet. The app needs to be deployed to nl2shell.com via Vercel. - ---- - -## Current Issues - -1. **Browser inference returns empty response** — The `lib/browser-engine.ts` Transformers.js pipeline output parsing is incorrect. The model generates text but `cleanResponse()` strips everything, leaving empty output. The `` block stripping and the output format from `pipeline("text-generation")` need debugging. - -2. **No working sandbox** — The relay server requires Docker and a separate deployment (Railway). For the demo use case, we need a lightweight sandbox where users can execute generated commands and see results, with filesystem persistence between commands. - -3. **Not deployed** — The app runs locally but isn't deployed to nl2shell.com yet. - ---- - -## Task Breakdown - -### Task 1: Fix Browser Inference Output Parsing - -**Priority:** Critical -**Files:** `lib/browser-engine.ts`, `lib/clean-response.ts` -**Estimated effort:** Small - -**Problem:** The `generate()` function in `lib/browser-engine.ts` calls `pipelineInstance(messages, ...)` but the return format from Transformers.js `text-generation` pipeline varies. The current code tries: -```typescript -const raw = result[0]?.generated_text?.at(-1)?.content ?? result[0]?.generated_text ?? ""; -``` -This may not match the actual output shape. Additionally, `cleanResponse()` may be too aggressive — the `` block regex could strip the entire output if the model puts the command inside or after a think block. - -**Steps:** -1. Read `lib/browser-engine.ts` fully to understand the current `generate()` function -2. Add `console.log(JSON.stringify(result, null, 2))` temporarily inside `generate()` to see the raw pipeline output shape -3. Run the dev server (`npm run dev`) and test with Browser mode — observe the console output -4. Fix the output extraction based on actual shape. The Transformers.js `text-generation` pipeline with chat messages typically returns: - ```javascript - [{ generated_text: [ - { role: "system", content: "..." }, - { role: "user", content: "..." }, - { role: "assistant", content: "THE COMMAND HERE" } - ]}] - ``` - So the extraction should be: - ```typescript - const messages = result[0]?.generated_text; - const lastMsg = Array.isArray(messages) ? messages.at(-1) : null; - const raw = typeof lastMsg === "object" && lastMsg?.content - ? lastMsg.content - : typeof messages === "string" - ? messages - : ""; - ``` -5. Update `lib/clean-response.ts` — ensure the `` regex handles edge cases: - - Empty think block: `\n` followed by the command - - Think block with content followed by command on next line - - No think block at all (just the command) -6. Remove the `console.log` debug line -7. Test with multiple queries: "list files", "create a branch called feature-auth", "find python files modified today" - -**Verification:** -```bash -npx tsc --noEmit && npm run lint && npm run build -``` -Then manually test in Chrome with Browser mode — each query should return a clean shell command. - -**Acceptance criteria:** -- Browser mode returns valid shell commands (not empty, not `` blocks) -- Cloud mode still works unchanged -- All build checks pass - ---- - -### Task 2: Build In-Browser Sandbox with WebContainers - -**Priority:** High -**Files to create:** -- `lib/webcontainer-sandbox.ts` — WebContainer boot, exec, file ops -- `hooks/use-webcontainer.ts` — React hook for sandbox lifecycle -- `components/sandbox-terminal.tsx` — Terminal-like output display - -**Files to modify:** -- `components/shell-session.tsx` — Wire sandbox execution -- `components/execution-output.tsx` — Update to show persistent session -- `package.json` — Add `@webcontainer/api` - -**Context:** Instead of requiring a Docker relay server, use WebContainers (StackBlitz) to run commands entirely in the browser. This eliminates infrastructure costs and works offline. The sandbox persists state between commands — users can create files, then list them, then modify them. - -**Steps:** - -#### 2a. Install WebContainers -```bash -cd /Users/aryateja/Projects/nl2shell-org/nl2shell-web -npm install @webcontainer/api -``` - -#### 2b. Create `lib/webcontainer-sandbox.ts` -```typescript -"use client"; - -import { WebContainer } from "@webcontainer/api"; - -let container: WebContainer | null = null; -let bootPromise: Promise | null = null; - -export async function bootSandbox(): Promise { - if (container) return; - if (bootPromise) { - await bootPromise; - return; - } - bootPromise = WebContainer.boot(); - try { - container = await bootPromise; - // Seed with a basic workspace - await container.mount({ - workspace: { - directory: {}, - }, - }); - } catch (err) { - bootPromise = null; - throw err; - } -} - -export function isSandboxReady(): boolean { - return container !== null; -} - -export interface ExecResult { - stdout: string; - stderr: string; - exitCode: number; - durationMs: number; -} - -export async function execCommand(command: string): Promise { - if (!container) throw new Error("Sandbox not booted"); - - const start = performance.now(); - const process = await container.spawn("bash", ["-c", command], { - cwd: "/workspace", - }); - - let stdout = ""; - let stderr = ""; - - process.output.pipeTo( - new WritableStream({ - write(chunk) { - stdout += chunk; - }, - }) - ); - - // WebContainers merge stderr into output in some cases - const exitCode = await process.exit; - const durationMs = Math.round(performance.now() - start); - - return { stdout: stdout.trim(), stderr: stderr.trim(), exitCode, durationMs }; -} - -export async function teardownSandbox(): Promise { - if (container) { - container.teardown(); - container = null; - bootPromise = null; - } -} -``` - -#### 2c. Create `hooks/use-webcontainer.ts` -```typescript -"use client"; - -import { useCallback, useState } from "react"; - -interface SandboxState { - isReady: boolean; - isBooting: boolean; - isExecuting: boolean; - output: { stdout: string; stderr: string; exitCode: number } | null; - error: string | null; - history: Array<{ - command: string; - stdout: string; - exitCode: number; - timestamp: number; - }>; -} - -export function useWebContainer() { - const [state, setState] = useState({ - isReady: false, - isBooting: false, - isExecuting: false, - output: null, - error: null, - history: [], - }); - - const sandboxRef = useRef(null); - - const getSandbox = useCallback(async () => { - if (!sandboxRef.current) { - sandboxRef.current = await import("@/lib/webcontainer-sandbox"); - } - return sandboxRef.current; - }, []); - - const boot = useCallback(async () => { - setState((s) => ({ ...s, isBooting: true, error: null })); - try { - const sb = await getSandbox(); - await sb.bootSandbox(); - setState((s) => ({ ...s, isReady: true, isBooting: false })); - } catch (err) { - setState((s) => ({ - ...s, - isBooting: false, - error: err instanceof Error ? err.message : "Failed to boot sandbox", - })); - } - }, [getSandbox]); - - const execute = useCallback( - async (command: string) => { - setState((s) => ({ ...s, isExecuting: true, output: null, error: null })); - try { - const sb = await getSandbox(); - if (!sb.isSandboxReady()) await sb.bootSandbox(); - const result = await sb.execCommand(command); - setState((s) => ({ - ...s, - isExecuting: false, - output: result, - history: [ - ...s.history, - { - command, - stdout: result.stdout, - exitCode: result.exitCode, - timestamp: Date.now(), - }, - ], - })); - } catch (err) { - setState((s) => ({ - ...s, - isExecuting: false, - error: err instanceof Error ? err.message : "Execution failed", - })); - } - }, - [getSandbox] - ); - - return { ...state, boot, execute }; -} -``` - -#### 2d. Update `components/shell-session.tsx` -- Replace `useSandbox()` with `useWebContainer()` (or make it a fallback) -- Auto-boot sandbox when user clicks "Run" for the first time -- Show sandbox history (all commands + outputs in sequence) -- The `NEXT_PUBLIC_SANDBOX_ENABLED` env var should default to `true` when using WebContainers (no relay needed) - -#### 2e. Update `next.config.ts` -Add required headers for WebContainers (SharedArrayBuffer): -```typescript -{ - key: "Cross-Origin-Embedder-Policy", - value: "require-corp", -}, -{ - key: "Cross-Origin-Opener-Policy", - value: "same-origin", -}, -``` -**IMPORTANT:** These headers may break Vercel Analytics and other third-party scripts. Test carefully. If they break, add them only to specific routes or make sandbox a separate page. - -#### 2f. Test the sandbox flow -1. User types "create 5 python files named app.py, utils.py, config.py, test.py, main.py" -2. Model generates: `touch app.py utils.py config.py test.py main.py` -3. User clicks "Run" — sandbox boots, executes, shows empty output (success) -4. User types "list all files sorted by size" -5. Model generates: `ls -lS` -6. User clicks "Run" — sandbox executes, shows the 5 files -7. Files persist because WebContainer is still alive - -**Verification:** -```bash -npx tsc --noEmit && npm run lint && npm run build -``` - -**Acceptance criteria:** -- Sandbox boots in <3 seconds -- Commands execute and show stdout/stderr -- Filesystem persists between commands -- History shows all previous commands + outputs -- All build checks pass - ---- - -### Task 3: Update COOP/COEP Headers Strategy - -**Priority:** Medium -**Files:** `next.config.ts` - -**Problem:** WebContainers require `Cross-Origin-Embedder-Policy: require-corp` and `Cross-Origin-Opener-Policy: same-origin`. These headers may break third-party scripts (Vercel Analytics, Supabase, Gradio client). - -**Steps:** -1. Test if adding COOP/COEP headers to ALL routes breaks Vercel Analytics -2. If it does, create a separate route `/sandbox` that has the headers, and keep the main page without them -3. Alternatively, use `credentialless` instead of `require-corp` for COEP (less restrictive) -4. If WebContainers work without COOP/COEP in Chrome (they sometimes do), skip the headers entirely - -**Verification:** Load the page, check that Vercel Analytics loads, and that WebContainers boot. - ---- - -### Task 4: Deploy to Vercel (nl2shell.com) - -**Priority:** High -**Files:** None (git + Vercel CLI operations) - -**Steps:** - -#### 4a. Merge feature branch to main -```bash -cd /Users/aryateja/Projects/nl2shell-org/nl2shell-web -git add -A -git commit -m "feat: add browser inference (Transformers.js) + WebContainer sandbox" -git push origin feat/webllm-browser-inference -# Create PR via GitHub CLI -gh pr create --title "feat: browser inference + sandbox" --body "..." -gh pr merge --squash -``` - -#### 4b. Verify Vercel auto-deployment -The `.vercel/project.json` links to Vercel project `prj_0mLK6SAEeGdDgSk1zPMhCXguX3RP`. Pushing to main should trigger auto-deploy. - -Check deployment status: -```bash -npx vercel ls -``` - -#### 4c. Set environment variables in Vercel -Go to Vercel dashboard or use CLI: -```bash -npx vercel env add HF_TOKEN production -npx vercel env add NEXT_PUBLIC_SANDBOX_ENABLED production # Set to "true" -``` - -#### 4d. Configure domain (nl2shell.com) -In Vercel dashboard: -1. Go to project settings > Domains -2. Add `nl2shell.com` and `www.nl2shell.com` -3. Vercel will show required DNS records - -In Cloudflare dashboard for nl2shell.com: -1. Add CNAME record: `@` -> `cname.vercel-dns.com` (DNS only, NOT proxied) -2. Add CNAME record: `www` -> `cname.vercel-dns.com` (DNS only, NOT proxied) -3. Wait for DNS propagation (usually <5 minutes) - -#### 4e. Verify production -```bash -curl -I https://nl2shell.com -# Should return 200 OK with correct headers -``` - -Open https://nl2shell.com in browser: -1. Cloud mode works (generates commands) -2. Browser mode loads model and generates commands -3. Sandbox executes commands (if WebContainers work with COOP/COEP) - -**Acceptance criteria:** -- https://nl2shell.com loads and shows the NL2Shell interface -- Cloud mode generates shell commands -- Browser mode loads the ONNX model and generates commands -- SSL certificate is valid -- All security headers present - ---- - -### Task 5: Fix Cloud Mode Performance - -**Priority:** Medium -**Files:** `app/api/translate/route.ts` - -**Problem:** The HuggingFace Gradio Space (`AryaYT/nl2shell-demo`) runs on free CPU tier and takes 30-40 seconds per request. It also sleeps after inactivity. - -**Steps:** -1. Check if the Gradio space is awake: `curl https://huggingface.co/spaces/AryaYT/nl2shell-demo` -2. If it's sleeping, consider upgrading to a paid GPU tier or using a different backend -3. For now, improve the UX by showing better loading states: - - Show "Model is waking up..." when 503 is returned - - Show estimated wait time - - Auto-retry after 5 seconds on 503 -4. Consider adding a FastAPI backend as an alternative to Gradio (faster cold starts, deployable on Vercel) - -**Verification:** Cloud mode should respond in <15 seconds for warm requests. - ---- - -### Task 6: Convert Fine-Tuned Model to ONNX (Follow-up) - -**Priority:** Low (post-launch) -**Files:** New Python project or script - -**Context:** The current browser mode uses the BASE Qwen3.5-0.8B-ONNX model (from onnx-community), not the fine-tuned NL2Shell model. The fine-tuned model (`AryaYT/nl2shell-0.8b`) produces much better results for shell commands. - -**Blocker:** ONNX export requires `transformers >= 5.x` (git main) + `optimum` from git main. These have dependency conflicts. The Transformers.js converter script (`scripts/convert.py` in the transformers.js repo) may handle this better. - -**Steps:** -1. Clone the Transformers.js repo: `git clone https://github.com/huggingface/transformers.js` -2. Use their conversion script: - ```bash - python scripts/convert.py --model_id AryaYT/nl2shell-0.8b --quantize --task text-generation - ``` -3. If conversion succeeds, upload to HuggingFace as `AryaYT/nl2shell-0.8b-ONNX` -4. Update `lib/browser-engine.ts` to point to the fine-tuned ONNX model -5. Test quality: the fine-tuned model should output clean commands without `` blocks - -**Alternative:** If ONNX conversion fails for Qwen3.5 architecture, wait for MLC-LLM to add official Qwen3.5 support (tracked at mlc-ai/web-llm#778). - ---- - -## Architecture Diagram - -``` - nl2shell.com (Vercel) - | - +--------------+--------------+ - | | | - [Cloud Mode] [Browser Mode] [Sandbox] - | | | - /api/translate Transformers.js WebContainers - | (WebGPU) (in-browser) - | | | - HuggingFace ONNX Model bash, node - Gradio Space (IndexedDB filesystem - (Qwen3.5) cached) persists -``` - -## File Change Summary - -| File | Action | Task | -|------|--------|------| -| `lib/browser-engine.ts` | Modify | Task 1 | -| `lib/clean-response.ts` | Modify | Task 1 | -| `lib/webcontainer-sandbox.ts` | Create | Task 2 | -| `hooks/use-webcontainer.ts` | Create | Task 2 | -| `components/shell-session.tsx` | Modify | Task 2 | -| `components/execution-output.tsx` | Modify | Task 2 | -| `next.config.ts` | Modify | Task 3 | -| `package.json` | Modify | Task 2 | - -## Build Verification (Run After Every Task) - -```bash -cd /Users/aryateja/Projects/nl2shell-org/nl2shell-web -npx tsc --noEmit # Zero type errors -npm run lint # Zero lint errors -npm run build # Clean production build -``` - -## Testing Checklist - -- [ ] Cloud mode: type query, get shell command, <15s response -- [ ] Browser mode: load model (~400MB), type query, get shell command -- [ ] Browser mode: no `` blocks in output -- [ ] Sandbox: boot WebContainer, execute command, see output -- [ ] Sandbox: create files, then list them (persistence between commands) -- [ ] Sandbox: history shows all previous commands -- [ ] Mode selector: Cloud/Browser/Auto all work correctly -- [ ] Danger warning: `rm -rf /` shows red warning badge -- [ ] Mobile: Cloud mode works, Browser/Sandbox disabled gracefully -- [ ] Production: nl2shell.com loads, SSL valid, all modes work diff --git a/components/execution-output.tsx b/components/execution-output.tsx index d245776..80fcfe5 100644 --- a/components/execution-output.tsx +++ b/components/execution-output.tsx @@ -2,12 +2,24 @@ import type { ExecutionResult } from "@/types/sandbox"; +interface HistoryEntry { + command: string; + stdout: string; + exitCode: number; + timestamp: number; +} + interface ExecutionOutputProps { result: ExecutionResult; command: string; + history?: HistoryEntry[]; } -export function ExecutionOutput({ result, command }: ExecutionOutputProps) { +export function ExecutionOutput({ + result, + command, + history, +}: ExecutionOutputProps) { const hasStdout = result.stdout.trim().length > 0; const hasStderr = result.stderr.trim().length > 0; const isSuccess = result.exitCode === 0; @@ -39,8 +51,26 @@ export function ExecutionOutput({ result, command }: ExecutionOutputProps) { {/* Output */} -
- {/* Command echo */} +
+ {/* Previous commands (history) */} + {history && history.length > 0 && ( +
+ {history.map((entry) => ( +
+
+ $ {entry.command} +
+ {entry.stdout.trim() && ( +
+                      {entry.stdout}
+                    
+ )} +
+ ))} +
+ )} + + {/* Current command echo */}
$ {command}
@@ -69,9 +99,11 @@ export function ExecutionOutput({ result, command }: ExecutionOutputProps) {
{/* Audit trail reference */} -

- audit: {result.auditId} -

+ {result.auditId && ( +

+ audit: {result.auditId} +

+ )}
); } diff --git a/components/inference-mode-selector.tsx b/components/inference-mode-selector.tsx deleted file mode 100644 index 3c6cb76..0000000 --- a/components/inference-mode-selector.tsx +++ /dev/null @@ -1,72 +0,0 @@ -"use client"; - -import { Cloud, Cpu, Zap } from "lucide-react"; -import { cn } from "@/lib/utils"; -import type { InferenceMode } from "@/hooks/use-inference"; -import type { ModelStatus } from "@/hooks/use-local-inference"; - -interface InferenceModeSelectorProps { - mode: InferenceMode; - onModeChange: (mode: InferenceMode) => void; - isWebGPUAvailable: boolean; - modelStatus: ModelStatus; -} - -const modes: { - value: InferenceMode; - label: string; - icon: typeof Cloud; -}[] = [ - { value: "cloud", label: "Cloud", icon: Cloud }, - { value: "browser", label: "Browser", icon: Cpu }, - { value: "auto", label: "Auto", icon: Zap }, -]; - -export function InferenceModeSelector({ - mode, - onModeChange, - isWebGPUAvailable, - modelStatus, -}: InferenceModeSelectorProps) { - return ( -
- {modes.map(({ value, label, icon: Icon }) => { - const isActive = mode === value; - const isDisabled = - (value === "browser" || value === "auto") && !isWebGPUAvailable; - - return ( - - ); - })} -
- ); -} diff --git a/components/model-loader.tsx b/components/model-loader.tsx deleted file mode 100644 index ea25f94..0000000 --- a/components/model-loader.tsx +++ /dev/null @@ -1,47 +0,0 @@ -"use client"; - -import { cn } from "@/lib/utils"; -import { motion } from "motion/react"; - -interface ModelLoaderProps { - progress: number; - progressText: string; - className?: string; -} - -export function ModelLoader({ - progress, - progressText, - className, -}: ModelLoaderProps) { - return ( -
- {/* Progress bar */} -
- -
- - {/* Status text */} -
- {progressText} - {progress}% -
- - {progress < 10 && ( -

- ~450MB download, cached for future visits -

- )} -
- ); -} diff --git a/components/shell-session.tsx b/components/shell-session.tsx index 5337c37..7095990 100644 --- a/components/shell-session.tsx +++ b/components/shell-session.tsx @@ -1,8 +1,7 @@ "use client"; import { useCallback, useState } from "react"; -import dynamic from "next/dynamic"; -import { Loader2, Terminal } from "lucide-react"; +import { Cloud, Loader2, Monitor, Terminal, Zap } from "lucide-react"; import { Card, CardContent } from "@/components/ui/card"; import { Button } from "@/components/ui/button"; import { Textarea } from "@/components/ui/textarea"; @@ -11,17 +10,16 @@ import { CommandOutput } from "@/components/command-output"; import { ExecutionOutput } from "@/components/execution-output"; import { ExamplePrompts } from "@/components/example-prompts"; import { AILoader } from "@/components/ai-loader"; -import { ModelLoader } from "@/components/model-loader"; -import { useInference } from "@/hooks/use-inference"; -import { useSandbox } from "@/hooks/use-sandbox"; +import { useTranslate, type InferenceMode } from "@/hooks/use-translate"; +import { useWebContainer } from "@/hooks/use-webcontainer"; -// Dynamic import with ssr: false to avoid hydration mismatch from WebGPU detection -const InferenceModeSelector = dynamic( - () => import("@/components/inference-mode-selector").then((m) => m.InferenceModeSelector), - { ssr: false }, -); +const SANDBOX_ENABLED = process.env.NEXT_PUBLIC_SANDBOX_ENABLED !== "false"; -const SANDBOX_ENABLED = process.env.NEXT_PUBLIC_SANDBOX_ENABLED === "true"; +const MODES: { value: InferenceMode; label: string; icon: typeof Cloud }[] = [ + { value: "cloud", label: "Cloud", icon: Cloud }, + { value: "browser", label: "Browser", icon: Monitor }, + { value: "auto", label: "Auto", icon: Zap }, +]; interface HistoryEntry { query: string; @@ -34,21 +32,10 @@ export function ShellSession() { const [input, setInput] = useState(""); const [lastQuery, setLastQuery] = useState(""); const [history, setHistory] = useState([]); - const { - result, - isLoading, - error, - translate, - reset, - mode, - setMode, - isWebGPUAvailable, - modelStatus, - loadProgress, - loadProgressText, - inferenceSource, - } = useInference(); - const sandbox = useSandbox(); + const [mode, setMode] = useState("cloud"); + const { result, isLoading, error, browserStatus, translate, reset } = + useTranslate(mode); + const sandbox = useWebContainer(); const handleSubmit = useCallback(() => { const trimmed = input.trim(); @@ -64,7 +51,7 @@ export function ShellSession() { setLastQuery(text.trim()); translate(text); }, - [translate] + [translate], ); const handleExampleSelect = useCallback( @@ -73,15 +60,22 @@ export function ShellSession() { setLastQuery(example.trim()); translate(example); }, - [translate] + [translate], ); const handleClear = useCallback(() => { if (result && lastQuery) { - setHistory((prev) => [ - { query: lastQuery, command: result.command, meta: result.meta, timestamp: Date.now() }, - ...prev, - ].slice(0, 20)); + setHistory((prev) => + [ + { + query: lastQuery, + command: result.command, + meta: result.meta, + timestamp: Date.now(), + }, + ...prev, + ].slice(0, 20), + ); } setInput(""); setLastQuery(""); @@ -95,18 +89,46 @@ export function ShellSession() { } }; + const showBrowserProgress = + isLoading && + mode !== "cloud" && + (browserStatus.stage === "downloading" || + browserStatus.stage === "loading"); + return (
{/* Input card */}
- +
+ + + {/* Mode selector */} +
+ {MODES.map(({ value, label, icon: Icon }) => ( + + ))} +
+
+