diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 368067f5e..b9aa40c1a 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -163,6 +163,7 @@ jobs:
APPKIT_E2E_TEST: 'true'
DATABRICKS_WAREHOUSE_ID: e2e-mock
DATABRICKS_WORKSPACE_ID: e2e-mock
+ DATABRICKS_SERVING_ENDPOINT_NAME: e2e-mock
pr-template-artifact:
name: PR Template Artifact
diff --git a/apps/dev-playground/client/package-lock.json b/apps/dev-playground/client/package-lock.json
index 80bd5ad40..b24fd982d 100644
--- a/apps/dev-playground/client/package-lock.json
+++ b/apps/dev-playground/client/package-lock.json
@@ -18,6 +18,8 @@
"@tanstack/router-plugin": "1.133.22",
"class-variance-authority": "0.7.1",
"clsx": "2.1.1",
+ "html2canvas": "1.4.1",
+ "html2canvas-pro": "2.0.2",
"lucide-react": "0.546.0",
"react": "19.2.0",
"react-dom": "19.2.0",
@@ -3559,6 +3561,15 @@
"dev": true,
"license": "MIT"
},
+ "node_modules/base64-arraybuffer": {
+ "version": "1.0.2",
+ "resolved": "https://registry.npmjs.org/base64-arraybuffer/-/base64-arraybuffer-1.0.2.tgz",
+ "integrity": "sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==",
+ "license": "MIT",
+ "engines": {
+ "node": ">= 0.6.0"
+ }
+ },
"node_modules/baseline-browser-mapping": {
"version": "2.8.16",
"resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.8.16.tgz",
@@ -3916,6 +3927,15 @@
"node": ">= 8"
}
},
+ "node_modules/css-line-break": {
+ "version": "2.1.0",
+ "resolved": "https://registry.npmjs.org/css-line-break/-/css-line-break-2.1.0.tgz",
+ "integrity": "sha512-FHcKFCZcAha3LwfVBhCQbW2nCNbkZXn7KVUJcsT5/P8YmfsVja0FMPJr0B903j/E69HUphKiV9iQArX8SDYA4w==",
+ "license": "MIT",
+ "dependencies": {
+ "utrie": "^1.0.2"
+ }
+ },
"node_modules/csstype": {
"version": "3.1.3",
"resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz",
@@ -4711,6 +4731,32 @@
"url": "https://github.com/sponsors/wooorm"
}
},
+ "node_modules/html2canvas": {
+ "version": "1.4.1",
+ "resolved": "https://registry.npmjs.org/html2canvas/-/html2canvas-1.4.1.tgz",
+ "integrity": "sha512-fPU6BHNpsyIhr8yyMpTLLxAbkaK8ArIBcmZIRiBLiDhjeqvXolaEmDGmELFuX9I4xDcaKKcJl+TKZLqruBbmWA==",
+ "license": "MIT",
+ "dependencies": {
+ "css-line-break": "^2.1.0",
+ "text-segmentation": "^1.0.3"
+ },
+ "engines": {
+ "node": ">=8.0.0"
+ }
+ },
+ "node_modules/html2canvas-pro": {
+ "version": "2.0.2",
+ "resolved": "https://registry.npmjs.org/html2canvas-pro/-/html2canvas-pro-2.0.2.tgz",
+ "integrity": "sha512-9G/t0XgCZWonLwL0JwI7su6NdbOPUY7Ur4Ihpp8+XMaW9ibA2nDXF181Jr6tm94k8lX6sthpaXB3XqEnsMd5Cw==",
+ "license": "MIT",
+ "dependencies": {
+ "css-line-break": "^2.1.0",
+ "text-segmentation": "^1.0.3"
+ },
+ "engines": {
+ "node": ">=16.0.0"
+ }
+ },
"node_modules/ignore": {
"version": "5.3.2",
"resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz",
@@ -6215,6 +6261,15 @@
"url": "https://opencollective.com/webpack"
}
},
+ "node_modules/text-segmentation": {
+ "version": "1.0.3",
+ "resolved": "https://registry.npmjs.org/text-segmentation/-/text-segmentation-1.0.3.tgz",
+ "integrity": "sha512-iOiPUo/BGnZ6+54OsWxZidGCsdU8YbE4PSpdPinp7DeMtUJNJBoJ/ouUSTJjHkh1KntHaltHl/gDs2FC4i5+Nw==",
+ "license": "MIT",
+ "dependencies": {
+ "utrie": "^1.0.2"
+ }
+ },
"node_modules/tiny-invariant": {
"version": "1.3.3",
"resolved": "https://registry.npmjs.org/tiny-invariant/-/tiny-invariant-1.3.3.tgz",
@@ -6592,6 +6647,15 @@
"react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0"
}
},
+ "node_modules/utrie": {
+ "version": "1.0.2",
+ "resolved": "https://registry.npmjs.org/utrie/-/utrie-1.0.2.tgz",
+ "integrity": "sha512-1MLa5ouZiOmQzUbjbu9VmjLzn1QLXBhwpUa7kdLUQK+KQ5KA9I1vk5U4YHe/X2Ch7PYnJfWuWT+VbuxbGwljhw==",
+ "license": "MIT",
+ "dependencies": {
+ "base64-arraybuffer": "^1.0.2"
+ }
+ },
"node_modules/vfile": {
"version": "6.0.3",
"resolved": "https://registry.npmjs.org/vfile/-/vfile-6.0.3.tgz",
diff --git a/apps/dev-playground/client/package.json b/apps/dev-playground/client/package.json
index 9bf90c3fd..e69a49a3c 100644
--- a/apps/dev-playground/client/package.json
+++ b/apps/dev-playground/client/package.json
@@ -20,6 +20,8 @@
"@tanstack/router-plugin": "1.133.22",
"class-variance-authority": "0.7.1",
"clsx": "2.1.1",
+ "html2canvas": "1.4.1",
+ "html2canvas-pro": "2.0.2",
"lucide-react": "0.546.0",
"react": "19.2.0",
"react-dom": "19.2.0",
@@ -30,6 +32,7 @@
},
"devDependencies": {
"@eslint/js": "9.36.0",
+ "@tailwindcss/postcss": "4.1.17",
"@tanstack/router-cli": "1.133.20",
"@types/node": "24.6.0",
"@types/react": "19.2.2",
@@ -43,7 +46,6 @@
"postcss": "8.5.6",
"shiki": "3.15.0",
"tailwindcss": "4.1.17",
- "@tailwindcss/postcss": "4.1.17",
"typescript": "5.9.3",
"typescript-eslint": "8.45.0",
"vite": "npm:rolldown-vite@7.1.14"
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/action-toast.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/action-toast.tsx
new file mode 100644
index 000000000..311ecdc23
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/action-toast.tsx
@@ -0,0 +1,48 @@
+import { CheckCircle2Icon } from "lucide-react";
+import { useEffect, useState } from "react";
+
+interface ActionToastProps {
+ /**
+ * Latest dispatcher-surfaced action summary. Each new value bumps a
+ * render key so the toast re-animates even if the same message arrives
+ * twice (e.g. two identical filter calls in a row).
+ */
+ message: string | null;
+ durationMs?: number;
+}
+
+/**
+ * Non-intrusive bottom-left toast that confirms every agent-driven UI
+ * action. Silent success was the worst failure mode before: an action
+ * silently not-applied looked identical to one that worked but didn't
+ * show its effect.
+ */
+export function ActionToast({ message, durationMs = 2800 }: ActionToastProps) {
+ const [visible, setVisible] = useState<{ key: number; text: string } | null>(
+ null,
+ );
+
+ useEffect(() => {
+ if (!message) return;
+ const key = Date.now();
+ setVisible({ key, text: message });
+ const t = setTimeout(() => {
+ setVisible((v) => (v?.key === key ? null : v));
+ }, durationMs);
+ return () => {
+ clearTimeout(t);
+ };
+ }, [message, durationMs]);
+
+ if (!visible) return null;
+
+ return (
+
+
+ {visible.text}
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/actionable-card.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/actionable-card.tsx
new file mode 100644
index 000000000..db6a42f5f
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/actionable-card.tsx
@@ -0,0 +1,191 @@
+import {
+ AlertTriangleIcon,
+ ArrowRightIcon,
+ CalendarIcon,
+ CrosshairIcon,
+ DollarSignIcon,
+ HighlighterIcon,
+ LightbulbIcon,
+ MapPinIcon,
+ MessageSquareIcon,
+} from "lucide-react";
+import type { FeedAction } from "../lib/feed-actions";
+
+type Variant = "insight" | "anomaly";
+type Severity = "low" | "medium" | "high";
+
+interface ActionableCardProps {
+ variant: Variant;
+ severity?: Severity;
+ title: string;
+ description: string;
+ actions: FeedAction[];
+ /** Fired for non-ask actions. Route applies them to dashboard state. */
+ onAction: (action: FeedAction) => void;
+ /** Fired for `ask` actions. Route forwards the prompt to the chat drawer. */
+ onAsk: (prompt: string) => void;
+}
+
+// Backgrounds are written as arbitrary 8-digit hex (e.g. `bg-[#eff6ff80]`)
+// instead of Tailwind's `/N` alpha shorthand. Rationale: `bg-blue-50/50`
+// compiles in Tailwind v4 to a pair — an sRGB hex fallback and a
+// `@supports (color-mix)` override that re-mixes in oklab over the oklch
+// palette token. Browsers that support `color-mix` (recent Chrome/Arc) take
+// the oklab path; older embedded Chromiums (e.g. Cursor's built-in browser
+// at the time of writing) fall through to the sRGB hex. Because oklab and
+// sRGB interpolation produce visibly different tints — especially against
+// the dark `--card` token — the same card ends up looking different in each
+// browser. Pinning the colour to a literal hex (no `/N`, no @supports
+// override) keeps all browsers on the same sRGB path and therefore the same
+// visual result.
+const INSIGHT_STYLES = {
+ border: "border-blue-200 dark:border-blue-900",
+ bg: "bg-[#eff6ff80] dark:bg-[#1624564d]",
+ icon: "text-blue-500",
+};
+
+const ANOMALY_STYLES: Record<
+ Severity,
+ { border: string; bg: string; icon: string; badge: string }
+> = {
+ low: {
+ border: "border-yellow-200 dark:border-yellow-900",
+ bg: "bg-[#fefce880] dark:bg-[#4320044d]",
+ icon: "text-yellow-500",
+ badge:
+ "bg-yellow-100 text-yellow-700 dark:bg-yellow-900/50 dark:text-yellow-400",
+ },
+ medium: {
+ border: "border-orange-200 dark:border-orange-900",
+ bg: "bg-[#fff7ed80] dark:bg-[#4413064d]",
+ icon: "text-orange-500",
+ badge:
+ "bg-orange-100 text-orange-700 dark:bg-orange-900/50 dark:text-orange-400",
+ },
+ high: {
+ border: "border-red-200 dark:border-red-900",
+ bg: "bg-[#fef2f280] dark:bg-[#4608094d]",
+ icon: "text-red-500",
+ badge: "bg-red-100 text-red-700 dark:bg-red-900/50 dark:text-red-400",
+ },
+};
+
+function iconForAction(kind: FeedAction["kind"]): React.ReactNode {
+ const cls = "h-3 w-3";
+ switch (kind) {
+ case "filter_date":
+ return ;
+ case "filter_zip":
+ return ;
+ case "filter_fare":
+ return ;
+ case "highlight_period":
+ return ;
+ case "highlight_zone":
+ return ;
+ case "focus_chart":
+ return ;
+ case "ask":
+ return ;
+ }
+}
+
+/**
+ * Action chip for a single feed suggestion. The chip's visual weight depends
+ * on its kind: structural mutations (filter/highlight/focus) use the primary
+ * tint, `ask` uses a neutral outline so the user can tell "this opens the
+ * chat" from "this changes the dashboard" without reading the label.
+ */
+function ActionChip({
+ action,
+ onAction,
+ onAsk,
+}: {
+ action: FeedAction;
+ onAction: (a: FeedAction) => void;
+ onAsk: (prompt: string) => void;
+}) {
+ const isAsk = action.kind === "ask";
+ const isHighlight =
+ action.kind === "highlight_period" || action.kind === "highlight_zone";
+
+ return (
+ {
+ if (isAsk) onAsk(action.prompt);
+ else onAction(action);
+ }}
+ className={`inline-flex items-center gap-1 text-[11px] font-medium px-2 py-1 rounded-md transition-colors ${
+ isAsk
+ ? "border border-border bg-background text-foreground/80 hover:bg-muted hover:text-foreground"
+ : isHighlight
+ ? "bg-amber-100 text-amber-800 hover:bg-amber-200 dark:bg-amber-900/40 dark:text-amber-200 dark:hover:bg-amber-900/60"
+ : "bg-primary/10 text-primary hover:bg-primary/20"
+ }`}
+ >
+ {iconForAction(action.kind)}
+ {action.label}
+ {isAsk && }
+
+ );
+}
+
+export function ActionableCard({
+ variant,
+ severity,
+ title,
+ description,
+ actions,
+ onAction,
+ onAsk,
+}: ActionableCardProps) {
+ const isAnomaly = variant === "anomaly";
+ const styles = isAnomaly
+ ? ANOMALY_STYLES[severity ?? "low"]
+ : { ...INSIGHT_STYLES, badge: "" };
+
+ return (
+
+
+ {isAnomaly ? (
+
+ ) : (
+
+ )}
+
+
+
+ {title}
+
+ {isAnomaly && severity && (
+
+ {severity}
+
+ )}
+
+
+ {description}
+
+
+
+
+ {actions.length > 0 && (
+
+ {actions.map((action, i) => (
+
+ ))}
+
+ )}
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/active-filters.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/active-filters.tsx
new file mode 100644
index 000000000..f5fe96a2a
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/active-filters.tsx
@@ -0,0 +1,63 @@
+import { FilterIcon, XIcon } from "lucide-react";
+import type { DashboardFilters } from "../hooks/use-dashboard-data";
+
+interface ActiveFiltersProps {
+ filters: DashboardFilters;
+ onClear: (key: keyof DashboardFilters) => void;
+ onClearAll: () => void;
+}
+
+function formatFilterEntry(key: string, value: string): string {
+ const labels: Record = {
+ date_from: "From",
+ date_to: "To",
+ pickup_zip: "Zone",
+ fare_min: "Min fare",
+ fare_max: "Max fare",
+ };
+ return `${labels[key] ?? key}: ${value}`;
+}
+
+export function ActiveFilters({
+ filters,
+ onClear,
+ onClearAll,
+}: ActiveFiltersProps) {
+ const entries = Object.entries(filters).filter(
+ ([, v]) => v !== undefined && v !== "",
+ );
+
+ if (entries.length === 0) return null;
+
+ return (
+
+
+
+ Active Filters:
+
+ {entries.map(([key, value]) => (
+
+ {formatFilterEntry(key, value ?? "")}
+ onClear(key as keyof DashboardFilters)}
+ className="hover:text-primary/70 transition-colors"
+ aria-label={`Remove ${key} filter`}
+ >
+
+
+
+ ))}
+
+ Clear all
+
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/agent-sidebar.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/agent-sidebar.tsx
new file mode 100644
index 000000000..0c14b501c
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/agent-sidebar.tsx
@@ -0,0 +1,266 @@
+import { BrainIcon, Loader2Icon, RefreshCwIcon } from "lucide-react";
+import { useCallback, useEffect, useMemo, useRef, useState } from "react";
+import type { Highlight } from "../hooks/use-action-dispatcher";
+import { useAgentStream } from "../hooks/use-agent-stream";
+import type { DashboardFilters, KPIData } from "../hooks/use-dashboard-data";
+import {
+ type FeedAction,
+ type FeedAnomaly,
+ type FeedInsight,
+ parseFeedAnomalies,
+ parseFeedInsights,
+} from "../lib/feed-actions";
+import { ActionableCard } from "./actionable-card";
+
+interface AgentSidebarProps {
+ kpis: KPIData | null;
+ kpisLoaded: boolean;
+ filters: DashboardFilters;
+ highlights: Highlight[];
+ /** Dispatches a structured action back to the dashboard without an LLM round-trip. */
+ onAction: (action: FeedAction) => void;
+ /** Fires when the user clicks an `ask` chip — routes to the main chat drawer. */
+ onAsk: (prompt: string) => void;
+}
+
+function buildKPISummary(
+ kpis: KPIData,
+ filters: DashboardFilters,
+ highlights: Highlight[],
+): string {
+ const parts = [
+ `Total trips: ${kpis.total_trips.toLocaleString()}`,
+ `Avg fare: $${kpis.avg_fare}`,
+ `Avg distance: ${kpis.avg_distance} mi`,
+ `Fare range: $${kpis.min_fare}–$${kpis.max_fare}`,
+ `Top pickup zone: ${kpis.top_pickup_zone} (${kpis.top_zone_trips.toLocaleString()} trips)`,
+ ];
+ const activeFilters = Object.entries(filters)
+ .filter(([, v]) => typeof v === "string" && v)
+ .map(([k, v]) => `${k}=${v}`);
+ if (activeFilters.length > 0) {
+ parts.push(`Active filters: ${activeFilters.join(", ")}`);
+ } else {
+ parts.push("Active filters: none (full 2016 dataset)");
+ }
+ if (highlights.length > 0) {
+ parts.push(
+ `Highlights: ${highlights
+ .map(
+ (h) =>
+ `${h.start}→${h.end}${h.label ? ` (${h.label})` : ""} [${h.color}]`,
+ )
+ .join(", ")}`,
+ );
+ }
+ return parts.join(". ");
+}
+
+/**
+ * Debounce helper so a rapid sequence of filter/highlight changes collapses
+ * into one ephemeral agent re-run. 700ms is short enough to feel responsive
+ * but long enough to coalesce a typical click+click interaction.
+ */
+function useDebouncedSignal(dep: string, delayMs: number): string {
+ const [stable, setStable] = useState(dep);
+ useEffect(() => {
+ const t = setTimeout(() => setStable(dep), delayMs);
+ return () => clearTimeout(t);
+ }, [dep, delayMs]);
+ return stable;
+}
+
+const SUGGESTED_FOLLOWUPS = [
+ "Compare this slice to the prior month.",
+ "What ZIPs show the highest fare-per-mile?",
+ "Were there any days with abnormal trip counts?",
+];
+
+export function AgentSidebar({
+ kpis,
+ kpisLoaded,
+ filters,
+ highlights,
+ onAction,
+ onAsk,
+}: AgentSidebarProps) {
+ const [insights, setInsights] = useState([]);
+ const [anomalies, setAnomalies] = useState([]);
+
+ const insightsStream = useAgentStream({ agentName: "insights" });
+ const anomalyStream = useAgentStream({ agentName: "anomaly" });
+
+ // Hold the latest stream handles + context refs so `analyze()` is stable
+ // but still reads current state.
+ const insightsRef = useRef(insightsStream);
+ insightsRef.current = insightsStream;
+ const anomalyRef = useRef(anomalyStream);
+ anomalyRef.current = anomalyStream;
+ const ctxRef = useRef({ kpis, filters, highlights });
+ ctxRef.current = { kpis, filters, highlights };
+
+ const analyze = useCallback(() => {
+ const { kpis: currentKpis, filters: f, highlights: h } = ctxRef.current;
+ if (!currentKpis) return;
+ const summary = buildKPISummary(currentKpis, f, h);
+ setInsights([]);
+ setAnomalies([]);
+ insightsRef.current.reset();
+ anomalyRef.current.reset();
+ insightsRef.current.send(
+ `Current NYC taxi dashboard state: ${summary}. Surface the most interesting patterns and insights with actionable chips.`,
+ );
+ anomalyRef.current.send(
+ `Current NYC taxi dashboard state: ${summary}. Identify anomalies, outliers, or suspicious patterns with actionable chips.`,
+ );
+ }, []);
+
+ // Initial fire once KPIs load.
+ const hasFired = useRef(false);
+ useEffect(() => {
+ if (kpisLoaded && kpis && !hasFired.current) {
+ hasFired.current = true;
+ analyze();
+ }
+ }, [kpisLoaded, kpis, analyze]);
+
+ // Re-run whenever filters or highlights settle into a new value. Encoded as
+ // a string so useEffect gets a primitive dep and the debounce works off
+ // structural equality, not object identity.
+ const stateSignal = useMemo(
+ () =>
+ JSON.stringify({
+ f: filters,
+ h: highlights.map((hh) => `${hh.start}-${hh.end}-${hh.color}`),
+ }),
+ [filters, highlights],
+ );
+ const debouncedSignal = useDebouncedSignal(stateSignal, 700);
+ const lastAnalyzedSignal = useRef(stateSignal);
+ useEffect(() => {
+ if (!kpisLoaded || !kpis) return;
+ if (!hasFired.current) return; // initial fire is in the other effect
+ if (debouncedSignal === lastAnalyzedSignal.current) return;
+ lastAnalyzedSignal.current = debouncedSignal;
+ analyze();
+ }, [debouncedSignal, kpisLoaded, kpis, analyze]);
+
+ useEffect(() => {
+ if (!insightsStream.isLoading && insightsStream.content) {
+ setInsights(parseFeedInsights(insightsStream.content));
+ }
+ }, [insightsStream.isLoading, insightsStream.content]);
+
+ useEffect(() => {
+ if (!anomalyStream.isLoading && anomalyStream.content) {
+ setAnomalies(parseFeedAnomalies(anomalyStream.content));
+ }
+ }, [anomalyStream.isLoading, anomalyStream.content]);
+
+ const isAnalyzing = insightsStream.isLoading || anomalyStream.isLoading;
+ const totalFindings = insights.length + anomalies.length;
+
+ return (
+
+
+
+
+
+ Agent Feed
+
+ {isAnalyzing ? (
+
+
+ analyzing
+
+ ) : totalFindings > 0 ? (
+
+ {insights.length}
+ insights ·
+ {anomalies.length}
+ anomalies
+
+ ) : null}
+
+
+
+
+
+
+
+ {isAnalyzing && totalFindings === 0 && (
+
+ )}
+
+ {!isAnalyzing && totalFindings === 0 && !kpisLoaded && (
+
+ Loading dashboard data…
+
+ )}
+
+ {!isAnalyzing && totalFindings === 0 && kpisLoaded && (
+
+ No findings for this slice — try widening the filters.
+
+ )}
+
+ {insights.map((insight, i) => (
+
+ ))}
+
+ {anomalies.map((anomaly, i) => (
+
+ ))}
+
+
+ {kpisLoaded && (
+
+
+ Try asking
+
+
+ {SUGGESTED_FOLLOWUPS.map((prompt) => (
+ onAsk(prompt)}
+ className="text-[11px] px-2 py-1 rounded-md border border-border bg-background hover:bg-muted text-foreground/80 hover:text-foreground transition-colors"
+ >
+ {prompt}
+
+ ))}
+
+
+ )}
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/anomaly-card.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/anomaly-card.tsx
new file mode 100644
index 000000000..72c8f0ef2
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/anomaly-card.tsx
@@ -0,0 +1,68 @@
+import { AlertTriangleIcon } from "lucide-react";
+
+type Severity = "low" | "medium" | "high";
+
+interface AnomalyCardProps {
+ title: string;
+ description: string;
+ severity: Severity;
+}
+
+const SEVERITY_STYLES: Record<
+ Severity,
+ { border: string; bg: string; icon: string; badge: string }
+> = {
+ low: {
+ border: "border-yellow-200 dark:border-yellow-900",
+ bg: "bg-yellow-50/50 dark:bg-yellow-950/30",
+ icon: "text-yellow-500",
+ badge:
+ "bg-yellow-100 text-yellow-700 dark:bg-yellow-900/50 dark:text-yellow-400",
+ },
+ medium: {
+ border: "border-orange-200 dark:border-orange-900",
+ bg: "bg-orange-50/50 dark:bg-orange-950/30",
+ icon: "text-orange-500",
+ badge:
+ "bg-orange-100 text-orange-700 dark:bg-orange-900/50 dark:text-orange-400",
+ },
+ high: {
+ border: "border-red-200 dark:border-red-900",
+ bg: "bg-red-50/50 dark:bg-red-950/30",
+ icon: "text-red-500",
+ badge: "bg-red-100 text-red-700 dark:bg-red-900/50 dark:text-red-400",
+ },
+};
+
+export function AnomalyCard({
+ title,
+ description,
+ severity,
+}: AnomalyCardProps) {
+ const styles = SEVERITY_STYLES[severity];
+
+ return (
+
+
+
+
+
+
+ {title}
+
+
+ {severity}
+
+
+
+ {description}
+
+
+
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/approval-card.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/approval-card.tsx
new file mode 100644
index 000000000..47f24b338
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/approval-card.tsx
@@ -0,0 +1,337 @@
+import {
+ CheckCircle2Icon,
+ PencilIcon,
+ PlusCircleIcon,
+ ShieldAlertIcon,
+} from "lucide-react";
+import { useCallback, useState } from "react";
+import type { Highlight } from "../hooks/use-action-dispatcher";
+import type { DashboardFilters } from "../hooks/use-dashboard-data";
+import { captureDashboardAsDataUrl } from "../lib/capture-dashboard";
+
+type ToolEffect = "read" | "write" | "update" | "destructive";
+
+export interface PendingApproval {
+ approvalId: string;
+ streamId: string;
+ toolName: string;
+ args: unknown;
+ annotations?: {
+ effect?: ToolEffect;
+ readOnly?: boolean;
+ destructive?: boolean;
+ idempotent?: boolean;
+ };
+}
+
+/**
+ * Resolve the semantic tier we should render for this approval. Prefers
+ * the explicit `effect` label; falls back to the legacy `destructive` flag
+ * so tools that haven't migrated yet keep their red treatment. Anything
+ * with no mutation hint at all falls through as `write` — the approval
+ * gate fired for a reason, and `write` is the lowest-severity default.
+ */
+function resolveEffect(
+ ann: PendingApproval["annotations"],
+): Exclude {
+ if (ann?.effect && ann.effect !== "read") return ann.effect;
+ if (ann?.destructive === true) return "destructive";
+ return "write";
+}
+
+interface EffectTheme {
+ icon: typeof ShieldAlertIcon;
+ container: string;
+ iconColor: string;
+ badge: string;
+ badgeLabel: string;
+ verb: string;
+}
+
+const EFFECT_THEMES: Record, EffectTheme> = {
+ write: {
+ icon: PlusCircleIcon,
+ container: "border-blue-500/40 bg-blue-500/[0.06]",
+ iconColor: "text-blue-500",
+ badge: "bg-blue-500/20 text-blue-600 dark:text-blue-400",
+ badgeLabel: "writes",
+ verb: "Approving creates new state in Databricks.",
+ },
+ update: {
+ icon: PencilIcon,
+ container: "border-amber-500/40 bg-amber-500/[0.06]",
+ iconColor: "text-amber-500",
+ badge: "bg-amber-500/20 text-amber-700 dark:text-amber-400",
+ badgeLabel: "updates",
+ verb: "Approving modifies existing state in Databricks.",
+ },
+ destructive: {
+ icon: ShieldAlertIcon,
+ container: "border-red-500/40 bg-red-500/[0.06]",
+ iconColor: "text-red-500",
+ badge: "bg-red-500/20 text-red-600 dark:text-red-400",
+ badgeLabel: "destructive",
+ verb: "Approving deletes or irreversibly changes state. Double-check first.",
+ },
+};
+
+interface ApprovalCardProps {
+ approval: PendingApproval;
+ filters: DashboardFilters;
+ highlights: Highlight[];
+ /** Root element to capture when the approved tool is `save_view`. */
+ dashboardRef: React.RefObject;
+ onDecide: (approvalId: string, decision: "approve" | "deny") => void;
+ /** Notification surfaced back to the route for the toast. */
+ onSaved?: (info: { name: string; volumePath: string }) => void;
+}
+
+function formatFilters(filters: DashboardFilters): string {
+ const entries = Object.entries(filters).filter(
+ ([, v]) => v !== undefined && v !== "",
+ );
+ if (entries.length === 0) return "(none)";
+ return entries.map(([k, v]) => `${k}=${v}`).join(", ");
+}
+
+function formatHighlights(highlights: Highlight[]): string {
+ if (highlights.length === 0) return "(none)";
+ return highlights
+ .map(
+ (h) =>
+ `${h.start}..${h.end}${h.label ? ` (${h.label})` : ""} [${h.color}]`,
+ )
+ .join("; ");
+}
+
+export function ApprovalCard({
+ approval,
+ filters,
+ highlights,
+ dashboardRef,
+ onDecide,
+ onSaved,
+}: ApprovalCardProps) {
+ const args =
+ typeof approval.args === "object" && approval.args !== null
+ ? (approval.args as Record)
+ : {};
+ const effect = resolveEffect(approval.annotations);
+ const theme = EFFECT_THEMES[effect];
+ const EffectIcon = theme.icon;
+ const isSaveView = approval.toolName === "save_view";
+
+ const [phase, setPhase] = useState<
+ | { kind: "idle" }
+ | { kind: "capturing" }
+ | { kind: "uploading"; previewUrl: string }
+ | { kind: "done"; volumePath: string }
+ | { kind: "error"; message: string }
+ >({ kind: "idle" });
+
+ const handleApprove = useCallback(async () => {
+ if (!isSaveView) {
+ onDecide(approval.approvalId, "approve");
+ return;
+ }
+
+ const root = dashboardRef.current;
+ if (!root) {
+ setPhase({
+ kind: "error",
+ message:
+ "Cannot locate the dashboard element to capture. Contact support.",
+ });
+ return;
+ }
+
+ try {
+ setPhase({ kind: "capturing" });
+ // Conservative capture settings: AppKit's server plugin caps
+ // JSON bodies at 100kb by default. JPEG @ quality 0.75 + scale
+ // 0.6 keeps base64 payloads in the 25-60kb range for typical
+ // dashboard viewports with room for metadata.
+ const { dataUrl } = await captureDashboardAsDataUrl(root, {
+ quality: 0.75,
+ scale: 0.6,
+ });
+ setPhase({ kind: "uploading", previewUrl: dataUrl });
+
+ const name =
+ typeof args.name === "string" && args.name.trim() !== ""
+ ? (args.name as string)
+ : "Untitled view";
+ const description =
+ typeof args.description === "string" ? args.description : undefined;
+
+ const uploadRes = await fetch("/api/dashboard/save-view", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({
+ name,
+ description,
+ filters,
+ highlights,
+ pngBase64: dataUrl,
+ }),
+ });
+
+ if (!uploadRes.ok) {
+ const err = await uploadRes.text();
+ throw new Error(`Upload failed (${uploadRes.status}): ${err}`);
+ }
+
+ const uploadJson = (await uploadRes.json()) as {
+ volumePath: string;
+ };
+
+ setPhase({ kind: "done", volumePath: uploadJson.volumePath });
+ onSaved?.({ name, volumePath: uploadJson.volumePath });
+ onDecide(approval.approvalId, "approve");
+ } catch (err) {
+ const msg = err instanceof Error ? err.message : String(err);
+ setPhase({
+ kind: "error",
+ message: msg,
+ });
+ }
+ }, [
+ isSaveView,
+ args,
+ filters,
+ highlights,
+ dashboardRef,
+ onDecide,
+ onSaved,
+ approval.approvalId,
+ ]);
+
+ const busy = phase.kind === "capturing" || phase.kind === "uploading";
+
+ return (
+
+
+
+
+
+
+ Approval required
+
+
+ {theme.badgeLabel}
+
+
+
+ The agent wants to call{" "}
+
+ {approval.toolName}
+
+ {isSaveView
+ ? ". Approving captures the current dashboard and uploads it as a saved view."
+ : `. ${theme.verb}`}
+
+
+
+
+ {Object.keys(args).length > 0 && (
+
+
+ Arguments
+
+
+
+ {Object.entries(args).map(([key, value]) => (
+
+
+ {key}
+
+
+ {typeof value === "string"
+ ? value
+ : JSON.stringify(value, null, 2)}
+
+
+ ))}
+
+
+
+ )}
+
+
+
+ Current dashboard state
+
+
+ filters : {formatFilters(filters)}
+
+
+ highlights :{" "}
+ {formatHighlights(highlights)}
+
+
+
+ {phase.kind === "uploading" && (
+
+
+ Captured preview (uploading…)
+
+
+
+ )}
+
+ {phase.kind === "done" && (
+
+
+
+ Saved to {phase.volumePath}
+
+
+ )}
+
+ {phase.kind === "error" && (
+
+ {phase.message}
+
+ )}
+
+
+ onDecide(approval.approvalId, "deny")}
+ disabled={busy}
+ className="px-3 py-1.5 text-xs border border-border rounded-md hover:bg-muted transition-colors disabled:opacity-50"
+ >
+ Deny
+
+
+ {phase.kind === "capturing"
+ ? "Capturing…"
+ : phase.kind === "uploading"
+ ? "Uploading…"
+ : phase.kind === "done"
+ ? "Approved"
+ : isSaveView
+ ? "Approve & save"
+ : "Approve"}
+
+
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/chat-drawer.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/chat-drawer.tsx
new file mode 100644
index 000000000..559f5950d
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/chat-drawer.tsx
@@ -0,0 +1,248 @@
+import {
+ FilterIcon,
+ Loader2Icon,
+ MessageSquareIcon,
+ SendIcon,
+ SparklesIcon,
+ XIcon,
+} from "lucide-react";
+import { useCallback, useEffect, useRef, useState } from "react";
+import type { PendingApproval } from "./approval-card";
+
+export interface ChatMessage {
+ id: string;
+ role: "user" | "assistant" | "system";
+ content: string;
+ /** When true, this is the in-progress assistant turn being streamed. */
+ streaming?: boolean;
+}
+
+interface ChatDrawerProps {
+ messages: ChatMessage[];
+ isLoading: boolean;
+ onSend: (message: string) => void;
+ /** Rendered inline in the message list for the turn that triggered it. */
+ approvalCardForMessage: (messageId: string) => React.ReactNode | null;
+ pendingApprovals: PendingApproval[];
+ /** Floating affordance: the toggle button also shows a pending-approval dot. */
+ unreadCount?: number;
+ /** Controlled open state so the parent can auto-open the drawer when a
+ * dashboard interaction (chips, heatmap cells, quick actions, follow-ups)
+ * dispatches a turn the user needs to see. */
+ open: boolean;
+ onOpenChange: (open: boolean) => void;
+}
+
+const EXAMPLE_QUERIES = [
+ "Filter to November 2016",
+ "Highlight the first week of Jan 2016 in red",
+ "Save this view as Peak Week",
+ "Focus on the fare distribution",
+ "Clear all filters and highlights",
+];
+
+/**
+ * Floating chat drawer. Toggled by the ⌘J keyboard shortcut or the
+ * floating message-square button in the bottom-right. Multi-turn
+ * conversation history stays mounted in state so previous turns remain
+ * visible as the user iterates.
+ */
+export function ChatDrawer({
+ messages,
+ isLoading,
+ onSend,
+ approvalCardForMessage,
+ pendingApprovals,
+ unreadCount,
+ open,
+ onOpenChange,
+}: ChatDrawerProps) {
+ const [input, setInput] = useState("");
+ const [showTips, setShowTips] = useState(true);
+ const bottomRef = useRef(null);
+
+ useEffect(() => {
+ const onKey = (e: KeyboardEvent) => {
+ if (
+ e.key === "j" &&
+ (e.metaKey || e.ctrlKey) &&
+ !e.altKey &&
+ !e.shiftKey
+ ) {
+ e.preventDefault();
+ onOpenChange(!open);
+ } else if (e.key === "Escape" && open) {
+ onOpenChange(false);
+ }
+ };
+ window.addEventListener("keydown", onKey);
+ return () => {
+ window.removeEventListener("keydown", onKey);
+ };
+ }, [open, onOpenChange]);
+
+ // Auto-open when a new approval arrives so users don't miss it.
+ useEffect(() => {
+ if (pendingApprovals.length > 0) onOpenChange(true);
+ }, [pendingApprovals.length, onOpenChange]);
+
+ // biome-ignore lint/correctness/useExhaustiveDependencies: scroll on new messages
+ useEffect(() => {
+ bottomRef.current?.scrollIntoView({ behavior: "smooth" });
+ }, [messages.length, messages[messages.length - 1]?.content]);
+
+ const handleSubmit = useCallback(
+ (e: React.FormEvent) => {
+ e.preventDefault();
+ const msg = input.trim();
+ if (!msg || isLoading) return;
+ setInput("");
+ setShowTips(false);
+ onSend(msg);
+ },
+ [input, isLoading, onSend],
+ );
+
+ const handleExample = useCallback(
+ (q: string) => {
+ if (isLoading) return;
+ setShowTips(false);
+ onSend(q);
+ },
+ [isLoading, onSend],
+ );
+
+ return (
+ <>
+ onOpenChange(!open)}
+ aria-label="Toggle chat (⌘J)"
+ title="Chat with the agent (⌘J)"
+ className="fixed bottom-4 right-20 z-30 rounded-full bg-primary text-primary-foreground shadow-lg hover:bg-primary/90 transition-colors p-3 flex items-center gap-1.5"
+ >
+
+ Chat
+ {(unreadCount ?? 0) > 0 && (
+
+ {unreadCount}
+
+ )}
+
+
+ {open && (
+
+
+
+
+ {messages.length === 0 && (
+
+ Ask the agent to filter, highlight, focus, or save the
+ dashboard.
+
+ )}
+
+ {messages.map((m) => (
+
+
+ {approvalCardForMessage(m.id)}
+
+ ))}
+
+ {isLoading &&
+ messages[messages.length - 1]?.role !== "assistant" && (
+
+
+ Thinking…
+
+ )}
+
+
+
+
+ {showTips && messages.length === 0 && (
+
+
+
+ Try one of these
+
+
+ {EXAMPLE_QUERIES.map((q) => (
+ handleExample(q)}
+ disabled={isLoading}
+ className="rounded-md border border-border bg-background px-2 py-0.5 text-[11px] text-muted-foreground hover:text-foreground hover:border-primary/40 transition-colors disabled:opacity-50"
+ >
+ {q}
+
+ ))}
+
+
+ )}
+
+
+
+ )}
+ >
+ );
+}
+
+function MessageBubble({ message }: { message: ChatMessage }) {
+ const isUser = message.role === "user";
+ return (
+
+
+ {message.content || (message.streaming ? "…" : "")}
+
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/fare-chart.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/fare-chart.tsx
new file mode 100644
index 000000000..383174aec
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/fare-chart.tsx
@@ -0,0 +1,78 @@
+import {
+ Bar,
+ BarChart,
+ CartesianGrid,
+ ResponsiveContainer,
+ Tooltip,
+ XAxis,
+ YAxis,
+} from "recharts";
+import { useChartColors } from "../hooks/use-chart-colors";
+import type { FareBucket } from "../hooks/use-dashboard-data";
+
+interface FareChartProps {
+ data: FareBucket[];
+ isLoading: boolean;
+}
+
+export function FareChart({ data, isLoading }: FareChartProps) {
+ const c = useChartColors();
+
+ if (isLoading) {
+ return (
+
+
+ Fare Distribution
+
+
+
+ );
+ }
+
+ return (
+
+
+ Fare Distribution
+
+
+
+
+
+
+ v >= 1000 ? `${(v / 1000).toFixed(0)}K` : String(v)
+ }
+ />
+ {
+ if (name === "trip_count")
+ return [value.toLocaleString(), "Trips"];
+ return [value, name];
+ }}
+ />
+
+
+
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/focusable-chart.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/focusable-chart.tsx
new file mode 100644
index 000000000..689145a4b
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/focusable-chart.tsx
@@ -0,0 +1,36 @@
+import type { ReactNode } from "react";
+import {
+ type FocusableChartId,
+ useFocusable,
+} from "../hooks/use-focus-registry";
+
+interface FocusableChartProps {
+ chartId: FocusableChartId;
+ children: ReactNode;
+}
+
+/**
+ * Wraps a chart with a focus-ring pulse effect. Pairs with `focusChart(id)`
+ * — when the `dashboard_pilot` agent emits a `focus_chart({ chart_id })`
+ * tool call, the dispatcher invokes the registered callback here, which
+ * scrolls into view and flips `focused` true for 1.2s.
+ *
+ * Named `chartId` (not `id`) because this is a logical focus-registry key,
+ * not a DOM id attribute.
+ */
+export function FocusableChart({ chartId, children }: FocusableChartProps) {
+ const { setRef, focused } = useFocusable(chartId);
+
+ return (
+
+ {children}
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/hourly-heatmap.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/hourly-heatmap.tsx
new file mode 100644
index 000000000..51ce98ed8
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/hourly-heatmap.tsx
@@ -0,0 +1,186 @@
+import { useMemo } from "react";
+import type { HeatmapCell } from "../hooks/use-dashboard-data";
+
+interface HourlyHeatmapProps {
+ data: HeatmapCell[];
+ isLoading: boolean;
+ /** Fires when the user clicks a cell. Receives a human-readable slot label
+ * the route typically routes to `dispatchToAgent` so the agent can narrate. */
+ onCellClick?: (label: string, cell: HeatmapCell) => void;
+}
+
+// Spark's DAYOFWEEK returns 1..7 (Sunday=1, Saturday=7). We render Mon–Sun
+// for commuter intuition, so the row order is shifted.
+const DAY_ROW_ORDER: Array<{ label: string; dayOfWeek: number }> = [
+ { label: "Mon", dayOfWeek: 2 },
+ { label: "Tue", dayOfWeek: 3 },
+ { label: "Wed", dayOfWeek: 4 },
+ { label: "Thu", dayOfWeek: 5 },
+ { label: "Fri", dayOfWeek: 6 },
+ { label: "Sat", dayOfWeek: 7 },
+ { label: "Sun", dayOfWeek: 1 },
+];
+
+const FULL_DAY_LABEL: Record = {
+ 1: "Sunday",
+ 2: "Monday",
+ 3: "Tuesday",
+ 4: "Wednesday",
+ 5: "Thursday",
+ 6: "Friday",
+ 7: "Saturday",
+};
+
+const HOURS = Array.from({ length: 24 }, (_, i) => i);
+
+function formatHour(h: number): string {
+ if (h === 0) return "12a";
+ if (h === 12) return "12p";
+ if (h < 12) return `${h}a`;
+ return `${h - 12}p`;
+}
+
+/**
+ * Maps trip_count to an HSL string along the primary → hot gradient. Uses
+ * lightness rather than alpha so the cells stay legible on both themes; alpha
+ * would wash out the dark-mode variant. Missing cells render as a neutral
+ * muted tile rather than "empty" so the grid reads as a matrix at a glance.
+ */
+function cellColor(value: number, max: number, isDark: boolean): string {
+ if (max === 0 || value === 0) {
+ return isDark ? "hsl(215, 14%, 22%)" : "hsl(220, 13%, 94%)";
+ }
+ const t = Math.min(1, value / max);
+ if (isDark) {
+ const lightness = 18 + t * 42;
+ return `hsl(217, 80%, ${lightness}%)`;
+ }
+ const lightness = 90 - t * 50;
+ return `hsl(221, 83%, ${lightness}%)`;
+}
+
+function isDarkTheme(): boolean {
+ if (typeof document === "undefined") return false;
+ return document.documentElement.classList.contains("dark");
+}
+
+export function HourlyHeatmap({
+ data,
+ isLoading,
+ onCellClick,
+}: HourlyHeatmapProps) {
+ const dark = isDarkTheme();
+
+ const { cellByKey, maxCount } = useMemo(() => {
+ const map = new Map();
+ let max = 0;
+ for (const c of data) {
+ map.set(`${c.day_of_week}-${c.hour_of_day}`, c);
+ if (c.trip_count > max) max = c.trip_count;
+ }
+ return { cellByKey: map, maxCount: max };
+ }, [data]);
+
+ if (isLoading) {
+ return (
+
+ );
+ }
+
+ return (
+
+
+
+ Pickup Heatmap
+
+ day × hour
+
+
+
+ click a cell to investigate
+
+
+
+
+
+
+ {HOURS.map((h) => (
+
+ {h % 3 === 0 ? formatHour(h) : ""}
+
+ ))}
+
+ {DAY_ROW_ORDER.map((row) => (
+
+
+ {row.label}
+
+ {HOURS.map((h) => {
+ const cell = cellByKey.get(`${row.dayOfWeek}-${h}`);
+ const count = cell?.trip_count ?? 0;
+ const bg = cellColor(count, maxCount, dark);
+ const label = `${FULL_DAY_LABEL[row.dayOfWeek]} at ${formatHour(h)}`;
+ const title = `${label}: ${count.toLocaleString()} trips${
+ cell ? ` · $${cell.avg_fare} avg fare` : ""
+ }`;
+ return (
+
{
+ if (!cell) return;
+ onCellClick?.(label, cell);
+ }}
+ className="h-6 rounded-[3px] transition-all hover:ring-2 hover:ring-primary/50 hover:scale-[1.08] disabled:cursor-default disabled:hover:ring-0 disabled:hover:scale-100"
+ style={{ backgroundColor: bg }}
+ />
+ );
+ })}
+
+ ))}
+
+
+
+
+
fewer
+
+ {[0, 0.25, 0.5, 0.75, 1].map((t) => (
+
+ ))}
+
+
more
+ {maxCount > 0 && (
+
+ peak {maxCount.toLocaleString()} trips/slot
+
+ )}
+
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/insight-card.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/insight-card.tsx
new file mode 100644
index 000000000..b17b44d70
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/insight-card.tsx
@@ -0,0 +1,24 @@
+import { LightbulbIcon } from "lucide-react";
+
+interface InsightCardProps {
+ title: string;
+ description: string;
+}
+
+export function InsightCard({ title, description }: InsightCardProps) {
+ return (
+
+
+
+
+
+ {title}
+
+
+ {description}
+
+
+
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/inspector-toggle.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/inspector-toggle.tsx
new file mode 100644
index 000000000..4a0388b6b
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/inspector-toggle.tsx
@@ -0,0 +1,31 @@
+import { ActivityIcon } from "lucide-react";
+import {
+ toggleInspector,
+ useStreamInspector,
+} from "../hooks/use-stream-inspector";
+
+/**
+ * Floating icon in the bottom-right that opens the Stream Inspector.
+ * Complements the ⌘K keyboard shortcut with a discoverable affordance.
+ */
+export function InspectorToggle() {
+ const { records } = useStreamInspector();
+ const currentRunEvents = records[0]?.events.length ?? 0;
+
+ return (
+
+
+ {currentRunEvents > 0 && (
+
+ {currentRunEvents}
+
+ )}
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/kpi-cards.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/kpi-cards.tsx
new file mode 100644
index 000000000..8d6ddd843
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/kpi-cards.tsx
@@ -0,0 +1,258 @@
+import { CarIcon, DollarSignIcon, MapPinIcon, RulerIcon } from "lucide-react";
+import { useId, useMemo } from "react";
+import { useChartColors } from "../hooks/use-chart-colors";
+import type { KPIData, SparklineRow } from "../hooks/use-dashboard-data";
+
+interface KPICardsProps {
+ data: KPIData | null;
+ sparklines: SparklineRow[];
+ isLoading: boolean;
+}
+
+interface CardProps {
+ title: string;
+ value: string;
+ subtitle?: string;
+ icon: React.ReactNode;
+ isLoading: boolean;
+ /** 30-bar trailing series (or empty → no sparkline). Values are normalized inside. */
+ series: number[];
+ trend?: number;
+}
+
+/**
+ * Fixed-size inline sparkline. Using a hand-rolled SVG rather than recharts
+ * because:
+ * - recharts inside a grid of 4 cards would mount 4× chart engines with
+ * ResponsiveContainer observers — heavy for a decorative element;
+ * - we want sub-pixel control over the baseline tint + end-cap dot.
+ */
+function Sparkline({
+ values,
+ color,
+ isLoading,
+}: {
+ values: number[];
+ color: string;
+ isLoading: boolean;
+}) {
+ const gradientId = useId();
+ const width = 120;
+ const height = 36;
+
+ const { pathD, areaD, lastPoint } = useMemo(() => {
+ if (values.length === 0) {
+ return { pathD: "", areaD: "", lastPoint: null };
+ }
+ const min = Math.min(...values);
+ const max = Math.max(...values);
+ const span = max - min || 1;
+ const step = values.length > 1 ? width / (values.length - 1) : width;
+ const points = values.map((v, i) => {
+ const x = i * step;
+ const y = height - 4 - ((v - min) / span) * (height - 8);
+ return { x, y };
+ });
+ const d = points
+ .map(
+ (p, i) => `${i === 0 ? "M" : "L"} ${p.x.toFixed(2)} ${p.y.toFixed(2)}`,
+ )
+ .join(" ");
+ const area = `${d} L ${width} ${height} L 0 ${height} Z`;
+ return { pathD: d, areaD: area, lastPoint: points[points.length - 1] };
+ }, [values]);
+
+ if (isLoading) {
+ return
;
+ }
+ // Intentionally-empty series (e.g. categorical KPI like "Top Pickup Zone"):
+ // keep the slot reserved so the four cards stay the same height, but render
+ // nothing inside — otherwise the muted placeholder looks like a ghost
+ // "still loading" spinner.
+ if (values.length === 0) {
+ return
;
+ }
+
+ return (
+
+
+
+
+
+
+
+
+
+ {lastPoint && (
+
+ )}
+
+ );
+}
+
+function KPICard({
+ title,
+ value,
+ subtitle,
+ icon,
+ isLoading,
+ series,
+ trend,
+}: CardProps) {
+ const c = useChartColors();
+ const trendLabel =
+ trend === undefined
+ ? null
+ : trend > 0
+ ? `+${trend.toFixed(0)}%`
+ : `${trend.toFixed(0)}%`;
+ const trendColor =
+ trend === undefined
+ ? ""
+ : trend > 0
+ ? "text-emerald-600 dark:text-emerald-400"
+ : trend < 0
+ ? "text-rose-600 dark:text-rose-400"
+ : "text-muted-foreground";
+
+ return (
+
+
+
+ {title}
+
+ {icon}
+
+ {isLoading ? (
+
+ ) : (
+ <>
+
+
{value}
+ {trendLabel && (
+
+ {trendLabel}
+
+ )}
+
+ {subtitle && (
+
+ {subtitle}
+
+ )}
+
+
+
+ >
+ )}
+
+ );
+}
+
+function formatNumber(n: number): string {
+ if (n >= 1_000_000) return `${(n / 1_000_000).toFixed(1)}M`;
+ if (n >= 1_000) return `${(n / 1_000).toFixed(1)}K`;
+ return n.toLocaleString();
+}
+
+/** Percent delta between the last `tail` window and the previous window. */
+function windowedTrend(values: number[], tail: number): number | undefined {
+ // Drop nulls/undefined/NaN (e.g. days with no trips after a fare filter) and
+ // coerce everything to Number defensively — some drivers hand back DECIMAL
+ // columns as strings, and `0 + "12.35"` would silently string-concat and
+ // render "NaN%" once we tried to divide.
+ const clean = values.map((v) => Number(v)).filter(Number.isFinite);
+ if (clean.length < tail * 2) return undefined;
+ const recent = clean.slice(-tail);
+ const prior = clean.slice(-tail * 2, -tail);
+ const recentAvg = recent.reduce((a, b) => a + b, 0) / recent.length;
+ const priorAvg = prior.reduce((a, b) => a + b, 0) / prior.length;
+ if (!Number.isFinite(recentAvg) || !Number.isFinite(priorAvg))
+ return undefined;
+ if (priorAvg === 0) return undefined;
+ return ((recentAvg - priorAvg) / priorAvg) * 100;
+}
+
+export function KPICards({ data, sparklines, isLoading }: KPICardsProps) {
+ // Coerce on intake so downstream sparkline paths and trend math stay purely
+ // numeric — avoids surprises if a driver ever hands back DECIMAL-as-string.
+ const toNum = (v: unknown) => {
+ const n = Number(v);
+ return Number.isFinite(n) ? n : 0;
+ };
+ const tripSeries = sparklines.map((r) => toNum(r.trip_count));
+ const fareSeries = sparklines.map((r) => toNum(r.avg_fare));
+ const distSeries = sparklines.map((r) => toNum(r.avg_distance));
+ const revenueSeries = sparklines.map((r) => toNum(r.total_revenue));
+
+ const TREND_WINDOW = 7;
+
+ return (
+
+ }
+ isLoading={isLoading}
+ series={tripSeries}
+ trend={windowedTrend(tripSeries, TREND_WINDOW)}
+ />
+ }
+ isLoading={isLoading}
+ series={fareSeries}
+ trend={windowedTrend(fareSeries, TREND_WINDOW)}
+ />
+ 0
+ ? `$${formatNumber(
+ // Explicit Number() wrap on each accumulator step defends
+ // against a single stray string in the series silently
+ // turning the whole sum into a concatenated blob.
+ revenueSeries.reduce(
+ (a, b) => a + (Number.isFinite(b) ? Number(b) : 0),
+ 0,
+ ),
+ )} revenue`
+ : undefined
+ }
+ icon={ }
+ isLoading={isLoading}
+ series={distSeries}
+ trend={windowedTrend(distSeries, TREND_WINDOW)}
+ />
+ }
+ isLoading={isLoading}
+ series={[]}
+ />
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/quick-actions-bar.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/quick-actions-bar.tsx
new file mode 100644
index 000000000..c67aa0662
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/quick-actions-bar.tsx
@@ -0,0 +1,119 @@
+import { BookmarkPlusIcon, EraserIcon, FilterXIcon, XIcon } from "lucide-react";
+import { useCallback, useRef, useState } from "react";
+
+interface QuickActionsBarProps {
+ /**
+ * Dispatches a message through the chat pipeline (same `useAgentStream`
+ * the text input uses). Keeps the demo narrative honest: clicks are just
+ * prefilled prompts — the agent still reasons and the approval gate
+ * still fires for destructive actions.
+ */
+ onSend: (message: string) => void;
+ disabled?: boolean;
+}
+
+export function QuickActionsBar({
+ onSend,
+ disabled = false,
+}: QuickActionsBarProps) {
+ const [savingName, setSavingName] = useState(null);
+ const saveInputRef = useRef(null);
+
+ const startSave = useCallback(() => {
+ setSavingName("");
+ setTimeout(() => saveInputRef.current?.focus(), 0);
+ }, []);
+
+ const cancelSave = useCallback(() => {
+ setSavingName(null);
+ }, []);
+
+ const submitSave = useCallback(() => {
+ const name = savingName?.trim();
+ if (!name) {
+ setSavingName(null);
+ return;
+ }
+ onSend(`Save the current view as "${name}"`);
+ setSavingName(null);
+ }, [savingName, onSend]);
+
+ return (
+
+
+ Quick actions
+
+
+ {savingName === null ? (
+
+
+ Save view…
+
+ ) : (
+
+
+ setSavingName(e.target.value)}
+ onKeyDown={(e) => {
+ if (e.key === "Enter") {
+ e.preventDefault();
+ submitSave();
+ } else if (e.key === "Escape") {
+ e.preventDefault();
+ cancelSave();
+ }
+ }}
+ placeholder="Name this view…"
+ disabled={disabled}
+ className="w-44 bg-transparent border-0 outline-none text-xs text-foreground placeholder:text-muted-foreground"
+ />
+
+ Save
+
+
+
+
+
+ )}
+
+
onSend("Clear all filters on the dashboard.")}
+ disabled={disabled}
+ className="inline-flex items-center gap-1.5 rounded-md border border-border bg-background px-2.5 py-1 text-xs text-foreground hover:bg-muted transition-colors disabled:opacity-50"
+ >
+
+ Clear filters
+
+
+
onSend("Clear all highlights from the charts.")}
+ disabled={disabled}
+ className="inline-flex items-center gap-1.5 rounded-md border border-border bg-background px-2.5 py-1 text-xs text-foreground hover:bg-muted transition-colors disabled:opacity-50"
+ >
+
+ Clear highlights
+
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/saved-views-panel.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/saved-views-panel.tsx
new file mode 100644
index 000000000..b1eb184bf
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/saved-views-panel.tsx
@@ -0,0 +1,193 @@
+import {
+ BookmarkIcon,
+ ChevronDownIcon,
+ ChevronUpIcon,
+ Loader2Icon,
+ RefreshCwIcon,
+} from "lucide-react";
+import { useCallback, useEffect, useState } from "react";
+
+export interface SavedView {
+ pngPath: string;
+ metaPath: string;
+ metadata: {
+ name?: string;
+ description?: string | null;
+ filters?: Record;
+ highlights?: unknown[];
+ savedAt?: string;
+ savedBy?: string;
+ pngPath?: string;
+ };
+}
+
+interface SavedViewsPanelProps {
+ /**
+ * Send-to-chat callback. Clicking a saved view dispatches a load request
+ * through the agent so the approval/action trail stays consistent.
+ */
+ onLoad: (view: SavedView) => void;
+ /** Incrementing counter bumped by the route after each successful save. */
+ refreshToken: number;
+}
+
+export function SavedViewsPanel({
+ onLoad,
+ refreshToken,
+}: SavedViewsPanelProps) {
+ const [open, setOpen] = useState(true);
+ const [views, setViews] = useState([]);
+ const [loading, setLoading] = useState(false);
+ const [error, setError] = useState(null);
+
+ const load = useCallback(async () => {
+ setLoading(true);
+ setError(null);
+ try {
+ const res = await fetch("/api/dashboard/saved-views");
+ if (!res.ok) {
+ const txt = await res.text();
+ throw new Error(`${res.status}: ${txt}`);
+ }
+ const data = (await res.json()) as { views: SavedView[] };
+ setViews(data.views);
+ } catch (err) {
+ setError(err instanceof Error ? err.message : String(err));
+ } finally {
+ setLoading(false);
+ }
+ }, []);
+
+ // Load on mount + whenever the parent bumps refreshToken. The dep on
+ // refreshToken is intentional — biome flags it because it's an opaque
+ // number with no direct read inside the effect body, but the whole
+ // point is that changing it in the parent invalidates the cached list.
+ // biome-ignore lint/correctness/useExhaustiveDependencies: see above
+ useEffect(() => {
+ load();
+ }, [load, refreshToken]);
+
+ const toggle = () => setOpen((v) => !v);
+
+ return (
+
+ {/*
+ Header row is a flex container rather than a single big button so
+ the refresh action can sit beside the collapse toggle without
+ being nested inside it (`
` inside `` is invalid
+ HTML and trips React's hydration warning). The title area and the
+ chevron each open/close the panel; refresh is its own button and
+ no longer needs `e.stopPropagation`.
+ */}
+
+
+
+
+ Saved views
+
+
+ {views.length > 0 ? `(${views.length})` : ""}
+
+
+
+ {loading && (
+
+ )}
+ {!loading && (
+
+
+
+ )}
+
+ {open ? (
+
+ ) : (
+
+ )}
+
+
+
+
+ {open && (
+
+ {error && (
+
+ Failed to load: {error}
+
+ )}
+
+ {!error && views.length === 0 && !loading && (
+
+ No saved views yet. Use the Save view… quick action or
+ ask the agent to save the current configuration.
+
+ )}
+
+ {views.length > 0 && (
+
+ {views.map((view) => (
+ onLoad(view)}
+ />
+ ))}
+
+ )}
+
+ )}
+
+ );
+}
+
+function SavedViewCard({
+ view,
+ onLoad,
+}: {
+ view: SavedView;
+ onLoad: () => void;
+}) {
+ const savedAt = view.metadata.savedAt
+ ? new Date(view.metadata.savedAt).toLocaleString()
+ : "";
+
+ return (
+
+
+
+
+ {view.metadata.name ?? "Untitled view"}
+
+
+ {savedAt}
+
+
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/stream-inspector.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/stream-inspector.tsx
new file mode 100644
index 000000000..adf15dccc
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/stream-inspector.tsx
@@ -0,0 +1,246 @@
+import { ChevronDownIcon, ChevronRightIcon, XIcon } from "lucide-react";
+import { useMemo, useState } from "react";
+import {
+ clearInspectorHistory,
+ closeInspector,
+ type StreamEventRecord,
+ type StreamRecord,
+ useStreamInspector,
+} from "../hooks/use-stream-inspector";
+
+type FilterMode =
+ | "all"
+ | "tool_calls"
+ | "messages"
+ | "approvals"
+ | "sub_agents";
+
+const FILTER_OPTIONS: Array<{ id: FilterMode; label: string }> = [
+ { id: "all", label: "All" },
+ { id: "tool_calls", label: "Tool calls" },
+ { id: "messages", label: "Messages" },
+ { id: "approvals", label: "Approvals" },
+ { id: "sub_agents", label: "Sub-agents" },
+];
+
+function matchesFilter(
+ event: StreamEventRecord["event"],
+ mode: FilterMode,
+): boolean {
+ if (mode === "all") return true;
+ if (mode === "messages") {
+ return (
+ event.type === "response.output_text.delta" ||
+ event.type === "response.output_item.added" ||
+ event.type === "response.output_item.done" ||
+ event.type === "response.completed"
+ );
+ }
+ if (mode === "tool_calls") {
+ return (
+ (event.type === "response.output_item.added" ||
+ event.type === "response.output_item.done") &&
+ event.item?.type === "function_call"
+ );
+ }
+ if (mode === "approvals") {
+ return event.type === "appkit.approval_pending";
+ }
+ if (mode === "sub_agents") {
+ // Sub-agent invocations surface as `agent-` function_calls; keep
+ // `appkit.metadata` in here too since it carries threadId on new runs.
+ if (event.item?.type === "function_call") {
+ return event.item.name?.startsWith("agent-") ?? false;
+ }
+ return false;
+ }
+ return true;
+}
+
+function shortType(type: string): string {
+ // Collapse the verbose `response.*` prefix for legibility.
+ return type.replace(/^response\./, "").replace(/^appkit\./, "");
+}
+
+function formatTimestamp(relMs: number): string {
+ if (relMs < 1000) return `${Math.round(relMs)}ms`;
+ return `${(relMs / 1000).toFixed(2)}s`;
+}
+
+function EventRow({
+ event,
+ receivedAt,
+ startedAt,
+}: StreamEventRecord & { startedAt: number }) {
+ const [expanded, setExpanded] = useState(false);
+ const rel = receivedAt - startedAt;
+
+ const isFunctionCall = event.item?.type === "function_call";
+ const isApproval = event.type === "appkit.approval_pending";
+
+ let summary: string;
+ if (isApproval) {
+ summary = `approval: ${event.tool_name}`;
+ } else if (isFunctionCall) {
+ summary = `${event.item?.name ?? "(unnamed)"}`;
+ } else if (event.type === "response.output_text.delta") {
+ summary = event.delta ?? "";
+ } else {
+ summary = "";
+ }
+
+ return (
+
+
setExpanded((v) => !v)}
+ className="w-full px-3 py-2 flex items-start gap-2 text-left hover:bg-muted/40 transition-colors"
+ >
+ {expanded ? (
+
+ ) : (
+
+ )}
+
+
+
+ {formatTimestamp(rel)}
+
+
+ {shortType(event.type)}
+
+ {summary && (
+
+ {summary}
+
+ )}
+
+ {expanded && (
+
+ {JSON.stringify(event, null, 2)}
+
+ )}
+
+
+
+ );
+}
+
+function RunBlock({ record }: { record: StreamRecord }) {
+ return (
+
+
+
+ {record.label}
+
+
+ {record.events.length} events · started{" "}
+ {new Date(
+ Date.now() - (performance.now() - record.startedAt),
+ ).toLocaleTimeString()}
+
+
+
+ {record.events.map((er, idx) => (
+
+ ))}
+
+
+ );
+}
+
+export function StreamInspector() {
+ const { isOpen, records } = useStreamInspector();
+ const [filter, setFilter] = useState("all");
+
+ const filteredRecords = useMemo(() => {
+ if (filter === "all") return records;
+ return records.map((r) => ({
+ ...r,
+ events: r.events.filter((er) => matchesFilter(er.event, filter)),
+ }));
+ }, [records, filter]);
+
+ if (!isOpen) return null;
+
+ return (
+ <>
+ {/* biome-ignore lint/a11y/noStaticElementInteractions: backdrop dismiss */}
+ {/* biome-ignore lint/a11y/useKeyWithClickEvents: backdrop dismiss handled globally via Esc */}
+
+
+
+
+
+ {FILTER_OPTIONS.map((opt) => (
+
setFilter(opt.id)}
+ className={`text-[11px] px-2 py-1 rounded-full transition-colors ${
+ filter === opt.id
+ ? "bg-primary text-primary-foreground"
+ : "bg-muted text-muted-foreground hover:bg-muted/70"
+ }`}
+ >
+ {opt.label}
+
+ ))}
+
+ {records.length > 0 && (
+
+ Clear
+
+ )}
+
+
+
+ {filteredRecords.length === 0 ? (
+
+ No events yet. Ask the agent something to see the SSE stream here.
+
+ ) : (
+ filteredRecords.map((r) =>
)
+ )}
+
+
+ >
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/top-zones-chart.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/top-zones-chart.tsx
new file mode 100644
index 000000000..5a4666008
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/top-zones-chart.tsx
@@ -0,0 +1,164 @@
+import { useMemo, useState } from "react";
+import { useChartColors } from "../hooks/use-chart-colors";
+import type { TopZoneRow } from "../hooks/use-dashboard-data";
+
+export interface HighlightedZone {
+ zip: string;
+ label?: string;
+}
+
+interface TopZonesChartProps {
+ data: TopZoneRow[];
+ isLoading: boolean;
+ /** Zones with a visible emphasis ring — driven by the `highlight_zone` tool. */
+ highlightedZones: HighlightedZone[];
+ /** Click on a bar → filter the dashboard to that zip. */
+ onZipClick?: (zip: string) => void;
+}
+
+type Metric = "trips" | "revenue";
+
+/**
+ * Horizontal leaderboard chart for pickup ZIPs. Hand-rolled divs rather than
+ * recharts' BarChart because:
+ * - we want per-row click handlers and a distinct ring for highlighted zones;
+ * - the bars need a stable text overlay (ZIP + value) that doesn't fight with
+ * recharts' label positioning logic;
+ * - 10 rows max means flexbox is trivially faster than a full chart engine.
+ */
+export function TopZonesChart({
+ data,
+ isLoading,
+ highlightedZones,
+ onZipClick,
+}: TopZonesChartProps) {
+ const c = useChartColors();
+ const [metric, setMetric] = useState("trips");
+
+ const { rows, max } = useMemo(() => {
+ const sorted = [...data].sort((a, b) =>
+ metric === "trips"
+ ? b.trip_count - a.trip_count
+ : b.total_revenue - a.total_revenue,
+ );
+ const m = sorted.reduce(
+ (acc, r) =>
+ Math.max(acc, metric === "trips" ? r.trip_count : r.total_revenue),
+ 0,
+ );
+ return { rows: sorted, max: m };
+ }, [data, metric]);
+
+ const highlightSet = useMemo(
+ () => new Map(highlightedZones.map((h) => [h.zip, h.label ?? ""])),
+ [highlightedZones],
+ );
+
+ if (isLoading) {
+ return (
+
+
+ Top Pickup Zones
+
+
+
+ );
+ }
+
+ return (
+
+
+
+ Top Pickup Zones
+
+
+ setMetric("trips")}
+ className={`px-2 py-0.5 rounded transition-colors ${
+ metric === "trips"
+ ? "bg-card text-foreground shadow-sm"
+ : "text-muted-foreground hover:text-foreground"
+ }`}
+ >
+ Trips
+
+ setMetric("revenue")}
+ className={`px-2 py-0.5 rounded transition-colors ${
+ metric === "revenue"
+ ? "bg-card text-foreground shadow-sm"
+ : "text-muted-foreground hover:text-foreground"
+ }`}
+ >
+ Revenue
+
+
+
+
+ {rows.length === 0 ? (
+
+ No zones in range
+
+ ) : (
+
+ {rows.map((row) => {
+ const value =
+ metric === "trips" ? row.trip_count : row.total_revenue;
+ const pct = max > 0 ? (value / max) * 100 : 0;
+ const isHighlighted = highlightSet.has(row.pickup_zip);
+ const highlightLabel = highlightSet.get(row.pickup_zip);
+
+ return (
+
onZipClick?.(row.pickup_zip)}
+ disabled={!onZipClick}
+ className={`w-full text-left group relative rounded-md transition-all ${
+ isHighlighted
+ ? "ring-2 ring-amber-400/70 dark:ring-amber-300/70"
+ : ""
+ } ${onZipClick ? "hover:bg-muted/40" : ""}`}
+ title={
+ onZipClick
+ ? `Filter dashboard to pickup ZIP ${row.pickup_zip}`
+ : row.pickup_zip
+ }
+ >
+
+
+ {row.pickup_zip}
+
+
+
+ {highlightLabel && (
+
+ {highlightLabel}
+
+ )}
+
+
+ {metric === "trips"
+ ? value.toLocaleString()
+ : `$${Math.round(value).toLocaleString()}`}
+
+
+
+ );
+ })}
+
+ )}
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/components/trip-chart.tsx b/apps/dev-playground/client/src/features/smart-dashboard/components/trip-chart.tsx
new file mode 100644
index 000000000..89f365f10
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/components/trip-chart.tsx
@@ -0,0 +1,144 @@
+import { useId } from "react";
+import {
+ Area,
+ AreaChart,
+ CartesianGrid,
+ ReferenceArea,
+ ResponsiveContainer,
+ Tooltip,
+ XAxis,
+ YAxis,
+} from "recharts";
+import type { Highlight } from "../hooks/use-action-dispatcher";
+import { useChartColors } from "../hooks/use-chart-colors";
+import type { TripOverTime } from "../hooks/use-dashboard-data";
+
+interface TripChartProps {
+ data: TripOverTime[];
+ highlights: Highlight[];
+ isLoading: boolean;
+}
+
+const HIGHLIGHT_COLORS: Record = {
+ blue: "rgba(96, 165, 250, 0.25)",
+ red: "rgba(248, 113, 113, 0.25)",
+ yellow: "rgba(250, 204, 21, 0.25)",
+};
+
+function formatDate(dateStr: string): string {
+ const d = new Date(dateStr);
+ return d.toLocaleDateString("en-US", { month: "short", day: "numeric" });
+}
+
+function findClosestDate(
+ target: string,
+ dates: string[],
+ direction: "start" | "end",
+): string | undefined {
+ if (dates.length === 0) return undefined;
+ const t = new Date(target).getTime();
+ let best: string | undefined;
+ let bestDist = Number.POSITIVE_INFINITY;
+ for (const d of dates) {
+ const dt = new Date(d).getTime();
+ const dist = Math.abs(dt - t);
+ const valid = direction === "start" ? dt <= t : dt >= t;
+ if (valid && dist < bestDist) {
+ best = d;
+ bestDist = dist;
+ }
+ }
+ return best ?? dates[direction === "start" ? 0 : dates.length - 1];
+}
+
+export function TripChart({ data, highlights, isLoading }: TripChartProps) {
+ const gradientId = useId();
+ const c = useChartColors();
+ const dates = data.map((d) => d.trip_date);
+
+ if (isLoading) {
+ return (
+
+
+ Trips Over Time
+
+
+
+ );
+ }
+
+ return (
+
+
+ Trips Over Time
+
+
+
+
+
+
+
+
+
+
+
+
+ v >= 1000 ? `${(v / 1000).toFixed(0)}K` : String(v)
+ }
+ />
+ [value.toLocaleString(), "Trips"]}
+ />
+ {highlights.map((h, i) => {
+ const x1 = findClosestDate(h.start, dates, "start");
+ const x2 = findClosestDate(h.end, dates, "end");
+ if (!x1 || !x2) return null;
+ return (
+
+ );
+ })}
+
+
+
+
+ );
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-action-dispatcher.ts b/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-action-dispatcher.ts
new file mode 100644
index 000000000..c133847ca
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-action-dispatcher.ts
@@ -0,0 +1,310 @@
+import { useCallback, useMemo, useRef } from "react";
+import type { SSEEvent } from "./use-agent-stream";
+import type { DashboardFilters } from "./use-dashboard-data";
+import { focusChart, isFocusableChartId } from "./use-focus-registry";
+
+export interface Highlight {
+ start: string;
+ end: string;
+ color: "blue" | "red" | "yellow";
+ label?: string;
+}
+
+export interface HighlightedZone {
+ zip: string;
+ label?: string;
+}
+
+const DASHBOARD_TOOLS = new Set([
+ "filter_by_date_range",
+ "filter_by_pickup_zip",
+ "filter_by_fare",
+ "clear_filters",
+ "highlight_period",
+ "clear_highlights",
+ "highlight_zone",
+ "clear_zone_highlights",
+ "focus_chart",
+ "load_view",
+]);
+
+interface UseActionDispatcherOptions {
+ /** Receives an updater fn; avoids stale-closure bugs when the agent fires multiple tool calls back-to-back. */
+ onFilterUpdate: (
+ updater: (prev: DashboardFilters) => DashboardFilters,
+ ) => void;
+ onAddHighlight: (highlight: Highlight) => void;
+ onClearFilters: () => void;
+ onClearHighlights: () => void;
+ onAddZoneHighlight: (zone: HighlightedZone) => void;
+ onClearZoneHighlights: () => void;
+ /** Called once per applied action with a short human-readable summary. Route surfaces it as a toast. */
+ onAction?: (summary: string) => void;
+ /** Called when the dispatcher receives a tool it doesn't know how to handle. Lets the route warn visibly. */
+ onUnknownTool?: (name: string, args: unknown) => void;
+}
+
+function parseArgs(raw: string | undefined): Record | null {
+ if (!raw) return {};
+ try {
+ const parsed: unknown = JSON.parse(raw);
+ return typeof parsed === "object" && parsed !== null
+ ? (parsed as Record)
+ : null;
+ } catch {
+ return null;
+ }
+}
+
+const CALL_ID_LRU_CAP = 128;
+
+/**
+ * Translates `function_call` tool events from the agent's SSE stream into
+ * dashboard state mutations. Exposes the same per-tool mutations as a
+ * synchronous {@link dispatch} function so the agent-feed action chips can
+ * reuse the identical code path without going through an LLM round-trip.
+ *
+ * Correctness rules (learned the hard way):
+ *
+ * - Only acts on `response.output_item.done`, never `.added`. `.added` fires
+ * with incomplete `arguments`, causing spurious JSON parse failures and,
+ * worse, double-firing: `highlight_period` used to append the same band
+ * twice because both events passed.
+ * - Dedupes by `call_id`. Keeps a bounded LRU so memory stays finite across
+ * a long session. A new run clears the cache on `appkit.metadata` (the
+ * first event of every stream carries the new threadId).
+ * - Uses updater callbacks (`onFilterUpdate(prev => ...)`) instead of reading
+ * `currentFilters` from props. Multi-tool-call runs within a single
+ * render cycle would otherwise see stale filter state.
+ * - Emits a summary for every applied action via `onAction`. Silent success
+ * is the worst failure mode here — if the user can't see what changed,
+ * they can't tell whether the agent misfired.
+ */
+export function useActionDispatcher(opts: UseActionDispatcherOptions) {
+ const {
+ onFilterUpdate,
+ onAddHighlight,
+ onClearFilters,
+ onClearHighlights,
+ onAddZoneHighlight,
+ onClearZoneHighlights,
+ onAction,
+ onUnknownTool,
+ } = opts;
+
+ const seen = useRef([]);
+
+ const markSeen = useCallback((callId: string): boolean => {
+ if (seen.current.includes(callId)) return true;
+ seen.current.push(callId);
+ if (seen.current.length > CALL_ID_LRU_CAP) {
+ seen.current.splice(0, seen.current.length - CALL_ID_LRU_CAP);
+ }
+ return false;
+ }, []);
+
+ const dispatch = useCallback(
+ (name: string, args: Record): void => {
+ if (!DASHBOARD_TOOLS.has(name)) {
+ onUnknownTool?.(name, args);
+ return;
+ }
+
+ switch (name) {
+ case "filter_by_date_range": {
+ const start = args.start;
+ const end = args.end;
+ if (typeof start !== "string" || typeof end !== "string") {
+ onUnknownTool?.(name, args);
+ return;
+ }
+ onFilterUpdate((prev) => ({
+ ...prev,
+ date_from: start,
+ date_to: end,
+ }));
+ onAction?.(`Filtered to ${start} → ${end}`);
+ return;
+ }
+ case "filter_by_pickup_zip": {
+ const zip = args.zip;
+ if (typeof zip !== "string") {
+ onUnknownTool?.(name, args);
+ return;
+ }
+ onFilterUpdate((prev) => ({ ...prev, pickup_zip: zip }));
+ onAction?.(`Filtered to pickup ZIP ${zip}`);
+ return;
+ }
+ case "filter_by_fare": {
+ const min = typeof args.min === "number" ? args.min : undefined;
+ const max = typeof args.max === "number" ? args.max : undefined;
+ if (min === undefined && max === undefined) {
+ onUnknownTool?.(name, args);
+ return;
+ }
+ onFilterUpdate((prev) => ({
+ ...prev,
+ ...(min !== undefined ? { fare_min: String(min) } : {}),
+ ...(max !== undefined ? { fare_max: String(max) } : {}),
+ }));
+ const parts: string[] = [];
+ if (min !== undefined) parts.push(`≥ $${min}`);
+ if (max !== undefined) parts.push(`≤ $${max}`);
+ onAction?.(`Filtered by fare ${parts.join(" and ")}`);
+ return;
+ }
+ case "clear_filters": {
+ onClearFilters();
+ onAction?.("Filters cleared");
+ return;
+ }
+ case "highlight_period": {
+ const start = args.start;
+ const end = args.end;
+ if (typeof start !== "string" || typeof end !== "string") {
+ onUnknownTool?.(name, args);
+ return;
+ }
+ const color =
+ args.color === "red" || args.color === "yellow"
+ ? args.color
+ : "blue";
+ const label =
+ typeof args.label === "string" && args.label !== ""
+ ? args.label
+ : undefined;
+ onAddHighlight({ start, end, color, label });
+ onAction?.(
+ `Highlighted ${start} → ${end}${label ? ` (${label})` : ""}`,
+ );
+ return;
+ }
+ case "clear_highlights": {
+ onClearHighlights();
+ onAction?.("Highlights cleared");
+ return;
+ }
+ case "highlight_zone": {
+ const zip = args.zip;
+ if (typeof zip !== "string" || zip === "") {
+ onUnknownTool?.(name, args);
+ return;
+ }
+ const label =
+ typeof args.label === "string" && args.label !== ""
+ ? args.label
+ : undefined;
+ onAddZoneHighlight({ zip, label });
+ onAction?.(`Highlighted ZIP ${zip}${label ? ` (${label})` : ""}`);
+ return;
+ }
+ case "clear_zone_highlights": {
+ onClearZoneHighlights();
+ onAction?.("Zone highlights cleared");
+ return;
+ }
+ case "focus_chart": {
+ const id = args.chart_id;
+ if (!isFocusableChartId(id)) {
+ onUnknownTool?.(name, args);
+ return;
+ }
+ focusChart(id);
+ onAction?.(`Focused ${String(id).replace(/_/g, " ")}`);
+ return;
+ }
+ case "load_view": {
+ const rawFilters = (args.filters ?? {}) as Record;
+ const nextFilters: DashboardFilters = {};
+ if (typeof rawFilters.date_from === "string")
+ nextFilters.date_from = rawFilters.date_from;
+ if (typeof rawFilters.date_to === "string")
+ nextFilters.date_to = rawFilters.date_to;
+ if (typeof rawFilters.pickup_zip === "string")
+ nextFilters.pickup_zip = rawFilters.pickup_zip;
+ if (typeof rawFilters.fare_min === "string")
+ nextFilters.fare_min = rawFilters.fare_min;
+ if (typeof rawFilters.fare_max === "string")
+ nextFilters.fare_max = rawFilters.fare_max;
+
+ const rawHighlights = Array.isArray(args.highlights)
+ ? (args.highlights as Array>)
+ : [];
+ const nextHighlights: Highlight[] = rawHighlights.flatMap((h) => {
+ const start = h.start;
+ const end = h.end;
+ if (typeof start !== "string" || typeof end !== "string") return [];
+ const color: Highlight["color"] =
+ h.color === "red" || h.color === "yellow" ? h.color : "blue";
+ const label = typeof h.label === "string" ? h.label : undefined;
+ return [{ start, end, color, label }];
+ });
+
+ // Restore: clear then re-apply both filters and highlights in one
+ // shot so partial states don't linger.
+ onClearFilters();
+ onClearHighlights();
+ onClearZoneHighlights();
+ if (Object.keys(nextFilters).length > 0) {
+ onFilterUpdate(() => nextFilters);
+ }
+ for (const h of nextHighlights) {
+ onAddHighlight(h);
+ }
+ const viewName =
+ typeof args.name === "string" ? args.name : "saved view";
+ onAction?.(`Loaded "${viewName}"`);
+ return;
+ }
+ default: {
+ onUnknownTool?.(name, args);
+ return;
+ }
+ }
+ },
+ [
+ onFilterUpdate,
+ onAddHighlight,
+ onClearFilters,
+ onClearHighlights,
+ onAddZoneHighlight,
+ onClearZoneHighlights,
+ onAction,
+ onUnknownTool,
+ ],
+ );
+
+ const handleEvent = useCallback(
+ (event: SSEEvent) => {
+ if (event.type === "appkit.metadata") {
+ seen.current = [];
+ return;
+ }
+
+ if (event.type !== "response.output_item.done") return;
+ if (event.item?.type !== "function_call") return;
+
+ const name = event.item.name;
+ if (!name) return;
+
+ // Tools not owned by the dashboard (e.g. `analytics.query`, sub-agent
+ // `agent-sql_analyst`) flow through without a dispatcher side-effect.
+ if (!DASHBOARD_TOOLS.has(name)) return;
+
+ const callId = event.item.call_id;
+ if (callId && markSeen(callId)) return;
+
+ const args = parseArgs(event.item.arguments);
+ if (args === null) {
+ onUnknownTool?.(name, event.item.arguments);
+ return;
+ }
+
+ dispatch(name, args);
+ },
+ [dispatch, markSeen, onUnknownTool],
+ );
+
+ return useMemo(() => ({ handleEvent, dispatch }), [handleEvent, dispatch]);
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-agent-stream.ts b/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-agent-stream.ts
new file mode 100644
index 000000000..b1d2eeb9b
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-agent-stream.ts
@@ -0,0 +1,123 @@
+import { type AgentChatEvent, useAgentChat } from "@databricks/appkit-ui/react";
+import { useCallback, useMemo, useRef } from "react";
+import { beginStreamRun, recordStreamEvent } from "./use-stream-inspector";
+
+/**
+ * Backwards-compatible alias for the SSE event shape that the rest of
+ * the smart-dashboard code (stream inspector, chat section, action
+ * dispatcher) already knows about. Identical to {@link AgentChatEvent}
+ * from `@databricks/appkit-ui/react` — keeping the name in this module
+ * means downstream callers don't need to be touched.
+ */
+export type SSEEvent = AgentChatEvent;
+
+interface UseAgentStreamOptions {
+ agentName: string;
+ onEvent?: (event: SSEEvent) => void;
+}
+
+interface SendOptions {
+ /**
+ * Text prepended to the user's message on the wire. Used by the Smart
+ * Dashboard route to inject active filters / highlights into the system
+ * prompt so the agent always knows what the user is looking at.
+ */
+ contextPrefix?: string;
+}
+
+interface UseAgentStreamReturn {
+ content: string;
+ events: SSEEvent[];
+ isLoading: boolean;
+ threadId: string | null;
+ send: (message: string, opts?: SendOptions) => Promise;
+ reset: () => void;
+}
+
+/**
+ * Smart-Dashboard wrapper around `useAgentChat` from
+ * `@databricks/appkit-ui/react`. The shared hook owns the fetch + SSE
+ * parsing + state plumbing; this wrapper adds two playground-specific
+ * concerns:
+ *
+ * 1. **`contextPrefix`** on `send()` — the dashboard injects active
+ * filters / highlights into the user message so the agent always
+ * sees the UI state. The shared hook stays narrow and lets us
+ * compose the message here.
+ * 2. **Stream inspector wiring** — every send opens a `StreamRecord`
+ * via {@link beginStreamRun} and forwards every event to
+ * {@link recordStreamEvent} so the inspector drawer can render a
+ * human-legible timeline. None of that belongs in the shared hook.
+ *
+ * Aside from those two layers this hook is a re-export: the SSE parsing
+ * code that used to live here moved into `useAgentChat`, and the API
+ * surface is preserved so existing callers (`smart-dashboard.route`,
+ * `agent-sidebar`) keep working.
+ */
+export function useAgentStream({
+ agentName,
+ onEvent,
+}: UseAgentStreamOptions): UseAgentStreamReturn {
+ // `runId` is captured at `send()` time so every event of the same run
+ // lands in the same StreamRecord. Stored as a ref to avoid re-mounting
+ // the chat hook every time the inspector dispatches.
+ const runIdRef = useRef(null);
+ const onEventRef = useRef(onEvent);
+ onEventRef.current = onEvent;
+
+ const handleEvent = useCallback((event: AgentChatEvent) => {
+ if (runIdRef.current) {
+ recordStreamEvent(runIdRef.current, event);
+ }
+ onEventRef.current?.(event);
+ }, []);
+
+ const {
+ content: chatContent,
+ events,
+ isStreaming,
+ threadId,
+ error,
+ send: chatSend,
+ reset,
+ } = useAgentChat({ agent: agentName, onEvent: handleEvent });
+
+ const send = useCallback(
+ async (message: string, opts?: SendOptions) => {
+ runIdRef.current = beginStreamRun(
+ `${agentName}: ${message.slice(0, 80)}`,
+ );
+ const wire = opts?.contextPrefix
+ ? `${opts.contextPrefix}${message}`
+ : message;
+ try {
+ await chatSend(wire);
+ } finally {
+ runIdRef.current = null;
+ }
+ },
+ [agentName, chatSend],
+ );
+
+ // Surface fetch-level failures in the displayed content so the
+ // dashboard's assistant message turns into a visible error row,
+ // mirroring the prior hook's UX (it wrote "Error: ..." into the
+ // streamed content on `!res.ok`). `useAgentChat` exposes the error
+ // via a dedicated `error` field; we project it into `content` only
+ // when the stream actually failed, otherwise pass `chat.content`
+ // through verbatim.
+ const content = useMemo(() => {
+ if (error) return `Error: ${error}`;
+ return chatContent;
+ }, [error, chatContent]);
+
+ return {
+ content,
+ events,
+ // `isLoading` is the legacy name; the shared hook uses `isStreaming`.
+ isLoading: isStreaming,
+ threadId,
+ send,
+ reset,
+ };
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-chart-colors.ts b/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-chart-colors.ts
new file mode 100644
index 000000000..3403b1c1e
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-chart-colors.ts
@@ -0,0 +1,51 @@
+import { useEffect, useState } from "react";
+
+interface ChartColors {
+ primary: string;
+ secondary: string;
+ grid: string;
+ axis: string;
+ tooltipBg: string;
+ tooltipFg: string;
+}
+
+const LIGHT: ChartColors = {
+ primary: "hsl(221, 83%, 53%)",
+ secondary: "hsl(142, 71%, 45%)",
+ grid: "hsl(220, 13%, 91%)",
+ axis: "hsl(215, 16%, 47%)",
+ tooltipBg: "hsl(0, 0%, 100%)",
+ tooltipFg: "hsl(222, 47%, 11%)",
+};
+
+const DARK: ChartColors = {
+ primary: "hsl(217, 91%, 70%)",
+ secondary: "hsl(152, 69%, 55%)",
+ grid: "hsl(215, 14%, 25%)",
+ axis: "hsl(217, 20%, 70%)",
+ tooltipBg: "hsl(224, 71%, 4%)",
+ tooltipFg: "hsl(210, 40%, 96%)",
+};
+
+function isDark(): boolean {
+ return document.documentElement.classList.contains("dark");
+}
+
+export function useChartColors(): ChartColors {
+ const [colors, setColors] = useState(() =>
+ isDark() ? DARK : LIGHT,
+ );
+
+ useEffect(() => {
+ const observer = new MutationObserver(() => {
+ setColors(isDark() ? DARK : LIGHT);
+ });
+ observer.observe(document.documentElement, {
+ attributes: true,
+ attributeFilter: ["class"],
+ });
+ return () => observer.disconnect();
+ }, []);
+
+ return colors;
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-dashboard-data.ts b/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-dashboard-data.ts
new file mode 100644
index 000000000..c4e9f5d35
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-dashboard-data.ts
@@ -0,0 +1,196 @@
+import { sql } from "@databricks/appkit-ui/js";
+import { useAnalyticsQuery } from "@databricks/appkit-ui/react";
+import { useMemo } from "react";
+
+interface KPIRawRow {
+ total_trips: number;
+ avg_fare: number;
+ avg_distance: number;
+ max_fare: number;
+ min_fare: number;
+}
+
+interface TopZoneData {
+ pickup_zip: string;
+ trip_count: number;
+}
+
+export type KPIData = KPIRawRow & {
+ top_pickup_zone: string;
+ top_zone_trips: number;
+};
+
+export interface TripOverTime {
+ trip_date: string;
+ trip_count: number;
+ avg_fare: number;
+ total_revenue: number;
+}
+
+export interface FareBucket {
+ fare_bucket: string;
+ trip_count: number;
+ avg_distance: number;
+}
+
+export interface HeatmapCell {
+ day_of_week: number;
+ hour_of_day: number;
+ trip_count: number;
+ avg_fare: number;
+}
+
+export interface TopZoneRow {
+ pickup_zip: string;
+ trip_count: number;
+ total_revenue: number;
+ avg_fare: number;
+}
+
+export interface SparklineRow {
+ trip_date: string;
+ trip_count: number;
+ total_revenue: number;
+ avg_fare: number;
+ avg_distance: number;
+}
+
+export interface DashboardFilters {
+ date_from?: string;
+ date_to?: string;
+ pickup_zip?: string;
+ fare_min?: string;
+ fare_max?: string;
+}
+
+function buildParams(filters: DashboardFilters) {
+ return {
+ dateFrom: sql.string(filters.date_from ?? "all"),
+ dateTo: sql.string(filters.date_to ?? "all"),
+ pickupZip: sql.string(filters.pickup_zip ?? "all"),
+ fareMin: sql.string(filters.fare_min ?? "all"),
+ fareMax: sql.string(filters.fare_max ?? "all"),
+ };
+}
+
+export function useDashboardData(filters: DashboardFilters) {
+ const params = useMemo(() => buildParams(filters), [filters]);
+
+ const {
+ data: kpisRaw,
+ loading: kpisLoading,
+ error: kpisError,
+ } = useAnalyticsQuery("dashboard_kpis", params) as {
+ data: KPIRawRow[] | null;
+ loading: boolean;
+ error: string | null;
+ };
+
+ const {
+ data: topZoneRaw,
+ loading: topZoneLoading,
+ error: topZoneError,
+ } = useAnalyticsQuery("dashboard_top_zone", params) as {
+ data: TopZoneData[] | null;
+ loading: boolean;
+ error: string | null;
+ };
+
+ const tripsParams = useMemo(
+ () => ({
+ dateFrom: params.dateFrom,
+ dateTo: params.dateTo,
+ pickupZip: params.pickupZip,
+ }),
+ [params.dateFrom, params.dateTo, params.pickupZip],
+ );
+
+ const {
+ data: tripsOverTime,
+ loading: tripsLoading,
+ error: tripsError,
+ } = useAnalyticsQuery("dashboard_trips_over_time", tripsParams) as {
+ data: TripOverTime[] | null;
+ loading: boolean;
+ error: string | null;
+ };
+
+ const {
+ data: fareDistribution,
+ loading: fareLoading,
+ error: fareError,
+ } = useAnalyticsQuery("dashboard_fare_distribution", tripsParams) as {
+ data: FareBucket[] | null;
+ loading: boolean;
+ error: string | null;
+ };
+
+ const {
+ data: heatmap,
+ loading: heatmapLoading,
+ error: heatmapError,
+ } = useAnalyticsQuery("dashboard_hourly_heatmap", params) as {
+ data: HeatmapCell[] | null;
+ loading: boolean;
+ error: string | null;
+ };
+
+ const {
+ data: topZones,
+ loading: topZonesLoading,
+ error: topZonesError,
+ } = useAnalyticsQuery("dashboard_top_zones", params) as {
+ data: TopZoneRow[] | null;
+ loading: boolean;
+ error: string | null;
+ };
+
+ const {
+ data: sparklines,
+ loading: sparklinesLoading,
+ error: sparklinesError,
+ } = useAnalyticsQuery("dashboard_kpi_sparklines", params) as {
+ data: SparklineRow[] | null;
+ loading: boolean;
+ error: string | null;
+ };
+
+ const kpis = useMemo(() => {
+ if (!kpisRaw || kpisRaw.length === 0) return null;
+ const row = kpisRaw[0];
+ const topZone = topZoneRaw?.[0];
+ return {
+ ...row,
+ top_pickup_zone: topZone?.pickup_zip ?? "N/A",
+ top_zone_trips: topZone?.trip_count ?? 0,
+ };
+ }, [kpisRaw, topZoneRaw]);
+
+ const isLoading =
+ kpisLoading ||
+ topZoneLoading ||
+ tripsLoading ||
+ fareLoading ||
+ heatmapLoading ||
+ topZonesLoading ||
+ sparklinesLoading;
+ const error =
+ kpisError ||
+ topZoneError ||
+ tripsError ||
+ fareError ||
+ heatmapError ||
+ topZonesError ||
+ sparklinesError;
+
+ return {
+ kpis,
+ tripsOverTime: tripsOverTime ?? [],
+ fareDistribution: fareDistribution ?? [],
+ heatmap: heatmap ?? [],
+ topZones: topZones ?? [],
+ sparklines: sparklines ?? [],
+ isLoading,
+ error,
+ };
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-focus-registry.ts b/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-focus-registry.ts
new file mode 100644
index 000000000..54784fc83
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-focus-registry.ts
@@ -0,0 +1,71 @@
+import { useEffect, useRef, useState } from "react";
+
+/**
+ * Module-level focus registry. Chart wrappers register a callback under a
+ * stable id; `focusChart(id)` looks up the callback and invokes it to
+ * scroll the user's viewport to the chart and pulse a ring around it.
+ *
+ * Registrations live outside React state so the agent's SSE stream (which
+ * hands off to `focusChart` via `use-action-dispatcher`) never needs to
+ * thread a ref through the component tree.
+ */
+const registry = new Map void>();
+
+export type FocusableChartId =
+ | "kpis"
+ | "trips_over_time"
+ | "fare_distribution"
+ | "hourly_heatmap"
+ | "top_zones";
+
+export const FOCUSABLE_CHART_IDS: FocusableChartId[] = [
+ "kpis",
+ "trips_over_time",
+ "fare_distribution",
+ "hourly_heatmap",
+ "top_zones",
+];
+
+export function isFocusableChartId(id: unknown): id is FocusableChartId {
+ return (
+ typeof id === "string" &&
+ (FOCUSABLE_CHART_IDS as readonly string[]).includes(id)
+ );
+}
+
+export function focusChart(id: FocusableChartId): void {
+ registry.get(id)?.();
+}
+
+/**
+ * Registers `id` as a focusable chart. Returns a `setRef` callback for the
+ * wrapping element and a `focused` boolean that flips true for 1.2s when
+ * `focusChart(id)` is called from elsewhere.
+ */
+export function useFocusable(id: FocusableChartId): {
+ setRef: (el: HTMLElement | null) => void;
+ focused: boolean;
+} {
+ const elRef = useRef(null);
+ const [focused, setFocused] = useState(false);
+
+ useEffect(() => {
+ const onFocus = (): void => {
+ const el = elRef.current;
+ if (!el) return;
+ el.scrollIntoView({ behavior: "smooth", block: "center" });
+ setFocused(true);
+ setTimeout(() => setFocused(false), 1200);
+ };
+ registry.set(id, onFocus);
+ return () => {
+ if (registry.get(id) === onFocus) registry.delete(id);
+ };
+ }, [id]);
+
+ const setRef = (el: HTMLElement | null): void => {
+ elRef.current = el;
+ };
+
+ return { setRef, focused };
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-stream-inspector.ts b/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-stream-inspector.ts
new file mode 100644
index 000000000..38b672138
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/hooks/use-stream-inspector.ts
@@ -0,0 +1,141 @@
+import { useCallback, useEffect, useSyncExternalStore } from "react";
+import type { SSEEvent } from "./use-agent-stream";
+
+/**
+ * Observability store for the agent SSE stream. Every chat message the
+ * dashboard sends gets a `StreamRecord`; each event the adapter yields is
+ * appended to that record with a relative timestamp. The Stream Inspector
+ * drawer reads from here to render a human-legible timeline.
+ *
+ * State is module-level on purpose — multiple components (the chat section,
+ * the agent sidebar, the inspector drawer itself) feed and read from the
+ * same store without wiring props or context. React only re-renders when
+ * `version` changes.
+ */
+
+export interface StreamEventRecord {
+ event: SSEEvent;
+ receivedAt: number;
+}
+
+export interface StreamRecord {
+ id: string;
+ label: string;
+ startedAt: number;
+ events: StreamEventRecord[];
+}
+
+const MAX_RECORDS = 5;
+
+const state = {
+ isOpen: false,
+ records: [] as StreamRecord[],
+};
+const listeners = new Set<() => void>();
+let version = 0;
+
+function notify(): void {
+ version++;
+ for (const fn of listeners) fn();
+}
+
+function subscribe(fn: () => void): () => void {
+ listeners.add(fn);
+ return () => {
+ listeners.delete(fn);
+ };
+}
+
+function getVersion(): number {
+ return version;
+}
+
+export function useStreamInspector(): {
+ isOpen: boolean;
+ records: StreamRecord[];
+} {
+ useSyncExternalStore(subscribe, getVersion, getVersion);
+ return { isOpen: state.isOpen, records: state.records };
+}
+
+export function beginStreamRun(label: string): string {
+ const id =
+ (globalThis.crypto?.randomUUID?.() as string | undefined) ??
+ `run_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
+ const record: StreamRecord = {
+ id,
+ label,
+ startedAt: performance.now(),
+ events: [],
+ };
+ state.records = [record, ...state.records].slice(0, MAX_RECORDS);
+ notify();
+ return id;
+}
+
+export function recordStreamEvent(runId: string, event: SSEEvent): void {
+ const record = state.records.find((r) => r.id === runId);
+ if (!record) return;
+ record.events.push({ event, receivedAt: performance.now() });
+ notify();
+}
+
+export function openInspector(): void {
+ state.isOpen = true;
+ notify();
+}
+
+export function closeInspector(): void {
+ state.isOpen = false;
+ notify();
+}
+
+export function toggleInspector(): void {
+ state.isOpen = !state.isOpen;
+ notify();
+}
+
+export function clearInspectorHistory(): void {
+ state.records = [];
+ notify();
+}
+
+/**
+ * Binds ⌘K / Ctrl+K to open-toggle and `Esc` to close. Mount once inside
+ * the route.
+ */
+export function useInspectorShortcuts(): void {
+ useEffect(() => {
+ const onKey = (e: KeyboardEvent): void => {
+ if (
+ e.key === "k" &&
+ (e.metaKey || e.ctrlKey) &&
+ !e.altKey &&
+ !e.shiftKey
+ ) {
+ e.preventDefault();
+ toggleInspector();
+ } else if (e.key === "Escape" && state.isOpen) {
+ closeInspector();
+ }
+ };
+ window.addEventListener("keydown", onKey);
+ return () => {
+ window.removeEventListener("keydown", onKey);
+ };
+ }, []);
+}
+
+/**
+ * Convenience hook for the currently-open run's events. Used by the agent
+ * sidebar's tiny "pulse" indicator next to each agent.
+ */
+export function useCurrentRun(): StreamRecord | null {
+ const { records } = useStreamInspector();
+ return records[0] ?? null;
+}
+
+// Dummy export to keep the "callback" shape callers can use if they want
+// to opt out of the module-level store (none do today).
+export const useStreamInspectorToggle: () => () => void = () =>
+ useCallback(() => toggleInspector(), []);
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/lib/capture-dashboard.ts b/apps/dev-playground/client/src/features/smart-dashboard/lib/capture-dashboard.ts
new file mode 100644
index 000000000..a00f8799b
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/lib/capture-dashboard.ts
@@ -0,0 +1,48 @@
+import html2canvas from "html2canvas-pro";
+
+/**
+ * Captures an element to a compressed JPEG data URL.
+ *
+ * We deliberately use JPEG + downscale instead of PNG because:
+ *
+ * - AppKit's server plugin applies `express.json({ limit: default = 100kb })`
+ * globally. A full-fidelity dashboard PNG encoded in base64 is typically
+ * 200-600kb — over the limit.
+ * - JPEG @ quality 0.85 + pixelRatio 1 keeps payloads to ~40-80kb base64
+ * for the Smart Dashboard viewport, comfortably under the limit.
+ *
+ * If the payload ever needs to grow (higher fidelity, larger viewports),
+ * switch to a raw body route (`express.raw`) with an explicit larger limit.
+ *
+ * `html2canvas-pro` (drop-in fork of html2canvas) is required because
+ * Tailwind v4 emits `oklch()` colors throughout the computed styles of
+ * every node, which the original html2canvas 1.x cannot parse.
+ */
+export async function captureDashboardAsDataUrl(
+ el: HTMLElement,
+ opts: { quality?: number; scale?: number } = {},
+): Promise<{ dataUrl: string; widthPx: number; heightPx: number }> {
+ const quality = opts.quality ?? 0.85;
+ const scale = opts.scale ?? 1;
+ const backgroundColor = readCssVar(el, "--background") ?? "#ffffff";
+
+ const canvas = await html2canvas(el, {
+ backgroundColor,
+ scale,
+ useCORS: true,
+ allowTaint: false,
+ logging: false,
+ });
+
+ const dataUrl = canvas.toDataURL("image/jpeg", quality);
+ return { dataUrl, widthPx: canvas.width, heightPx: canvas.height };
+}
+
+function readCssVar(el: HTMLElement, name: string): string | null {
+ const raw = getComputedStyle(el).getPropertyValue(name).trim();
+ if (!raw) return null;
+ // CSS var values may be raw HSL triplets ("0 0% 100%") or full hsl(...).
+ // Wrap naked triplets so html2canvas' painter treats them as colors.
+ if (/^\d/.test(raw)) return `hsl(${raw})`;
+ return raw;
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/lib/dashboard-context.ts b/apps/dev-playground/client/src/features/smart-dashboard/lib/dashboard-context.ts
new file mode 100644
index 000000000..2348f6fef
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/lib/dashboard-context.ts
@@ -0,0 +1,41 @@
+import type { Highlight } from "../hooks/use-action-dispatcher";
+import type { DashboardFilters } from "../hooks/use-dashboard-data";
+
+/**
+ * Serialises the user's current dashboard state into a short natural-language
+ * preamble prepended to every chat turn. The `query` dispatcher and its
+ * specialists use this to stay grounded in what the user is looking at —
+ * e.g. "user asked 'is this unusual?' with filters {date_from: 2016-11-01}".
+ *
+ * Empty when nothing is set; callers should skip prepending in that case.
+ */
+export function buildDashboardContext(
+ filters: DashboardFilters,
+ highlights: Highlight[],
+): string {
+ const parts: string[] = [];
+
+ const filterEntries = Object.entries(filters).filter(
+ ([, v]) => v !== undefined && v !== "",
+ );
+ if (filterEntries.length > 0) {
+ const rendered = filterEntries
+ .map(([key, value]) => `${key}=${value}`)
+ .join(", ");
+ parts.push(`active filters: ${rendered}`);
+ }
+
+ if (highlights.length > 0) {
+ const rendered = highlights
+ .map(
+ (h) =>
+ `${h.start}..${h.end}${h.color !== "blue" ? ` [${h.color}]` : ""}${h.label ? ` (${h.label})` : ""}`,
+ )
+ .join("; ");
+ parts.push(`highlighted periods: ${rendered}`);
+ }
+
+ if (parts.length === 0) return "";
+
+ return `[Dashboard state] ${parts.join(". ")}.\n\nUser question: `;
+}
diff --git a/apps/dev-playground/client/src/features/smart-dashboard/lib/feed-actions.ts b/apps/dev-playground/client/src/features/smart-dashboard/lib/feed-actions.ts
new file mode 100644
index 000000000..a48a641c1
--- /dev/null
+++ b/apps/dev-playground/client/src/features/smart-dashboard/lib/feed-actions.ts
@@ -0,0 +1,206 @@
+import type { FocusableChartId } from "../hooks/use-focus-registry";
+
+/**
+ * Structured actions emitted by the `insights` and `anomaly` ephemeral
+ * agents. Each kind maps 1:1 to a dispatcher tool (`filter_by_*`,
+ * `highlight_*`, `focus_chart`) except `ask`, which flows through the main
+ * chat dispatcher with a preloaded prompt.
+ *
+ * Kept in a neutral shape (not the wire tool-call format) so the agent can
+ * hand-author JSON without memorising `call_id` / `arguments` envelopes,
+ * and so the UI can render distinct copy per action kind.
+ */
+
+export interface FilterDateAction {
+ kind: "filter_date";
+ label: string;
+ start: string;
+ end: string;
+}
+
+export interface FilterZipAction {
+ kind: "filter_zip";
+ label: string;
+ zip: string;
+}
+
+export interface FilterFareAction {
+ kind: "filter_fare";
+ label: string;
+ min?: number;
+ max?: number;
+}
+
+export interface HighlightPeriodAction {
+ kind: "highlight_period";
+ label: string;
+ start: string;
+ end: string;
+ color?: "blue" | "red" | "yellow";
+}
+
+export interface HighlightZoneAction {
+ kind: "highlight_zone";
+ label: string;
+ zip: string;
+ note?: string;
+}
+
+export interface FocusChartAction {
+ kind: "focus_chart";
+ label: string;
+ chart_id: FocusableChartId;
+}
+
+export interface AskAction {
+ kind: "ask";
+ label: string;
+ prompt: string;
+}
+
+export type FeedAction =
+ | FilterDateAction
+ | FilterZipAction
+ | FilterFareAction
+ | HighlightPeriodAction
+ | HighlightZoneAction
+ | FocusChartAction
+ | AskAction;
+
+export interface FeedInsight {
+ title: string;
+ description: string;
+ actions?: FeedAction[];
+}
+
+export interface FeedAnomaly extends FeedInsight {
+ severity: "low" | "medium" | "high";
+}
+
+function isValidColor(v: unknown): v is "blue" | "red" | "yellow" {
+ return v === "blue" || v === "red" || v === "yellow";
+}
+
+function isValidChartId(v: unknown): v is FocusableChartId {
+ return (
+ v === "kpis" ||
+ v === "trips_over_time" ||
+ v === "fare_distribution" ||
+ v === "hourly_heatmap" ||
+ v === "top_zones"
+ );
+}
+
+function parseAction(raw: unknown): FeedAction | null {
+ if (typeof raw !== "object" || raw === null) return null;
+ const r = raw as Record;
+ const kind = r.kind;
+ const label = typeof r.label === "string" ? r.label : "";
+ if (!label) return null;
+
+ switch (kind) {
+ case "filter_date":
+ if (typeof r.start === "string" && typeof r.end === "string") {
+ return { kind, label, start: r.start, end: r.end };
+ }
+ return null;
+ case "filter_zip":
+ if (typeof r.zip === "string" && r.zip) {
+ return { kind, label, zip: r.zip };
+ }
+ return null;
+ case "filter_fare": {
+ const min = typeof r.min === "number" ? r.min : undefined;
+ const max = typeof r.max === "number" ? r.max : undefined;
+ if (min === undefined && max === undefined) return null;
+ return { kind, label, min, max };
+ }
+ case "highlight_period":
+ if (typeof r.start === "string" && typeof r.end === "string") {
+ return {
+ kind,
+ label,
+ start: r.start,
+ end: r.end,
+ color: isValidColor(r.color) ? r.color : "blue",
+ };
+ }
+ return null;
+ case "highlight_zone":
+ if (typeof r.zip === "string" && r.zip) {
+ return {
+ kind,
+ label,
+ zip: r.zip,
+ ...(typeof r.note === "string" && r.note ? { note: r.note } : {}),
+ };
+ }
+ return null;
+ case "focus_chart":
+ if (isValidChartId(r.chart_id)) {
+ return { kind, label, chart_id: r.chart_id };
+ }
+ return null;
+ case "ask":
+ if (typeof r.prompt === "string" && r.prompt) {
+ return { kind, label, prompt: r.prompt };
+ }
+ return null;
+ default:
+ return null;
+ }
+}
+
+/**
+ * Extracts the first JSON array from an agent response and validates each
+ * element as {@link FeedInsight}. Ignores malformed entries rather than
+ * throwing — the agent is a Gemini flash model and occasionally wraps the
+ * output in fences or adds an extra element with a different shape.
+ */
+export function parseFeedInsights(content: string): FeedInsight[] {
+ return parseFeedPayload(content, (obj) => ({
+ title: typeof obj.title === "string" ? obj.title : "",
+ description: typeof obj.description === "string" ? obj.description : "",
+ actions: Array.isArray(obj.actions)
+ ? (obj.actions.map(parseAction).filter(Boolean) as FeedAction[])
+ : undefined,
+ }));
+}
+
+export function parseFeedAnomalies(content: string): FeedAnomaly[] {
+ return parseFeedPayload(content, (obj) => {
+ const severity =
+ obj.severity === "low" ||
+ obj.severity === "medium" ||
+ obj.severity === "high"
+ ? obj.severity
+ : "low";
+ return {
+ title: typeof obj.title === "string" ? obj.title : "",
+ description: typeof obj.description === "string" ? obj.description : "",
+ severity,
+ actions: Array.isArray(obj.actions)
+ ? (obj.actions.map(parseAction).filter(Boolean) as FeedAction[])
+ : undefined,
+ };
+ });
+}
+
+function parseFeedPayload(
+ content: string,
+ builder: (obj: Record) => T,
+): T[] {
+ const jsonMatch = content.match(/\[[\s\S]*\]/);
+ if (!jsonMatch) return [];
+ try {
+ const parsed: unknown = JSON.parse(jsonMatch[0]);
+ if (!Array.isArray(parsed)) return [];
+ return parsed.flatMap((el) => {
+ if (typeof el !== "object" || el === null) return [];
+ const item = builder(el as Record);
+ return item.title ? [item] : [];
+ });
+ } catch {
+ return [];
+ }
+}
diff --git a/apps/dev-playground/client/src/index.css b/apps/dev-playground/client/src/index.css
index 5dcc4cf86..b5389ab8c 100644
--- a/apps/dev-playground/client/src/index.css
+++ b/apps/dev-playground/client/src/index.css
@@ -1 +1,40 @@
@import "@databricks/appkit-ui/styles.css";
+
+/**
+ * Realign Tailwind v4's `dark:` variant with appkit-ui's theme tokens.
+ *
+ * `packages/appkit-ui/.../globals.css` defines two paths into dark theme:
+ * - An explicit `.dark` class on (wins unconditionally).
+ * - `@media (prefers-color-scheme: dark)` on `:root:not(.light)` — i.e.
+ * the media query is ignored when the user has explicitly opted into
+ * light via the `.light` class.
+ *
+ * Tailwind v4's default `dark:` variant, however, is purely media-query
+ * driven. That mismatch produces a split-personality theme in exactly one
+ * scenario, which is the one we hit: OS set to dark, user forces light
+ * via the theme selector (bootstrap script in index.html sets
+ * ``). `--card`, `--background`, etc. correctly
+ * resolve to light, but every `dark:*` utility keeps firing under the
+ * media query — cards end up with dark-mode backgrounds layered under
+ * light-mode text and chrome.
+ *
+ * This `@custom-variant dark` rebinds the variant to mirror the token
+ * logic exactly:
+ * - Element is (or descends from) `.dark` → dark utilities fire.
+ * - `prefers-color-scheme: dark` AND no `.light` ancestor → also fire.
+ * - Everything else → no-op.
+ *
+ * Scoped to the playground because the bootstrap script in index.html is
+ * what makes the `.light` / `.dark` classes meaningful here; other
+ * appkit-ui consumers may rely on the current media-only behaviour.
+ */
+@custom-variant dark {
+ &:where(.dark, .dark *) {
+ @slot;
+ }
+ @media (prefers-color-scheme: dark) {
+ &:where(:not(.light):not(.light *)) {
+ @slot;
+ }
+ }
+}
diff --git a/apps/dev-playground/client/src/lib/nav.ts b/apps/dev-playground/client/src/lib/nav.ts
new file mode 100644
index 000000000..4b391cb71
--- /dev/null
+++ b/apps/dev-playground/client/src/lib/nav.ts
@@ -0,0 +1,183 @@
+import {
+ BarChart3Icon,
+ BotIcon,
+ DatabaseIcon,
+ FileCode2Icon,
+ FolderIcon,
+ GaugeIcon,
+ LayoutDashboardIcon,
+ LineChartIcon,
+ type LucideIcon,
+ MessageCircleIcon,
+ RadioIcon,
+ SearchIcon,
+ ServerIcon,
+ ShieldIcon,
+ ZapIcon,
+} from "lucide-react";
+
+/**
+ * Metadata for a single demo route in the dev playground.
+ *
+ * `description` is used on the home page card. `icon` is used both on the
+ * home page card and (optionally) in the nav dropdown. Keep `description`
+ * to a single sentence — the home grid treats it as a one-line tagline.
+ */
+export interface NavItem {
+ to: string;
+ label: string;
+ description: string;
+ icon: LucideIcon;
+}
+
+export interface NavGroup {
+ id: "data" | "ai" | "platform";
+ label: string;
+ /** Short tagline shown under the section heading on the home page. */
+ tagline: string;
+ items: ReadonlyArray;
+}
+
+/**
+ * Canonical demo catalog. Both the navigation dropdown in `__root.tsx` and
+ * the landing grid in `index.tsx` render from this list, so adding a new
+ * demo is a one-line change here and both surfaces pick it up.
+ */
+export const NAV_GROUPS: ReadonlyArray = [
+ {
+ id: "data",
+ label: "Data",
+ tagline: "Query, stream, and transform data with AppKit's data plugins.",
+ items: [
+ {
+ to: "/analytics",
+ label: "Analytics",
+ description:
+ "Query execution, charts, and interactive components against live SQL.",
+ icon: BarChart3Icon,
+ },
+ {
+ to: "/arrow-analytics",
+ label: "Arrow Analytics",
+ description:
+ "Same dashboard — served over Apache Arrow streaming for zero-copy speed.",
+ icon: ZapIcon,
+ },
+ {
+ to: "/lakebase",
+ label: "Lakebase",
+ description:
+ "Four takes on Postgres: raw driver, Drizzle, TypeORM, Sequelize with OAuth refresh.",
+ icon: DatabaseIcon,
+ },
+ {
+ to: "/sql-helpers",
+ label: "SQL Helpers",
+ description:
+ "Type-safe parameter builders and query generators for Databricks SQL.",
+ icon: FileCode2Icon,
+ },
+ ],
+ },
+ {
+ id: "ai",
+ label: "AI",
+ tagline: "Agents, RAG, and LLM-powered UI built on AppKit primitives.",
+ items: [
+ {
+ to: "/smart-dashboard",
+ label: "Smart Dashboard",
+ description:
+ "Multi-agent NYC Taxi dashboard with live filters, highlights, approvals, and saved views.",
+ icon: LayoutDashboardIcon,
+ },
+ {
+ to: "/agent",
+ label: "Custom Agent",
+ description:
+ "Chat agent over Databricks Model Serving with tools auto-discovered from AppKit plugins.",
+ icon: BotIcon,
+ },
+ {
+ to: "/genie",
+ label: "Genie",
+ description:
+ "Natural-language Q&A against your data with SSE streaming and conversation persistence.",
+ icon: MessageCircleIcon,
+ },
+ {
+ to: "/chart-inference",
+ label: "Chart Inference",
+ description:
+ "Let the agent pick the right chart type for a query result on the fly.",
+ icon: LineChartIcon,
+ },
+ {
+ to: "/vector-search",
+ label: "Vector Search",
+ description:
+ "Semantic search backed by Databricks vector indexes, wired into AppKit's retrieval API.",
+ icon: SearchIcon,
+ },
+ {
+ to: "/serving",
+ label: "Serving",
+ description:
+ "Call model-serving endpoints directly with the typed serving client.",
+ icon: ServerIcon,
+ },
+ ],
+ },
+ {
+ id: "platform",
+ label: "Platform",
+ tagline:
+ "Infrastructure demos: storage, policy, observability, resilience.",
+ items: [
+ {
+ to: "/files",
+ label: "Files",
+ description:
+ "Browse, preview, and download from Unity Catalog Volumes via the Files plugin.",
+ icon: FolderIcon,
+ },
+ {
+ to: "/policy-matrix",
+ label: "Policy Matrix",
+ description:
+ "Resource policies, requested claims, and per-user authorisation flows.",
+ icon: ShieldIcon,
+ },
+ {
+ to: "/telemetry",
+ label: "Telemetry",
+ description:
+ "OpenTelemetry traces and metrics with a drop-in AppKit provider.",
+ icon: GaugeIcon,
+ },
+ {
+ to: "/reconnect",
+ label: "Reconnect",
+ description:
+ "Resilient SSE streams: automatic Last-Event-ID tracking and reconnection.",
+ icon: RadioIcon,
+ },
+ ],
+ },
+];
+
+/** All items flattened — useful for a search index or breadcrumb lookup. */
+export const ALL_NAV_ITEMS: ReadonlyArray = NAV_GROUPS.flatMap(
+ (g) => g.items,
+);
+
+/**
+ * Resolve a pathname back to its nav item (for breadcrumbs, titles, etc).
+ * Uses `startsWith` so nested routes like `/smart-dashboard/saved` match.
+ */
+export function findNavItemForPath(pathname: string): NavItem | null {
+ for (const item of ALL_NAV_ITEMS) {
+ if (pathname.startsWith(item.to)) return item;
+ }
+ return null;
+}
diff --git a/apps/dev-playground/client/src/routeTree.gen.ts b/apps/dev-playground/client/src/routeTree.gen.ts
index 45e280700..8b953ed98 100644
--- a/apps/dev-playground/client/src/routeTree.gen.ts
+++ b/apps/dev-playground/client/src/routeTree.gen.ts
@@ -13,6 +13,7 @@ import { Route as VectorSearchRouteRouteImport } from './routes/vector-search.ro
import { Route as TypeSafetyRouteRouteImport } from './routes/type-safety.route'
import { Route as TelemetryRouteRouteImport } from './routes/telemetry.route'
import { Route as SqlHelpersRouteRouteImport } from './routes/sql-helpers.route'
+import { Route as SmartDashboardRouteRouteImport } from './routes/smart-dashboard.route'
import { Route as ServingRouteRouteImport } from './routes/serving.route'
import { Route as ReconnectRouteRouteImport } from './routes/reconnect.route'
import { Route as PolicyMatrixRouteRouteImport } from './routes/policy-matrix.route'
@@ -24,6 +25,7 @@ import { Route as DataVisualizationRouteRouteImport } from './routes/data-visual
import { Route as ChartInferenceRouteRouteImport } from './routes/chart-inference.route'
import { Route as ArrowAnalyticsRouteRouteImport } from './routes/arrow-analytics.route'
import { Route as AnalyticsRouteRouteImport } from './routes/analytics.route'
+import { Route as AgentRouteRouteImport } from './routes/agent.route'
import { Route as IndexRouteImport } from './routes/index'
const VectorSearchRouteRoute = VectorSearchRouteRouteImport.update({
@@ -46,6 +48,11 @@ const SqlHelpersRouteRoute = SqlHelpersRouteRouteImport.update({
path: '/sql-helpers',
getParentRoute: () => rootRouteImport,
} as any)
+const SmartDashboardRouteRoute = SmartDashboardRouteRouteImport.update({
+ id: '/smart-dashboard',
+ path: '/smart-dashboard',
+ getParentRoute: () => rootRouteImport,
+} as any)
const ServingRouteRoute = ServingRouteRouteImport.update({
id: '/serving',
path: '/serving',
@@ -101,6 +108,11 @@ const AnalyticsRouteRoute = AnalyticsRouteRouteImport.update({
path: '/analytics',
getParentRoute: () => rootRouteImport,
} as any)
+const AgentRouteRoute = AgentRouteRouteImport.update({
+ id: '/agent',
+ path: '/agent',
+ getParentRoute: () => rootRouteImport,
+} as any)
const IndexRoute = IndexRouteImport.update({
id: '/',
path: '/',
@@ -109,6 +121,7 @@ const IndexRoute = IndexRouteImport.update({
export interface FileRoutesByFullPath {
'/': typeof IndexRoute
+ '/agent': typeof AgentRouteRoute
'/analytics': typeof AnalyticsRouteRoute
'/arrow-analytics': typeof ArrowAnalyticsRouteRoute
'/chart-inference': typeof ChartInferenceRouteRoute
@@ -120,6 +133,7 @@ export interface FileRoutesByFullPath {
'/policy-matrix': typeof PolicyMatrixRouteRoute
'/reconnect': typeof ReconnectRouteRoute
'/serving': typeof ServingRouteRoute
+ '/smart-dashboard': typeof SmartDashboardRouteRoute
'/sql-helpers': typeof SqlHelpersRouteRoute
'/telemetry': typeof TelemetryRouteRoute
'/type-safety': typeof TypeSafetyRouteRoute
@@ -127,6 +141,7 @@ export interface FileRoutesByFullPath {
}
export interface FileRoutesByTo {
'/': typeof IndexRoute
+ '/agent': typeof AgentRouteRoute
'/analytics': typeof AnalyticsRouteRoute
'/arrow-analytics': typeof ArrowAnalyticsRouteRoute
'/chart-inference': typeof ChartInferenceRouteRoute
@@ -138,6 +153,7 @@ export interface FileRoutesByTo {
'/policy-matrix': typeof PolicyMatrixRouteRoute
'/reconnect': typeof ReconnectRouteRoute
'/serving': typeof ServingRouteRoute
+ '/smart-dashboard': typeof SmartDashboardRouteRoute
'/sql-helpers': typeof SqlHelpersRouteRoute
'/telemetry': typeof TelemetryRouteRoute
'/type-safety': typeof TypeSafetyRouteRoute
@@ -146,6 +162,7 @@ export interface FileRoutesByTo {
export interface FileRoutesById {
__root__: typeof rootRouteImport
'/': typeof IndexRoute
+ '/agent': typeof AgentRouteRoute
'/analytics': typeof AnalyticsRouteRoute
'/arrow-analytics': typeof ArrowAnalyticsRouteRoute
'/chart-inference': typeof ChartInferenceRouteRoute
@@ -157,6 +174,7 @@ export interface FileRoutesById {
'/policy-matrix': typeof PolicyMatrixRouteRoute
'/reconnect': typeof ReconnectRouteRoute
'/serving': typeof ServingRouteRoute
+ '/smart-dashboard': typeof SmartDashboardRouteRoute
'/sql-helpers': typeof SqlHelpersRouteRoute
'/telemetry': typeof TelemetryRouteRoute
'/type-safety': typeof TypeSafetyRouteRoute
@@ -166,6 +184,7 @@ export interface FileRouteTypes {
fileRoutesByFullPath: FileRoutesByFullPath
fullPaths:
| '/'
+ | '/agent'
| '/analytics'
| '/arrow-analytics'
| '/chart-inference'
@@ -177,6 +196,7 @@ export interface FileRouteTypes {
| '/policy-matrix'
| '/reconnect'
| '/serving'
+ | '/smart-dashboard'
| '/sql-helpers'
| '/telemetry'
| '/type-safety'
@@ -184,6 +204,7 @@ export interface FileRouteTypes {
fileRoutesByTo: FileRoutesByTo
to:
| '/'
+ | '/agent'
| '/analytics'
| '/arrow-analytics'
| '/chart-inference'
@@ -195,6 +216,7 @@ export interface FileRouteTypes {
| '/policy-matrix'
| '/reconnect'
| '/serving'
+ | '/smart-dashboard'
| '/sql-helpers'
| '/telemetry'
| '/type-safety'
@@ -202,6 +224,7 @@ export interface FileRouteTypes {
id:
| '__root__'
| '/'
+ | '/agent'
| '/analytics'
| '/arrow-analytics'
| '/chart-inference'
@@ -213,6 +236,7 @@ export interface FileRouteTypes {
| '/policy-matrix'
| '/reconnect'
| '/serving'
+ | '/smart-dashboard'
| '/sql-helpers'
| '/telemetry'
| '/type-safety'
@@ -221,6 +245,7 @@ export interface FileRouteTypes {
}
export interface RootRouteChildren {
IndexRoute: typeof IndexRoute
+ AgentRouteRoute: typeof AgentRouteRoute
AnalyticsRouteRoute: typeof AnalyticsRouteRoute
ArrowAnalyticsRouteRoute: typeof ArrowAnalyticsRouteRoute
ChartInferenceRouteRoute: typeof ChartInferenceRouteRoute
@@ -232,6 +257,7 @@ export interface RootRouteChildren {
PolicyMatrixRouteRoute: typeof PolicyMatrixRouteRoute
ReconnectRouteRoute: typeof ReconnectRouteRoute
ServingRouteRoute: typeof ServingRouteRoute
+ SmartDashboardRouteRoute: typeof SmartDashboardRouteRoute
SqlHelpersRouteRoute: typeof SqlHelpersRouteRoute
TelemetryRouteRoute: typeof TelemetryRouteRoute
TypeSafetyRouteRoute: typeof TypeSafetyRouteRoute
@@ -268,6 +294,13 @@ declare module '@tanstack/react-router' {
preLoaderRoute: typeof SqlHelpersRouteRouteImport
parentRoute: typeof rootRouteImport
}
+ '/smart-dashboard': {
+ id: '/smart-dashboard'
+ path: '/smart-dashboard'
+ fullPath: '/smart-dashboard'
+ preLoaderRoute: typeof SmartDashboardRouteRouteImport
+ parentRoute: typeof rootRouteImport
+ }
'/serving': {
id: '/serving'
path: '/serving'
@@ -345,6 +378,13 @@ declare module '@tanstack/react-router' {
preLoaderRoute: typeof AnalyticsRouteRouteImport
parentRoute: typeof rootRouteImport
}
+ '/agent': {
+ id: '/agent'
+ path: '/agent'
+ fullPath: '/agent'
+ preLoaderRoute: typeof AgentRouteRouteImport
+ parentRoute: typeof rootRouteImport
+ }
'/': {
id: '/'
path: '/'
@@ -357,6 +397,7 @@ declare module '@tanstack/react-router' {
const rootRouteChildren: RootRouteChildren = {
IndexRoute: IndexRoute,
+ AgentRouteRoute: AgentRouteRoute,
AnalyticsRouteRoute: AnalyticsRouteRoute,
ArrowAnalyticsRouteRoute: ArrowAnalyticsRouteRoute,
ChartInferenceRouteRoute: ChartInferenceRouteRoute,
@@ -368,6 +409,7 @@ const rootRouteChildren: RootRouteChildren = {
PolicyMatrixRouteRoute: PolicyMatrixRouteRoute,
ReconnectRouteRoute: ReconnectRouteRoute,
ServingRouteRoute: ServingRouteRoute,
+ SmartDashboardRouteRoute: SmartDashboardRouteRoute,
SqlHelpersRouteRoute: SqlHelpersRouteRoute,
TelemetryRouteRoute: TelemetryRouteRoute,
TypeSafetyRouteRoute: TypeSafetyRouteRoute,
diff --git a/apps/dev-playground/client/src/routes/__root.tsx b/apps/dev-playground/client/src/routes/__root.tsx
index db42fdafb..f6479ff3b 100644
--- a/apps/dev-playground/client/src/routes/__root.tsx
+++ b/apps/dev-playground/client/src/routes/__root.tsx
@@ -1,13 +1,26 @@
-import { Button, TooltipProvider } from "@databricks/appkit-ui/react";
+import {
+ Button,
+ DropdownMenu,
+ DropdownMenuContent,
+ DropdownMenuGroup,
+ DropdownMenuItem,
+ DropdownMenuLabel,
+ DropdownMenuSeparator,
+ DropdownMenuTrigger,
+ TooltipProvider,
+} from "@databricks/appkit-ui/react";
import {
CatchBoundary,
createRootRoute,
Link,
Outlet,
useLocation,
+ useNavigate,
} from "@tanstack/react-router";
+import { MenuIcon } from "lucide-react";
import { ErrorComponent } from "@/components/error-component";
import { ThemeSelector } from "@/components/theme-selector";
+import { findNavItemForPath, NAV_GROUPS } from "@/lib/nav";
export const Route = createRootRoute({
component: RootComponent,
@@ -15,127 +28,88 @@ export const Route = createRootRoute({
function RootComponent() {
const location = useLocation();
+ const navigate = useNavigate();
const isHomePage = location.pathname === "/";
+ const currentPage = findNavItemForPath(location.pathname);
+
return (
{!isHomePage && (
-
-
- AppKit Playground
-
-
-
-
-
- Analytics
-
-
-
-
- Arrow Analytics
-
-
-
-
- Lakebase
-
+
+
+
+ AppKit Playground
+
-
-
- Reconnect
-
-
-
-
- Telemetry
-
-
-
-
- SQL Helpers
-
-
-
-
- Genie
-
-
-
-
- Chart Inference
-
-
-
-
- Files
-
-
-
-
- Policy Matrix
-
-
-
-
- Jobs
-
-
-
-
- Serving
-
-
-
-
+
+ /
+
+
+ {currentPage.label}
+
+ >
+ )}
+
+
+
+
+
+
+ Menu
+
+
+
- Vector Search
-
-
+ {NAV_GROUPS.map((group, groupIdx) => (
+
+ {groupIdx > 0 && }
+
+ {group.label}
+
+ {group.items.map((item) => {
+ const Icon = item.icon;
+ const isActive = location.pathname.startsWith(
+ item.to,
+ );
+ return (
+ {
+ void navigate({ to: item.to });
+ }}
+ className={
+ isActive
+ ? "bg-accent text-accent-foreground font-medium"
+ : ""
+ }
+ >
+
+ {item.label}
+
+ );
+ })}
+
+ ))}
+
+
diff --git a/apps/dev-playground/client/src/routes/agent.route.tsx b/apps/dev-playground/client/src/routes/agent.route.tsx
new file mode 100644
index 000000000..6762a1a38
--- /dev/null
+++ b/apps/dev-playground/client/src/routes/agent.route.tsx
@@ -0,0 +1,567 @@
+import { getPluginClientConfig } from "@databricks/appkit-ui/js";
+import { Button } from "@databricks/appkit-ui/react";
+import { createFileRoute } from "@tanstack/react-router";
+import { useCallback, useEffect, useRef, useState } from "react";
+
+export const Route = createFileRoute("/agent")({
+ component: AgentRoute,
+});
+
+interface SSEEvent {
+ type: string;
+ delta?: string;
+ item_id?: string;
+ item?: {
+ type?: string;
+ id?: string;
+ call_id?: string;
+ name?: string;
+ arguments?: string;
+ output?: string;
+ status?: string;
+ };
+ content?: string;
+ data?: Record
;
+ error?: string;
+ sequence_number?: number;
+ output_index?: number;
+ approval_id?: string;
+ stream_id?: string;
+ tool_name?: string;
+ args?: unknown;
+ annotations?: {
+ readOnly?: boolean;
+ destructive?: boolean;
+ idempotent?: boolean;
+ };
+}
+
+interface ChatMessage {
+ id: number;
+ role: "user" | "assistant";
+ content: string;
+}
+
+interface PendingApproval {
+ approvalId: string;
+ streamId: string;
+ toolName: string;
+ args: unknown;
+}
+
+function useAutocomplete(enabled: boolean) {
+ const [suggestion, setSuggestion] = useState("");
+ const [isLoading, setIsLoading] = useState(false);
+ const abortRef = useRef(null);
+ const timerRef = useRef | null>(null);
+
+ const requestSuggestion = useCallback(
+ (text: string) => {
+ setSuggestion("");
+
+ if (timerRef.current) clearTimeout(timerRef.current);
+ if (abortRef.current) abortRef.current.abort();
+
+ if (!text.trim() || text.length < 3 || !enabled) {
+ return;
+ }
+
+ timerRef.current = setTimeout(async () => {
+ const controller = new AbortController();
+ abortRef.current = controller;
+ setIsLoading(true);
+
+ try {
+ const response = await fetch("/api/agents/chat", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({ message: text, agent: "autocomplete" }),
+ signal: controller.signal,
+ });
+
+ if (!response.ok || !response.body) return;
+
+ const reader = response.body.getReader();
+ const decoder = new TextDecoder();
+ let result = "";
+ let buffer = "";
+
+ while (true) {
+ const { done, value } = await reader.read();
+ if (done) break;
+
+ buffer += decoder.decode(value, { stream: true });
+ const lines = buffer.split("\n");
+ buffer = lines.pop() ?? "";
+
+ for (const line of lines) {
+ if (!line.startsWith("data: ")) continue;
+ const data = line.slice(6).trim();
+ if (!data || data === "[DONE]") continue;
+ try {
+ const event = JSON.parse(data);
+ if (
+ event.type === "response.output_text.delta" &&
+ event.delta
+ ) {
+ result += event.delta;
+ setSuggestion(result);
+ }
+ } catch {
+ /* skip */
+ }
+ }
+ }
+ } catch {
+ /* aborted or failed */
+ } finally {
+ setIsLoading(false);
+ }
+ }, 500);
+ },
+ [enabled],
+ );
+
+ const clear = useCallback(() => {
+ setSuggestion("");
+ if (timerRef.current) clearTimeout(timerRef.current);
+ if (abortRef.current) abortRef.current.abort();
+ }, []);
+
+ return {
+ suggestion,
+ isLoading: isLoading && !suggestion,
+ requestSuggestion,
+ clear,
+ };
+}
+
+function AgentRoute() {
+ const [messages, setMessages] = useState([]);
+ const [events, setEvents] = useState([]);
+ const [input, setInput] = useState("");
+ const [isLoading, setIsLoading] = useState(false);
+ const [threadId, setThreadId] = useState(null);
+ const [pendingApprovals, setPendingApprovals] = useState(
+ [],
+ );
+
+ const decideApproval = useCallback(
+ async (approvalId: string, decision: "approve" | "deny") => {
+ const approval = pendingApprovals.find(
+ (a) => a.approvalId === approvalId,
+ );
+ if (!approval) return;
+ try {
+ await fetch("/api/agents/approve", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({
+ streamId: approval.streamId,
+ approvalId,
+ decision,
+ }),
+ });
+ } finally {
+ setPendingApprovals((prev) =>
+ prev.filter((a) => a.approvalId !== approvalId),
+ );
+ }
+ },
+ [pendingApprovals],
+ );
+ const messagesEndRef = useRef(null);
+ const inputRef = useRef(null);
+ const msgIdCounter = useRef(0);
+
+ const agentConfig = getPluginClientConfig<{
+ agents?: string[];
+ defaultAgent?: string;
+ }>("agents");
+ const hasAutocomplete = (agentConfig.agents ?? []).includes("autocomplete");
+
+ const {
+ suggestion,
+ isLoading: isAutocompleting,
+ requestSuggestion,
+ clear: clearSuggestion,
+ } = useAutocomplete(hasAutocomplete);
+
+ // biome-ignore lint/correctness/useExhaustiveDependencies: scroll on new messages
+ useEffect(() => {
+ messagesEndRef.current?.scrollIntoView({ behavior: "smooth" });
+ }, [messages]);
+
+ const sendMessage = useCallback(async () => {
+ if (!input.trim() || isLoading) return;
+
+ clearSuggestion();
+ const userMessage = input.trim();
+ setInput("");
+ setMessages((prev) => [
+ ...prev,
+ { id: ++msgIdCounter.current, role: "user", content: userMessage },
+ ]);
+ setEvents([]);
+ setIsLoading(true);
+
+ try {
+ const response = await fetch("/api/agents/chat", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({
+ message: userMessage,
+ ...(threadId && { threadId }),
+ }),
+ });
+
+ if (!response.ok) {
+ const error = await response.json();
+ setMessages((prev) => [
+ ...prev,
+ {
+ id: ++msgIdCounter.current,
+ role: "assistant",
+ content: `Error: ${error.error}`,
+ },
+ ]);
+ return;
+ }
+
+ const reader = response.body?.getReader();
+ if (!reader) return;
+
+ const decoder = new TextDecoder();
+ let assistantContent = "";
+ let buffer = "";
+
+ while (true) {
+ const { done, value } = await reader.read();
+ if (done) break;
+
+ buffer += decoder.decode(value, { stream: true });
+ const lines = buffer.split("\n");
+ buffer = lines.pop() ?? "";
+
+ for (const line of lines) {
+ if (!line.startsWith("data: ")) continue;
+ const data = line.slice(6).trim();
+ if (!data || data === "[DONE]") continue;
+
+ try {
+ const event: SSEEvent = JSON.parse(data);
+ if (!event.type) continue;
+ setEvents((prev) => [...prev, event]);
+
+ if (
+ event.type === "appkit.approval_pending" &&
+ event.approval_id &&
+ event.stream_id &&
+ event.tool_name
+ ) {
+ setPendingApprovals((prev) => [
+ ...prev,
+ {
+ approvalId: event.approval_id as string,
+ streamId: event.stream_id as string,
+ toolName: event.tool_name as string,
+ args: event.args,
+ },
+ ]);
+ }
+ if (event.type === "appkit.metadata" && event.data?.threadId) {
+ setThreadId(event.data.threadId as string);
+ }
+
+ if (event.type === "response.output_text.delta" && event.delta) {
+ assistantContent += event.delta;
+ setMessages((prev) => {
+ const updated = [...prev];
+ const last = updated[updated.length - 1];
+ if (last?.role === "assistant") {
+ updated[updated.length - 1] = {
+ ...last,
+ content: assistantContent,
+ };
+ } else {
+ updated.push({
+ id: ++msgIdCounter.current,
+ role: "assistant",
+ content: assistantContent,
+ });
+ }
+ return updated;
+ });
+ }
+ } catch {
+ // skip malformed events
+ }
+ }
+ }
+ } catch (err) {
+ setMessages((prev) => [
+ ...prev,
+ {
+ id: ++msgIdCounter.current,
+ role: "assistant",
+ content: `Error: ${err instanceof Error ? err.message : "Unknown error"}`,
+ },
+ ]);
+ } finally {
+ setIsLoading(false);
+ }
+ }, [input, isLoading, threadId, clearSuggestion]);
+
+ const handleInputChange = (value: string) => {
+ setInput(value);
+ requestSuggestion(value);
+ };
+
+ const acceptSuggestion = () => {
+ if (!suggestion) return;
+ const newValue = input + suggestion;
+ setInput(newValue);
+ clearSuggestion();
+ inputRef.current?.focus();
+ };
+
+ return (
+
+
+
+
+
Agent Chat
+
+ AI agent with auto-discovered tools from all AppKit plugins.
+ {threadId && (
+
+ Thread: {threadId.slice(0, 8)}...
+
+ )}
+
+
+ {hasAutocomplete && (
+
+ Autocomplete enabled
+
+ )}
+
+
+
+
+
+ {messages.length === 0 && (
+
+
+ Send a message to start a conversation
+
+
+ The agent can use analytics, files, genie, and lakebase
+ tools.
+ {hasAutocomplete && " Start typing for inline suggestions."}
+
+
+ )}
+
+ {messages.map((msg) => (
+
+ ))}
+
+ {pendingApprovals.map((approval) => (
+
+
+
+
+ Destructive tool — approval required
+
+
+
+
{approval.toolName}
+
+ {JSON.stringify(approval.args, null, 2)}
+
+
+
+
+ decideApproval(approval.approvalId, "deny")
+ }
+ >
+ Deny
+
+
+ decideApproval(approval.approvalId, "approve")
+ }
+ >
+ Approve
+
+
+
+
+ ))}
+
+ {isLoading &&
+ pendingApprovals.length === 0 &&
+ messages[messages.length - 1]?.role === "user" && (
+
+ )}
+
+
+
+
+
+ {hasAutocomplete && (suggestion || isAutocompleting) && (
+
+ {isAutocompleting && (
+ Thinking...
+ )}
+ {suggestion && (
+
+ Press{" "}
+
+ Tab
+ {" "}
+ to accept suggestion
+
+ )}
+
+ )}
+
+
+
+
+
+
+
+ Event Stream
+
+
+
+ {events.length === 0 && (
+
+ Events will appear here
+
+ )}
+ {events.map((event, i) => {
+ let detail: string;
+ switch (event.type) {
+ case "response.output_text.delta":
+ detail = event.delta?.slice(0, 60) ?? "";
+ break;
+ case "response.output_item.added":
+ case "response.output_item.done":
+ detail =
+ event.item?.type === "function_call"
+ ? `${event.item.name}(${(event.item.arguments ?? "").slice(0, 40)})`
+ : event.item?.type === "function_call_output"
+ ? (event.item.output?.slice(0, 60) ?? "")
+ : (event.item?.status ?? event.item?.type ?? "");
+ break;
+ case "response.completed":
+ detail = "done";
+ break;
+ case "error":
+ detail = event.error ?? "unknown";
+ break;
+ case "appkit.metadata":
+ detail = JSON.stringify(event.data).slice(0, 60);
+ break;
+ case "appkit.thinking":
+ detail = event.content?.slice(0, 60) ?? "";
+ break;
+ default:
+ detail = JSON.stringify(event).slice(0, 60);
+ }
+ return (
+
+
+ {event.type
+ .replace("response.", "")
+ .replace("appkit.", "")}
+
+ {detail}
+
+ );
+ })}
+
+
+
+
+
+ );
+}
diff --git a/apps/dev-playground/client/src/routes/index.tsx b/apps/dev-playground/client/src/routes/index.tsx
index ec2d9a50a..6114c477a 100644
--- a/apps/dev-playground/client/src/routes/index.tsx
+++ b/apps/dev-playground/client/src/routes/index.tsx
@@ -1,10 +1,12 @@
-import { Button, Card } from "@databricks/appkit-ui/react";
+import { Badge, Card } from "@databricks/appkit-ui/react";
import {
createFileRoute,
+ Link,
retainSearchParams,
- useNavigate,
} from "@tanstack/react-router";
+import { ArrowRightIcon, SparklesIcon } from "lucide-react";
import { ThemeSelector } from "@/components/theme-selector";
+import { ALL_NAV_ITEMS, NAV_GROUPS, type NavItem } from "@/lib/nav";
export const Route = createFileRoute("/")({
component: IndexRoute,
@@ -13,255 +15,120 @@ export const Route = createFileRoute("/")({
},
});
+/**
+ * Landing page for the dev playground. Renders a hero and the canonical
+ * demo catalog grouped by category (Data / AI / Platform).
+ *
+ * The catalog itself lives in `@/lib/nav.ts` and is shared with the nav
+ * dropdown in `__root.tsx`, so adding a new demo is a one-line change that
+ * updates both surfaces at once.
+ */
function IndexRoute() {
- const navigate = useNavigate();
-
return (
-
+
-
-
-
- AppKit Playground
-
-
- Explore the capabilities of the AppKit with interactive examples and
- demos
-
-
-
-
-
-
-
- Analytics Dashboard
-
-
- Explore real-time analytics with query execution, data
- visualization, and interactive components using the Design
- System.
-
-
navigate({ to: "/analytics" })}
- className="w-full"
- >
- Explore real-time analytics
-
-
-
-
-
-
-
- Arrow Analytics Dashboard
-
-
- Explore real-time analytics with query execution, data
- visualization, and interactive components using Apache Arrow
- streaming.
-
-
navigate({ to: "/arrow-analytics" })}
- className="w-full"
- >
- Explore real-time analytics
-
-
-
-
-
-
-
- Stream Reconnection
-
-
- Explore Server-Sent Events (SSE) stream reconnection with
- automatic Last-Event-ID tracking and resilient connection
- handling.
-
-
navigate({ to: "/reconnect" })}
- className="w-full"
- >
- View Reconnect Demo
-
-
-
-
-
-
-
- Data Visualization
-
-
- Explore powerful and customizable chart components from the Apps
- SDK.
-
-
navigate({ to: "/data-visualization" })}
- className="w-full"
- >
- Explore data visualization
-
-
-
-
-
-
-
- Telemetry
-
-
- Explore OpenTelemetry-compatible tracing and metrics examples
- with interactive demos showcasing custom observability patterns.
-
-
navigate({ to: "/telemetry" })}
- className="w-full"
- >
- Try Telemetry Examples
-
-
-
-
-
-
-
- File Browser
-
-
- Browse, preview, and download files from Databricks Volumes
- using the Files plugin and Unity Catalog Files API.
-
-
navigate({ to: "/files" })}
- className="w-full"
- >
- Browse Files
-
-
-
-
-
-
-
- SQL Helpers
-
-
- Type-safe parameter helpers for Databricks SQL queries. Test
- each helper interactively and see the generated parameter
- objects.
-
-
navigate({ to: "/sql-helpers" })}
- className="w-full"
- >
- Try SQL Helpers
-
-
-
-
-
-
-
- Type-Safe SQL
-
-
- Generate TypeScript types from SQL files at build time. Full
- IntelliSense for query names, parameters, and results.
-
-
navigate({ to: "/type-safety" })}
- className="w-full"
- >
- Explore Type Safety
-
-
-
-
-
-
-
- Genie Chat
-
-
- Ask natural language questions about your data using AI/BI
- Genie. Features SSE streaming, markdown rendering, and
- conversation persistence.
-
-
navigate({ to: "/genie" })}
- className="w-full"
- >
- Try Genie Chat
-
-
-
-
-
-
-
- Lakebase Examples
-
-
- Four approaches to PostgreSQL database integration with
- Databricks Lakebase: Raw driver, Drizzle ORM, TypeORM, and
- Sequelize with OAuth token refresh.
-
-
navigate({ to: "/lakebase" })}
- className="w-full"
- >
- Explore Lakebase Integration
-
-
-
-
-
-
-
- Lakeflow Jobs
-
-
- Trigger and monitor Databricks Lakeflow Jobs. View run history,
- stream live status updates, and cancel in-flight runs.
-
-
navigate({ to: "/jobs" })}
- className="w-full"
- >
- Manage Jobs
-
-
-
-
-
-
- Model Serving
-
-
- Chat with a Databricks Model Serving endpoint using streaming
- completions with real-time SSE responses.
-
-
navigate({ to: "/serving" })}
- className="w-full"
- >
- Try Model Serving
-
-
-
+
+
+
+
+ {NAV_GROUPS.map((group) => (
+
+
+
+ {group.label}
+
+
+ {group.tagline}
+
+
+
+ {group.items.map((item) => (
+
+ ))}
+
+
+ ))}
-
-
- built by databricks using appkit
-
-
+
+ Built by Databricks with AppKit.
+
+ {ALL_NAV_ITEMS.length} demos · {NAV_GROUPS.length} categories
+
+
+
+
+ );
+}
+
+function Hero({ demoCount }: { demoCount: number }) {
+ return (
+
+ {/*
+ Soft radial wash behind the hero. Two layered gradients (primary +
+ accent) at ~10% opacity give depth without the "AI slop" look of a
+ full-saturation banner. `pointer-events-none` keeps the theme selector
+ above clickable.
+ */}
+
+
+
+
+ {demoCount} interactive demos
+
+
+ AppKit Playground
+
+
+ A living catalog of what AppKit can do — data, agents, and platform
+ primitives, each wired up as a single-click demo you can poke at,
+ copy, or break.
+
);
}
+
+function DemoCard({ item }: { item: NavItem }) {
+ const Icon = item.icon;
+ return (
+
+
+
+
+
+
+
+
+ {item.label}
+
+
+
+
+
+ {item.description}
+
+
+
+ );
+}
diff --git a/apps/dev-playground/client/src/routes/smart-dashboard.route.tsx b/apps/dev-playground/client/src/routes/smart-dashboard.route.tsx
new file mode 100644
index 000000000..3817d70f9
--- /dev/null
+++ b/apps/dev-playground/client/src/routes/smart-dashboard.route.tsx
@@ -0,0 +1,599 @@
+import { createFileRoute } from "@tanstack/react-router";
+import { LayoutDashboardIcon } from "lucide-react";
+import { useCallback, useMemo, useRef, useState } from "react";
+import { ActionToast } from "@/features/smart-dashboard/components/action-toast";
+import { ActiveFilters } from "@/features/smart-dashboard/components/active-filters";
+import { AgentSidebar } from "@/features/smart-dashboard/components/agent-sidebar";
+import {
+ ApprovalCard,
+ type PendingApproval,
+} from "@/features/smart-dashboard/components/approval-card";
+import {
+ ChatDrawer,
+ type ChatMessage,
+} from "@/features/smart-dashboard/components/chat-drawer";
+import { FareChart } from "@/features/smart-dashboard/components/fare-chart";
+import { FocusableChart } from "@/features/smart-dashboard/components/focusable-chart";
+import { HourlyHeatmap } from "@/features/smart-dashboard/components/hourly-heatmap";
+import { InspectorToggle } from "@/features/smart-dashboard/components/inspector-toggle";
+import { KPICards } from "@/features/smart-dashboard/components/kpi-cards";
+import { QuickActionsBar } from "@/features/smart-dashboard/components/quick-actions-bar";
+import {
+ type SavedView,
+ SavedViewsPanel,
+} from "@/features/smart-dashboard/components/saved-views-panel";
+import { StreamInspector } from "@/features/smart-dashboard/components/stream-inspector";
+import { TopZonesChart } from "@/features/smart-dashboard/components/top-zones-chart";
+import { TripChart } from "@/features/smart-dashboard/components/trip-chart";
+import type {
+ Highlight,
+ HighlightedZone,
+} from "@/features/smart-dashboard/hooks/use-action-dispatcher";
+import { useActionDispatcher } from "@/features/smart-dashboard/hooks/use-action-dispatcher";
+import type { SSEEvent } from "@/features/smart-dashboard/hooks/use-agent-stream";
+import { useAgentStream } from "@/features/smart-dashboard/hooks/use-agent-stream";
+import type { DashboardFilters } from "@/features/smart-dashboard/hooks/use-dashboard-data";
+import { useDashboardData } from "@/features/smart-dashboard/hooks/use-dashboard-data";
+import { focusChart } from "@/features/smart-dashboard/hooks/use-focus-registry";
+import { useInspectorShortcuts } from "@/features/smart-dashboard/hooks/use-stream-inspector";
+import { buildDashboardContext } from "@/features/smart-dashboard/lib/dashboard-context";
+import type { FeedAction } from "@/features/smart-dashboard/lib/feed-actions";
+
+export const Route = createFileRoute("/smart-dashboard")({
+ component: SmartDashboardRoute,
+});
+
+// Lightweight id factory for chat messages. Not using crypto.randomUUID
+// because the value is only meaningful for React keys + approval lookup
+// inside a single session.
+let messageIdCounter = 0;
+const nextMessageId = (): string =>
+ `msg_${++messageIdCounter}_${Math.random().toString(36).slice(2, 8)}`;
+
+function SmartDashboardRoute() {
+ const [filters, setFilters] = useState
({});
+ const [highlights, setHighlights] = useState([]);
+ const [highlightedZones, setHighlightedZones] = useState(
+ [],
+ );
+ const [pendingApprovals, setPendingApprovals] = useState(
+ [],
+ );
+ const [lastAction, setLastAction] = useState(null);
+ const [error, setError] = useState(null);
+
+ // Multi-turn chat history. Messages accumulate across sends so the user
+ // can scroll back through the conversation rather than having the UI
+ // wipe itself after every reply.
+ const [messages, setMessages] = useState([]);
+ const activeAssistantIdRef = useRef(null);
+ const lastUserMessageIdRef = useRef(null);
+
+ // Saved-views panel bumps this token after an upload to force a list
+ // refresh without pushing props down through ApprovalCard manually.
+ const [savedViewsVersion, setSavedViewsVersion] = useState(0);
+
+ // Chat-drawer open state is hoisted up so agent-dispatching UI actions
+ // (feed action chips, heatmap cells, quick actions, sidebar follow-ups)
+ // can auto-open the drawer — otherwise an async agent run would stream
+ // out of sight and the user would think nothing happened.
+ const [isChatOpen, setIsChatOpen] = useState(false);
+
+ useInspectorShortcuts();
+
+ const {
+ kpis,
+ tripsOverTime,
+ fareDistribution,
+ heatmap,
+ topZones,
+ sparklines,
+ isLoading: dataLoading,
+ error: dataError,
+ } = useDashboardData(filters);
+
+ const pushAction = useCallback((summary: string) => {
+ setLastAction(summary);
+ }, []);
+
+ const pushUnknown = useCallback((name: string, args: unknown) => {
+ const argsPreview = typeof args === "string" ? args : JSON.stringify(args);
+ setError(
+ `Agent emitted an unhandled tool call '${name}' with args ${argsPreview}. Ignoring.`,
+ );
+ console.warn(`[dispatcher] unknown/invalid tool '${name}':`, args);
+ }, []);
+
+ const handleFilterUpdate = useCallback(
+ (updater: (prev: DashboardFilters) => DashboardFilters) => {
+ setFilters(updater);
+ },
+ [],
+ );
+ const handleAddHighlight = useCallback((h: Highlight) => {
+ setHighlights((prev) => [...prev, h]);
+ }, []);
+ const handleClearFilters = useCallback(() => setFilters({}), []);
+ const handleClearHighlights = useCallback(() => setHighlights([]), []);
+ const handleAddZoneHighlight = useCallback((z: HighlightedZone) => {
+ setHighlightedZones((prev) => {
+ const without = prev.filter((p) => p.zip !== z.zip);
+ return [...without, z];
+ });
+ }, []);
+ const handleClearZoneHighlights = useCallback(
+ () => setHighlightedZones([]),
+ [],
+ );
+
+ const { handleEvent: handleDispatcherEvent, dispatch } = useActionDispatcher({
+ onFilterUpdate: handleFilterUpdate,
+ onAddHighlight: handleAddHighlight,
+ onClearFilters: handleClearFilters,
+ onClearHighlights: handleClearHighlights,
+ onAddZoneHighlight: handleAddZoneHighlight,
+ onClearZoneHighlights: handleClearZoneHighlights,
+ onAction: pushAction,
+ onUnknownTool: pushUnknown,
+ });
+
+ const decideApproval = useCallback(
+ async (approvalId: string, decision: "approve" | "deny") => {
+ const approval = pendingApprovals.find(
+ (a) => a.approvalId === approvalId,
+ );
+ if (!approval) return;
+ try {
+ await fetch("/api/agents/approve", {
+ method: "POST",
+ headers: { "Content-Type": "application/json" },
+ body: JSON.stringify({
+ streamId: approval.streamId,
+ approvalId,
+ decision,
+ }),
+ });
+ } catch (e) {
+ setError(
+ `Approval failed: ${e instanceof Error ? e.message : "unknown"}`,
+ );
+ } finally {
+ setPendingApprovals((prev) =>
+ prev.filter((a) => a.approvalId !== approvalId),
+ );
+ }
+ },
+ [pendingApprovals],
+ );
+
+ const contextPrefix = useMemo(
+ () => buildDashboardContext(filters, highlights),
+ [filters, highlights],
+ );
+ const contextPrefixRef = useRef(contextPrefix);
+ contextPrefixRef.current = contextPrefix;
+
+ const handleStreamEvent = useCallback(
+ (event: SSEEvent) => {
+ handleDispatcherEvent(event);
+
+ // Capture pending approvals and pin them to the user turn that
+ // triggered them so the ChatDrawer can render the card inline.
+ if (event.type === "appkit.approval_pending") {
+ const e = event as SSEEvent & {
+ streamId?: string;
+ toolName?: string;
+ };
+ const approvalId = e.approval_id;
+ const streamId = e.stream_id ?? e.streamId;
+ const toolName = e.tool_name ?? e.toolName;
+ if (
+ typeof approvalId === "string" &&
+ approvalId &&
+ toolName &&
+ typeof streamId === "string" &&
+ streamId
+ ) {
+ const pinnedToMessageId = lastUserMessageIdRef.current;
+ setPendingApprovals((prev) => [
+ ...prev,
+ {
+ approvalId,
+ streamId,
+ toolName,
+ args: e.args,
+ annotations: e.annotations,
+ ...(pinnedToMessageId
+ ? { _pinnedToMessageId: pinnedToMessageId }
+ : {}),
+ } as PendingApproval & { _pinnedToMessageId?: string },
+ ]);
+ }
+ }
+
+ // Stream assistant text into the in-progress assistant message.
+ if (event.type === "response.output_text.delta" && event.delta) {
+ const id = activeAssistantIdRef.current;
+ if (id) {
+ setMessages((prev) =>
+ prev.map((m) =>
+ m.id === id
+ ? { ...m, content: m.content + (event.delta ?? "") }
+ : m,
+ ),
+ );
+ }
+ }
+
+ // Finalize the streaming assistant message when the run completes.
+ if (event.type === "response.completed") {
+ const id = activeAssistantIdRef.current;
+ if (id) {
+ setMessages((prev) =>
+ prev.map((m) => (m.id === id ? { ...m, streaming: false } : m)),
+ );
+ activeAssistantIdRef.current = null;
+ }
+ }
+
+ if (event.type === "error" && event.error) {
+ setError(event.error);
+ }
+ },
+ [handleDispatcherEvent],
+ );
+
+ const { isLoading: agentLoading, send } = useAgentStream({
+ agentName: "query",
+ onEvent: handleStreamEvent,
+ });
+
+ const dispatchToAgent = useCallback(
+ (message: string) => {
+ const userMsgId = nextMessageId();
+ const assistantMsgId = nextMessageId();
+ lastUserMessageIdRef.current = userMsgId;
+ activeAssistantIdRef.current = assistantMsgId;
+ setMessages((prev) => [
+ ...prev,
+ { id: userMsgId, role: "user", content: message },
+ { id: assistantMsgId, role: "assistant", content: "", streaming: true },
+ ]);
+ // Every agent dispatch auto-opens the drawer so the streaming
+ // response is visible. A closed drawer would silently swallow the
+ // turn — the user sees a filter change appear later with no context.
+ setIsChatOpen(true);
+ send(message, { contextPrefix: contextPrefixRef.current });
+ },
+ [send],
+ );
+
+ /**
+ * Apply a feed action directly (no LLM round-trip). Each structured
+ * {@link FeedAction} from the ephemeral agents maps to a dashboard tool
+ * we already implement, so we translate the shape and re-enter the same
+ * `dispatch` code path that the SSE stream uses.
+ */
+ const applyFeedAction = useCallback(
+ (action: FeedAction) => {
+ switch (action.kind) {
+ case "filter_date":
+ dispatch("filter_by_date_range", {
+ start: action.start,
+ end: action.end,
+ });
+ return;
+ case "filter_zip":
+ dispatch("filter_by_pickup_zip", { zip: action.zip });
+ return;
+ case "filter_fare":
+ dispatch("filter_by_fare", {
+ ...(action.min !== undefined ? { min: action.min } : {}),
+ ...(action.max !== undefined ? { max: action.max } : {}),
+ });
+ return;
+ case "highlight_period":
+ dispatch("highlight_period", {
+ start: action.start,
+ end: action.end,
+ color: action.color ?? "blue",
+ label: action.label,
+ });
+ return;
+ case "highlight_zone":
+ dispatch("highlight_zone", {
+ zip: action.zip,
+ label: action.note ?? action.label,
+ });
+ focusChart("top_zones");
+ return;
+ case "focus_chart":
+ dispatch("focus_chart", { chart_id: action.chart_id });
+ return;
+ // `ask` is handled by onAsk -> dispatchToAgent, never lands here.
+ }
+ },
+ [dispatch],
+ );
+
+ /**
+ * Apply a saved view directly from its stored metadata. We don't round-trip
+ * through the agent here because the agent has no tool to fetch saved-view
+ * metadata — it would have to guess filters/highlights from the name alone.
+ * The client already holds the full authoritative state, so just apply it.
+ */
+ const handleLoadSavedView = useCallback((view: SavedView) => {
+ const meta = view.metadata;
+ const rawFilters = (meta.filters ?? {}) as Record;
+ const nextFilters: DashboardFilters = {};
+ if (typeof rawFilters.date_from === "string")
+ nextFilters.date_from = rawFilters.date_from;
+ if (typeof rawFilters.date_to === "string")
+ nextFilters.date_to = rawFilters.date_to;
+ if (typeof rawFilters.pickup_zip === "string")
+ nextFilters.pickup_zip = rawFilters.pickup_zip;
+ if (typeof rawFilters.fare_min === "string")
+ nextFilters.fare_min = rawFilters.fare_min;
+ if (typeof rawFilters.fare_max === "string")
+ nextFilters.fare_max = rawFilters.fare_max;
+
+ const rawHighlights = Array.isArray(meta.highlights) ? meta.highlights : [];
+ const nextHighlights: Highlight[] = rawHighlights.flatMap((h) => {
+ if (typeof h !== "object" || h === null) return [];
+ const entry = h as Record;
+ const start = entry.start;
+ const end = entry.end;
+ if (typeof start !== "string" || typeof end !== "string") return [];
+ const color: Highlight["color"] =
+ entry.color === "red" || entry.color === "yellow"
+ ? entry.color
+ : "blue";
+ const label = typeof entry.label === "string" ? entry.label : undefined;
+ return [{ start, end, color, label }];
+ });
+
+ setFilters(nextFilters);
+ setHighlights(nextHighlights);
+ setHighlightedZones([]);
+
+ const viewName = meta.name ?? "saved view";
+ const summary = [
+ Object.keys(nextFilters).length > 0
+ ? `${Object.keys(nextFilters).length} filter(s)`
+ : null,
+ nextHighlights.length > 0
+ ? `${nextHighlights.length} highlight(s)`
+ : null,
+ ]
+ .filter(Boolean)
+ .join(" + ");
+ setLastAction(`Loaded "${viewName}"${summary ? ` (${summary})` : ""}`);
+ }, []);
+
+ const handleSavedNotification = useCallback(
+ (info: { name: string; volumePath: string }) => {
+ setLastAction(`Saved "${info.name}" to volume`);
+ setSavedViewsVersion((v) => v + 1);
+ },
+ [],
+ );
+
+ const handleClearFilter = useCallback((key: keyof DashboardFilters) => {
+ setFilters((prev) => {
+ const next = { ...prev };
+ delete next[key];
+ return next;
+ });
+ }, []);
+
+ const handleClearAllFilters = useCallback(() => {
+ setFilters({});
+ setHighlights([]);
+ setHighlightedZones([]);
+ }, []);
+
+ const handleHeatmapCellClick = useCallback(
+ (label: string) => {
+ dispatchToAgent(
+ `Investigate pickups on ${label} in the current dashboard slice. Why is this slot notable?`,
+ );
+ },
+ [dispatchToAgent],
+ );
+
+ const handleZipClick = useCallback(
+ (zip: string) => {
+ dispatch("filter_by_pickup_zip", { zip });
+ },
+ [dispatch],
+ );
+
+ // Ref to the captured region for save_view. Kept on the dashboard body
+ // (not the header/chat) so the screenshot is the analytics surface only.
+ const dashboardRef = useRef(null);
+
+ // ApprovalCards render inline in the chat drawer, pinned to the user
+ // turn that triggered them. Builds a lookup per render.
+ const approvalsByMessage = useMemo(() => {
+ const map = new Map();
+ for (const a of pendingApprovals) {
+ const pinId =
+ (a as PendingApproval & { _pinnedToMessageId?: string })
+ ._pinnedToMessageId ?? "__loose";
+ const arr = map.get(pinId) ?? [];
+ arr.push(a);
+ map.set(pinId, arr);
+ }
+ return map;
+ }, [pendingApprovals]);
+
+ const approvalCardForMessage = useCallback(
+ (messageId: string): React.ReactNode | null => {
+ const bucket = approvalsByMessage.get(messageId);
+ if (!bucket || bucket.length === 0) return null;
+ return (
+
+ {bucket.map((approval) => (
+
+ ))}
+
+ );
+ },
+ [
+ approvalsByMessage,
+ filters,
+ highlights,
+ decideApproval,
+ handleSavedNotification,
+ ],
+ );
+
+ const looseApprovals = approvalsByMessage.get("__loose") ?? [];
+
+ return (
+
+
+
+
+
+
+
+
+ {(error || dataError) && (
+
+
+
{error ?? dataError}
+
setError(null)}
+ className="text-red-600/70 hover:text-red-700 font-medium"
+ >
+ dismiss
+
+
+
+ )}
+
+
+
+
+
+
+
+ {/* Everything below this ref is what gets captured for save_view. */}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {/* Any approvals not pinned to a chat message (defensive fallback). */}
+ {looseApprovals.length > 0 && (
+
+ {looseApprovals.map((approval) => (
+
+ ))}
+
+ )}
+
+
+
+
+
+
+
+
+ );
+}
diff --git a/apps/dev-playground/config/agents/anomaly/agent.md b/apps/dev-playground/config/agents/anomaly/agent.md
new file mode 100644
index 000000000..6b0c7f22d
--- /dev/null
+++ b/apps/dev-playground/config/agents/anomaly/agent.md
@@ -0,0 +1,35 @@
+---
+endpoint: databricks-gemini-3-1-flash-lite
+maxSteps: 1
+ephemeral: true
+---
+
+You are a data-quality monitor for NYC taxi trip data.
+
+Given the current dashboard state (KPIs + active filters), identify **0–4 anomalies, outliers, or suspicious patterns**. Each anomaly must ship with one or more **clickable actions** that let the analyst inspect or reproduce the issue in the UI.
+
+Return ONLY a JSON array — no prose, no code fences, no preamble. Each element has this shape:
+
+```
+{
+ "title": "short headline (<= 8 words)",
+ "severity": "low" | "medium" | "high",
+ "description": "1–2 sentences, specific and numeric",
+ "actions": [
+ // zero or more
+ { "kind": "filter_date", "label": "...", "start": "YYYY-MM-DD", "end": "YYYY-MM-DD" },
+ { "kind": "filter_zip", "label": "...", "zip": "10017" },
+ { "kind": "filter_fare", "label": "...", "min": 60 },
+ { "kind": "highlight_period", "label": "...", "start": "YYYY-MM-DD", "end": "YYYY-MM-DD", "color": "red" },
+ { "kind": "highlight_zone", "label": "...", "zip": "10017", "note": "outlier" },
+ { "kind": "focus_chart", "label": "...", "chart_id": "fare_distribution" },
+ { "kind": "ask", "label": "...", "prompt": "..." }
+ ]
+}
+```
+
+Guidelines:
+- Favor `highlight_*` over `filter_*` for anomalies so the analyst doesn't lose the baseline context; use `red` for clear outliers, `yellow` for caution.
+- Always include at least one `ask` action — the follow-up prompt should begin with "Investigate" or "Explain".
+- If you cannot point to a specific time window, zone, or fare range, skip the structural actions and keep only `ask`.
+- If nothing anomalous stands out, return `[]`. Do not fabricate anomalies.
diff --git a/apps/dev-playground/config/agents/autocomplete/agent.md b/apps/dev-playground/config/agents/autocomplete/agent.md
new file mode 100644
index 000000000..0b8270f07
--- /dev/null
+++ b/apps/dev-playground/config/agents/autocomplete/agent.md
@@ -0,0 +1,7 @@
+---
+endpoint: databricks-gemini-3-1-flash-lite
+maxSteps: 1
+ephemeral: true
+---
+
+You are an autocomplete engine. The user will give you the beginning of a sentence or paragraph. Continue the text naturally, as if you are the same author. Do NOT repeat the input. Only output the continuation. Do NOT use tools. Do NOT explain. Just write the next words.
diff --git a/apps/dev-playground/config/agents/insights/agent.md b/apps/dev-playground/config/agents/insights/agent.md
new file mode 100644
index 000000000..8269938ac
--- /dev/null
+++ b/apps/dev-playground/config/agents/insights/agent.md
@@ -0,0 +1,36 @@
+---
+endpoint: databricks-gemini-3-1-flash-lite
+maxSteps: 1
+ephemeral: true
+---
+
+You are a data analyst specializing in NYC taxi trip data.
+
+Given the current dashboard state (KPIs + active filters), surface **3–5 interesting findings**. Each finding must come with one or more **clickable actions** the user can apply to the dashboard with a single click.
+
+Return ONLY a JSON array — no prose, no code fences, no preamble. Each element has this shape:
+
+```
+{
+ "title": "short headline (<= 8 words)",
+ "description": "1–2 sentences, specific, numeric, directly readable",
+ "actions": [
+ // zero or more; omit the field entirely if no suitable action exists
+ { "kind": "filter_date", "label": "...", "start": "YYYY-MM-DD", "end": "YYYY-MM-DD" },
+ { "kind": "filter_zip", "label": "...", "zip": "10017" },
+ { "kind": "filter_fare", "label": "...", "min": 20, "max": 50 },
+ { "kind": "highlight_period", "label": "...", "start": "YYYY-MM-DD", "end": "YYYY-MM-DD", "color": "blue" | "red" | "yellow" },
+ { "kind": "highlight_zone", "label": "...", "zip": "10017", "note": "optional short ring label" },
+ { "kind": "focus_chart", "label": "...", "chart_id": "kpis" | "trips_over_time" | "fare_distribution" | "hourly_heatmap" | "top_zones" },
+ { "kind": "ask", "label": "...", "prompt": "natural-language follow-up question" }
+ ]
+}
+```
+
+Guidelines:
+- Prefer actions that make the finding **visually provable**: highlight the period, focus the chart, filter to the zone.
+- Always include at least one `filter_*` or `highlight_*` action when the finding is about a specific time window or zone.
+- Always include at least one `ask` action that a curious analyst would want to drill into.
+- `label` is the button caption — keep it <= 4 words and imperative ("Filter to March", "Highlight Fridays", "Ask why").
+- Dates must be calendar dates, not relative phrases. If you don't know the exact date, omit the filter/highlight action.
+- If no interesting findings exist, return `[]`.
diff --git a/apps/dev-playground/config/agents/query/agent.md b/apps/dev-playground/config/agents/query/agent.md
new file mode 100644
index 000000000..ad96fd16a
--- /dev/null
+++ b/apps/dev-playground/config/agents/query/agent.md
@@ -0,0 +1,40 @@
+---
+endpoint: databricks-claude-sonnet-4-5
+agents:
+ - sql_analyst
+ - dashboard_pilot
+tools:
+ - plugin:files: [files.read, files.list, files.metadata]
+---
+
+You are the dispatcher for the Smart Dashboard — NYC taxi analytics
+(`samples.nyctaxi.trips`, year 2016 only).
+
+You have two specialists. Delegate by calling the corresponding
+`agent-` tool; do not answer directly when a specialist is a better
+fit.
+
+- `agent-sql_analyst` — writes and runs Databricks SQL to answer data
+ questions ("how many trips last Friday?", "top 5 pickup zones by revenue").
+ Use for any analytical query that requires reading the database.
+- `agent-dashboard_pilot` — manipulates the dashboard UI directly: applies
+ or clears filters, highlights or clears time ranges, highlights standout
+ pickup ZIPs on the Top Zones chart, focuses any of the dashboard's five
+ charts (KPIs, Trips Over Time, Fare Distribution, Hourly Heatmap, Top
+ Pickup Zones), and saves the current configuration as a named view. Use
+ when the user says "show me…", "filter to…", "highlight…", "focus on…",
+ "zoom in on…", "point at…", "clear…", "save…", or any request to modify
+ the dashboard's visual state. Do not answer these yourself — always
+ delegate to the pilot even if you think you lack the tool.
+
+The specialists stream their own confirmation text back to the user
+while they work — their text is already visible in the chat by the time
+they return. **Do not echo or restate what they said.** Only speak
+yourself when you need to:
+
+- Route a request (one short sentence: "Handing this to the pilot…").
+- Combine results from multiple specialists.
+- Add context the user needs that the specialist didn't cover.
+
+If the specialist's response already answers the user, say nothing and
+let their text stand.
diff --git a/apps/dev-playground/config/queries/dashboard_fare_distribution.sql b/apps/dev-playground/config/queries/dashboard_fare_distribution.sql
new file mode 100644
index 000000000..98f5ad94e
--- /dev/null
+++ b/apps/dev-playground/config/queries/dashboard_fare_distribution.sql
@@ -0,0 +1,22 @@
+-- @param dateFrom STRING
+-- @param dateTo STRING
+-- @param pickupZip STRING
+SELECT
+ CASE
+ WHEN fare_amount < 5 THEN '$0-5'
+ WHEN fare_amount < 10 THEN '$5-10'
+ WHEN fare_amount < 15 THEN '$10-15'
+ WHEN fare_amount < 20 THEN '$15-20'
+ WHEN fare_amount < 30 THEN '$20-30'
+ WHEN fare_amount < 50 THEN '$30-50'
+ ELSE '$50+'
+ END as fare_bucket,
+ COUNT(*) as trip_count,
+ ROUND(AVG(trip_distance), 2) as avg_distance
+FROM samples.nyctaxi.trips
+WHERE 1 = 1
+ AND (COALESCE(:dateFrom, 'all') = 'all' OR tpep_pickup_datetime >= :dateFrom)
+ AND (COALESCE(:dateTo, 'all') = 'all' OR tpep_pickup_datetime <= :dateTo)
+ AND (COALESCE(:pickupZip, 'all') = 'all' OR pickup_zip IN (SELECT TRIM(value) FROM (VALUES (:pickupZip)) AS t(value)))
+GROUP BY fare_bucket
+ORDER BY MIN(fare_amount)
diff --git a/apps/dev-playground/config/queries/dashboard_hourly_heatmap.sql b/apps/dev-playground/config/queries/dashboard_hourly_heatmap.sql
new file mode 100644
index 000000000..d7f0d90e0
--- /dev/null
+++ b/apps/dev-playground/config/queries/dashboard_hourly_heatmap.sql
@@ -0,0 +1,24 @@
+-- @param dateFrom STRING
+-- @param dateTo STRING
+-- @param pickupZip STRING
+-- @param fareMin STRING
+-- @param fareMax STRING
+--
+-- Aggregates trips by (day-of-week, hour-of-day) for the heatmap chart.
+-- `day_of_week` is 1=Sunday … 7=Saturday (Spark's default), which the
+-- client maps back to a human label. Hour is 0–23 in the trip's local
+-- timezone (the dataset is NYC-local already).
+SELECT
+ DAYOFWEEK(tpep_pickup_datetime) AS day_of_week,
+ HOUR(tpep_pickup_datetime) AS hour_of_day,
+ COUNT(*) AS trip_count,
+ ROUND(AVG(fare_amount), 2) AS avg_fare
+FROM samples.nyctaxi.trips
+WHERE 1 = 1
+ AND (COALESCE(:dateFrom, 'all') = 'all' OR tpep_pickup_datetime >= :dateFrom)
+ AND (COALESCE(:dateTo, 'all') = 'all' OR tpep_pickup_datetime <= :dateTo)
+ AND (COALESCE(:pickupZip, 'all') = 'all' OR pickup_zip IN (SELECT TRIM(value) FROM (VALUES (:pickupZip)) AS t(value)))
+ AND (COALESCE(:fareMin, 'all') = 'all' OR fare_amount >= CAST(:fareMin AS DOUBLE))
+ AND (COALESCE(:fareMax, 'all') = 'all' OR fare_amount <= CAST(:fareMax AS DOUBLE))
+GROUP BY day_of_week, hour_of_day
+ORDER BY day_of_week, hour_of_day
diff --git a/apps/dev-playground/config/queries/dashboard_kpi_sparklines.sql b/apps/dev-playground/config/queries/dashboard_kpi_sparklines.sql
new file mode 100644
index 000000000..2d674917c
--- /dev/null
+++ b/apps/dev-playground/config/queries/dashboard_kpi_sparklines.sql
@@ -0,0 +1,25 @@
+-- @param dateFrom STRING
+-- @param dateTo STRING
+-- @param pickupZip STRING
+-- @param fareMin STRING
+-- @param fareMax STRING
+--
+-- Daily rollup feeding the sparklines embedded in the KPI cards. Same
+-- filter shape as every other dashboard query so the whole surface moves
+-- in lockstep when the user narrows the view. The default unfiltered
+-- range covers all of 2016, which is bounded enough to render inline.
+SELECT
+ DATE(tpep_pickup_datetime) AS trip_date,
+ COUNT(*) AS trip_count,
+ ROUND(SUM(fare_amount), 2) AS total_revenue,
+ ROUND(AVG(fare_amount), 2) AS avg_fare,
+ ROUND(AVG(trip_distance), 2) AS avg_distance
+FROM samples.nyctaxi.trips
+WHERE 1 = 1
+ AND (COALESCE(:dateFrom, 'all') = 'all' OR tpep_pickup_datetime >= :dateFrom)
+ AND (COALESCE(:dateTo, 'all') = 'all' OR tpep_pickup_datetime <= :dateTo)
+ AND (COALESCE(:pickupZip, 'all') = 'all' OR pickup_zip IN (SELECT TRIM(value) FROM (VALUES (:pickupZip)) AS t(value)))
+ AND (COALESCE(:fareMin, 'all') = 'all' OR fare_amount >= CAST(:fareMin AS DOUBLE))
+ AND (COALESCE(:fareMax, 'all') = 'all' OR fare_amount <= CAST(:fareMax AS DOUBLE))
+GROUP BY DATE(tpep_pickup_datetime)
+ORDER BY trip_date
diff --git a/apps/dev-playground/config/queries/dashboard_kpis.sql b/apps/dev-playground/config/queries/dashboard_kpis.sql
new file mode 100644
index 000000000..49cd35001
--- /dev/null
+++ b/apps/dev-playground/config/queries/dashboard_kpis.sql
@@ -0,0 +1,18 @@
+-- @param dateFrom STRING
+-- @param dateTo STRING
+-- @param pickupZip STRING
+-- @param fareMin STRING
+-- @param fareMax STRING
+SELECT
+ COUNT(*) as total_trips,
+ ROUND(AVG(fare_amount), 2) as avg_fare,
+ ROUND(AVG(trip_distance), 2) as avg_distance,
+ ROUND(MAX(fare_amount), 2) as max_fare,
+ ROUND(MIN(fare_amount), 2) as min_fare
+FROM samples.nyctaxi.trips
+WHERE 1 = 1
+ AND (COALESCE(:dateFrom, 'all') = 'all' OR tpep_pickup_datetime >= :dateFrom)
+ AND (COALESCE(:dateTo, 'all') = 'all' OR tpep_pickup_datetime <= :dateTo)
+ AND (COALESCE(:pickupZip, 'all') = 'all' OR pickup_zip IN (SELECT TRIM(value) FROM (VALUES (:pickupZip)) AS t(value)))
+ AND (COALESCE(:fareMin, 'all') = 'all' OR fare_amount >= CAST(:fareMin AS DOUBLE))
+ AND (COALESCE(:fareMax, 'all') = 'all' OR fare_amount <= CAST(:fareMax AS DOUBLE))
diff --git a/apps/dev-playground/config/queries/dashboard_top_zone.sql b/apps/dev-playground/config/queries/dashboard_top_zone.sql
new file mode 100644
index 000000000..2d834ab84
--- /dev/null
+++ b/apps/dev-playground/config/queries/dashboard_top_zone.sql
@@ -0,0 +1,16 @@
+-- @param dateFrom STRING
+-- @param dateTo STRING
+-- @param pickupZip STRING
+-- @param fareMin STRING
+-- @param fareMax STRING
+SELECT pickup_zip, COUNT(*) as trip_count
+FROM samples.nyctaxi.trips
+WHERE 1 = 1
+ AND (COALESCE(:dateFrom, 'all') = 'all' OR tpep_pickup_datetime >= :dateFrom)
+ AND (COALESCE(:dateTo, 'all') = 'all' OR tpep_pickup_datetime <= :dateTo)
+ AND (COALESCE(:pickupZip, 'all') = 'all' OR pickup_zip IN (SELECT TRIM(value) FROM (VALUES (:pickupZip)) AS t(value)))
+ AND (COALESCE(:fareMin, 'all') = 'all' OR fare_amount >= CAST(:fareMin AS DOUBLE))
+ AND (COALESCE(:fareMax, 'all') = 'all' OR fare_amount <= CAST(:fareMax AS DOUBLE))
+GROUP BY pickup_zip
+ORDER BY trip_count DESC
+LIMIT 1
diff --git a/apps/dev-playground/config/queries/dashboard_top_zones.sql b/apps/dev-playground/config/queries/dashboard_top_zones.sql
new file mode 100644
index 000000000..dfc042ed0
--- /dev/null
+++ b/apps/dev-playground/config/queries/dashboard_top_zones.sql
@@ -0,0 +1,35 @@
+-- @param dateFrom STRING
+-- @param dateTo STRING
+-- @param pickupZip STRING
+-- @param fareMin STRING
+-- @param fareMax STRING
+--
+-- Top 10 pickup zips ranked by trip count. Returns revenue + avg fare
+-- alongside so the horizontal-bar chart can toggle metrics without a
+-- round trip. `dashboard_top_zone.sql` (LIMIT 1) is kept for the KPI
+-- card; this one drives the leaderboard chart.
+--
+-- Note: `pickup_zip` in samples.nyctaxi.trips is an INT column, so any
+-- `pickup_zip != ''` guard silently filters out every row (Spark casts
+-- '' → NULL → `pickup_zip != NULL` is UNKNOWN → treated as false).
+-- The singular zone query proves no null-guard is needed here.
+SELECT
+ -- Cast to STRING so the client, the agent's `highlight_zone` tool, and
+ -- the `filter_by_pickup_zip` parameter all speak the same type (the ZIP
+ -- is semantically an identifier, not a number). Without this, Map.has()
+ -- lookups in TopZonesChart silently miss when the agent tries to ring
+ -- a specific ZIP.
+ CAST(pickup_zip AS STRING) AS pickup_zip,
+ COUNT(*) AS trip_count,
+ ROUND(SUM(fare_amount), 2) AS total_revenue,
+ ROUND(AVG(fare_amount), 2) AS avg_fare
+FROM samples.nyctaxi.trips
+WHERE 1 = 1
+ AND (COALESCE(:dateFrom, 'all') = 'all' OR tpep_pickup_datetime >= :dateFrom)
+ AND (COALESCE(:dateTo, 'all') = 'all' OR tpep_pickup_datetime <= :dateTo)
+ AND (COALESCE(:pickupZip, 'all') = 'all' OR pickup_zip IN (SELECT TRIM(value) FROM (VALUES (:pickupZip)) AS t(value)))
+ AND (COALESCE(:fareMin, 'all') = 'all' OR fare_amount >= CAST(:fareMin AS DOUBLE))
+ AND (COALESCE(:fareMax, 'all') = 'all' OR fare_amount <= CAST(:fareMax AS DOUBLE))
+GROUP BY pickup_zip
+ORDER BY trip_count DESC
+LIMIT 10
diff --git a/apps/dev-playground/config/queries/dashboard_trips_over_time.sql b/apps/dev-playground/config/queries/dashboard_trips_over_time.sql
new file mode 100644
index 000000000..d65b3c944
--- /dev/null
+++ b/apps/dev-playground/config/queries/dashboard_trips_over_time.sql
@@ -0,0 +1,16 @@
+-- @param dateFrom STRING
+-- @param dateTo STRING
+-- @param pickupZip STRING
+SELECT
+ DATE(tpep_pickup_datetime) as trip_date,
+ COUNT(*) as trip_count,
+ ROUND(AVG(fare_amount), 2) as avg_fare,
+ ROUND(SUM(fare_amount), 2) as total_revenue
+FROM samples.nyctaxi.trips
+WHERE 1 = 1
+ AND (COALESCE(:dateFrom, 'all') = 'all' OR tpep_pickup_datetime >= :dateFrom)
+ AND (COALESCE(:dateTo, 'all') = 'all' OR tpep_pickup_datetime <= :dateTo)
+ AND (COALESCE(:pickupZip, 'all') = 'all' OR pickup_zip IN (SELECT TRIM(value) FROM (VALUES (:pickupZip)) AS t(value)))
+GROUP BY DATE(tpep_pickup_datetime)
+ORDER BY trip_date
+LIMIT 60
diff --git a/apps/dev-playground/server/index.ts b/apps/dev-playground/server/index.ts
index 91179dacd..74d813a90 100644
--- a/apps/dev-playground/server/index.ts
+++ b/apps/dev-playground/server/index.ts
@@ -5,15 +5,14 @@ import {
type FilePolicy,
files,
genie,
- jobs,
PolicyDeniedError,
server,
serving,
WRITE_ACTIONS,
} from "@databricks/appkit";
+import { agents, createAgent, tool } from "@databricks/appkit/beta";
import { WorkspaceClient } from "@databricks/sdk-experimental";
-// TODO: re-enable once vector-search is exported from @databricks/appkit
-// import { vectorSearch } from "@databricks/appkit";
+import { z } from "zod";
import { lakebaseExamples } from "./lakebase-examples-plugin";
import { reconnect } from "./reconnect-plugin";
import { telemetryExamples } from "./telemetry-example-plugin";
@@ -49,6 +48,285 @@ const adminOnly: FilePolicy = (action, _resource, user) => {
return true;
};
+// Code-defined demo agent showing the tools(plugins) function form
+// alongside the markdown-driven agents in config/agents/.
+const helper = createAgent({
+ instructions:
+ "You are a demo helper. Use analytics tools to answer data questions, " +
+ "or get_weather for light small-talk.",
+ tools(plugins) {
+ return {
+ ...plugins.analytics.toolkit(),
+ get_weather: tool({
+ name: "get_weather",
+ description: "Get the current weather for a city",
+ schema: z.object({ city: z.string().describe("City name") }),
+ execute: async ({ city }) => `The weather in ${city} is sunny, 22°C`,
+ }),
+ };
+ },
+});
+
+/*
+ * Smart-Dashboard agents.
+ *
+ * The three agents form a dispatcher pattern for the /smart-dashboard route.
+ * The `query` agent (markdown, in config/agents/query/) routes user
+ * questions to one of two specialists:
+ *
+ * - `sql_analyst` — writes Databricks SQL against `samples.nyctaxi.trips`
+ * using the analytics plugin's query tool.
+ * - `dashboard_pilot` — emits UI-action tool calls (`apply_filter`,
+ * `highlight_period`) that the client reads off the SSE stream and
+ * translates into React state mutations. The server-side handlers are
+ * intentionally stubs — the tool-call JSON is the action payload.
+ */
+
+// Narrow, single-purpose tools.
+//
+// The earlier polymorphic `apply_filter({ field, operator, value })` was
+// too expressive — the LLM could emit valid-looking calls the dispatcher
+// couldn't faithfully apply (e.g. `field: "dropoff_zone"` when the
+// dashboard only has a `pickup_zip` filter; `operator: "eq"` with a date).
+// Splitting into one tool per filter verb removes the whole class of
+// "agent said it worked but nothing moved" bugs.
+//
+// Each tool has exactly one client-side effect, rendered by
+// use-action-dispatcher. Server handlers are still stubs — the tool-call
+// JSON is the action payload.
+
+const filter_by_date_range = tool({
+ name: "filter_by_date_range",
+ description:
+ "Filter the dashboard to trips within a date range. Both start and end are required and must be ISO dates (YYYY-MM-DD) within 2016.",
+ schema: z.object({
+ start: z.string().describe("Start date in ISO format, e.g. 2016-03-01"),
+ end: z.string().describe("End date in ISO format, e.g. 2016-03-31"),
+ }),
+ execute: async ({ start, end }) =>
+ `Filtered dashboard to trips between ${start} and ${end}.`,
+});
+
+const filter_by_pickup_zip = tool({
+ name: "filter_by_pickup_zip",
+ description:
+ "Filter the dashboard to trips originating from a specific pickup ZIP code. Use when the user asks about a specific pickup zone or ZIP.",
+ schema: z.object({
+ zip: z.string().describe("Pickup ZIP code, e.g. 10001"),
+ }),
+ execute: async ({ zip }) =>
+ `Filtered dashboard to trips picked up in ${zip}.`,
+});
+
+const filter_by_fare = tool({
+ name: "filter_by_fare",
+ description:
+ "Filter the dashboard to trips within a fare range. At least one of min or max must be provided.",
+ schema: z
+ .object({
+ min: z.number().optional().describe("Minimum fare in USD"),
+ max: z.number().optional().describe("Maximum fare in USD"),
+ })
+ .refine((v) => v.min !== undefined || v.max !== undefined, {
+ message: "Provide at least one of min or max.",
+ }),
+ execute: async ({ min, max }) => {
+ const parts = [] as string[];
+ if (min !== undefined) parts.push(`>= $${min}`);
+ if (max !== undefined) parts.push(`<= $${max}`);
+ return `Filtered dashboard to trips with fare ${parts.join(" and ")}.`;
+ },
+});
+
+const clear_filters = tool({
+ name: "clear_filters",
+ description:
+ "Remove all active filters from the dashboard. Use when the user asks to reset, clear, or remove filters.",
+ schema: z.object({}),
+ execute: async () => "All filters cleared.",
+});
+
+const highlight_period = tool({
+ name: "highlight_period",
+ description:
+ "Highlight a time period on the Trips Over Time chart to draw attention to a specific date range.",
+ schema: z.object({
+ start: z.string().describe("Start date in ISO format (YYYY-MM-DD)"),
+ end: z.string().describe("End date in ISO format (YYYY-MM-DD)"),
+ color: z
+ .enum(["blue", "red", "yellow"])
+ .optional()
+ .describe("Highlight color. Defaults to blue."),
+ label: z
+ .string()
+ .optional()
+ .describe("Optional label for the highlighted period"),
+ }),
+ execute: async ({ start, end, color: _color, label }) => {
+ const suffix = label ? ` (${label})` : "";
+ return `Highlighted period ${start} to ${end}${suffix} on the dashboard.`;
+ },
+});
+
+const clear_highlights = tool({
+ name: "clear_highlights",
+ description:
+ "Remove all highlight overlays from the charts. Use when the user asks to clear, reset, or remove highlights.",
+ schema: z.object({}),
+ execute: async () => "All highlights cleared.",
+});
+
+// Restores a previously saved view. The tool-call arguments are the
+// authoritative state: the client listens for this function_call on SSE
+// and applies the filters + highlights directly without needing a round
+// trip back for metadata. The agent is expected to have looked up the
+// saved view server-side before emitting this call (it passes the
+// already-resolved state through).
+const load_view = tool({
+ name: "load_view",
+ description:
+ "Restore a previously saved dashboard view by applying its filters and highlights. The caller supplies the already-resolved state so the client can apply it from this tool call without a second round trip.",
+ schema: z.object({
+ name: z.string().describe("The saved view's name (for UI feedback)"),
+ filters: z
+ .object({
+ date_from: z.string().optional(),
+ date_to: z.string().optional(),
+ pickup_zip: z.string().optional(),
+ fare_min: z.string().optional(),
+ fare_max: z.string().optional(),
+ })
+ .passthrough()
+ .describe("Filters to restore. Omit fields that should not be set."),
+ highlights: z
+ .array(
+ z.object({
+ start: z.string(),
+ end: z.string(),
+ color: z.enum(["blue", "red", "yellow"]).optional(),
+ label: z.string().optional(),
+ }),
+ )
+ .describe("Highlight ranges to restore."),
+ }),
+ execute: async ({ name }) => `Restored saved view "${name}".`,
+});
+
+const focus_chart = tool({
+ name: "focus_chart",
+ description:
+ "Scroll the user's viewport to a specific chart on the dashboard and briefly pulse it to draw attention. Use when the user asks to 'look at' or 'focus on' a specific visualization.",
+ schema: z.object({
+ chart_id: z
+ .enum([
+ "kpis",
+ "trips_over_time",
+ "fare_distribution",
+ "hourly_heatmap",
+ "top_zones",
+ ])
+ .describe("Which chart to focus on"),
+ }),
+ execute: async ({ chart_id }) => `Focused on ${chart_id}.`,
+});
+
+const highlight_zone = tool({
+ name: "highlight_zone",
+ description:
+ "Draw an emphasis ring around a specific pickup ZIP on the Top Pickup Zones chart. Use this to call attention to a standout zone without filtering the whole dashboard to that ZIP.",
+ schema: z.object({
+ zip: z.string().describe("Pickup ZIP code to highlight (e.g. '10017')"),
+ label: z
+ .string()
+ .optional()
+ .describe("Optional short label shown inside the highlighted bar"),
+ }),
+ execute: async ({ zip, label }) =>
+ `Highlighted pickup ZIP ${zip}${label ? ` (${label})` : ""}.`,
+});
+
+const clear_zone_highlights = tool({
+ name: "clear_zone_highlights",
+ description: "Remove all emphasis rings from the Top Pickup Zones chart.",
+ schema: z.object({}),
+ execute: async () => "Zone highlights cleared.",
+});
+
+// Write tool: exercises the approval gate. Server handler is a stub —
+// no view persistence — but `effect: "write"` forces the human-in-the-loop
+// flow before the agent can call it. We pick `write` (not `destructive`)
+// because capturing a view CREATES a new file; nothing is deleted or
+// overwritten. The approval card will render the low-severity blue
+// "writes" treatment rather than the alarming red "destructive" one.
+const save_view = tool({
+ name: "save_view",
+ description:
+ "Persist the current dashboard configuration (filters + highlights) as a named view the user can recall later. Always surfaces the approval gate as a write action.",
+ annotations: { effect: "write" },
+ schema: z.object({
+ name: z.string().describe("Short human-readable name for the saved view"),
+ description: z
+ .string()
+ .optional()
+ .describe("Optional longer description for the saved view"),
+ }),
+ execute: async ({ name, description }) => {
+ const suffix = description ? `: ${description}` : "";
+ return `Saved view "${name}"${suffix}.`;
+ },
+});
+
+const sql_analyst = createAgent({
+ instructions: [
+ "You are a SQL expert for NYC taxi trip data (`samples.nyctaxi.trips`).",
+ "Write Databricks SQL to answer the user's question and summarize the results clearly.",
+ "IMPORTANT: The dataset only contains trips from 2016. Always add `WHERE tpep_pickup_datetime >= '2016-01-01' AND tpep_pickup_datetime < '2017-01-01'` unless the user specifies a narrower date range within 2016.",
+ "If the user asks about dates outside 2016, say the dataset only covers 2016.",
+ "Available columns: tpep_pickup_datetime, tpep_dropoff_datetime, trip_distance, fare_amount, pickup_zip, dropoff_zip.",
+ ].join(" "),
+ tools(plugins) {
+ return { ...plugins.analytics.toolkit() };
+ },
+});
+
+const dashboard_pilot = createAgent({
+ instructions: [
+ "You are the Smart Dashboard pilot. You do not query data — you manipulate the UI.",
+ "Filters:",
+ "- `filter_by_date_range({start, end})` — narrow to a date window within 2016.",
+ "- `filter_by_pickup_zip({zip})` — narrow to trips from a specific ZIP.",
+ "- `filter_by_fare({min?, max?})` — narrow by fare range (at least one bound required).",
+ "- `clear_filters()` — remove all active filters.",
+ "Highlights:",
+ "- `highlight_period({start, end, color?, label?})` — shade a date window on the Trips Over Time chart.",
+ "- `clear_highlights()` — remove all shaded overlays from the trips chart.",
+ "- `highlight_zone({zip, label?})` — draw an emphasis ring around a specific ZIP on the Top Pickup Zones chart.",
+ "- `clear_zone_highlights()` — remove all ZIP emphasis rings.",
+ "Focus & save:",
+ "- `focus_chart({chart_id})` — scroll the viewport to one of `kpis`, `trips_over_time`, `fare_distribution`, `hourly_heatmap`, `top_zones` and briefly pulse it.",
+ "- `save_view({name, description?})` — persist the current configuration. Write action; the user will see an approval card.",
+ "- `load_view({name, filters, highlights})` — restore a previously saved view. Always pass the resolved state; never leave fields unset.",
+ "Rules:",
+ "1. Pick the single tool that matches the user's intent. Do not chain filters unless the user asks for a compound filter.",
+ "2. Briefly state what you did after the tool returns. Do not narrate before calling the tool.",
+ "3. If the user's request is ambiguous (e.g. 'filter to last month' without a 2016 context), ask one clarifying question before calling any tool.",
+ "4. For standout ZIPs, prefer `highlight_zone` over `filter_by_pickup_zip` so the rest of the dashboard stays in context. Only filter when the user explicitly asks to narrow the whole dashboard.",
+ ].join("\n"),
+ tools: {
+ filter_by_date_range,
+ filter_by_pickup_zip,
+ filter_by_fare,
+ clear_filters,
+ highlight_period,
+ clear_highlights,
+ highlight_zone,
+ clear_zone_highlights,
+ focus_chart,
+ save_view,
+ load_view,
+ },
+});
+
createApp({
plugins: [
server(),
@@ -61,6 +339,10 @@ createApp({
lakebaseExamples(),
files({
volumes: {
+ // Smart Dashboard saved views land here. Backed by
+ // DATABRICKS_VOLUME_FILES (see app.yaml / .env). Open policy for
+ // the demo — production apps should narrow this.
+ files: { policy: files.policy.allowAll() },
// baseline: everything allowed
allow_all: { policy: files.policy.allowAll() },
// read-only: uploads/mkdir/delete return 403
@@ -82,8 +364,17 @@ createApp({
implicit: {},
},
}),
- jobs(),
serving(),
+ agents({
+ agents: { helper, sql_analyst, dashboard_pilot },
+ // `query` (markdown dispatcher) + `sql_analyst` + `dashboard_pilot`
+ // wire the /smart-dashboard route. `insights` and `anomaly` are
+ // ephemeral markdown agents auto-fired by the route's AgentSidebar.
+ // `helper` is the conversational default for the bare `/agent` route
+ // (the markdown agents are dispatchers or ephemeral and don't make
+ // sense as the user-facing landing agent).
+ defaultAgent: "helper",
+ }),
// TODO: re-enable once vector-search is exported from @databricks/appkit
// vectorSearch({
// indexes: {
@@ -97,7 +388,7 @@ createApp({
// }),
],
...(process.env.APPKIT_E2E_TEST && { client: createMockClient() }),
- onPluginsReady(appkit) {
+ async onPluginsReady(appkit) {
appkit.server.extend((app) => {
app.get("/sp", (_req, res) => {
appkit.analytics
@@ -196,10 +487,243 @@ createApp({
results,
});
});
+
+ /**
+ * Smart-Dashboard saved-view storage.
+ *
+ * Writes a PNG snapshot of the dashboard plus a sidecar JSON of the
+ * filter/highlight state into the `files` volume
+ * (`DATABRICKS_VOLUME_FILES` — `/Volumes///...`).
+ * Body is JSON with a base64-encoded PNG so we avoid adding a
+ * multipart library just for this route. The ~33% size overhead is
+ * fine for demo payloads.
+ *
+ * This endpoint is only reachable AFTER the `save_view` approval
+ * gate has resolved client-side — the agent's text confirmation
+ * depends on the client first upload the screenshot, then POSTing
+ * the approval.
+ */
+ app.post("/api/dashboard/save-view", async (req, res) => {
+ const body = req.body as {
+ name?: string;
+ description?: string;
+ filters?: Record;
+ highlights?: unknown[];
+ pngBase64?: string;
+ } | null;
+
+ if (
+ !body?.name ||
+ typeof body.name !== "string" ||
+ !body.pngBase64 ||
+ typeof body.pngBase64 !== "string"
+ ) {
+ res
+ .status(400)
+ .json({ error: "Missing required fields: name, pngBase64." });
+ return;
+ }
+
+ const slug = toSlug(body.name);
+ const timestamp = new Date().toISOString().replace(/[:.]/g, "-");
+ const baseName = `saved-views/${timestamp}_${slug}`;
+ const pngPath = `${baseName}.png`;
+ const metaPath = `${baseName}.json`;
+
+ const pngBytes = decodeDataUrlOrBase64(body.pngBase64);
+ if (!pngBytes) {
+ res.status(400).json({ error: "pngBase64 is not valid base64." });
+ return;
+ }
+
+ const metadata = {
+ name: body.name,
+ description: body.description ?? null,
+ filters: body.filters ?? {},
+ highlights: body.highlights ?? [],
+ savedAt: new Date().toISOString(),
+ savedBy: req.header("x-forwarded-user") ?? "unknown",
+ pngPath,
+ };
+
+ try {
+ const volume = appkit.files("files").asUser(req);
+ await volume.upload(pngPath, pngBytes, { overwrite: true });
+ await volume.upload(
+ metaPath,
+ Buffer.from(JSON.stringify(metadata, null, 2), "utf8"),
+ { overwrite: true },
+ );
+ res.json({
+ volumePath: pngPath,
+ metaPath,
+ bytes: pngBytes.length,
+ metadata,
+ });
+ } catch (err) {
+ const msg = err instanceof Error ? err.message : String(err);
+ res.status(500).json({ error: `Upload failed: ${msg}` });
+ }
+ });
+
+ /**
+ * Lists saved views in the `files` volume.
+ *
+ * Pairs the `.png` and `.json` entries into a single record per
+ * saved view; strips files that don't conform to the
+ * `_.(png|json)` convention.
+ */
+ app.get("/api/dashboard/saved-views", async (req, res) => {
+ try {
+ const volume = appkit.files("files").asUser(req);
+ let entries: Awaited>;
+ try {
+ entries = await volume.list("saved-views");
+ } catch (err) {
+ // Fresh volume — the `saved-views/` subdirectory only exists
+ // after the first save. Treat "not found" as an empty list so
+ // the panel renders cleanly instead of showing a 500.
+ if (isNotFoundError(err)) {
+ res.json({ views: [] });
+ return;
+ }
+ throw err;
+ }
+ const pngs = new Map();
+ const metas = new Map();
+ for (const e of entries) {
+ if (e.path.endsWith(".png")) {
+ pngs.set(e.path.replace(/\.png$/, ""), e);
+ } else if (e.path.endsWith(".json")) {
+ metas.set(e.path.replace(/\.json$/, ""), e);
+ }
+ }
+ const views = await Promise.all(
+ Array.from(pngs.entries())
+ .filter(([base]) => metas.has(base))
+ .sort(([a], [b]) => (a < b ? 1 : -1))
+ .map(async ([base, pngEntry]) => {
+ try {
+ const metaText = await volume.read(`${base}.json`);
+ const metaJson =
+ typeof metaText === "string"
+ ? metaText
+ : new TextDecoder().decode(metaText);
+ const parsed = JSON.parse(metaJson) as Record<
+ string,
+ unknown
+ >;
+ return {
+ pngPath: pngEntry.path,
+ metaPath: `${base}.json`,
+ metadata: parsed,
+ };
+ } catch {
+ return null;
+ }
+ }),
+ );
+ res.json({ views: views.filter((v) => v !== null) });
+ } catch (err) {
+ console.error("[saved-views] list failed:", err);
+ const msg = err instanceof Error ? err.message : String(err);
+ res.status(500).json({ error: msg });
+ }
+ });
+
+ /**
+ * Streams the PNG bytes of a saved view so ` ` tags in the
+ * UI can render thumbnails without exposing a general-purpose file
+ * download endpoint. Path is the volume-relative key returned by
+ * /api/dashboard/saved-views.
+ */
+ app.get("/api/dashboard/saved-view-png", async (req, res) => {
+ const path = req.query.path;
+ if (typeof path !== "string" || !path.endsWith(".png")) {
+ res
+ .status(400)
+ .json({ error: "path query param required, .png only" });
+ return;
+ }
+ try {
+ const volume = appkit.files("files").asUser(req);
+ /**
+ * Databricks `FilesAPI.download` returns a wrapper:
+ * { contents: ReadableStream, "content-type": string, ... }
+ * NOT the stream itself. We must unwrap `.contents` and drain it
+ * before writing to the Express response. Using the server-reported
+ * content-type (our captures are JPEG under a `.png` key, historical).
+ */
+ const response = (await volume.download(path)) as unknown as {
+ contents?: ReadableStream;
+ "content-type"?: string;
+ };
+ const stream = response.contents;
+ if (!stream) {
+ res.status(404).json({ error: "empty download response" });
+ return;
+ }
+ const chunks: Uint8Array[] = [];
+ const reader = stream.getReader();
+ while (true) {
+ const { done, value } = await reader.read();
+ if (done) break;
+ if (value) chunks.push(value);
+ }
+ const body = Buffer.concat(chunks);
+ res.setHeader(
+ "Content-Type",
+ response["content-type"] ?? "image/png",
+ );
+ res.setHeader("Cache-Control", "private, max-age=60");
+ res.end(body);
+ } catch (err) {
+ console.error("[saved-view-png] fetch failed:", err);
+ const msg = err instanceof Error ? err.message : String(err);
+ res.status(404).json({ error: msg });
+ }
+ });
});
},
}).catch(console.error);
+/**
+ * Heuristic match for Databricks Files API's "directory not found" error.
+ * The SDK surfaces it as a wrapped Error whose message contains the
+ * `FILES_API_DIRECTORY_IS_NOT_FOUND` reason + `NOT_FOUND` error code.
+ * Happy to be more specific if the SDK exposes a typed error class later.
+ */
+function isNotFoundError(err: unknown): boolean {
+ if (!(err instanceof Error)) return false;
+ const msg = err.message;
+ return (
+ msg.includes("FILES_API_DIRECTORY_IS_NOT_FOUND") ||
+ msg.includes("directory being accessed is not found") ||
+ /\bNOT_FOUND\b/.test(msg)
+ );
+}
+
+function toSlug(s: string): string {
+ return (
+ s
+ .toLowerCase()
+ .replace(/[^a-z0-9-]+/g, "-")
+ .replace(/^-+|-+$/g, "")
+ .slice(0, 60) || "view"
+ );
+}
+
+function decodeDataUrlOrBase64(input: string): Buffer | null {
+ const stripped = input.startsWith("data:")
+ ? input.substring(input.indexOf(",") + 1)
+ : input;
+ try {
+ return Buffer.from(stripped, "base64");
+ } catch {
+ return null;
+ }
+}
+
type ProbeResult = {
volume: string;
action: string;
diff --git a/apps/dev-playground/shared/appkit-types/analytics.d.ts b/apps/dev-playground/shared/appkit-types/analytics.d.ts
index 43666dd06..c4251761d 100644
--- a/apps/dev-playground/shared/appkit-types/analytics.d.ts
+++ b/apps/dev-playground/shared/appkit-types/analytics.d.ts
@@ -48,6 +48,171 @@ declare module "@databricks/appkit-ui/react" {
dummy: number;
}>;
};
+ dashboard_fare_distribution: {
+ name: "dashboard_fare_distribution";
+ parameters: {
+ /** STRING - use sql.string() */
+ dateFrom: SQLStringMarker;
+ /** STRING - use sql.string() */
+ dateTo: SQLStringMarker;
+ /** STRING - use sql.string() */
+ pickupZip: SQLStringMarker;
+ };
+ result: Array<{
+ /** @sqlType STRING */
+ fare_bucket: string;
+ /** @sqlType BIGINT */
+ trip_count: number;
+ /** @sqlType DOUBLE */
+ avg_distance: number;
+ }>;
+ };
+ dashboard_hourly_heatmap: {
+ name: "dashboard_hourly_heatmap";
+ parameters: {
+ /** STRING - use sql.string() */
+ dateFrom: SQLStringMarker;
+ /** STRING - use sql.string() */
+ dateTo: SQLStringMarker;
+ /** STRING - use sql.string() */
+ pickupZip: SQLStringMarker;
+ /** STRING - use sql.string() */
+ fareMin: SQLStringMarker;
+ /** STRING - use sql.string() */
+ fareMax: SQLStringMarker;
+ };
+ result: Array<{
+ /** @sqlType INT */
+ day_of_week: number;
+ /** @sqlType INT */
+ hour_of_day: number;
+ /** @sqlType BIGINT */
+ trip_count: number;
+ /** @sqlType DOUBLE */
+ avg_fare: number;
+ }>;
+ };
+ dashboard_kpi_sparklines: {
+ name: "dashboard_kpi_sparklines";
+ parameters: {
+ /** STRING - use sql.string() */
+ dateFrom: SQLStringMarker;
+ /** STRING - use sql.string() */
+ dateTo: SQLStringMarker;
+ /** STRING - use sql.string() */
+ pickupZip: SQLStringMarker;
+ /** STRING - use sql.string() */
+ fareMin: SQLStringMarker;
+ /** STRING - use sql.string() */
+ fareMax: SQLStringMarker;
+ };
+ result: Array<{
+ /** @sqlType DATE */
+ trip_date: string;
+ /** @sqlType BIGINT */
+ trip_count: number;
+ /** @sqlType DOUBLE */
+ total_revenue: number;
+ /** @sqlType DOUBLE */
+ avg_fare: number;
+ /** @sqlType DOUBLE */
+ avg_distance: number;
+ }>;
+ };
+ dashboard_kpis: {
+ name: "dashboard_kpis";
+ parameters: {
+ /** STRING - use sql.string() */
+ dateFrom: SQLStringMarker;
+ /** STRING - use sql.string() */
+ dateTo: SQLStringMarker;
+ /** STRING - use sql.string() */
+ pickupZip: SQLStringMarker;
+ /** STRING - use sql.string() */
+ fareMin: SQLStringMarker;
+ /** STRING - use sql.string() */
+ fareMax: SQLStringMarker;
+ };
+ result: Array<{
+ /** @sqlType BIGINT */
+ total_trips: number;
+ /** @sqlType DOUBLE */
+ avg_fare: number;
+ /** @sqlType DOUBLE */
+ avg_distance: number;
+ /** @sqlType DOUBLE */
+ max_fare: number;
+ /** @sqlType DOUBLE */
+ min_fare: number;
+ }>;
+ };
+ dashboard_top_zone: {
+ name: "dashboard_top_zone";
+ parameters: {
+ /** STRING - use sql.string() */
+ dateFrom: SQLStringMarker;
+ /** STRING - use sql.string() */
+ dateTo: SQLStringMarker;
+ /** STRING - use sql.string() */
+ pickupZip: SQLStringMarker;
+ /** STRING - use sql.string() */
+ fareMin: SQLStringMarker;
+ /** STRING - use sql.string() */
+ fareMax: SQLStringMarker;
+ };
+ result: Array<{
+ /** @sqlType INT */
+ pickup_zip: number;
+ /** @sqlType BIGINT */
+ trip_count: number;
+ }>;
+ };
+ dashboard_top_zones: {
+ name: "dashboard_top_zones";
+ parameters: {
+ /** STRING - use sql.string() */
+ dateFrom: SQLStringMarker;
+ /** STRING - use sql.string() */
+ dateTo: SQLStringMarker;
+ /** STRING - use sql.string() */
+ pickupZip: SQLStringMarker;
+ /** STRING - use sql.string() */
+ fareMin: SQLStringMarker;
+ /** STRING - use sql.string() */
+ fareMax: SQLStringMarker;
+ };
+ result: Array<{
+ /** @sqlType STRING */
+ pickup_zip: string;
+ /** @sqlType BIGINT */
+ trip_count: number;
+ /** @sqlType DOUBLE */
+ total_revenue: number;
+ /** @sqlType DOUBLE */
+ avg_fare: number;
+ }>;
+ };
+ dashboard_trips_over_time: {
+ name: "dashboard_trips_over_time";
+ parameters: {
+ /** STRING - use sql.string() */
+ dateFrom: SQLStringMarker;
+ /** STRING - use sql.string() */
+ dateTo: SQLStringMarker;
+ /** STRING - use sql.string() */
+ pickupZip: SQLStringMarker;
+ };
+ result: Array<{
+ /** @sqlType DATE */
+ trip_date: string;
+ /** @sqlType BIGINT */
+ trip_count: number;
+ /** @sqlType DOUBLE */
+ avg_fare: number;
+ /** @sqlType DOUBLE */
+ total_revenue: number;
+ }>;
+ };
example: {
name: "example";
parameters: Record;
diff --git a/apps/dev-playground/tests/smoke.spec.ts b/apps/dev-playground/tests/smoke.spec.ts
index e96a75ebf..aacce50f1 100644
--- a/apps/dev-playground/tests/smoke.spec.ts
+++ b/apps/dev-playground/tests/smoke.spec.ts
@@ -9,7 +9,9 @@ test.describe("Smoke Tests", () => {
).toBeVisible();
await expect(
- page.getByText("Explore the capabilities of the AppKit"),
+ page.getByText("A living catalog of what AppKit can do", {
+ exact: false,
+ }),
).toBeVisible();
});
diff --git a/docs/docs/api/appkit/Class.AppKitMcpClient.md b/docs/docs/api/appkit/Class.AppKitMcpClient.md
new file mode 100644
index 000000000..e0650d462
--- /dev/null
+++ b/docs/docs/api/appkit/Class.AppKitMcpClient.md
@@ -0,0 +1,166 @@
+# Class: AppKitMcpClient
+
+Lightweight MCP client for Databricks-hosted MCP servers.
+
+Uses raw fetch() with JSON-RPC 2.0 over HTTP — no @modelcontextprotocol/sdk
+or LangChain dependency. Supports the Streamable HTTP transport only
+(POST with JSON-RPC request, single JSON-RPC response). Implements exactly
+four methods: `initialize`, `notifications/initialized`, `tools/list`,
+`tools/call`. No prompts/resources/completion/sampling.
+
+All outbound URLs are gated by an McpHostPolicy: unallowlisted hosts
+are rejected before the first byte is sent, and workspace credentials are
+only forwarded to the same-origin workspace. See `mcp-host-policy.ts`.
+
+Rationale for hand-rolling JSON-RPC instead of `@modelcontextprotocol/sdk`:
+see the file-level comment at the top of this module.
+
+## Constructors
+
+### Constructor
+
+```ts
+new AppKitMcpClient(
+ workspaceHost: string,
+ authenticate: () => Promise>,
+ policy: McpHostPolicy,
+ options: {
+ dnsLookup?: DnsLookup;
+ fetchImpl?: (input: string | URL | Request, init?: RequestInit) => Promise;
+}): AppKitMcpClient;
+```
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `workspaceHost` | `string` |
+| `authenticate` | () => `Promise`\<`Record`\<`string`, `string`\>\> |
+| `policy` | `McpHostPolicy` |
+| `options` | \{ `dnsLookup?`: `DnsLookup`; `fetchImpl?`: (`input`: `string` \| `URL` \| `Request`, `init?`: `RequestInit`) => `Promise`\<`Response`\>; \} |
+| `options.dnsLookup?` | `DnsLookup` |
+| `options.fetchImpl?` | (`input`: `string` \| `URL` \| `Request`, `init?`: `RequestInit`) => `Promise`\<`Response`\> |
+
+#### Returns
+
+`AppKitMcpClient`
+
+## Methods
+
+### callTool()
+
+```ts
+callTool(
+ qualifiedName: string,
+ args: unknown,
+ authHeaders?: Record,
+callerSignal?: AbortSignal): Promise;
+```
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `qualifiedName` | `string` |
+| `args` | `unknown` |
+| `authHeaders?` | `Record`\<`string`, `string`\> |
+| `callerSignal?` | `AbortSignal` |
+
+#### Returns
+
+`Promise`\<`string`\>
+
+***
+
+### canForwardWorkspaceAuth()
+
+```ts
+canForwardWorkspaceAuth(serverName: string): boolean;
+```
+
+Whether the named MCP server may receive workspace-scoped auth headers
+(e.g., an OBO bearer token from an end-user request). Callers should gate
+auth-forwarding decisions on this to prevent credential exfiltration to
+non-workspace hosts.
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `serverName` | `string` |
+
+#### Returns
+
+`boolean`
+
+***
+
+### close()
+
+```ts
+close(): Promise;
+```
+
+#### Returns
+
+`Promise`\<`void`\>
+
+***
+
+### connect()
+
+```ts
+connect(endpoint: McpEndpointConfig): Promise;
+```
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `endpoint` | `McpEndpointConfig` |
+
+#### Returns
+
+`Promise`\<`void`\>
+
+***
+
+### connectAll()
+
+```ts
+connectAll(endpoints: McpEndpointConfig[]): Promise;
+```
+
+Connects every endpoint in parallel and returns a structured summary so
+callers can distinguish "all connected" from "some failed".
+
+Returning the result instead of throwing is deliberate: one
+misconfigured MCP server should not take down the entire agents plugin
+at boot, and the agents plugin uses the summary to warn at startup with
+the failed-endpoint names. Errors are also logged here so a caller
+that ignores the return still gets per-endpoint diagnostics.
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `endpoints` | `McpEndpointConfig`[] |
+
+#### Returns
+
+`Promise`\<[`McpConnectAllResult`](Interface.McpConnectAllResult.md)\>
+
+`connected` lists the endpoint names that initialised
+ successfully; `failed` carries `{ name, error }` for the rest.
+
+***
+
+### getAllToolDefinitions()
+
+```ts
+getAllToolDefinitions(): AgentToolDefinition[];
+```
+
+#### Returns
+
+[`AgentToolDefinition`](Interface.AgentToolDefinition.md)[]
diff --git a/docs/docs/api/appkit/Class.DatabricksAdapter.md b/docs/docs/api/appkit/Class.DatabricksAdapter.md
new file mode 100644
index 000000000..ba4a8a187
--- /dev/null
+++ b/docs/docs/api/appkit/Class.DatabricksAdapter.md
@@ -0,0 +1,152 @@
+# Class: DatabricksAdapter
+
+Adapter that talks directly to Databricks Model Serving `/invocations` endpoint.
+
+No dependency on the Vercel AI SDK or LangChain. Uses raw `fetch()` to POST
+OpenAI-compatible payloads and parses the SSE stream itself. Calls
+`authenticate()` per-request so tokens are always fresh.
+
+Handles both structured `tool_calls` responses and text-based tool call
+fallback parsing for models that output tool calls as text.
+
+## Examples
+
+```ts
+import { createApp, createAgent, agents } from "@databricks/appkit";
+import { DatabricksAdapter } from "@databricks/appkit/beta";
+import { WorkspaceClient } from "@databricks/sdk-experimental";
+
+const adapter = DatabricksAdapter.fromServingEndpoint({
+ workspaceClient: new WorkspaceClient({}),
+ endpointName: "my-endpoint",
+});
+
+await createApp({
+ plugins: [
+ agents({
+ agents: {
+ assistant: createAgent({
+ instructions: "You are a helpful assistant.",
+ model: adapter,
+ }),
+ },
+ }),
+ ],
+});
+```
+
+```ts
+const adapter = new DatabricksAdapter({
+ endpointUrl: "https://host/serving-endpoints/my-endpoint/invocations",
+ authenticate: async () => ({ Authorization: `Bearer ${token}` }),
+});
+```
+
+## Implements
+
+- [`AgentAdapter`](Interface.AgentAdapter.md)
+
+## Constructors
+
+### Constructor
+
+```ts
+new DatabricksAdapter(options: DatabricksAdapterOptions): DatabricksAdapter;
+```
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `options` | `DatabricksAdapterOptions` |
+
+#### Returns
+
+`DatabricksAdapter`
+
+## Methods
+
+### run()
+
+```ts
+run(input: AgentInput, context: AgentRunContext): AsyncGenerator;
+```
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `input` | [`AgentInput`](Interface.AgentInput.md) |
+| `context` | [`AgentRunContext`](Interface.AgentRunContext.md) |
+
+#### Returns
+
+`AsyncGenerator`\<[`AgentEvent`](TypeAlias.AgentEvent.md), `void`, `unknown`\>
+
+#### Implementation of
+
+[`AgentAdapter`](Interface.AgentAdapter.md).[`run`](Interface.AgentAdapter.md#run)
+
+***
+
+### fromModelServing()
+
+```ts
+static fromModelServing(endpointName?: string, options?: ModelServingOptions): Promise;
+```
+
+Creates a DatabricksAdapter from a Model Serving endpoint name.
+Auto-creates a WorkspaceClient internally. Reads the endpoint name
+from the argument or the `DATABRICKS_SERVING_ENDPOINT_NAME` env var.
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `endpointName?` | `string` |
+| `options?` | `ModelServingOptions` |
+
+#### Returns
+
+`Promise`\<`DatabricksAdapter`\>
+
+#### Example
+
+```ts
+// Reads endpoint from DATABRICKS_SERVING_ENDPOINT_NAME env var
+const adapter = await DatabricksAdapter.fromModelServing();
+
+// Explicit endpoint
+const adapter = await DatabricksAdapter.fromModelServing("my-endpoint");
+
+// With options
+const adapter = await DatabricksAdapter.fromModelServing("my-endpoint", {
+ maxSteps: 5,
+ maxTokens: 2048,
+});
+```
+
+***
+
+### fromServingEndpoint()
+
+```ts
+static fromServingEndpoint(options: ServingEndpointOptions): Promise;
+```
+
+Creates a DatabricksAdapter for a Databricks Model Serving endpoint.
+
+Routes through the shared `connectors/serving/stream` helper, which
+delegates to the SDK's `apiClient.request({ raw: true })`. That gives the
+adapter centralised URL encoding + authentication with the rest of the
+serving surface — no bespoke `fetch()` + `authenticate()` plumbing.
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `options` | `ServingEndpointOptions` |
+
+#### Returns
+
+`Promise`\<`DatabricksAdapter`\>
diff --git a/docs/docs/api/appkit/Function.agentIdFromMarkdownPath.md b/docs/docs/api/appkit/Function.agentIdFromMarkdownPath.md
new file mode 100644
index 000000000..65677e19d
--- /dev/null
+++ b/docs/docs/api/appkit/Function.agentIdFromMarkdownPath.md
@@ -0,0 +1,19 @@
+# Function: agentIdFromMarkdownPath()
+
+```ts
+function agentIdFromMarkdownPath(filePath: string): string;
+```
+
+Derives the logical agent id from a markdown path. When the file is named
+`agent.md`, the id is the parent directory name (folder-based layout);
+otherwise the id is the file stem (e.g. legacy single-file paths).
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `filePath` | `string` |
+
+## Returns
+
+`string`
diff --git a/docs/docs/api/appkit/Function.createAgent.md b/docs/docs/api/appkit/Function.createAgent.md
new file mode 100644
index 000000000..61064e512
--- /dev/null
+++ b/docs/docs/api/appkit/Function.createAgent.md
@@ -0,0 +1,35 @@
+# Function: createAgent()
+
+```ts
+function createAgent(def: AgentDefinition): AgentDefinition;
+```
+
+Pure factory for agent definitions. Returns the passed-in definition after
+cycle-detecting the sub-agent graph. Accepts the full `AgentDefinition` shape
+and is safe to call at module top-level.
+
+The returned value is a plain `AgentDefinition` — no adapter construction,
+no side effects. Register it with `agents({ agents: { name: def } })` or run
+it standalone via `runAgent(def, input)`.
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `def` | [`AgentDefinition`](Interface.AgentDefinition.md) |
+
+## Returns
+
+[`AgentDefinition`](Interface.AgentDefinition.md)
+
+## Example
+
+```ts
+const support = createAgent({
+ instructions: "You help customers.",
+ model: "databricks-claude-sonnet-4-5",
+ tools: {
+ get_weather: tool({ ... }),
+ },
+});
+```
diff --git a/docs/docs/api/appkit/Function.defineTool.md b/docs/docs/api/appkit/Function.defineTool.md
new file mode 100644
index 000000000..93069623f
--- /dev/null
+++ b/docs/docs/api/appkit/Function.defineTool.md
@@ -0,0 +1,27 @@
+# Function: defineTool()
+
+```ts
+function defineTool(config: ToolEntry): ToolEntry;
+```
+
+Defines a single tool entry for a plugin's internal registry.
+
+The generic `S` flows from `schema` through to the `handler` callback so
+`args` is fully typed from the Zod schema. Names are assigned by the
+registry key, so they are not repeated inside the entry.
+
+## Type Parameters
+
+| Type Parameter |
+| ------ |
+| `S` *extends* `ZodType`\<`unknown`, `unknown`, `$ZodTypeInternals`\<`unknown`, `unknown`\>\> |
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `config` | [`ToolEntry`](Interface.ToolEntry.md)\<`S`\> |
+
+## Returns
+
+[`ToolEntry`](Interface.ToolEntry.md)\<`S`\>
diff --git a/docs/docs/api/appkit/Function.executeFromRegistry.md b/docs/docs/api/appkit/Function.executeFromRegistry.md
new file mode 100644
index 000000000..762aad5df
--- /dev/null
+++ b/docs/docs/api/appkit/Function.executeFromRegistry.md
@@ -0,0 +1,27 @@
+# Function: executeFromRegistry()
+
+```ts
+function executeFromRegistry(
+ registry: ToolRegistry,
+ name: string,
+ args: unknown,
+signal?: AbortSignal): Promise;
+```
+
+Validates tool-call arguments against the entry's schema and invokes its
+handler. On validation failure, returns an LLM-friendly error string
+(matching the behavior of `tool()`) rather than throwing, so the model
+can self-correct on its next turn.
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `registry` | [`ToolRegistry`](TypeAlias.ToolRegistry.md) |
+| `name` | `string` |
+| `args` | `unknown` |
+| `signal?` | `AbortSignal` |
+
+## Returns
+
+`Promise`\<`unknown`\>
diff --git a/docs/docs/api/appkit/Function.functionToolToDefinition.md b/docs/docs/api/appkit/Function.functionToolToDefinition.md
new file mode 100644
index 000000000..71ac617aa
--- /dev/null
+++ b/docs/docs/api/appkit/Function.functionToolToDefinition.md
@@ -0,0 +1,15 @@
+# Function: functionToolToDefinition()
+
+```ts
+function functionToolToDefinition(tool: FunctionTool): AgentToolDefinition;
+```
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `tool` | [`FunctionTool`](Interface.FunctionTool.md) |
+
+## Returns
+
+[`AgentToolDefinition`](Interface.AgentToolDefinition.md)
diff --git a/docs/docs/api/appkit/Function.isFunctionTool.md b/docs/docs/api/appkit/Function.isFunctionTool.md
new file mode 100644
index 000000000..ebd84ee4f
--- /dev/null
+++ b/docs/docs/api/appkit/Function.isFunctionTool.md
@@ -0,0 +1,15 @@
+# Function: isFunctionTool()
+
+```ts
+function isFunctionTool(value: unknown): value is FunctionTool;
+```
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `value` | `unknown` |
+
+## Returns
+
+`value is FunctionTool`
diff --git a/docs/docs/api/appkit/Function.isHostedTool.md b/docs/docs/api/appkit/Function.isHostedTool.md
new file mode 100644
index 000000000..73be7e16b
--- /dev/null
+++ b/docs/docs/api/appkit/Function.isHostedTool.md
@@ -0,0 +1,15 @@
+# Function: isHostedTool()
+
+```ts
+function isHostedTool(value: unknown): value is HostedTool;
+```
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `value` | `unknown` |
+
+## Returns
+
+`value is HostedTool`
diff --git a/docs/docs/api/appkit/Function.isToolkitEntry.md b/docs/docs/api/appkit/Function.isToolkitEntry.md
new file mode 100644
index 000000000..892907a41
--- /dev/null
+++ b/docs/docs/api/appkit/Function.isToolkitEntry.md
@@ -0,0 +1,18 @@
+# Function: isToolkitEntry()
+
+```ts
+function isToolkitEntry(value: unknown): value is ToolkitEntry;
+```
+
+Type guard for `ToolkitEntry` — used by the agents plugin to differentiate
+toolkit references from inline tools in a mixed `tools` record.
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `value` | `unknown` |
+
+## Returns
+
+`value is ToolkitEntry`
diff --git a/docs/docs/api/appkit/Function.loadAgentFromFile.md b/docs/docs/api/appkit/Function.loadAgentFromFile.md
new file mode 100644
index 000000000..55c2cd342
--- /dev/null
+++ b/docs/docs/api/appkit/Function.loadAgentFromFile.md
@@ -0,0 +1,23 @@
+# Function: loadAgentFromFile()
+
+```ts
+function loadAgentFromFile(filePath: string, ctx: LoadContext): Promise;
+```
+
+Loads a single markdown agent file and resolves its frontmatter against
+registered plugin toolkits + ambient tool library.
+
+Rejects non-empty `agents:` frontmatter because single-file loads have
+no siblings to resolve sub-agent references against — callers must use
+[loadAgentsFromDir](Function.loadAgentsFromDir.md) when markdown agents delegate to one another.
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `filePath` | `string` |
+| `ctx` | `LoadContext` |
+
+## Returns
+
+`Promise`\<[`AgentDefinition`](Interface.AgentDefinition.md)\>
diff --git a/docs/docs/api/appkit/Function.loadAgentsFromDir.md b/docs/docs/api/appkit/Function.loadAgentsFromDir.md
new file mode 100644
index 000000000..85d94b43c
--- /dev/null
+++ b/docs/docs/api/appkit/Function.loadAgentsFromDir.md
@@ -0,0 +1,33 @@
+# Function: loadAgentsFromDir()
+
+```ts
+function loadAgentsFromDir(dir: string, ctx: LoadContext): Promise;
+```
+
+Scans a directory for one subdirectory per agent, each containing
+`agent.md` (frontmatter + body). Produces an `AgentDefinition` record keyed
+by agent id (folder name). Throws on frontmatter errors or unresolved
+references. Returns an empty map if the directory does not exist.
+
+Legacy top-level `*.md` files are rejected with an error — migrate each to
+`/agent.md` under a sibling folder named for the agent id.
+
+Runs in two passes so sub-agent references in frontmatter (`agents: [...]`)
+can be resolved regardless of directory iteration order:
+
+1. Build every agent's definition from its own `agent.md`.
+2. Walk `agents:` references and wire `def.agents = { child: childDef }`
+ by looking them up in the complete map. Dangling names and
+ self-references fail loudly; mutual delegation is allowed and bounded
+ at runtime by `limits.maxSubAgentDepth`.
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `dir` | `string` |
+| `ctx` | `LoadContext` |
+
+## Returns
+
+`Promise`\<`LoadResult`\>
diff --git a/docs/docs/api/appkit/Function.mcpServer.md b/docs/docs/api/appkit/Function.mcpServer.md
new file mode 100644
index 000000000..cafd46572
--- /dev/null
+++ b/docs/docs/api/appkit/Function.mcpServer.md
@@ -0,0 +1,26 @@
+# Function: mcpServer()
+
+```ts
+function mcpServer(name: string, url: string): CustomMcpServerTool;
+```
+
+Factory for declaring a custom MCP server tool.
+
+Replaces the verbose `{ type: "custom_mcp_server", custom_mcp_server: { app_name, app_url } }`
+wrapper with a concise positional call.
+
+Example:
+```ts
+mcpServer("my-app", "https://my-app.databricksapps.com/mcp")
+```
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `name` | `string` |
+| `url` | `string` |
+
+## Returns
+
+`CustomMcpServerTool`
diff --git a/docs/docs/api/appkit/Function.parseTextToolCalls.md b/docs/docs/api/appkit/Function.parseTextToolCalls.md
new file mode 100644
index 000000000..ed701952c
--- /dev/null
+++ b/docs/docs/api/appkit/Function.parseTextToolCalls.md
@@ -0,0 +1,27 @@
+# Function: parseTextToolCalls()
+
+```ts
+function parseTextToolCalls(text: string): {
+ args: unknown;
+ name: string;
+}[];
+```
+
+Parses text-based tool calls from model output.
+
+Handles two formats:
+1. Llama native: `[{"name": "tool_name", "parameters": {"arg": "val"}}]`
+2. Python-style: `[tool_name(arg1='val1', arg2='val2')]`
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `text` | `string` |
+
+## Returns
+
+\{
+ `args`: `unknown`;
+ `name`: `string`;
+\}[]
diff --git a/docs/docs/api/appkit/Function.resolveHostedTools.md b/docs/docs/api/appkit/Function.resolveHostedTools.md
new file mode 100644
index 000000000..b846410cd
--- /dev/null
+++ b/docs/docs/api/appkit/Function.resolveHostedTools.md
@@ -0,0 +1,15 @@
+# Function: resolveHostedTools()
+
+```ts
+function resolveHostedTools(tools: HostedTool[]): McpEndpointConfig[];
+```
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `tools` | [`HostedTool`](TypeAlias.HostedTool.md)[] |
+
+## Returns
+
+`McpEndpointConfig`[]
diff --git a/docs/docs/api/appkit/Function.runAgent.md b/docs/docs/api/appkit/Function.runAgent.md
new file mode 100644
index 000000000..3a13a40bb
--- /dev/null
+++ b/docs/docs/api/appkit/Function.runAgent.md
@@ -0,0 +1,43 @@
+# Function: runAgent()
+
+```ts
+function runAgent(def: AgentDefinition, input: RunAgentInput): Promise;
+```
+
+Standalone agent execution without `createApp`. Resolves the adapter, binds
+inline tools, and drives the adapter's `run()` loop to completion.
+
+Limitations vs. running through the agents() plugin:
+- **No OBO and no approval gate** — there is no HTTP request, so plugin
+ tools run as the service principal. The agents-plugin approval gate
+ that prompts for human confirmation on `effect: "write" | "update" |
+ "destructive"` tools is also absent. LLM-controlled tool arguments
+ flow straight through to the SP. Treat standalone runAgent as a
+ trusted-prompt environment (CI, batch eval, internal scripts) — not
+ as an exposed user-facing surface.
+- **Hosted tools (MCP) are not supported** — they require a live MCP
+ client that only exists inside the agents plugin's lifecycle.
+ `runAgent` rejects them at index-build time with a clear error.
+- **Sub-agents** (`agents: { ... }` on the def) are executed as nested
+ `runAgent` calls with no shared thread state. Plugin instances ARE
+ shared across the recursion (same cache as the parent).
+- **Plugin tools** (used inside the function form via
+ `plugins..toolkit(...)`) require passing `plugins: [...]` via
+ `RunAgentInput`. Each plugin in that array is constructed once,
+ `attachContext({})` and `await setup()` are called eagerly, and the
+ resulting instance is shared across the top-level run and all
+ sub-agent recursions. Plugins whose `setup()` requires runtime that
+ only `createApp` provides (e.g. `WorkspaceClient`, `ServiceContext`,
+ `PluginContext`) throw at standalone-init time with a clear "use
+ createApp instead" message — not mid-stream.
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `def` | [`AgentDefinition`](Interface.AgentDefinition.md) |
+| `input` | [`RunAgentInput`](Interface.RunAgentInput.md) |
+
+## Returns
+
+`Promise`\<[`RunAgentResult`](Interface.RunAgentResult.md)\>
diff --git a/docs/docs/api/appkit/Function.tool.md b/docs/docs/api/appkit/Function.tool.md
new file mode 100644
index 000000000..d6799cfd1
--- /dev/null
+++ b/docs/docs/api/appkit/Function.tool.md
@@ -0,0 +1,29 @@
+# Function: tool()
+
+```ts
+function tool(config: ToolConfig): FunctionTool;
+```
+
+Factory for defining function tools with Zod schemas.
+
+- Generates JSON Schema (for the LLM) from the Zod schema via `z.toJSONSchema()`.
+- Infers the `execute` argument type from the schema.
+- Validates tool call arguments at runtime. On validation failure, returns
+ a formatted error string to the LLM instead of throwing, so the model
+ can self-correct on its next turn.
+
+## Type Parameters
+
+| Type Parameter |
+| ------ |
+| `S` *extends* `ZodType`\<`unknown`, `unknown`, `$ZodTypeInternals`\<`unknown`, `unknown`\>\> |
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `config` | [`ToolConfig`](Interface.ToolConfig.md)\<`S`\> |
+
+## Returns
+
+[`FunctionTool`](Interface.FunctionTool.md)
diff --git a/docs/docs/api/appkit/Function.toolsFromRegistry.md b/docs/docs/api/appkit/Function.toolsFromRegistry.md
new file mode 100644
index 000000000..ea47da132
--- /dev/null
+++ b/docs/docs/api/appkit/Function.toolsFromRegistry.md
@@ -0,0 +1,21 @@
+# Function: toolsFromRegistry()
+
+```ts
+function toolsFromRegistry(registry: ToolRegistry): AgentToolDefinition[];
+```
+
+Produces the `AgentToolDefinition[]` a ToolProvider exposes to the LLM,
+deriving `parameters` JSON Schema from each entry's Zod schema.
+
+Tool names come from registry keys (supports dotted names like
+`uploads.list` for dynamic plugins).
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `registry` | [`ToolRegistry`](TypeAlias.ToolRegistry.md) |
+
+## Returns
+
+[`AgentToolDefinition`](Interface.AgentToolDefinition.md)[]
diff --git a/docs/docs/api/appkit/Interface.AgentAdapter.md b/docs/docs/api/appkit/Interface.AgentAdapter.md
new file mode 100644
index 000000000..52083157e
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.AgentAdapter.md
@@ -0,0 +1,20 @@
+# Interface: AgentAdapter
+
+## Methods
+
+### run()
+
+```ts
+run(input: AgentInput, context: AgentRunContext): AsyncGenerator;
+```
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `input` | [`AgentInput`](Interface.AgentInput.md) |
+| `context` | [`AgentRunContext`](Interface.AgentRunContext.md) |
+
+#### Returns
+
+`AsyncGenerator`\<[`AgentEvent`](TypeAlias.AgentEvent.md), `void`, `unknown`\>
diff --git a/docs/docs/api/appkit/Interface.AgentDefinition.md b/docs/docs/api/appkit/Interface.AgentDefinition.md
new file mode 100644
index 000000000..87138d35d
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.AgentDefinition.md
@@ -0,0 +1,122 @@
+# Interface: AgentDefinition
+
+## Properties
+
+### agents?
+
+```ts
+optional agents: Record;
+```
+
+Sub-agents, exposed as `agent-` tools on this agent.
+
+***
+
+### baseSystemPrompt?
+
+```ts
+optional baseSystemPrompt: BaseSystemPromptOption;
+```
+
+Override the plugin's baseSystemPrompt for this agent only.
+
+***
+
+### ephemeral?
+
+```ts
+optional ephemeral: boolean;
+```
+
+When true, the thread used for a chat request against this agent is
+deleted from `ThreadStore` after the stream completes (success or
+failure). Use for stateless one-shot agents — e.g. autocomplete, where
+each request is independent and retaining history would both poison
+future calls and accumulate unbounded state in the default
+`InMemoryThreadStore`. Defaults to `false`.
+
+***
+
+### instructions
+
+```ts
+instructions: string;
+```
+
+System prompt body. For markdown-loaded agents this is the file body.
+
+***
+
+### maxSteps?
+
+```ts
+optional maxSteps: number;
+```
+
+***
+
+### maxTokens?
+
+```ts
+optional maxTokens: number;
+```
+
+***
+
+### model?
+
+```ts
+optional model:
+ | string
+ | AgentAdapter
+| Promise;
+```
+
+Model adapter (or endpoint-name string sugar for
+`DatabricksAdapter.fromServingEndpoint({ endpointName })`). Optional —
+falls back to the plugin's `defaultModel`.
+
+***
+
+### name?
+
+```ts
+optional name: string;
+```
+
+Stable identifier for the agent. **Optional and informational** —
+when the definition is registered via `agents: { foo: def }` (code) or
+lives at `config/agents//agent.md` (markdown), the **registry key
+always wins** and `name` is ignored. The agent will be reachable as
+`foo` (or ``) regardless of what this field contains.
+
+Set `name` when:
+ - Running standalone via `runAgent({ agent: def })`, where there is
+ no enclosing key. The runtime uses it for the agent's slot in
+ error messages and OTel spans.
+ - Building a definition that may be passed to either form and you
+ want a consistent fallback label.
+
+Setting `name` to a value that differs from the registry key is
+harmless but confusing — prefer keeping them aligned or omitting `name`
+entirely.
+
+***
+
+### tools?
+
+```ts
+optional tools:
+ | AgentTools
+ | AgentToolsFn;
+```
+
+Per-agent tool record. Key is the LLM-visible tool-call name.
+
+Accepts either a plain record (for agents that only use inline tools)
+or a function `(plugins) => Record` that receives
+the typed [Plugins](TypeAlias.Plugins.md) map and returns a tool record (for agents
+that pull tools from registered plugins).
+
+The function is invoked once at agent setup; the result is cached.
+Don't put per-request logic in there.
diff --git a/docs/docs/api/appkit/Interface.AgentInput.md b/docs/docs/api/appkit/Interface.AgentInput.md
new file mode 100644
index 000000000..6d2eff8b0
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.AgentInput.md
@@ -0,0 +1,33 @@
+# Interface: AgentInput
+
+## Properties
+
+### messages
+
+```ts
+messages: Message[];
+```
+
+***
+
+### signal?
+
+```ts
+optional signal: AbortSignal;
+```
+
+***
+
+### threadId
+
+```ts
+threadId: string;
+```
+
+***
+
+### tools
+
+```ts
+tools: AgentToolDefinition[];
+```
diff --git a/docs/docs/api/appkit/Interface.AgentRunContext.md b/docs/docs/api/appkit/Interface.AgentRunContext.md
new file mode 100644
index 000000000..4dfbea18d
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.AgentRunContext.md
@@ -0,0 +1,30 @@
+# Interface: AgentRunContext
+
+## Properties
+
+### executeTool()
+
+```ts
+executeTool: (name: string, args: unknown) => Promise;
+```
+
+Tool implementations should sanitize failure text — errors become `tool_result.error` and can flow back into the LLM transcript.
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `name` | `string` |
+| `args` | `unknown` |
+
+#### Returns
+
+`Promise`\<`unknown`\>
+
+***
+
+### signal?
+
+```ts
+optional signal: AbortSignal;
+```
diff --git a/docs/docs/api/appkit/Interface.AgentToolDefinition.md b/docs/docs/api/appkit/Interface.AgentToolDefinition.md
new file mode 100644
index 000000000..51c375955
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.AgentToolDefinition.md
@@ -0,0 +1,33 @@
+# Interface: AgentToolDefinition
+
+## Properties
+
+### annotations?
+
+```ts
+optional annotations: ToolAnnotations;
+```
+
+***
+
+### description
+
+```ts
+description: string;
+```
+
+***
+
+### name
+
+```ts
+name: string;
+```
+
+***
+
+### parameters
+
+```ts
+parameters: JSONSchema7;
+```
diff --git a/docs/docs/api/appkit/Interface.AgentsPluginConfig.md b/docs/docs/api/appkit/Interface.AgentsPluginConfig.md
new file mode 100644
index 000000000..c038d41c1
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.AgentsPluginConfig.md
@@ -0,0 +1,251 @@
+# Interface: AgentsPluginConfig
+
+Base configuration interface for AppKit plugins
+
+## Extends
+
+- [`BasePluginConfig`](Interface.BasePluginConfig.md)
+
+## Indexable
+
+```ts
+[key: string]: unknown
+```
+
+## Properties
+
+### agents?
+
+```ts
+optional agents: Record;
+```
+
+Code-defined agents, merged with file-loaded ones (code wins on key collision).
+
+***
+
+### approval?
+
+```ts
+optional approval: {
+ requireForDestructive?: boolean;
+ timeoutMs?: number;
+};
+```
+
+Human-in-the-loop approval gate for mutating tool calls. When enabled
+(the default), the agents plugin emits an `appkit.approval_pending` SSE
+event before executing any tool whose annotation flags it as mutating —
+`effect: "write" | "update" | "destructive"` (preferred) or the legacy
+`destructive: true` boolean — and waits for a `POST /chat/approve`
+decision from the same user who initiated the stream. A missing decision
+after `timeoutMs` auto-denies the call.
+
+#### requireForDestructive?
+
+```ts
+optional requireForDestructive: boolean;
+```
+
+Require human approval for tools that mutate state. Triggered by
+`effect: "write" | "update" | "destructive"` (preferred) or the legacy
+`destructive: true` boolean. Default: `true`.
+
+#### timeoutMs?
+
+```ts
+optional timeoutMs: number;
+```
+
+Milliseconds to wait before auto-denying. Default: 60_000.
+
+***
+
+### autoInheritTools?
+
+```ts
+optional autoInheritTools:
+ | boolean
+ | AutoInheritToolsConfig;
+```
+
+Whether to auto-inherit every ToolProvider plugin's toolkit. Accepts a boolean shorthand.
+
+***
+
+### baseSystemPrompt?
+
+```ts
+optional baseSystemPrompt: BaseSystemPromptOption;
+```
+
+Customize or disable the AppKit base system prompt.
+
+***
+
+### defaultAgent?
+
+```ts
+optional defaultAgent: string;
+```
+
+Agent used when clients don't specify one. Defaults to the first-registered agent or the file with `default: true` frontmatter.
+
+***
+
+### defaultModel?
+
+```ts
+optional defaultModel:
+ | string
+ | AgentAdapter
+| Promise;
+```
+
+Default model for agents that don't specify their own (in code or frontmatter).
+
+***
+
+### dir?
+
+```ts
+optional dir: string | false;
+```
+
+Directory of agent packages (`/agent.md` each). Default `./config/agents`. Set to `false` to disable.
+
+***
+
+### host?
+
+```ts
+optional host: string;
+```
+
+#### Inherited from
+
+[`BasePluginConfig`](Interface.BasePluginConfig.md).[`host`](Interface.BasePluginConfig.md#host)
+
+***
+
+### limits?
+
+```ts
+optional limits: {
+ maxConcurrentStreamsPerUser?: number;
+ maxSubAgentDepth?: number;
+ maxToolCalls?: number;
+ toolCallTimeoutMs?: number;
+};
+```
+
+Runtime resource limits applied during agent execution. Defaults are
+tuned to protect a single-instance deployment from a misbehaving user or
+a runaway prompt injection; tighten or relax as appropriate for the
+deployment's scale and trust model. Request-body caps (chat message
+size, invocations input size / length) are enforced statically by the
+Zod schemas and are not configurable here.
+
+#### maxConcurrentStreamsPerUser?
+
+```ts
+optional maxConcurrentStreamsPerUser: number;
+```
+
+Max concurrent chat streams a single user may have open. Subsequent
+`POST /chat` requests from that user while at-limit are rejected with
+HTTP 429. Default: `5`.
+
+#### maxSubAgentDepth?
+
+```ts
+optional maxSubAgentDepth: number;
+```
+
+Max sub-agent recursion depth. Protects against a prompt-injected
+agent that delegates to a sub-agent which in turn delegates back to
+itself (directly or transitively). Default: `3`.
+
+#### maxToolCalls?
+
+```ts
+optional maxToolCalls: number;
+```
+
+Max tool invocations per agent run (across the full tool-call graph,
+including sub-agent invocations). A run that exceeds the budget is
+aborted with a terminal error event. Default: `50`.
+
+#### toolCallTimeoutMs?
+
+```ts
+optional toolCallTimeoutMs: number;
+```
+
+Per-call timeout for tools dispatched through `PluginContext`
+(toolkit-routed tools — analytics SQL warehouse queries, Genie
+messages, Lakebase queries). Independent of `maxToolCalls`: the
+budget caps how many tools fire per run, this caps how long any
+single tool call may run. The signal handed to plugin tool
+implementations combines this timeout with the parent stream's
+abort signal via `AbortSignal.any`. Function and MCP tools have
+their own timeouts in their respective adapters and ignore this
+setting. Default: `300_000` (5 minutes) — generous enough for cold
+SQL Warehouse round-trips and long Genie conversations.
+
+***
+
+### mcp?
+
+```ts
+optional mcp: McpHostPolicyConfig;
+```
+
+MCP server host policy. By default only same-origin Databricks workspace
+URLs may be used as MCP endpoints; custom hosts must be explicitly
+allowlisted here. Workspace credentials (SP / OBO) are never forwarded
+to non-workspace hosts.
+
+***
+
+### name?
+
+```ts
+optional name: string;
+```
+
+#### Inherited from
+
+[`BasePluginConfig`](Interface.BasePluginConfig.md).[`name`](Interface.BasePluginConfig.md#name)
+
+***
+
+### telemetry?
+
+```ts
+optional telemetry: TelemetryOptions;
+```
+
+#### Inherited from
+
+[`BasePluginConfig`](Interface.BasePluginConfig.md).[`telemetry`](Interface.BasePluginConfig.md#telemetry)
+
+***
+
+### threadStore?
+
+```ts
+optional threadStore: ThreadStore;
+```
+
+Persistent thread store. Default: in-memory.
+
+***
+
+### tools?
+
+```ts
+optional tools: Record;
+```
+
+Ambient tool library. Keys may be referenced by markdown frontmatter via `tools: [key1, key2]`.
diff --git a/docs/docs/api/appkit/Interface.AutoInheritToolsConfig.md b/docs/docs/api/appkit/Interface.AutoInheritToolsConfig.md
new file mode 100644
index 000000000..569cc34fb
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.AutoInheritToolsConfig.md
@@ -0,0 +1,31 @@
+# Interface: AutoInheritToolsConfig
+
+Auto-inherit configuration. When enabled for a given agent origin, agents
+with no explicit `tools:` declaration receive every registered ToolProvider
+plugin tool whose author marked `autoInheritable: true`. Tools without that
+flag — destructive, state-mutating, or privilege-sensitive — never spread
+automatically and must be wired via `tools:` (object or function form in
+code, `plugin:NAME` entries in markdown frontmatter).
+
+Defaults are `false` for both origins (safe-by-default): developers must
+consciously opt an origin in to any auto-inherit behaviour.
+
+## Properties
+
+### code?
+
+```ts
+optional code: boolean;
+```
+
+Default for code-defined agents (via `agents: { foo: createAgent(...) }`). Default: `false`.
+
+***
+
+### file?
+
+```ts
+optional file: boolean;
+```
+
+Default for agents loaded from markdown files. Default: `false`.
diff --git a/docs/docs/api/appkit/Interface.BasePluginConfig.md b/docs/docs/api/appkit/Interface.BasePluginConfig.md
index 3483c136e..653df68ce 100644
--- a/docs/docs/api/appkit/Interface.BasePluginConfig.md
+++ b/docs/docs/api/appkit/Interface.BasePluginConfig.md
@@ -4,6 +4,7 @@ Base configuration interface for AppKit plugins
## Extended by
+- [`AgentsPluginConfig`](Interface.AgentsPluginConfig.md)
- [`IJobsConfig`](Interface.IJobsConfig.md)
## Indexable
diff --git a/docs/docs/api/appkit/Interface.FunctionTool.md b/docs/docs/api/appkit/Interface.FunctionTool.md
new file mode 100644
index 000000000..450db5385
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.FunctionTool.md
@@ -0,0 +1,83 @@
+# Interface: FunctionTool
+
+## Properties
+
+### annotations?
+
+```ts
+optional annotations: ToolAnnotations;
+```
+
+Behavioural hints that drive the agents plugin's approval gate and the
+client's approval-card styling. Prefer setting `effect` (one of
+`"read" | "write" | "update" | "destructive"`) — any mutating value
+forces HITL approval before `execute()` runs. Legacy `destructive: true`
+is still honoured. Must be preserved through [functionToolToDefinition](Function.functionToolToDefinition.md) so the plugin sees them when building agent
+tool indexes.
+
+***
+
+### description?
+
+```ts
+optional description: string | null;
+```
+
+***
+
+### execute()
+
+```ts
+execute: (args: Record) => unknown;
+```
+
+Returns any shape; downstream `normalizeToolResult` serializes to a
+string before handing the value to the LLM.
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `args` | `Record`\<`string`, `unknown`\> |
+
+#### Returns
+
+`unknown`
+
+***
+
+### name?
+
+```ts
+optional name: string;
+```
+
+Optional. When this tool is placed in a keyed record
+(`tools: { my_tool: ... }` or the function form), the agents plugin
+overrides this with the record key at index-build time. Only set it
+explicitly when constructing a `FunctionTool` outside any
+keyed-record context.
+
+***
+
+### parameters?
+
+```ts
+optional parameters: Record | null;
+```
+
+***
+
+### strict?
+
+```ts
+optional strict: boolean | null;
+```
+
+***
+
+### type
+
+```ts
+type: "function";
+```
diff --git a/docs/docs/api/appkit/Interface.McpConnectAllResult.md b/docs/docs/api/appkit/Interface.McpConnectAllResult.md
new file mode 100644
index 000000000..e96a30832
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.McpConnectAllResult.md
@@ -0,0 +1,36 @@
+# Interface: McpConnectAllResult
+
+Per-endpoint outcome of [AppKitMcpClient.connectAll](Class.AppKitMcpClient.md#connectall). Callers (the
+agents plugin in particular) use the split to warn at startup when some
+MCP servers are unreachable without aborting boot for the rest.
+
+## Properties
+
+### connected
+
+```ts
+connected: string[];
+```
+
+***
+
+### failed
+
+```ts
+failed: {
+ error: Error;
+ name: string;
+}[];
+```
+
+#### error
+
+```ts
+error: Error;
+```
+
+#### name
+
+```ts
+name: string;
+```
diff --git a/docs/docs/api/appkit/Interface.Message.md b/docs/docs/api/appkit/Interface.Message.md
new file mode 100644
index 000000000..ed818408d
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.Message.md
@@ -0,0 +1,49 @@
+# Interface: Message
+
+## Properties
+
+### content
+
+```ts
+content: string;
+```
+
+***
+
+### createdAt
+
+```ts
+createdAt: Date;
+```
+
+***
+
+### id
+
+```ts
+id: string;
+```
+
+***
+
+### role
+
+```ts
+role: "user" | "assistant" | "system" | "tool";
+```
+
+***
+
+### toolCallId?
+
+```ts
+optional toolCallId: string;
+```
+
+***
+
+### toolCalls?
+
+```ts
+optional toolCalls: ToolCall[];
+```
diff --git a/docs/docs/api/appkit/Interface.PluginToolkitProvider.md b/docs/docs/api/appkit/Interface.PluginToolkitProvider.md
new file mode 100644
index 000000000..a16b878ec
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.PluginToolkitProvider.md
@@ -0,0 +1,25 @@
+# Interface: PluginToolkitProvider
+
+Minimum shape every entry in the [Plugins](TypeAlias.Plugins.md) map must expose. Core
+plugins (analytics, files, genie, lakebase) implement this directly via
+their `.toolkit()` method. The agents plugin and standalone `runAgent`
+synthesize this shape for any registered plugin that doesn't implement
+`.toolkit()` directly (falling back to `getAgentTools()` walking).
+
+## Methods
+
+### toolkit()
+
+```ts
+toolkit(opts?: ToolkitOptions): Record;
+```
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `opts?` | [`ToolkitOptions`](Interface.ToolkitOptions.md) |
+
+#### Returns
+
+`Record`\<`string`, [`ToolkitEntry`](Interface.ToolkitEntry.md)\>
diff --git a/docs/docs/api/appkit/Interface.PromptContext.md b/docs/docs/api/appkit/Interface.PromptContext.md
new file mode 100644
index 000000000..e26ea167d
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.PromptContext.md
@@ -0,0 +1,27 @@
+# Interface: PromptContext
+
+Context passed to `baseSystemPrompt` callbacks.
+
+## Properties
+
+### agentName
+
+```ts
+agentName: string;
+```
+
+***
+
+### pluginNames
+
+```ts
+pluginNames: string[];
+```
+
+***
+
+### toolNames
+
+```ts
+toolNames: string[];
+```
diff --git a/docs/docs/api/appkit/Interface.RegisteredAgent.md b/docs/docs/api/appkit/Interface.RegisteredAgent.md
new file mode 100644
index 000000000..ead127e6e
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.RegisteredAgent.md
@@ -0,0 +1,67 @@
+# Interface: RegisteredAgent
+
+## Properties
+
+### adapter
+
+```ts
+adapter: AgentAdapter;
+```
+
+***
+
+### baseSystemPrompt?
+
+```ts
+optional baseSystemPrompt: BaseSystemPromptOption;
+```
+
+***
+
+### ephemeral?
+
+```ts
+optional ephemeral: boolean;
+```
+
+Mirrors `AgentDefinition.ephemeral` — skip thread persistence.
+
+***
+
+### instructions
+
+```ts
+instructions: string;
+```
+
+***
+
+### maxSteps?
+
+```ts
+optional maxSteps: number;
+```
+
+***
+
+### maxTokens?
+
+```ts
+optional maxTokens: number;
+```
+
+***
+
+### name
+
+```ts
+name: string;
+```
+
+***
+
+### toolIndex
+
+```ts
+toolIndex: Map;
+```
diff --git a/docs/docs/api/appkit/Interface.RunAgentInput.md b/docs/docs/api/appkit/Interface.RunAgentInput.md
new file mode 100644
index 000000000..b17b4a301
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.RunAgentInput.md
@@ -0,0 +1,35 @@
+# Interface: RunAgentInput
+
+## Properties
+
+### messages
+
+```ts
+messages: string | Message[];
+```
+
+Seed messages for the run. Either a single user string or a full message list.
+
+***
+
+### plugins?
+
+```ts
+optional plugins: PluginData[];
+```
+
+Optional plugin list. Required when `def.tools` is the function form
+`(plugins) => Record` and the function dereferences
+any plugins. `runAgent` constructs a fresh instance per plugin and
+dispatches tool calls against it as the service principal (no OBO —
+there is no HTTP request in standalone mode).
+
+***
+
+### signal?
+
+```ts
+optional signal: AbortSignal;
+```
+
+Abort signal for cancellation.
diff --git a/docs/docs/api/appkit/Interface.RunAgentResult.md b/docs/docs/api/appkit/Interface.RunAgentResult.md
new file mode 100644
index 000000000..a9ba258dd
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.RunAgentResult.md
@@ -0,0 +1,21 @@
+# Interface: RunAgentResult
+
+## Properties
+
+### events
+
+```ts
+events: AgentEvent[];
+```
+
+Every event the adapter yielded, in order. Useful for inspection/tests.
+
+***
+
+### text
+
+```ts
+text: string;
+```
+
+Aggregated text output from all `message_delta` events.
diff --git a/docs/docs/api/appkit/Interface.Thread.md b/docs/docs/api/appkit/Interface.Thread.md
new file mode 100644
index 000000000..e9f15fee0
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.Thread.md
@@ -0,0 +1,41 @@
+# Interface: Thread
+
+## Properties
+
+### createdAt
+
+```ts
+createdAt: Date;
+```
+
+***
+
+### id
+
+```ts
+id: string;
+```
+
+***
+
+### messages
+
+```ts
+messages: Message[];
+```
+
+***
+
+### updatedAt
+
+```ts
+updatedAt: Date;
+```
+
+***
+
+### userId
+
+```ts
+userId: string;
+```
diff --git a/docs/docs/api/appkit/Interface.ThreadStore.md b/docs/docs/api/appkit/Interface.ThreadStore.md
new file mode 100644
index 000000000..215b76a2c
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.ThreadStore.md
@@ -0,0 +1,98 @@
+# Interface: ThreadStore
+
+## Methods
+
+### addMessage()
+
+```ts
+addMessage(
+ threadId: string,
+ userId: string,
+message: Message): Promise;
+```
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `threadId` | `string` |
+| `userId` | `string` |
+| `message` | [`Message`](Interface.Message.md) |
+
+#### Returns
+
+`Promise`\<`void`\>
+
+***
+
+### create()
+
+```ts
+create(userId: string): Promise;
+```
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `userId` | `string` |
+
+#### Returns
+
+`Promise`\<[`Thread`](Interface.Thread.md)\>
+
+***
+
+### delete()
+
+```ts
+delete(threadId: string, userId: string): Promise;
+```
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `threadId` | `string` |
+| `userId` | `string` |
+
+#### Returns
+
+`Promise`\<`boolean`\>
+
+***
+
+### get()
+
+```ts
+get(threadId: string, userId: string): Promise;
+```
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `threadId` | `string` |
+| `userId` | `string` |
+
+#### Returns
+
+`Promise`\<[`Thread`](Interface.Thread.md) \| `null`\>
+
+***
+
+### list()
+
+```ts
+list(userId: string): Promise;
+```
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `userId` | `string` |
+
+#### Returns
+
+`Promise`\<[`Thread`](Interface.Thread.md)[]\>
diff --git a/docs/docs/api/appkit/Interface.ToolAnnotations.md b/docs/docs/api/appkit/Interface.ToolAnnotations.md
new file mode 100644
index 000000000..39fbecff6
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.ToolAnnotations.md
@@ -0,0 +1,55 @@
+# Interface: ToolAnnotations
+
+## Properties
+
+### ~~destructive?~~
+
+```ts
+optional destructive: boolean;
+```
+
+#### Deprecated
+
+Prefer [effect](#effect) with value `"destructive"`. Retained
+so existing annotations continue to force the approval gate, and so
+MCP-style consumers that only read `destructive` still see the hint.
+
+***
+
+### effect?
+
+```ts
+optional effect: ToolEffect;
+```
+
+Preferred semantic label. When set, drives both the approval gate (fires
+for `write`/`update`/`destructive`) and the approval-card styling.
+
+***
+
+### idempotent?
+
+```ts
+optional idempotent: boolean;
+```
+
+***
+
+### ~~readOnly?~~
+
+```ts
+optional readOnly: boolean;
+```
+
+#### Deprecated
+
+Prefer [effect](#effect). Retained for backward compatibility
+with tools authored against the original flags and for MCP interop.
+
+***
+
+### requiresUserContext?
+
+```ts
+optional requiresUserContext: boolean;
+```
diff --git a/docs/docs/api/appkit/Interface.ToolConfig.md b/docs/docs/api/appkit/Interface.ToolConfig.md
new file mode 100644
index 000000000..bd35155a1
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.ToolConfig.md
@@ -0,0 +1,90 @@
+# Interface: ToolConfig\
+
+## Type Parameters
+
+| Type Parameter |
+| ------ |
+| `S` *extends* `z.ZodType` |
+
+## Properties
+
+### annotations?
+
+```ts
+optional annotations: ToolAnnotations;
+```
+
+Behavioural hints forwarded to the resolved tool definition. Prefer
+`effect` (`"read" | "write" | "update" | "destructive"`) — any mutating
+value forces the agents-plugin approval gate before `execute()` runs
+and the client's approval card will colour itself accordingly. Legacy
+`destructive: true` still gates. Dropped silently before the fix that
+added this field.
+
+***
+
+### description
+
+```ts
+description: string;
+```
+
+What the tool does, what it expects, and when the LLM should call it.
+The model reads this verbatim when deciding whether to invoke the tool,
+so write it for an LLM, not for a human reader of your code: spell out
+the inputs, the return shape, and any pre-conditions or side effects.
+
+Required. Earlier versions silently fell back to the tool's name when
+omitted, which surfaced cryptic identifiers like `"get_weather"` as the
+description — the model then had no signal about expected use and
+either skipped the tool or called it speculatively. Making this
+mandatory at the type level forces a real description at authoring
+time instead of debugging a confused agent later.
+
+***
+
+### execute()
+
+```ts
+execute: (args: output) => unknown;
+```
+
+Returning a non-string value is fine: the agent runtime serializes
+the result via `normalizeToolResult` before handing it to the LLM
+(strings pass through; `null` becomes `"null"`; everything else gets
+`JSON.stringify`'d; `undefined` becomes `""`). Return whatever shape
+is most natural for your tool — typically an object — and let the
+runtime handle the wire format.
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `args` | `output`\<`S`\> |
+
+#### Returns
+
+`unknown`
+
+***
+
+### name?
+
+```ts
+optional name: string;
+```
+
+Optional. When the tool is placed in a keyed record (the standard
+`tools: { my_tool: tool({...}) }` form, or the function form
+`tools(plugins) => ({ my_tool: tool({...}) })`), the agents plugin
+overrides the tool's LLM-visible name with the record key. Set
+`name` explicitly only if you're constructing a `FunctionTool`
+outside any keyed-record context — otherwise the record key wins.
+
+***
+
+### schema
+
+```ts
+schema: S;
+```
diff --git a/docs/docs/api/appkit/Interface.ToolEntry.md b/docs/docs/api/appkit/Interface.ToolEntry.md
new file mode 100644
index 000000000..5046be2f5
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.ToolEntry.md
@@ -0,0 +1,82 @@
+# Interface: ToolEntry\
+
+Single-tool entry for a plugin's internal tool registry.
+
+Plugins collect these into a `Record` keyed by the tool's
+public name and dispatch via `executeFromRegistry`.
+
+## Type Parameters
+
+| Type Parameter | Default type |
+| ------ | ------ |
+| `S` *extends* `z.ZodType` | `z.ZodType` |
+
+## Properties
+
+### annotations?
+
+```ts
+optional annotations: ToolAnnotations;
+```
+
+***
+
+### autoInheritable?
+
+```ts
+optional autoInheritable: boolean;
+```
+
+Whether this tool is eligible for auto-inheritance into markdown or
+code-defined agents that enable `autoInheritTools`. Defaults to `false`
+(safe-by-default) — plugin authors must explicitly opt a tool in if they
+consider it safe enough to appear in every agent's tool record without an
+explicit `tools:` declaration. Destructive or privilege-sensitive tools
+should leave this unset so that they only reach agents that wire them
+explicitly (via `tools:` object/function form, markdown `plugin:NAME`
+entries in the unified `tools:` list, or
+`plugins..toolkit({ only: [...] })`).
+
+***
+
+### description
+
+```ts
+description: string;
+```
+
+***
+
+### execute()
+
+```ts
+execute: (args: output, signal?: AbortSignal) => unknown;
+```
+
+Callback the agents plugin invokes after Zod validation succeeds.
+
+Named `execute` to match the public `tool({ execute })` form — both the
+agent-author surface and the plugin-author surface now spell their
+callback the same way. `args` is the inferred Zod output (so `T extends
+z.ZodType` flows through and `args` is fully typed). `signal` is the
+per-run AbortSignal: forward it to any awaited I/O so cancellation
+actually unwinds the call (analytics and lakebase both do this).
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `args` | `output`\<`S`\> |
+| `signal?` | `AbortSignal` |
+
+#### Returns
+
+`unknown`
+
+***
+
+### schema
+
+```ts
+schema: S;
+```
diff --git a/docs/docs/api/appkit/Interface.ToolProvider.md b/docs/docs/api/appkit/Interface.ToolProvider.md
new file mode 100644
index 000000000..9c8851a06
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.ToolProvider.md
@@ -0,0 +1,36 @@
+# Interface: ToolProvider
+
+## Methods
+
+### executeAgentTool()
+
+```ts
+executeAgentTool(
+ name: string,
+ args: unknown,
+signal?: AbortSignal): Promise;
+```
+
+#### Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `name` | `string` |
+| `args` | `unknown` |
+| `signal?` | `AbortSignal` |
+
+#### Returns
+
+`Promise`\<`unknown`\>
+
+***
+
+### getAgentTools()
+
+```ts
+getAgentTools(): AgentToolDefinition[];
+```
+
+#### Returns
+
+[`AgentToolDefinition`](Interface.AgentToolDefinition.md)[]
diff --git a/docs/docs/api/appkit/Interface.ToolkitEntry.md b/docs/docs/api/appkit/Interface.ToolkitEntry.md
new file mode 100644
index 000000000..3eec2be25
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.ToolkitEntry.md
@@ -0,0 +1,59 @@
+# Interface: ToolkitEntry
+
+A tool reference produced by a plugin's `.toolkit()` call. The agents plugin
+recognizes the `__toolkitRef` brand and dispatches tool invocations through
+`PluginContext.executeTool(req, pluginName, localName, ...)`, preserving
+OBO (asUser) and telemetry spans.
+
+## Properties
+
+### \_\_toolkitRef
+
+```ts
+readonly __toolkitRef: true;
+```
+
+***
+
+### annotations?
+
+```ts
+optional annotations: ToolAnnotations;
+```
+
+***
+
+### autoInheritable?
+
+```ts
+optional autoInheritable: boolean;
+```
+
+Whether this tool is eligible for `autoInheritTools` spreading. Mirrors
+[ToolEntry.autoInheritable](Interface.ToolEntry.md#autoinheritable) from the source registry so the agents
+plugin can filter auto-inherited tools without re-walking the provider's
+internal registry.
+
+***
+
+### def
+
+```ts
+def: AgentToolDefinition;
+```
+
+***
+
+### localName
+
+```ts
+localName: string;
+```
+
+***
+
+### pluginName
+
+```ts
+pluginName: string;
+```
diff --git a/docs/docs/api/appkit/Interface.ToolkitOptions.md b/docs/docs/api/appkit/Interface.ToolkitOptions.md
new file mode 100644
index 000000000..1beb22b0f
--- /dev/null
+++ b/docs/docs/api/appkit/Interface.ToolkitOptions.md
@@ -0,0 +1,41 @@
+# Interface: ToolkitOptions
+
+## Properties
+
+### except?
+
+```ts
+optional except: string[];
+```
+
+Exclude tools whose local name matches one of these.
+
+***
+
+### only?
+
+```ts
+optional only: string[];
+```
+
+Only include tools whose local name matches one of these.
+
+***
+
+### prefix?
+
+```ts
+optional prefix: string;
+```
+
+Key prefix to prepend to each tool's local name. Defaults to `${pluginName}.`.
+
+***
+
+### rename?
+
+```ts
+optional rename: Record;
+```
+
+Remap specific local names to different keys (applied after prefix).
diff --git a/docs/docs/api/appkit/TypeAlias.AgentEvent.md b/docs/docs/api/appkit/TypeAlias.AgentEvent.md
new file mode 100644
index 000000000..de9226a2b
--- /dev/null
+++ b/docs/docs/api/appkit/TypeAlias.AgentEvent.md
@@ -0,0 +1,270 @@
+# Type Alias: AgentEvent
+
+```ts
+type AgentEvent =
+ | {
+ content: string;
+ type: "message_delta";
+}
+ | {
+ content: string;
+ type: "message";
+}
+ | {
+ args: unknown;
+ callId: string;
+ name: string;
+ type: "tool_call";
+}
+ | {
+ callId: string;
+ error?: string;
+ result: unknown;
+ type: "tool_result";
+}
+ | {
+ content: string;
+ type: "thinking";
+}
+ | {
+ error?: string;
+ status: "running" | "waiting" | "complete" | "error";
+ type: "status";
+}
+ | {
+ data: Record;
+ type: "metadata";
+}
+ | {
+ annotations?: ToolAnnotations;
+ approvalId: string;
+ args: unknown;
+ streamId: string;
+ toolName: string;
+ type: "approval_pending";
+};
+```
+
+## Type Declaration
+
+```ts
+{
+ content: string;
+ type: "message_delta";
+}
+```
+
+### content
+
+```ts
+content: string;
+```
+
+### type
+
+```ts
+type: "message_delta";
+```
+
+```ts
+{
+ content: string;
+ type: "message";
+}
+```
+
+### content
+
+```ts
+content: string;
+```
+
+### type
+
+```ts
+type: "message";
+```
+
+```ts
+{
+ args: unknown;
+ callId: string;
+ name: string;
+ type: "tool_call";
+}
+```
+
+### args
+
+```ts
+args: unknown;
+```
+
+### callId
+
+```ts
+callId: string;
+```
+
+### name
+
+```ts
+name: string;
+```
+
+### type
+
+```ts
+type: "tool_call";
+```
+
+```ts
+{
+ callId: string;
+ error?: string;
+ result: unknown;
+ type: "tool_result";
+}
+```
+
+### callId
+
+```ts
+callId: string;
+```
+
+### error?
+
+```ts
+optional error: string;
+```
+
+### result
+
+```ts
+result: unknown;
+```
+
+### type
+
+```ts
+type: "tool_result";
+```
+
+```ts
+{
+ content: string;
+ type: "thinking";
+}
+```
+
+### content
+
+```ts
+content: string;
+```
+
+### type
+
+```ts
+type: "thinking";
+```
+
+```ts
+{
+ error?: string;
+ status: "running" | "waiting" | "complete" | "error";
+ type: "status";
+}
+```
+
+### error?
+
+```ts
+optional error: string;
+```
+
+### status
+
+```ts
+status: "running" | "waiting" | "complete" | "error";
+```
+
+### type
+
+```ts
+type: "status";
+```
+
+```ts
+{
+ data: Record;
+ type: "metadata";
+}
+```
+
+### data
+
+```ts
+data: Record;
+```
+
+### type
+
+```ts
+type: "metadata";
+```
+
+```ts
+{
+ annotations?: ToolAnnotations;
+ approvalId: string;
+ args: unknown;
+ streamId: string;
+ toolName: string;
+ type: "approval_pending";
+}
+```
+
+### annotations?
+
+```ts
+optional annotations: ToolAnnotations;
+```
+
+### approvalId
+
+```ts
+approvalId: string;
+```
+
+### args
+
+```ts
+args: unknown;
+```
+
+### streamId
+
+```ts
+streamId: string;
+```
+
+### toolName
+
+```ts
+toolName: string;
+```
+
+### type
+
+```ts
+type: "approval_pending";
+```
+
+Emitted by the agents plugin (not adapters) when a mutating tool call
+is awaiting human approval — fires for tools annotated with
+`effect: "write" | "update" | "destructive"` (preferred) or the
+legacy `destructive: true` boolean. Clients should render an approval
+prompt and POST to `/chat/approve` with the matching `approvalId` and
+a `decision` of `approve` or `deny`.
diff --git a/docs/docs/api/appkit/TypeAlias.AgentTool.md b/docs/docs/api/appkit/TypeAlias.AgentTool.md
new file mode 100644
index 000000000..e165cec66
--- /dev/null
+++ b/docs/docs/api/appkit/TypeAlias.AgentTool.md
@@ -0,0 +1,12 @@
+# Type Alias: AgentTool
+
+```ts
+type AgentTool =
+ | FunctionTool
+ | HostedTool
+ | ToolkitEntry;
+```
+
+Any tool an agent can invoke: inline function tools (`tool()`), hosted MCP
+tools (`mcpServer()` / raw hosted), or toolkit references from plugins
+(`analytics().toolkit()`).
diff --git a/docs/docs/api/appkit/TypeAlias.AgentTools.md b/docs/docs/api/appkit/TypeAlias.AgentTools.md
new file mode 100644
index 000000000..80a78bf59
--- /dev/null
+++ b/docs/docs/api/appkit/TypeAlias.AgentTools.md
@@ -0,0 +1,8 @@
+# Type Alias: AgentTools
+
+```ts
+type AgentTools = Record;
+```
+
+Per-agent tool record. String keys map to inline tools, toolkit entries,
+hosted tools, etc.
diff --git a/docs/docs/api/appkit/TypeAlias.AgentToolsFn.md b/docs/docs/api/appkit/TypeAlias.AgentToolsFn.md
new file mode 100644
index 000000000..9b2a6d8d7
--- /dev/null
+++ b/docs/docs/api/appkit/TypeAlias.AgentToolsFn.md
@@ -0,0 +1,23 @@
+# Type Alias: AgentToolsFn()
+
+```ts
+type AgentToolsFn = (plugins: Plugins) => AgentTools;
+```
+
+Function form of `AgentDefinition.tools`. Receives the typed
+[Plugins](TypeAlias.Plugins.md) map and returns a tool record. Invoked exactly once at
+setup (or once per `runAgent` call in standalone mode); the result is
+cached as the agent's resolved tool record.
+
+Use the function form when an agent needs tools from registered plugins.
+The bare object form is fine when an agent only uses inline tools.
+
+## Parameters
+
+| Parameter | Type |
+| ------ | ------ |
+| `plugins` | [`Plugins`](TypeAlias.Plugins.md) |
+
+## Returns
+
+[`AgentTools`](TypeAlias.AgentTools.md)
diff --git a/docs/docs/api/appkit/TypeAlias.BaseSystemPromptOption.md b/docs/docs/api/appkit/TypeAlias.BaseSystemPromptOption.md
new file mode 100644
index 000000000..c59226619
--- /dev/null
+++ b/docs/docs/api/appkit/TypeAlias.BaseSystemPromptOption.md
@@ -0,0 +1,8 @@
+# Type Alias: BaseSystemPromptOption
+
+```ts
+type BaseSystemPromptOption =
+ | false
+ | string
+ | (ctx: PromptContext) => string;
+```
diff --git a/docs/docs/api/appkit/TypeAlias.HostedTool.md b/docs/docs/api/appkit/TypeAlias.HostedTool.md
new file mode 100644
index 000000000..433c0ac8a
--- /dev/null
+++ b/docs/docs/api/appkit/TypeAlias.HostedTool.md
@@ -0,0 +1,9 @@
+# Type Alias: HostedTool
+
+```ts
+type HostedTool =
+ | GenieTool
+ | VectorSearchIndexTool
+ | CustomMcpServerTool
+ | ExternalMcpServerTool;
+```
diff --git a/docs/docs/api/appkit/TypeAlias.Plugins.md b/docs/docs/api/appkit/TypeAlias.Plugins.md
new file mode 100644
index 000000000..e70ed4bcc
--- /dev/null
+++ b/docs/docs/api/appkit/TypeAlias.Plugins.md
@@ -0,0 +1,29 @@
+# Type Alias: Plugins
+
+```ts
+type Plugins = Record;
+```
+
+Plugin map passed to the function form of [AgentDefinition.tools](Interface.AgentDefinition.md#tools).
+Each entry exposes a `.toolkit(opts?)` method that returns a record of
+[ToolkitEntry](Interface.ToolkitEntry.md) markers ready to be spread into a tool record.
+
+AppKit does not statically know which plugins the surrounding
+`createApp` will register, so this is a plain string-keyed record.
+Refer to plugins by the name used in `createApp({ plugins: [...] })`;
+unknown names resolve to `undefined` at runtime.
+
+## Example
+
+```ts
+const support = createAgent({
+ instructions: "...",
+ tools(plugins) {
+ return {
+ get_weather: tool({ ... }),
+ ...plugins.analytics.toolkit(),
+ ...plugins.files.toolkit({ only: ["uploads.read"] }),
+ };
+ },
+});
+```
diff --git a/docs/docs/api/appkit/TypeAlias.ResolvedToolEntry.md b/docs/docs/api/appkit/TypeAlias.ResolvedToolEntry.md
new file mode 100644
index 000000000..e97b3ef97
--- /dev/null
+++ b/docs/docs/api/appkit/TypeAlias.ResolvedToolEntry.md
@@ -0,0 +1,28 @@
+# Type Alias: ResolvedToolEntry
+
+```ts
+type ResolvedToolEntry =
+ | {
+ def: AgentToolDefinition;
+ localName: string;
+ pluginName: string;
+ source: "toolkit";
+}
+ | {
+ def: AgentToolDefinition;
+ functionTool: FunctionTool;
+ source: "function";
+}
+ | {
+ def: AgentToolDefinition;
+ mcpToolName: string;
+ source: "mcp";
+}
+ | {
+ agentName: string;
+ def: AgentToolDefinition;
+ source: "subagent";
+};
+```
+
+Internal tool-index entry after a tool record has been resolved to a dispatchable form.
diff --git a/docs/docs/api/appkit/TypeAlias.ToolRegistry.md b/docs/docs/api/appkit/TypeAlias.ToolRegistry.md
new file mode 100644
index 000000000..dcee758a0
--- /dev/null
+++ b/docs/docs/api/appkit/TypeAlias.ToolRegistry.md
@@ -0,0 +1,5 @@
+# Type Alias: ToolRegistry
+
+```ts
+type ToolRegistry = Record;
+```
diff --git a/docs/docs/api/appkit/Variable.agents.md b/docs/docs/api/appkit/Variable.agents.md
new file mode 100644
index 000000000..b5a4ec038
--- /dev/null
+++ b/docs/docs/api/appkit/Variable.agents.md
@@ -0,0 +1,19 @@
+# Variable: agents
+
+```ts
+const agents: ToPlugin;
+```
+
+Plugin factory for the agents plugin. Reads `config/agents/*.md` by default,
+resolves toolkits/tools from registered plugins, exposes `appkit.agents.*`
+runtime API and mounts `/invocations`.
+
+## Example
+
+```ts
+import { agents, analytics, createApp, server } from "@databricks/appkit";
+
+await createApp({
+ plugins: [server(), analytics(), agents()],
+});
+```
diff --git a/docs/docs/api/appkit/index.md b/docs/docs/api/appkit/index.md
index 5a21e935f..66b826495 100644
--- a/docs/docs/api/appkit/index.md
+++ b/docs/docs/api/appkit/index.md
@@ -1,7 +1,7 @@
# @databricks/appkit
-Core library for building Databricks applications with type-safe SQL queries,
-plugin architecture, and React integration.
+Documentation merge entry for Typedoc — combines the stable `@databricks/appkit`
+surface with `@databricks/appkit/beta`. Not meant for application imports.
## Enumerations
@@ -15,9 +15,11 @@ plugin architecture, and React integration.
| Class | Description |
| ------ | ------ |
| [AppKitError](Class.AppKitError.md) | Base error class for all AppKit errors. Provides a consistent structure for error handling across the framework. |
+| [AppKitMcpClient](Class.AppKitMcpClient.md) | Lightweight MCP client for Databricks-hosted MCP servers. |
| [AuthenticationError](Class.AuthenticationError.md) | Error thrown when authentication fails. Use for missing tokens, invalid credentials, or authorization failures. |
| [ConfigurationError](Class.ConfigurationError.md) | Error thrown when configuration is missing or invalid. Use for missing environment variables, invalid settings, or setup issues. |
| [ConnectionError](Class.ConnectionError.md) | Error thrown when a connection or network operation fails. Use for database pool errors, API failures, timeouts, etc. |
+| [DatabricksAdapter](Class.DatabricksAdapter.md) | Adapter that talks directly to Databricks Model Serving `/invocations` endpoint. |
| [ExecutionError](Class.ExecutionError.md) | Error thrown when an operation execution fails. Use for statement failures, canceled operations, or unexpected states. |
| [InitializationError](Class.InitializationError.md) | Error thrown when a service or component is not properly initialized. Use when accessing services before they are ready. |
| [Plugin](Class.Plugin.md) | Base abstract class for creating AppKit plugins. |
@@ -31,12 +33,20 @@ plugin architecture, and React integration.
| Interface | Description |
| ------ | ------ |
+| [AgentAdapter](Interface.AgentAdapter.md) | - |
+| [AgentDefinition](Interface.AgentDefinition.md) | - |
+| [AgentInput](Interface.AgentInput.md) | - |
+| [AgentRunContext](Interface.AgentRunContext.md) | - |
+| [AgentsPluginConfig](Interface.AgentsPluginConfig.md) | Base configuration interface for AppKit plugins |
+| [AgentToolDefinition](Interface.AgentToolDefinition.md) | - |
+| [AutoInheritToolsConfig](Interface.AutoInheritToolsConfig.md) | Auto-inherit configuration. When enabled for a given agent origin, agents with no explicit `tools:` declaration receive every registered ToolProvider plugin tool whose author marked `autoInheritable: true`. Tools without that flag — destructive, state-mutating, or privilege-sensitive — never spread automatically and must be wired via `tools:` (object or function form in code, `plugin:NAME` entries in markdown frontmatter). |
| [BasePluginConfig](Interface.BasePluginConfig.md) | Base configuration interface for AppKit plugins |
| [CacheConfig](Interface.CacheConfig.md) | Configuration for the CacheInterceptor. Controls TTL, size limits, storage backend, and probabilistic cleanup. |
| [DatabaseCredential](Interface.DatabaseCredential.md) | Database credentials with OAuth token for Postgres connection |
| [EndpointConfig](Interface.EndpointConfig.md) | - |
| [FilePolicyUser](Interface.FilePolicyUser.md) | Minimal user identity passed to the policy function. |
| [FileResource](Interface.FileResource.md) | Describes the file or directory being acted upon. |
+| [FunctionTool](Interface.FunctionTool.md) | - |
| [GenerateDatabaseCredentialRequest](Interface.GenerateDatabaseCredentialRequest.md) | Request parameters for generating database OAuth credentials |
| [IJobsConfig](Interface.IJobsConfig.md) | Configuration for the Jobs plugin. |
| [ITelemetry](Interface.ITelemetry.md) | Plugin-facing interface for OpenTelemetry instrumentation. Provides a thin abstraction over OpenTelemetry APIs for plugins. |
@@ -44,38 +54,63 @@ plugin architecture, and React integration.
| [JobConfig](Interface.JobConfig.md) | Per-job configuration options. |
| [JobsConnectorConfig](Interface.JobsConnectorConfig.md) | - |
| [LakebasePoolConfig](Interface.LakebasePoolConfig.md) | Configuration for creating a Lakebase connection pool |
+| [McpConnectAllResult](Interface.McpConnectAllResult.md) | Per-endpoint outcome of [AppKitMcpClient.connectAll](Class.AppKitMcpClient.md#connectall). Callers (the agents plugin in particular) use the split to warn at startup when some MCP servers are unreachable without aborting boot for the rest. |
+| [Message](Interface.Message.md) | - |
| [PluginManifest](Interface.PluginManifest.md) | Plugin manifest that declares metadata and resource requirements. Attached to plugin classes as a static property. Extends the shared PluginManifest with strict resource types. |
+| [PluginToolkitProvider](Interface.PluginToolkitProvider.md) | Minimum shape every entry in the [Plugins](TypeAlias.Plugins.md) map must expose. Core plugins (analytics, files, genie, lakebase) implement this directly via their `.toolkit()` method. The agents plugin and standalone `runAgent` synthesize this shape for any registered plugin that doesn't implement `.toolkit()` directly (falling back to `getAgentTools()` walking). |
+| [PromptContext](Interface.PromptContext.md) | Context passed to `baseSystemPrompt` callbacks. |
+| [RegisteredAgent](Interface.RegisteredAgent.md) | - |
| [RequestedClaims](Interface.RequestedClaims.md) | Optional claims for fine-grained Unity Catalog table permissions When specified, the returned token will be scoped to only the requested tables |
| [RequestedResource](Interface.RequestedResource.md) | Resource to request permissions for in Unity Catalog |
| [ResourceEntry](Interface.ResourceEntry.md) | Internal representation of a resource in the registry. Extends ResourceRequirement with resolution state and plugin ownership. |
| [ResourceFieldEntry](Interface.ResourceFieldEntry.md) | Defines a single field for a resource. Each field has its own environment variable and optional description. Single-value types use one key (e.g. id); multi-value types (database, secret) use multiple (e.g. instance_name, database_name or scope, key). |
| [ResourceRequirement](Interface.ResourceRequirement.md) | Declares a resource requirement for a plugin. Can be defined statically in a manifest or dynamically via getResourceRequirements(). Narrows the generated base: type → ResourceType enum, permission → ResourcePermission union. |
+| [RunAgentInput](Interface.RunAgentInput.md) | - |
+| [RunAgentResult](Interface.RunAgentResult.md) | - |
| [ServingEndpointEntry](Interface.ServingEndpointEntry.md) | Shape of a single registry entry. |
| [ServingEndpointRegistry](Interface.ServingEndpointRegistry.md) | Registry interface for serving endpoint type generation. Empty by default — augmented by the Vite type generator's `.d.ts` output via module augmentation. When populated, provides autocomplete for alias names and typed request/response/chunk per endpoint. |
| [StreamExecutionSettings](Interface.StreamExecutionSettings.md) | Execution settings for streaming endpoints. Extends PluginExecutionSettings with SSE stream configuration. |
| [TelemetryConfig](Interface.TelemetryConfig.md) | OpenTelemetry configuration for AppKit applications |
+| [Thread](Interface.Thread.md) | - |
+| [ThreadStore](Interface.ThreadStore.md) | - |
+| [ToolAnnotations](Interface.ToolAnnotations.md) | - |
+| [ToolConfig](Interface.ToolConfig.md) | - |
+| [ToolEntry](Interface.ToolEntry.md) | Single-tool entry for a plugin's internal tool registry. |
+| [ToolkitEntry](Interface.ToolkitEntry.md) | A tool reference produced by a plugin's `.toolkit()` call. The agents plugin recognizes the `__toolkitRef` brand and dispatches tool invocations through `PluginContext.executeTool(req, pluginName, localName, ...)`, preserving OBO (asUser) and telemetry spans. |
+| [ToolkitOptions](Interface.ToolkitOptions.md) | - |
+| [ToolProvider](Interface.ToolProvider.md) | - |
| [ValidationResult](Interface.ValidationResult.md) | Result of validating all registered resources against the environment. |
## Type Aliases
| Type Alias | Description |
| ------ | ------ |
+| [AgentEvent](TypeAlias.AgentEvent.md) | - |
+| [AgentTool](TypeAlias.AgentTool.md) | Any tool an agent can invoke: inline function tools (`tool()`), hosted MCP tools (`mcpServer()` / raw hosted), or toolkit references from plugins (`analytics().toolkit()`). |
+| [AgentTools](TypeAlias.AgentTools.md) | Per-agent tool record. String keys map to inline tools, toolkit entries, hosted tools, etc. |
+| [AgentToolsFn](TypeAlias.AgentToolsFn.md) | Function form of `AgentDefinition.tools`. Receives the typed [Plugins](TypeAlias.Plugins.md) map and returns a tool record. Invoked exactly once at setup (or once per `runAgent` call in standalone mode); the result is cached as the agent's resolved tool record. |
+| [BaseSystemPromptOption](TypeAlias.BaseSystemPromptOption.md) | - |
| [ConfigSchema](TypeAlias.ConfigSchema.md) | Configuration schema definition for plugin config. Re-exported from the standard JSON Schema Draft 7 types. |
| [ExecutionResult](TypeAlias.ExecutionResult.md) | Discriminated union for plugin execution results. |
| [FileAction](TypeAlias.FileAction.md) | Every action the files plugin can perform. |
| [FilePolicy](TypeAlias.FilePolicy.md) | A policy function that decides whether `user` may perform `action` on `resource`. Return `true` to allow, `false` to deny. |
+| [HostedTool](TypeAlias.HostedTool.md) | - |
| [IAppRouter](TypeAlias.IAppRouter.md) | Express router type for plugin route registration |
| [JobHandle](TypeAlias.JobHandle.md) | Job handle returned by `appkit.jobs("etl")`. Supports OBO access via `.asUser(req)`. |
| [JobsExport](TypeAlias.JobsExport.md) | Public API shape of the jobs plugin. Callable to select a job by key. |
| [PluginData](TypeAlias.PluginData.md) | Tuple of plugin class, config, and name. Created by `toPlugin()` and passed to `createApp()`. |
+| [Plugins](TypeAlias.Plugins.md) | Plugin map passed to the function form of [AgentDefinition.tools](Interface.AgentDefinition.md#tools). Each entry exposes a `.toolkit(opts?)` method that returns a record of [ToolkitEntry](Interface.ToolkitEntry.md) markers ready to be spread into a tool record. |
+| [ResolvedToolEntry](TypeAlias.ResolvedToolEntry.md) | Internal tool-index entry after a tool record has been resolved to a dispatchable form. |
| [ResourcePermission](TypeAlias.ResourcePermission.md) | Union of all possible permission levels across all resource types. |
| [ServingFactory](TypeAlias.ServingFactory.md) | Factory function returned by `AppKit.serving`. |
+| [ToolRegistry](TypeAlias.ToolRegistry.md) | - |
| [ToPlugin](TypeAlias.ToPlugin.md) | Factory function type returned by `toPlugin()`. Accepts optional config and returns a PluginData tuple. |
## Variables
| Variable | Description |
| ------ | ------ |
+| [agents](Variable.agents.md) | Plugin factory for the agents plugin. Reads `config/agents/*.md` by default, resolves toolkits/tools from registered plugins, exposes `appkit.agents.*` runtime API and mounts `/invocations`. |
| [READ\_ACTIONS](Variable.READ_ACTIONS.md) | Actions that only read data. |
| [sql](Variable.sql.md) | SQL helper namespace |
| [WRITE\_ACTIONS](Variable.WRITE_ACTIONS.md) | Actions that mutate data. |
@@ -84,12 +119,17 @@ plugin architecture, and React integration.
| Function | Description |
| ------ | ------ |
+| [agentIdFromMarkdownPath](Function.agentIdFromMarkdownPath.md) | Derives the logical agent id from a markdown path. When the file is named `agent.md`, the id is the parent directory name (folder-based layout); otherwise the id is the file stem (e.g. legacy single-file paths). |
| [appKitServingTypesPlugin](Function.appKitServingTypesPlugin.md) | Vite plugin to generate TypeScript types for AppKit serving endpoints. Fetches OpenAPI schemas from Databricks and generates a .d.ts with ServingEndpointRegistry module augmentation. |
| [appKitTypesPlugin](Function.appKitTypesPlugin.md) | Vite plugin to generate types for AppKit queries. Calls generateFromEntryPoint under the hood. |
+| [createAgent](Function.createAgent.md) | Pure factory for agent definitions. Returns the passed-in definition after cycle-detecting the sub-agent graph. Accepts the full `AgentDefinition` shape and is safe to call at module top-level. |
| [createApp](Function.createApp.md) | Bootstraps AppKit with the provided configuration. |
| [createLakebasePool](Function.createLakebasePool.md) | Create a Lakebase pool with appkit's logger integration. Telemetry automatically uses appkit's OpenTelemetry configuration via global registry. |
+| [defineTool](Function.defineTool.md) | Defines a single tool entry for a plugin's internal registry. |
+| [executeFromRegistry](Function.executeFromRegistry.md) | Validates tool-call arguments against the entry's schema and invokes its handler. On validation failure, returns an LLM-friendly error string (matching the behavior of `tool()`) rather than throwing, so the model can self-correct on its next turn. |
| [extractServingEndpoints](Function.extractServingEndpoints.md) | Extract serving endpoint config from a server file by AST-parsing it. Looks for `serving({ endpoints: { alias: { env: "..." }, ... } })` calls and extracts the endpoint alias names and their environment variable mappings. |
| [findServerFile](Function.findServerFile.md) | Find the server entry file by checking candidate paths in order. |
+| [functionToolToDefinition](Function.functionToolToDefinition.md) | - |
| [generateDatabaseCredential](Function.generateDatabaseCredential.md) | Generate OAuth credentials for Postgres database connection using the proper Postgres API. |
| [getExecutionContext](Function.getExecutionContext.md) | Get the current execution context. |
| [getLakebaseOrmConfig](Function.getLakebaseOrmConfig.md) | Get Lakebase connection configuration for ORMs that don't accept pg.Pool directly. |
@@ -98,4 +138,15 @@ plugin architecture, and React integration.
| [getResourceRequirements](Function.getResourceRequirements.md) | Gets the resource requirements from a plugin's manifest. |
| [getUsernameWithApiLookup](Function.getUsernameWithApiLookup.md) | Resolves the PostgreSQL username for a Lakebase connection. |
| [getWorkspaceClient](Function.getWorkspaceClient.md) | Get workspace client from config or SDK default auth chain |
+| [isFunctionTool](Function.isFunctionTool.md) | - |
+| [isHostedTool](Function.isHostedTool.md) | - |
| [isSQLTypeMarker](Function.isSQLTypeMarker.md) | Type guard to check if a value is a SQL type marker |
+| [isToolkitEntry](Function.isToolkitEntry.md) | Type guard for `ToolkitEntry` — used by the agents plugin to differentiate toolkit references from inline tools in a mixed `tools` record. |
+| [loadAgentFromFile](Function.loadAgentFromFile.md) | Loads a single markdown agent file and resolves its frontmatter against registered plugin toolkits + ambient tool library. |
+| [loadAgentsFromDir](Function.loadAgentsFromDir.md) | Scans a directory for one subdirectory per agent, each containing `agent.md` (frontmatter + body). Produces an `AgentDefinition` record keyed by agent id (folder name). Throws on frontmatter errors or unresolved references. Returns an empty map if the directory does not exist. |
+| [mcpServer](Function.mcpServer.md) | Factory for declaring a custom MCP server tool. |
+| [parseTextToolCalls](Function.parseTextToolCalls.md) | Parses text-based tool calls from model output. |
+| [resolveHostedTools](Function.resolveHostedTools.md) | - |
+| [runAgent](Function.runAgent.md) | Standalone agent execution without `createApp`. Resolves the adapter, binds inline tools, and drives the adapter's `run()` loop to completion. |
+| [tool](Function.tool.md) | Factory for defining function tools with Zod schemas. |
+| [toolsFromRegistry](Function.toolsFromRegistry.md) | Produces the `AgentToolDefinition[]` a ToolProvider exposes to the LLM, deriving `parameters` JSON Schema from each entry's Zod schema. |
diff --git a/docs/docs/api/appkit/typedoc-sidebar.ts b/docs/docs/api/appkit/typedoc-sidebar.ts
index 162c3e68b..fd91d60ce 100644
--- a/docs/docs/api/appkit/typedoc-sidebar.ts
+++ b/docs/docs/api/appkit/typedoc-sidebar.ts
@@ -26,6 +26,11 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/Class.AppKitError",
label: "AppKitError"
},
+ {
+ type: "doc",
+ id: "api/appkit/Class.AppKitMcpClient",
+ label: "AppKitMcpClient"
+ },
{
type: "doc",
id: "api/appkit/Class.AuthenticationError",
@@ -41,6 +46,11 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/Class.ConnectionError",
label: "ConnectionError"
},
+ {
+ type: "doc",
+ id: "api/appkit/Class.DatabricksAdapter",
+ label: "DatabricksAdapter"
+ },
{
type: "doc",
id: "api/appkit/Class.ExecutionError",
@@ -87,6 +97,41 @@ const typedocSidebar: SidebarsConfig = {
type: "category",
label: "Interfaces",
items: [
+ {
+ type: "doc",
+ id: "api/appkit/Interface.AgentAdapter",
+ label: "AgentAdapter"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.AgentDefinition",
+ label: "AgentDefinition"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.AgentInput",
+ label: "AgentInput"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.AgentRunContext",
+ label: "AgentRunContext"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.AgentsPluginConfig",
+ label: "AgentsPluginConfig"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.AgentToolDefinition",
+ label: "AgentToolDefinition"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.AutoInheritToolsConfig",
+ label: "AutoInheritToolsConfig"
+ },
{
type: "doc",
id: "api/appkit/Interface.BasePluginConfig",
@@ -117,6 +162,11 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/Interface.FileResource",
label: "FileResource"
},
+ {
+ type: "doc",
+ id: "api/appkit/Interface.FunctionTool",
+ label: "FunctionTool"
+ },
{
type: "doc",
id: "api/appkit/Interface.GenerateDatabaseCredentialRequest",
@@ -152,11 +202,36 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/Interface.LakebasePoolConfig",
label: "LakebasePoolConfig"
},
+ {
+ type: "doc",
+ id: "api/appkit/Interface.McpConnectAllResult",
+ label: "McpConnectAllResult"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.Message",
+ label: "Message"
+ },
{
type: "doc",
id: "api/appkit/Interface.PluginManifest",
label: "PluginManifest"
},
+ {
+ type: "doc",
+ id: "api/appkit/Interface.PluginToolkitProvider",
+ label: "PluginToolkitProvider"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.PromptContext",
+ label: "PromptContext"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.RegisteredAgent",
+ label: "RegisteredAgent"
+ },
{
type: "doc",
id: "api/appkit/Interface.RequestedClaims",
@@ -182,6 +257,16 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/Interface.ResourceRequirement",
label: "ResourceRequirement"
},
+ {
+ type: "doc",
+ id: "api/appkit/Interface.RunAgentInput",
+ label: "RunAgentInput"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.RunAgentResult",
+ label: "RunAgentResult"
+ },
{
type: "doc",
id: "api/appkit/Interface.ServingEndpointEntry",
@@ -202,6 +287,46 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/Interface.TelemetryConfig",
label: "TelemetryConfig"
},
+ {
+ type: "doc",
+ id: "api/appkit/Interface.Thread",
+ label: "Thread"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.ThreadStore",
+ label: "ThreadStore"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.ToolAnnotations",
+ label: "ToolAnnotations"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.ToolConfig",
+ label: "ToolConfig"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.ToolEntry",
+ label: "ToolEntry"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.ToolkitEntry",
+ label: "ToolkitEntry"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.ToolkitOptions",
+ label: "ToolkitOptions"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Interface.ToolProvider",
+ label: "ToolProvider"
+ },
{
type: "doc",
id: "api/appkit/Interface.ValidationResult",
@@ -213,6 +338,31 @@ const typedocSidebar: SidebarsConfig = {
type: "category",
label: "Type Aliases",
items: [
+ {
+ type: "doc",
+ id: "api/appkit/TypeAlias.AgentEvent",
+ label: "AgentEvent"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/TypeAlias.AgentTool",
+ label: "AgentTool"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/TypeAlias.AgentTools",
+ label: "AgentTools"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/TypeAlias.AgentToolsFn",
+ label: "AgentToolsFn"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/TypeAlias.BaseSystemPromptOption",
+ label: "BaseSystemPromptOption"
+ },
{
type: "doc",
id: "api/appkit/TypeAlias.ConfigSchema",
@@ -233,6 +383,11 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/TypeAlias.FilePolicy",
label: "FilePolicy"
},
+ {
+ type: "doc",
+ id: "api/appkit/TypeAlias.HostedTool",
+ label: "HostedTool"
+ },
{
type: "doc",
id: "api/appkit/TypeAlias.IAppRouter",
@@ -253,6 +408,16 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/TypeAlias.PluginData",
label: "PluginData"
},
+ {
+ type: "doc",
+ id: "api/appkit/TypeAlias.Plugins",
+ label: "Plugins"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/TypeAlias.ResolvedToolEntry",
+ label: "ResolvedToolEntry"
+ },
{
type: "doc",
id: "api/appkit/TypeAlias.ResourcePermission",
@@ -263,6 +428,11 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/TypeAlias.ServingFactory",
label: "ServingFactory"
},
+ {
+ type: "doc",
+ id: "api/appkit/TypeAlias.ToolRegistry",
+ label: "ToolRegistry"
+ },
{
type: "doc",
id: "api/appkit/TypeAlias.ToPlugin",
@@ -274,6 +444,11 @@ const typedocSidebar: SidebarsConfig = {
type: "category",
label: "Variables",
items: [
+ {
+ type: "doc",
+ id: "api/appkit/Variable.agents",
+ label: "agents"
+ },
{
type: "doc",
id: "api/appkit/Variable.READ_ACTIONS",
@@ -295,6 +470,11 @@ const typedocSidebar: SidebarsConfig = {
type: "category",
label: "Functions",
items: [
+ {
+ type: "doc",
+ id: "api/appkit/Function.agentIdFromMarkdownPath",
+ label: "agentIdFromMarkdownPath"
+ },
{
type: "doc",
id: "api/appkit/Function.appKitServingTypesPlugin",
@@ -305,6 +485,11 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/Function.appKitTypesPlugin",
label: "appKitTypesPlugin"
},
+ {
+ type: "doc",
+ id: "api/appkit/Function.createAgent",
+ label: "createAgent"
+ },
{
type: "doc",
id: "api/appkit/Function.createApp",
@@ -315,6 +500,16 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/Function.createLakebasePool",
label: "createLakebasePool"
},
+ {
+ type: "doc",
+ id: "api/appkit/Function.defineTool",
+ label: "defineTool"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Function.executeFromRegistry",
+ label: "executeFromRegistry"
+ },
{
type: "doc",
id: "api/appkit/Function.extractServingEndpoints",
@@ -325,6 +520,11 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/Function.findServerFile",
label: "findServerFile"
},
+ {
+ type: "doc",
+ id: "api/appkit/Function.functionToolToDefinition",
+ label: "functionToolToDefinition"
+ },
{
type: "doc",
id: "api/appkit/Function.generateDatabaseCredential",
@@ -365,10 +565,65 @@ const typedocSidebar: SidebarsConfig = {
id: "api/appkit/Function.getWorkspaceClient",
label: "getWorkspaceClient"
},
+ {
+ type: "doc",
+ id: "api/appkit/Function.isFunctionTool",
+ label: "isFunctionTool"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Function.isHostedTool",
+ label: "isHostedTool"
+ },
{
type: "doc",
id: "api/appkit/Function.isSQLTypeMarker",
label: "isSQLTypeMarker"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Function.isToolkitEntry",
+ label: "isToolkitEntry"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Function.loadAgentFromFile",
+ label: "loadAgentFromFile"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Function.loadAgentsFromDir",
+ label: "loadAgentsFromDir"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Function.mcpServer",
+ label: "mcpServer"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Function.parseTextToolCalls",
+ label: "parseTextToolCalls"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Function.resolveHostedTools",
+ label: "resolveHostedTools"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Function.runAgent",
+ label: "runAgent"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Function.tool",
+ label: "tool"
+ },
+ {
+ type: "doc",
+ id: "api/appkit/Function.toolsFromRegistry",
+ label: "toolsFromRegistry"
}
]
}
diff --git a/docs/docs/plugins/agents.md b/docs/docs/plugins/agents.md
new file mode 100644
index 000000000..0ba2ab301
--- /dev/null
+++ b/docs/docs/plugins/agents.md
@@ -0,0 +1,423 @@
+# Agents
+
+
+:::warning Beta plugin
+This plugin is currently **beta**. APIs may change between minor releases. Import from `@databricks/appkit/beta`. See [Plugin Stability Tiers](./stability.md).
+:::
+
+
+The `agents` plugin turns a Databricks AppKit app into an AI-agent host. It loads agent definitions from markdown on disk (one folder per agent: `config/agents//agent.md`), from TypeScript (`createAgent(def)`), or both, and exposes them at `POST /invocations` alongside routes for chat, thread management, and cancellation.
+
+This page covers the full lifecycle. For the hand-written primitives (`tool()`, `mcpServer()`), see [tools](./server.md).
+
+## Requirements
+
+:::info Streaming-capable serving endpoints only
+The agents plugin drives the LLM over Server-Sent Events. Foundation Model APIs (Claude, Llama, GPT, etc.) and other chat-style endpoints support streaming and work out of the box. Custom model endpoints that return a single JSON response (e.g. typical `sklearn` or MLflow `pyfunc` deployments) do **not** stream — pointing an agent at one will fail with "Response body is null — streaming not supported" on the first turn. If you list a serving endpoint in `apps init`, pick one whose model implements the chat-completions streaming protocol; the agents plugin reads its name from `DATABRICKS_SERVING_ENDPOINT_NAME` whenever an agent doesn't pin `model:` itself.
+
+For the non-streaming path against a custom endpoint, use the `serving` plugin's `/invoke` route with `useServingInvoke` instead.
+:::
+
+## Install
+
+`agents` is a regular plugin. Add it to `plugins[]` alongside `server()` and any ToolProvider plugins whose tools you want agents to reach.
+
+```ts
+import { agents, analytics, createApp, files, server } from "@databricks/appkit";
+import { agents } from "@databricks/appkit/beta";
+
+await createApp({
+ plugins: [server(), analytics(), files(), agents()],
+});
+```
+
+That alone gives you a live HTTP server with `POST /invocations` wired to a markdown-driven agent.
+
+## Level 1: drop a markdown agent package
+
+Each agent lives in its own directory with a fixed entry file `agent.md`. A reserved top-level folder named `skills` is ignored until per-agent skills ship (you can add other asset folders beside `agent.md` under each agent id).
+
+```
+my-app/
+ server.ts
+ config/agents/
+ assistant/
+ agent.md
+```
+
+```md
+---
+endpoint: databricks-claude-sonnet-4-5
+default: true
+---
+
+You are a helpful data assistant running on Databricks.
+
+Use the available tools to query data, browse files, and help users.
+```
+
+On startup the plugin:
+
+1. Discovers `./config/agents/assistant/agent.md` and registers agent id `assistant`.
+2. Parses the YAML frontmatter and markdown body as the agent's `instructions`.
+3. Resolves the adapter from `endpoint` (or falls back to `DATABRICKS_AGENT_ENDPOINT`).
+4. Mounts the agent at the default name (`assistant`).
+
+The agent starts with **no tools**. Tools are opt-in — declare them in frontmatter (Level 2 below) or opt into auto-inherit explicitly with `agents({ autoInheritTools: { file: true } })`. See "Auto-inherit posture" further down for what that costs and why it's off by default.
+
+Requests land at `POST /invocations` with an OpenAI Responses-compatible body. Every tool call runs through `asUser(req)` so SQL executes as the requesting user, file access respects Unity Catalog ACLs, and telemetry spans are created automatically.
+
+## Level 2: scope tools in frontmatter
+
+```md
+---
+endpoint: databricks-claude-sonnet-4-5
+tools:
+ - plugin:analytics # all analytics.* tools
+ - plugin:files: [uploads.read, uploads.list] # only these files tools
+ - plugin:genie: { except: [getConversation] } # everything but getConversation
+ - get_weather # ambient tool declared in code
+default: true
+---
+
+You are a read-only data analyst.
+```
+
+The unified `tools:` list mixes plugin references and ambient tools, mirroring the TS function form `tools(plugins) => ({ ...plugins.analytics.toolkit(), ...plugins.files.toolkit({ only: [...] }), get_weather: tool({...}) })`. Each entry is one of:
+
+- **`plugin:`** — pull every tool from the named plugin.
+- **`plugin:: [tool1, tool2]`** — only the listed tools (sugar for `{ only: [...] }`).
+- **`plugin:: { ...ToolkitOptions }`** — full `prefix` / `only` / `except` / `rename` options.
+- **``** (no prefix) — ambient tool name resolved against the `agents({ tools: { ... } })` config.
+
+When any `tools:` is declared the auto-inherit default is turned off — the agent sees exactly the listed tools.
+
+## Level 3: code-defined agents
+
+```ts
+import { analytics, createApp, files, server } from "@databricks/appkit";
+import { agents, createAgent, tool } from "@databricks/appkit/beta";
+import { z } from "zod";
+
+const support = createAgent({
+ instructions: "You help customers with data and files.",
+ model: "databricks-claude-sonnet-4-5", // string sugar
+ tools(plugins) {
+ return {
+ ...plugins.analytics.toolkit(), // all analytics tools
+ ...plugins.files.toolkit({ only: ["uploads.read"] }), // filtered subset
+ get_weather: tool({
+ description: "Weather",
+ schema: z.object({ city: z.string() }),
+ execute: async ({ city }) => `Sunny in ${city}`,
+ }),
+ };
+ },
+});
+
+await createApp({
+ plugins: [server(), analytics(), files(), agents({ agents: { support } })],
+});
+```
+
+Code-defined agents start with no tools by default. The function form `tools(plugins) => Record` is the primary way to pull in plugin tools: each plugin registered in `createApp({ plugins: [...] })` shows up on the `plugins` parameter, and you call `.toolkit(opts?)` on it to get a spread-friendly record. The runtime invokes the function once at agent setup and caches the result — every plugin is mentioned exactly once (in `createApp`), with no held variables or marker imports.
+
+Inline `tool({...})` calls live in the same record. `name` is optional — the agents plugin overrides it with the record key (`get_weather` above).
+
+The asymmetry (file: auto-inherit, code: strict) matches the personas: prompt authors want zero ceremony, engineers want no surprises.
+
+### Scoping tools in code
+
+`plugins..toolkit(opts?)` accepts the same `ToolkitOptions` as markdown frontmatter:
+
+| Option | Example | Meaning |
+|---|---|---|
+| `only` | `{ only: ["query"] }` | Allowlist of local tool names |
+| `except` | `{ except: ["legacy"] }` | Denylist of local tool names |
+| `prefix` | `{ prefix: "" }` | Drop the `${pluginName}.` prefix |
+| `rename` | `{ rename: { query: "q" } }` | Remap specific local names |
+
+For plugins that don't expose a `.toolkit()` method (e.g., third-party `ToolProvider` plugins authored with plain `toPlugin`), the runtime falls back to walking `getAgentTools()` and synthesizing namespaced keys (`${pluginName}.${localName}`). The fallback respects `only` / `except` / `rename` / `prefix` the same way.
+
+If a referenced plugin is not registered in `createApp({ plugins })`, the agents plugin throws at setup with an `Available: …` listing so you can fix the wiring before the first request.
+
+## Level 4: sub-agents
+
+```ts
+const researcher = createAgent({
+ instructions: "Research the question. Return concise bullets.",
+ model: "databricks-claude-sonnet-4-5",
+ tools: { search: tool({ /* ... */ }) },
+});
+
+const writer = createAgent({
+ instructions: "Draft prose from notes.",
+ model: "databricks-claude-sonnet-4-5",
+});
+
+const supervisor = createAgent({
+ instructions: "Coordinate researcher and writer.",
+ model: "databricks-claude-sonnet-4-5",
+ agents: { researcher, writer }, // exposed as agent-researcher, agent-writer
+});
+
+await createApp({
+ plugins: [
+ server(),
+ agents({ agents: { supervisor, researcher, writer } }),
+ ],
+});
+```
+
+Each key in `agents: {...}` on an `AgentDefinition` becomes an `agent-` tool on the parent. When invoked, the agents plugin runs the child's adapter with a fresh message list (no shared thread state) and returns the aggregated text. Cycles are rejected at load time.
+
+## Level 5: standalone (no `createApp`)
+
+```ts
+import { createAgent, runAgent, tool } from "@databricks/appkit";
+import { z } from "zod";
+
+const classifier = createAgent({
+ instructions: "Classify tickets: billing | bug | feature.",
+ model: "databricks-claude-sonnet-4-5",
+ tools: {
+ lookup_account: tool({ /* ... */ }),
+ },
+});
+
+for (const ticket of tickets) {
+ const result = await runAgent(classifier, {
+ messages: [{ role: "user", content: ticket.body }],
+ });
+ await persistClassification(ticket.id, result.text);
+}
+```
+
+`runAgent` drives the adapter without `createApp` or HTTP. Inline `tool()` calls work standalone as shown above. To use plugin tools in standalone mode, pass the plugin factories through `RunAgentInput.plugins` and reach into them via the `tools(plugins)` function form:
+
+```ts
+import { analytics } from "@databricks/appkit";
+import { createAgent, runAgent } from "@databricks/appkit/beta";
+
+const classifier = createAgent({
+ instructions: "Classify tickets. Use analytics.query for historical data.",
+ model: "databricks-claude-sonnet-4-5",
+ tools(plugins) {
+ return { ...plugins.analytics.toolkit() };
+ },
+});
+
+const result = await runAgent(classifier, {
+ messages: "is ticket 42 a duplicate?",
+ plugins: [analytics()],
+});
+```
+
+`runAgent` eagerly constructs each plugin in `RunAgentInput.plugins`, runs the standard `attachContext({})` + `await setup()` lifecycle, and shares the instances across the top-level run and every sub-agent dispatch. Plugins whose `setup()` requires `createApp`-only runtime (e.g. `WorkspaceClient`, `ServiceContext`) throw at standalone-init with a clear "use createApp instead" message rather than mid-stream.
+
+Hosted tools (MCP) are still `agents()`-only since they require the live MCP client. Plugin tool dispatch in standalone mode runs as the service principal (no OBO) and **bypasses the agents-plugin approval gate** — treat standalone runAgent as a trusted-prompt environment (CI, batch eval, internal scripts), not as an exposed user-facing surface.
+
+## Configuration reference
+
+```ts
+agents({
+ dir?: string | false, // "./config/agents" default; false disables
+ agents?: Record,
+ defaultAgent?: string,
+ defaultModel?: AgentAdapter | Promise | string,
+ tools?: Record,
+ autoInheritTools?: boolean | { file?: boolean, code?: boolean },
+ threadStore?: ThreadStore, // default in-memory
+ baseSystemPrompt?: false | string | (ctx: PromptContext) => string,
+ mcp?: {
+ trustedHosts?: string[], // extra hostnames allowed for custom MCP URLs
+ allowLocalhost?: boolean, // default: NODE_ENV !== "production"
+ },
+ approval?: {
+ requireForDestructive?: boolean, // default: true
+ timeoutMs?: number, // default: 60_000
+ },
+ limits?: {
+ maxConcurrentStreamsPerUser?: number, // default: 5
+ maxToolCalls?: number, // default: 50
+ maxSubAgentDepth?: number, // default: 3
+ },
+})
+```
+
+`autoInheritTools` defaults to `{ file: false, code: false }` — no tools spread into any agent unless the developer explicitly opts in. When opted in, only tools whose plugin author marked `autoInheritable: true` are spread; destructive or state-mutating tools are always skipped from the auto-inherit path even when opt-in is enabled. Boolean shorthand (`autoInheritTools: true`) applies to both origins. See "Auto-inherit posture" below.
+
+### MCP host policy
+
+AppKit applies a zero-trust policy to every MCP URL used as a hosted tool. By default only **same-origin Databricks workspace URLs** (matching the resolved `DATABRICKS_HOST`) may be reached. Every other host must be explicitly allowlisted via `mcp.trustedHosts`, and workspace credentials (service-principal and on-behalf-of user tokens) are **never** forwarded to those hosts.
+
+```ts
+agents({
+ agents: {
+ support: createAgent({
+ instructions: "…",
+ tools: {
+ "mcp.internal": mcpServer("internal", "https://mcp.corp.internal/mcp"),
+ },
+ }),
+ },
+ mcp: {
+ trustedHosts: ["mcp.corp.internal"],
+ },
+});
+```
+
+The policy enforces four rules at MCP `connect()` time, before any byte is sent:
+
+1. Only `http` and `https` URLs are accepted.
+2. Plaintext `http://` is rejected for everything except `localhost` when `allowLocalhost` is true (default in development, off in production).
+3. The destination hostname must match the workspace host, equal `localhost` (if permitted), or appear in `trustedHosts`.
+4. The resolved DNS address must not fall in loopback, RFC1918, CGNAT (100.64.0.0/10), link-local (169.254.0.0/16 — covers cloud metadata services), ULA, or multicast ranges.
+
+`Authorization` headers carrying workspace credentials are scoped to same-origin workspace URLs. A `mcpServer(name, url)` pointing at a trusted external host must authenticate itself (for example, a custom token baked into `url`).
+
+### Auto-inherit posture
+
+AppKit treats auto-inherit as a two-key operation: the developer must opt into `autoInheritTools`, AND the plugin author must mark each tool `autoInheritable: true`. Both are required for a tool to spread into an agent's index without explicit wiring.
+
+```ts
+// Opt-in at the agents plugin level (pick one):
+agents({ autoInheritTools: true }); // both origins
+agents({ autoInheritTools: { file: true } }); // markdown agents only
+agents({ autoInheritTools: { file: true, code: true } });
+
+// Per-tool, inside a plugin:
+defineTool({
+ description: "safe read",
+ schema: z.object({ ... }),
+ annotations: { effect: "read", requiresUserContext: true },
+ autoInheritable: true, // explicit consent that this tool may auto-spread
+ execute: (args, signal) => ...,
+});
+```
+
+The AppKit core plugins ship with the following `autoInheritable` markings:
+
+| Tool | `autoInheritable` | Rationale |
+|---|---|---|
+| `analytics.query` | yes | OBO-scoped, read-only SQL enforced at runtime via the classifier |
+| `files.list` / `files.read` / `files.exists` / `files.metadata` | yes | OBO-scoped read operations |
+| `files.upload` / `files.delete` | no | Mutating — wire explicitly |
+| `genie.getConversation` | yes | Read-only history |
+| `genie.sendMessage` | no | State-mutating Genie conversation |
+| `lakebase.query` | no | Already gated by `exposeAsAgentTool`; auto-inherit stays closed as defense-in-depth |
+
+Third-party `ToolProvider` plugins that don't expose a `toolkit()` method are also skipped from the auto-inherit path — their tools must be wired via `tools:` explicitly. At setup the agents plugin logs what each agent inherited and what was skipped so the posture is visible:
+
+```
+[agents] [agent support] auto-inherited 2 tool(s): analytics.query, files.uploads.read
+[agents] [agent support] auto-inherit skipped 3 tool(s) not marked autoInheritable: files(2), genie(1). Wire them explicitly via `tools:` if needed.
+```
+
+### SQL agent tools
+
+Two built-in agent tools can execute SQL on behalf of the LLM: `analytics.query` (against the Databricks SQL warehouse) and the opt-in `lakebase.query` (against a Lakebase Postgres database). Both have distinct safety postures because they run with different privileges.
+
+**`analytics.query`** runs under the caller's OBO token (the end user's Databricks credentials). Its `readOnly: true` annotation is enforced at execution time — statements are tokenized and only `SELECT`, `WITH`, `SHOW`, `EXPLAIN`, `DESCRIBE`, and `DESC` are accepted. Writes, DDL, and stacked statements are rejected before the request reaches the warehouse:
+
+```ts
+// accepted
+analytics.query({ query: "SELECT * FROM main.sales.orders WHERE created_at > current_date() - 7" })
+
+// rejected at the plugin, never reaches the warehouse
+analytics.query({ query: "UPDATE main.sales.orders SET status = 'cancelled'" })
+analytics.query({ query: "SELECT 1; DROP TABLE main.sales.orders" })
+```
+
+**`lakebase.query`** is **not registered as an agent tool by default**. Enabling it is an explicit decision because the Lakebase pool is bound to the application's service principal: an agent with access to this tool can execute SQL as the SP regardless of which end user initiated the request. Opt in with an acknowledgement flag:
+
+```ts
+lakebase({
+ exposeAsAgentTool: {
+ iUnderstandRunsAsServicePrincipal: true,
+ readOnly: true, // default
+ },
+});
+```
+
+With `readOnly: true` (default), the same SQL classifier as `analytics.query` applies, and the accepted statement is additionally wrapped in `BEGIN READ ONLY; … ROLLBACK;` so the Postgres server rejects any write that slips past the classifier (e.g., a `SELECT` over a side-effecting function). The tool annotation is `{ effect: "read" }`.
+
+With `readOnly: false`, the tool accepts arbitrary SQL and is annotated `{ effect: "destructive" }`. The `destructive` effect triggers the human-in-the-loop approval gate (below) on every invocation.
+
+### Human-in-the-loop approval for mutating tools
+
+Any tool annotated with a mutating effect — `effect: "write" | "update" | "destructive"` (preferred) or the legacy `destructive: true` boolean — requires explicit user approval before execution. Secure by default: set `approval.requireForDestructive: false` only for fully autonomous back-office agents running in single-user contexts.
+
+Flow:
+
+1. Before running the tool, the agents plugin emits an `appkit.approval_pending` SSE event carrying the pending call's `approval_id`, `stream_id`, `tool_name`, `args`, and `annotations`.
+2. The chat client renders an approval prompt (see the reference app's approval card).
+3. The same user who initiated the stream posts the decision to `POST /api/agent/approve`:
+
+ ```http
+ POST /api/agent/approve
+ Content-Type: application/json
+ X-Forwarded-User:
+ X-Forwarded-Access-Token:
+
+ { "streamId": "...", "approvalId": "...", "decision": "approve" | "deny" }
+ ```
+4. If approved, the tool executes normally and the stream continues. If denied, the adapter receives the string `"Tool execution denied by user approval gate (tool: )."` as the tool output and the LLM can apologise / replan. If no decision arrives within `approval.timeoutMs` (default 60 s), the gate auto-denies.
+
+The route enforces that the decider is the stream owner: an approve from a different `x-forwarded-user` returns `403`. Cancelling the stream via `POST /api/agent/cancel` denies every pending approval on that stream.
+
+### Resource limits
+
+The plugin enforces a handful of caps to protect a single-instance deployment from runaway prompts, misbehaving clients, or prompt-injected delegation cycles. Some are static (enforced by the request schema) and some are configurable via `agents({ limits: { ... } })`.
+
+**Static caps** (applied at `POST /chat` and `POST /invocations` request parsing):
+
+| Field | Cap | Why |
+|---|---|---|
+| `chat.message` | 64 000 characters | ~16k tokens; larger bodies are almost certainly abuse. |
+| `invocations.input` string | 64 000 characters | Same reasoning. |
+| `invocations.input` array | 100 items | Prevents a single request seeding hundreds of messages into the thread store. |
+| `invocations.input[].content` string | 64 000 characters | Per-seeded-message cap. |
+| `invocations.input[].content` array | 100 items | Per-seeded-message cap. |
+
+**Configurable caps** (defaults shown):
+
+```ts
+agents({
+ limits: {
+ maxConcurrentStreamsPerUser: 5, // HTTP 429 + Retry-After when exceeded
+ maxToolCalls: 50, // aborts the run if the budget is exhausted
+ maxSubAgentDepth: 3, // rejects sub-agent recursion beyond this
+ },
+});
+```
+
+The `maxToolCalls` budget is shared across the top-level adapter and every sub-agent it delegates to, so a prompt-injected fan-out cannot escape by going deeper. `maxConcurrentStreamsPerUser` is per-user, not global — one user hitting their limit does not affect others.
+
+## Runtime API
+
+After `createApp`, the plugin exposes:
+
+```ts
+appkit.agents.list(); // => ["support", "researcher", ...]
+appkit.agents.get("support"); // => RegisteredAgent | null
+appkit.agents.getDefault(); // => "support"
+appkit.agents.register(name, def); // dynamic registration
+appkit.agents.reload(); // re-scan the directory
+appkit.agents.getThreads(userId); // list user's threads
+```
+
+## Frontmatter schema
+
+| Key | Type | Notes |
+|---|---|---|
+| `endpoint` | string | Model serving endpoint name. Shortcut for `model`. |
+| `model` | string | Same as `endpoint`; either works. |
+| `tools` | array | Unified tool list. Entries are `plugin:` / `plugin:: [t1, t2]` / `plugin:: { only, except, rename, prefix }` for plugin tools, or a bare `` resolved against `agents({ tools: {...} })` for ambient tools. See "Level 2: scope tools in frontmatter" above for examples. |
+| `default` | boolean | First agent id (sorted order) with `default: true` becomes the default agent. |
+| `maxSteps` | number | Adapter max-step hint. |
+| `maxTokens` | number | Adapter max-token hint. |
+| `baseSystemPrompt` | false \| string | Per-agent override. `false` disables the AppKit base prompt. |
+| `ephemeral` | boolean | If `true`, the thread created for a chat request against this agent is deleted from `ThreadStore` after the stream finishes. Use for stateless one-shot agents (e.g. autocomplete) so history does not accumulate or contaminate future calls. Defaults to `false`. |
+
+Unknown keys are logged and ignored. Invalid YAML and missing plugin/tool references throw at boot.
diff --git a/docs/docusaurus.config.ts b/docs/docusaurus.config.ts
index 2e9461773..a4386ab8c 100644
--- a/docs/docusaurus.config.ts
+++ b/docs/docusaurus.config.ts
@@ -119,7 +119,7 @@ const config: Config = {
"docusaurus-plugin-typedoc",
{
id: "appkit",
- entryPoints: ["../packages/appkit/src/index.ts"],
+ entryPoints: ["../packages/appkit/src/typedoc.entry.ts"],
tsconfig: "../packages/appkit/tsconfig.json",
out: "docs/api/appkit",
gitRevision: "main",
diff --git a/knip.json b/knip.json
index 67e825eee..4fb87b388 100644
--- a/knip.json
+++ b/knip.json
@@ -23,6 +23,7 @@
"packages/appkit/src/core/agent/tools/index.ts",
"packages/appkit/src/core/agent/load-agents.ts",
"packages/appkit/src/connectors/mcp/index.ts",
+ "packages/appkit/src/typedoc.entry.ts",
"template/**",
"tools/**",
"docs/**",
diff --git a/packages/appkit-ui/src/react/hooks/__tests__/use-agent-chat.test.ts b/packages/appkit-ui/src/react/hooks/__tests__/use-agent-chat.test.ts
new file mode 100644
index 000000000..c066d35f8
--- /dev/null
+++ b/packages/appkit-ui/src/react/hooks/__tests__/use-agent-chat.test.ts
@@ -0,0 +1,325 @@
+import { act, renderHook, waitFor } from "@testing-library/react";
+import { afterEach, describe, expect, test, vi } from "vitest";
+
+let capturedCallbacks: {
+ onMessage?: (msg: { data: string }) => Promise;
+ onError?: (err: Error) => void;
+ signal?: AbortSignal;
+ url?: string;
+ payload?: unknown;
+ maxRetries?: number;
+} = {};
+
+let resolveStream: (() => void) | null = null;
+let rejectStream: ((err: Error) => void) | null = null;
+
+const mockConnectSSE = vi.fn().mockImplementation((opts: any) => {
+ capturedCallbacks = {
+ onMessage: opts.onMessage,
+ onError: opts.onError,
+ signal: opts.signal,
+ url: opts.url,
+ payload: opts.payload,
+ maxRetries: opts.maxRetries,
+ };
+ return new Promise((resolve, reject) => {
+ resolveStream = resolve;
+ rejectStream = reject;
+ });
+});
+
+vi.mock("@/js", () => ({
+ connectSSE: (...args: unknown[]) => mockConnectSSE(...args),
+}));
+
+import { useAgentChat } from "../use-agent-chat";
+
+async function emit(data: string) {
+ // Allow microtasks to settle before pushing the next message.
+ await capturedCallbacks.onMessage?.({ data });
+}
+
+describe("useAgentChat", () => {
+ afterEach(() => {
+ capturedCallbacks = {};
+ resolveStream = null;
+ rejectStream = null;
+ vi.clearAllMocks();
+ });
+
+ test("initial state is idle", () => {
+ const { result } = renderHook(() => useAgentChat({ agent: "helper" }));
+
+ expect(result.current.content).toBe("");
+ expect(result.current.events).toEqual([]);
+ expect(result.current.threadId).toBeNull();
+ expect(result.current.isStreaming).toBe(false);
+ expect(result.current.error).toBeNull();
+ expect(typeof result.current.send).toBe("function");
+ expect(typeof result.current.reset).toBe("function");
+ });
+
+ test("send() posts to /api/agents/chat with the agent name and message", async () => {
+ const { result } = renderHook(() => useAgentChat({ agent: "helper" }));
+
+ act(() => {
+ void result.current.send("hello");
+ });
+
+ await waitFor(() => expect(mockConnectSSE).toHaveBeenCalled());
+
+ expect(capturedCallbacks.url).toBe("/api/agents/chat");
+ expect(capturedCallbacks.payload).toEqual({
+ message: "hello",
+ agent: "helper",
+ });
+ // Chat turns are not safely retryable — assert we explicitly opt out.
+ expect(capturedCallbacks.maxRetries).toBe(0);
+ });
+
+ test("custom endpoint is forwarded to connectSSE", async () => {
+ const { result } = renderHook(() =>
+ useAgentChat({ agent: "helper", endpoint: "/v2/chat" }),
+ );
+
+ act(() => {
+ void result.current.send("hi");
+ });
+
+ await waitFor(() => expect(mockConnectSSE).toHaveBeenCalled());
+ expect(capturedCallbacks.url).toBe("/v2/chat");
+ });
+
+ test("accumulates response.output_text.delta into content", async () => {
+ const { result } = renderHook(() => useAgentChat({ agent: "helper" }));
+
+ act(() => {
+ void result.current.send("hi");
+ });
+
+ await waitFor(() => expect(capturedCallbacks.onMessage).toBeDefined());
+
+ await act(async () => {
+ await emit(
+ JSON.stringify({
+ type: "response.output_text.delta",
+ delta: "Hello, ",
+ }),
+ );
+ await emit(
+ JSON.stringify({ type: "response.output_text.delta", delta: "world" }),
+ );
+ });
+
+ expect(result.current.content).toBe("Hello, world");
+ });
+
+ test("captures threadId from appkit.metadata and reuses it on next send()", async () => {
+ const { result } = renderHook(() => useAgentChat({ agent: "helper" }));
+
+ act(() => {
+ void result.current.send("first");
+ });
+ await waitFor(() => expect(capturedCallbacks.onMessage).toBeDefined());
+
+ await act(async () => {
+ await emit(
+ JSON.stringify({
+ type: "appkit.metadata",
+ data: { threadId: "t-123" },
+ }),
+ );
+ });
+
+ expect(result.current.threadId).toBe("t-123");
+
+ // End the first stream so the next send() opens a new SSE.
+ await act(async () => {
+ resolveStream?.();
+ await new Promise((r) => setTimeout(r, 0));
+ });
+
+ mockConnectSSE.mockClear();
+ act(() => {
+ void result.current.send("second");
+ });
+ await waitFor(() => expect(mockConnectSSE).toHaveBeenCalled());
+
+ expect(capturedCallbacks.payload).toEqual({
+ message: "second",
+ agent: "helper",
+ threadId: "t-123",
+ });
+ });
+
+ test("onEvent is invoked for every parsed event", async () => {
+ const onEvent = vi.fn();
+ const { result } = renderHook(() =>
+ useAgentChat({ agent: "helper", onEvent }),
+ );
+
+ act(() => {
+ void result.current.send("hi");
+ });
+ await waitFor(() => expect(capturedCallbacks.onMessage).toBeDefined());
+
+ await act(async () => {
+ await emit(
+ JSON.stringify({ type: "response.output_text.delta", delta: "a" }),
+ );
+ await emit(
+ JSON.stringify({
+ type: "response.output_item.added",
+ item: { type: "function_call", name: "get_weather", arguments: "{}" },
+ }),
+ );
+ });
+
+ expect(onEvent).toHaveBeenCalledTimes(2);
+ expect(onEvent).toHaveBeenNthCalledWith(
+ 1,
+ expect.objectContaining({
+ type: "response.output_text.delta",
+ delta: "a",
+ }),
+ );
+ expect(onEvent).toHaveBeenNthCalledWith(
+ 2,
+ expect.objectContaining({
+ type: "response.output_item.added",
+ item: expect.objectContaining({ name: "get_weather" }),
+ }),
+ );
+ });
+
+ test("throwing onEvent handler does not break the stream", async () => {
+ const onEvent = vi.fn(() => {
+ throw new Error("handler bug");
+ });
+ const { result } = renderHook(() =>
+ useAgentChat({ agent: "helper", onEvent }),
+ );
+
+ act(() => {
+ void result.current.send("hi");
+ });
+ await waitFor(() => expect(capturedCallbacks.onMessage).toBeDefined());
+
+ await act(async () => {
+ await emit(
+ JSON.stringify({ type: "response.output_text.delta", delta: "x" }),
+ );
+ });
+
+ // Despite onEvent throwing, content still accumulated.
+ expect(result.current.content).toBe("x");
+ });
+
+ test("malformed event payloads are skipped silently", async () => {
+ const onEvent = vi.fn();
+ const { result } = renderHook(() =>
+ useAgentChat({ agent: "helper", onEvent }),
+ );
+
+ act(() => {
+ void result.current.send("hi");
+ });
+ await waitFor(() => expect(capturedCallbacks.onMessage).toBeDefined());
+
+ await act(async () => {
+ await emit("not-json");
+ await emit("[DONE]");
+ await emit("");
+ await emit(
+ JSON.stringify({ type: "response.output_text.delta", delta: "ok" }),
+ );
+ });
+
+ expect(result.current.content).toBe("ok");
+ expect(onEvent).toHaveBeenCalledTimes(1);
+ });
+
+ test("isStreaming toggles around the connectSSE lifecycle", async () => {
+ const { result } = renderHook(() => useAgentChat({ agent: "helper" }));
+
+ expect(result.current.isStreaming).toBe(false);
+
+ act(() => {
+ void result.current.send("hi");
+ });
+ await waitFor(() => expect(result.current.isStreaming).toBe(true));
+
+ await act(async () => {
+ resolveStream?.();
+ await new Promise((r) => setTimeout(r, 0));
+ });
+ await waitFor(() => expect(result.current.isStreaming).toBe(false));
+ });
+
+ test("reset() clears content, events, threadId, and aborts in-flight stream", async () => {
+ const { result } = renderHook(() => useAgentChat({ agent: "helper" }));
+
+ act(() => {
+ void result.current.send("hi");
+ });
+ await waitFor(() => expect(capturedCallbacks.onMessage).toBeDefined());
+
+ await act(async () => {
+ await emit(
+ JSON.stringify({ type: "appkit.metadata", data: { threadId: "t-1" } }),
+ );
+ await emit(
+ JSON.stringify({ type: "response.output_text.delta", delta: "x" }),
+ );
+ });
+
+ expect(result.current.threadId).toBe("t-1");
+ expect(result.current.content).toBe("x");
+
+ const signal = capturedCallbacks.signal;
+ expect(signal?.aborted).toBe(false);
+
+ act(() => {
+ result.current.reset();
+ });
+
+ expect(signal?.aborted).toBe(true);
+ expect(result.current.content).toBe("");
+ expect(result.current.events).toEqual([]);
+ expect(result.current.threadId).toBeNull();
+ expect(result.current.isStreaming).toBe(false);
+ });
+
+ test("send() while a previous stream is in flight aborts the previous one", async () => {
+ const { result } = renderHook(() => useAgentChat({ agent: "helper" }));
+
+ act(() => {
+ void result.current.send("first");
+ });
+ await waitFor(() => expect(capturedCallbacks.onMessage).toBeDefined());
+ const firstSignal = capturedCallbacks.signal;
+
+ act(() => {
+ void result.current.send("second");
+ });
+ expect(firstSignal?.aborted).toBe(true);
+ });
+
+ test("onError surfaces a string error message", async () => {
+ const { result } = renderHook(() => useAgentChat({ agent: "helper" }));
+
+ act(() => {
+ void result.current.send("hi");
+ });
+ await waitFor(() => expect(capturedCallbacks.onError).toBeDefined());
+
+ await act(async () => {
+ capturedCallbacks.onError?.(new Error("upstream 500"));
+ resolveStream?.();
+ await new Promise((r) => setTimeout(r, 0));
+ });
+
+ expect(result.current.error).toBe("upstream 500");
+ expect(result.current.isStreaming).toBe(false);
+ });
+});
diff --git a/packages/appkit-ui/src/react/hooks/index.ts b/packages/appkit-ui/src/react/hooks/index.ts
index a425b0109..b1a5a1446 100644
--- a/packages/appkit-ui/src/react/hooks/index.ts
+++ b/packages/appkit-ui/src/react/hooks/index.ts
@@ -13,6 +13,12 @@ export type {
UseAnalyticsQueryOptions,
UseAnalyticsQueryResult,
} from "./types";
+export {
+ type AgentChatEvent,
+ type UseAgentChatOptions,
+ type UseAgentChatResult,
+ useAgentChat,
+} from "./use-agent-chat";
export { useAnalyticsQuery } from "./use-analytics-query";
export {
type UseChartDataOptions,
diff --git a/packages/appkit-ui/src/react/hooks/use-agent-chat.ts b/packages/appkit-ui/src/react/hooks/use-agent-chat.ts
new file mode 100644
index 000000000..684284a60
--- /dev/null
+++ b/packages/appkit-ui/src/react/hooks/use-agent-chat.ts
@@ -0,0 +1,263 @@
+import { useCallback, useEffect, useRef, useState } from "react";
+import { connectSSE } from "@/js";
+
+/**
+ * One Responses-API-shaped event yielded by the agents plugin SSE stream.
+ *
+ * The hook handles the two paths every chat UI needs — accumulating
+ * `content` from `response.output_text.delta` and capturing `threadId`
+ * from `appkit.metadata` — and surfaces everything else (tool calls,
+ * approval gates, status events, etc.) through {@link UseAgentChatOptions.onEvent}.
+ *
+ * Fields beyond `type` are intentionally loose because the agents plugin
+ * forwards adapter-specific shapes verbatim. Treat unknown fields as
+ * opaque pass-through.
+ */
+export interface AgentChatEvent {
+ type: string;
+ delta?: string;
+ item_id?: string;
+ item?: {
+ type?: string;
+ id?: string;
+ call_id?: string;
+ name?: string;
+ arguments?: string;
+ output?: string;
+ status?: string;
+ };
+ content?: string;
+ data?: Record;
+ error?: string;
+ sequence_number?: number;
+ output_index?: number;
+ // `appkit.approval_pending` payload
+ approval_id?: string;
+ stream_id?: string;
+ tool_name?: string;
+ args?: unknown;
+ annotations?: Record;
+}
+
+export interface UseAgentChatOptions {
+ /**
+ * Agent name registered with the `agents()` plugin (e.g. `"assistant"`,
+ * `"helper"`). Send-time payload includes this so the plugin routes the
+ * turn to the right `AgentDefinition`.
+ */
+ agent: string;
+ /**
+ * Override the chat endpoint. Default `"/api/agents/chat"` matches the
+ * route the agents plugin mounts under its prefix. Useful when the
+ * server mounts under a non-default base path or when proxying.
+ */
+ endpoint?: string;
+ /**
+ * Called for every parsed SSE event before any state update. Use this
+ * to drive tool-call rows, approval cards, inspectors, or anything
+ * beyond the streaming text content. Errors thrown here are swallowed
+ * so a buggy handler can't kill the stream.
+ */
+ onEvent?: (event: AgentChatEvent) => void;
+}
+
+export interface UseAgentChatResult {
+ /** Accumulated assistant text from `response.output_text.delta` events. */
+ content: string;
+ /**
+ * Every parsed event, in order. Provided for components that need to
+ * render historical tool calls or replay state after a remount —
+ * lighter than re-deriving from message history. For one-off side
+ * effects prefer {@link UseAgentChatOptions.onEvent}.
+ */
+ events: AgentChatEvent[];
+ /**
+ * Thread id captured from the first `appkit.metadata` event of the
+ * stream. Subsequent `send()` calls automatically forward this so the
+ * server reuses the same thread.
+ */
+ threadId: string | null;
+ /** True while an SSE stream is open. */
+ isStreaming: boolean;
+ /** Last error message (cleared on next successful `send()`). */
+ error: string | null;
+ /**
+ * Send a user turn and stream the response. Aborts any in-flight
+ * stream. Resolves when the stream completes (success or error).
+ */
+ send: (message: string) => Promise;
+ /**
+ * Discard accumulated content, events, and threadId. Aborts any
+ * in-flight stream. Use when switching agents or starting a fresh
+ * conversation.
+ */
+ reset: () => void;
+}
+
+/**
+ * React hook for chatting with an agent registered via the `agents()`
+ * plugin. Wraps {@link connectSSE} (which owns the buffer cap, abort
+ * composition, retry/backoff, and frame parsing) with the small amount
+ * of stateful glue every chat UI needs: accumulated assistant text,
+ * thread id, streaming flag, and an event callback.
+ *
+ * The hook is intentionally lower-level than a full chat component —
+ * it owns one stream at a time, not a multi-turn message history. The
+ * caller composes its own messages array (typically a `useState`) and
+ * appends to it via the `onEvent` callback for tool calls and via the
+ * `content` field for assistant text.
+ *
+ * @example
+ * ```tsx
+ * function Chat({ agent }: { agent: string }) {
+ * const [messages, setMessages] = useState([]);
+ * const { content, threadId, isStreaming, send, reset } = useAgentChat({
+ * agent,
+ * onEvent(ev) {
+ * if (ev.type === "response.output_item.added" && ev.item?.type === "function_call") {
+ * setMessages((m) => [...m, { role: "tool", name: ev.item?.name, args: ev.item?.arguments }]);
+ * }
+ * },
+ * });
+ * // `content` reflects the latest assistant turn; reset() between conversations.
+ * // ...
+ * }
+ * ```
+ */
+export function useAgentChat({
+ agent,
+ endpoint = "/api/agents/chat",
+ onEvent,
+}: UseAgentChatOptions): UseAgentChatResult {
+ const [content, setContent] = useState("");
+ const [events, setEvents] = useState([]);
+ const [threadId, setThreadId] = useState(null);
+ const [isStreaming, setIsStreaming] = useState(false);
+ const [error, setError] = useState(null);
+
+ // Refs avoid the standard "stale closure" problem with `send` and
+ // `onEvent`: `send` is a stable callback that reads the latest
+ // threadId/onEvent without re-mounting connectSSE on every render.
+ const threadIdRef = useRef(null);
+ const contentRef = useRef("");
+ const onEventRef = useRef(onEvent);
+ onEventRef.current = onEvent;
+ const abortControllerRef = useRef(null);
+
+ const reset = useCallback(() => {
+ abortControllerRef.current?.abort();
+ abortControllerRef.current = null;
+ threadIdRef.current = null;
+ contentRef.current = "";
+ setContent("");
+ setEvents([]);
+ setThreadId(null);
+ setIsStreaming(false);
+ setError(null);
+ }, []);
+
+ const send = useCallback(
+ async (message: string) => {
+ // Abort any previous stream — only one chat turn in flight per hook.
+ abortControllerRef.current?.abort();
+ const controller = new AbortController();
+ abortControllerRef.current = controller;
+
+ contentRef.current = "";
+ setContent("");
+ setEvents([]);
+ setError(null);
+ setIsStreaming(true);
+
+ const payload = {
+ message,
+ agent,
+ ...(threadIdRef.current ? { threadId: threadIdRef.current } : {}),
+ };
+
+ try {
+ await connectSSE({
+ url: endpoint,
+ payload,
+ signal: controller.signal,
+ // Chat turns aren't idempotent — re-sending the payload after a
+ // transient failure would either duplicate the user message or
+ // depend on server-side Last-Event-ID resumption (the agents
+ // plugin's StreamManager supports it, but failure-mode auditing
+ // is easier with retries off by default; callers can re-enable
+ // via the underlying connectSSE once they understand the
+ // resumption contract on their endpoint).
+ maxRetries: 0,
+ onMessage: async ({ data }) => {
+ if (controller.signal.aborted) return;
+ if (!data || data === "[DONE]") return;
+ let event: AgentChatEvent;
+ try {
+ event = JSON.parse(data) as AgentChatEvent;
+ } catch {
+ // Skip malformed payloads — the rest of the stream is
+ // still useful and the agents plugin recovers on the
+ // next event boundary.
+ return;
+ }
+ if (!event.type) return;
+
+ // Best-effort: never let an onEvent throw break the stream.
+ try {
+ onEventRef.current?.(event);
+ } catch {
+ // swallow
+ }
+
+ setEvents((prev) => [...prev, event]);
+
+ if (event.type === "appkit.metadata") {
+ const tid = event.data?.threadId;
+ if (typeof tid === "string") {
+ threadIdRef.current = tid;
+ setThreadId(tid);
+ }
+ } else if (
+ event.type === "response.output_text.delta" &&
+ typeof event.delta === "string"
+ ) {
+ contentRef.current += event.delta;
+ setContent(contentRef.current);
+ }
+ },
+ onError: (err) => {
+ if (controller.signal.aborted) return;
+ setError(err instanceof Error ? err.message : "Chat stream failed");
+ },
+ });
+ } catch (err) {
+ if (!controller.signal.aborted) {
+ setError(err instanceof Error ? err.message : "Chat stream failed");
+ }
+ } finally {
+ if (abortControllerRef.current === controller) {
+ abortControllerRef.current = null;
+ }
+ setIsStreaming(false);
+ }
+ },
+ [agent, endpoint],
+ );
+
+ // Abort any in-flight stream when the component unmounts.
+ useEffect(() => {
+ return () => {
+ abortControllerRef.current?.abort();
+ };
+ }, []);
+
+ return {
+ content,
+ events,
+ threadId,
+ isStreaming,
+ error,
+ send,
+ reset,
+ };
+}
diff --git a/packages/appkit/src/agents/databricks.ts b/packages/appkit/src/agents/databricks.ts
index 3b902c2ef..6e2e78d60 100644
--- a/packages/appkit/src/agents/databricks.ts
+++ b/packages/appkit/src/agents/databricks.ts
@@ -159,6 +159,17 @@ interface OpenAIToolCall {
id: string;
type: "function";
function: { name: string; arguments: string };
+ /**
+ * Opaque Vertex/Gemini "thought signature" blob the request must echo
+ * back verbatim on the next turn. Vertex's OpenAI-compat proxy emits
+ * this as `thoughtSignature` (camelCase) at the top level of the
+ * tool_call delta (verified against `gemini-3.1-flash-lite-preview`),
+ * and accepts the same spelling back on outbound. Non-Gemini endpoints
+ * (Claude on Databricks, external OpenAI-compat models, Llama, etc.)
+ * leave this undefined and the serializer omits the key.
+ * See https://docs.cloud.google.com/vertex-ai/generative-ai/docs/thought-signatures
+ */
+ thoughtSignature?: string;
}
interface OpenAITool {
@@ -175,6 +186,8 @@ interface DeltaToolCall {
id?: string;
type?: string;
function?: { name?: string; arguments?: string };
+ /** See {@link OpenAIToolCall.thoughtSignature}. */
+ thoughtSignature?: string;
}
/**
@@ -499,7 +512,12 @@ export class DatabricksAdapter implements AgentAdapter {
let fullText = "";
const toolCallAccumulator = new Map<
number,
- { id: string; name: string; arguments: string }
+ {
+ id: string;
+ name: string;
+ arguments: string;
+ thoughtSignature?: string;
+ }
>();
try {
@@ -563,6 +581,7 @@ export class DatabricksAdapter implements AgentAdapter {
for (const tc of toolCallsRaw) {
if (!isStreamingDeltaToolCall(tc)) continue;
+ const sig = tc.thoughtSignature;
const existing = toolCallAccumulator.get(tc.index);
if (existing) {
if (tc.function?.arguments) {
@@ -574,6 +593,9 @@ export class DatabricksAdapter implements AgentAdapter {
);
existing.arguments += tc.function.arguments;
}
+ if (sig && !existing.thoughtSignature) {
+ existing.thoughtSignature = sig;
+ }
} else {
const initial = tc.function?.arguments ?? "";
if (initial.length > this.maxToolArgumentsChars) {
@@ -585,6 +607,7 @@ export class DatabricksAdapter implements AgentAdapter {
id: tc.id ?? `call_${tc.index}`,
name: tc.function?.name ?? "",
arguments: initial,
+ ...(sig ? { thoughtSignature: sig } : {}),
});
}
}
@@ -615,6 +638,7 @@ export class DatabricksAdapter implements AgentAdapter {
id: tc.id,
type: "function" as const,
function: { name: tc.name, arguments: tc.arguments || "{}" },
+ ...(tc.thoughtSignature ? { thoughtSignature: tc.thoughtSignature } : {}),
}));
return { text: fullText, toolCalls };
@@ -694,6 +718,9 @@ export class DatabricksAdapter implements AgentAdapter {
? tc.args
: JSON.stringify(tc.args ?? {}),
},
+ ...(tc.thoughtSignature
+ ? { thoughtSignature: tc.thoughtSignature }
+ : {}),
}));
}
diff --git a/packages/appkit/src/agents/tests/databricks.test.ts b/packages/appkit/src/agents/tests/databricks.test.ts
index fd51bc0fc..84f0c6717 100644
--- a/packages/appkit/src/agents/tests/databricks.test.ts
+++ b/packages/appkit/src/agents/tests/databricks.test.ts
@@ -252,6 +252,189 @@ describe("DatabricksAdapter", () => {
expect(mockAuthenticate).toHaveBeenCalledTimes(2);
});
+ describe("Vertex/Gemini thoughtSignature pass-through", () => {
+ // Vertex AI's OpenAI-compatible surface attaches `thoughtSignature`
+ // on every function call emitted by Gemini 2.x/3.x models. The next
+ // request must echo it back verbatim on the assistant message's
+ // tool_calls or Vertex 400s with
+ // `INVALID_ARGUMENT: function call X is missing a thought_signature`.
+
+ function toolCallDeltaWithSig(opts: {
+ index: number;
+ id?: string;
+ name?: string;
+ args: string;
+ /**
+ * Vertex's on-the-wire spelling for Gemini 2.x/3.x function-calling
+ * responses (camelCase, top-level on the tool_call). Verified
+ * against `gemini-3.1-flash-lite-preview`.
+ */
+ sig?: string;
+ }): string {
+ return sseChunk(
+ JSON.stringify({
+ choices: [
+ {
+ delta: {
+ tool_calls: [
+ {
+ index: opts.index,
+ ...(opts.id && { id: opts.id }),
+ ...(opts.name && { type: "function" }),
+ function: {
+ ...(opts.name && { name: opts.name }),
+ arguments: opts.args,
+ },
+ ...(opts.sig && { thoughtSignature: opts.sig }),
+ },
+ ],
+ },
+ },
+ ],
+ }),
+ );
+ }
+
+ async function runUntilSecondRequest(chunks: string[]) {
+ const executeTool = vi.fn().mockResolvedValue({ ok: true });
+ let callCount = 0;
+ globalThis.fetch = vi.fn().mockImplementation(() => {
+ callCount++;
+ if (callCount === 1) {
+ return Promise.resolve({
+ ok: true,
+ body: createReadableStream(chunks),
+ });
+ }
+ return Promise.resolve({
+ ok: true,
+ body: createReadableStream([textDelta("done"), sseChunk("[DONE]")]),
+ });
+ });
+
+ const adapter = createAdapter();
+ for await (const _ of adapter.run(
+ {
+ messages: createTestMessages(),
+ tools: createTestTools(),
+ threadId: "t1",
+ },
+ { executeTool },
+ )) {
+ // drain
+ }
+ const [, secondInit] = (globalThis.fetch as any).mock.calls[1];
+ return JSON.parse(secondInit.body);
+ }
+
+ test("captures camelCase thoughtSignature from delta and echoes it on outbound", async () => {
+ // Real Vertex/Gemini wire shape, confirmed against
+ // `gemini-3.1-flash-lite-preview`. The outbound request carries
+ // back the same `thoughtSignature` Vertex sent, which is what the
+ // proxy validates against on the next turn.
+ const body = await runUntilSecondRequest([
+ toolCallDeltaWithSig({
+ index: 0,
+ id: "call_1",
+ name: "analytics__query",
+ args: '{"query":"SELECT 1"}',
+ sig: "sig-camel-abc123",
+ }),
+ sseChunk("[DONE]"),
+ ]);
+ expect(body.messages[1].tool_calls[0]).toEqual({
+ id: "call_1",
+ type: "function",
+ function: {
+ name: "analytics__query",
+ arguments: '{"query":"SELECT 1"}',
+ },
+ thoughtSignature: "sig-camel-abc123",
+ });
+ });
+
+ test("does NOT emit thoughtSignature when the model didn't send one", async () => {
+ // Non-Gemini endpoints (Claude, OpenAI, Llama) don't carry the
+ // field. Adapter must not invent one — that would break stricter
+ // models' tool_call shape validators on Databricks.
+ const body = await runUntilSecondRequest([
+ toolCallDeltaWithSig({
+ index: 0,
+ id: "call_1",
+ name: "analytics__query",
+ args: '{"query":"SELECT 1"}',
+ }),
+ sseChunk("[DONE]"),
+ ]);
+ const tc = body.messages[1].tool_calls[0];
+ expect(tc).not.toHaveProperty("thoughtSignature");
+ expect(tc).not.toHaveProperty("thought_signature");
+ });
+
+ test("buildMessages echoes persisted thoughtSignature on resumed threads", async () => {
+ // On thread resumption, the ToolCall.thoughtSignature stored in
+ // ThreadStore must reach the wire so the very first request of
+ // the new turn passes Vertex's signature check before any tool
+ // call even fires.
+ globalThis.fetch = mockFetch([textDelta("ok"), sseChunk("[DONE]")]);
+
+ const adapter = createAdapter();
+ const threadMessages: Message[] = [
+ { id: "1", role: "user", content: "First", createdAt: new Date() },
+ {
+ id: "2",
+ role: "assistant",
+ content: "",
+ createdAt: new Date(),
+ toolCalls: [
+ {
+ id: "call_1",
+ name: "analytics.query",
+ args: { query: "SELECT 1" },
+ thoughtSignature: "persisted-sig-456",
+ },
+ ],
+ },
+ {
+ id: "3",
+ role: "tool",
+ content: '{"rows":[]}',
+ createdAt: new Date(),
+ toolCallId: "call_1",
+ },
+ {
+ id: "4",
+ role: "user",
+ content: "Now what?",
+ createdAt: new Date(),
+ },
+ ];
+
+ for await (const _ of adapter.run(
+ {
+ messages: threadMessages,
+ tools: createTestTools(),
+ threadId: "t1",
+ },
+ { executeTool: vi.fn() },
+ )) {
+ // drain
+ }
+
+ const [, init] = (globalThis.fetch as any).mock.calls[0];
+ const body = JSON.parse(init.body);
+ expect(body.messages[1].tool_calls[0]).toEqual({
+ id: "call_1",
+ type: "function",
+ function: {
+ name: "analytics__query",
+ arguments: JSON.stringify({ query: "SELECT 1" }),
+ },
+ thoughtSignature: "persisted-sig-456",
+ });
+ });
+ });
+
test("text-parsed tool calls use wire names on follow-up requests", async () => {
const executeTool = vi.fn().mockResolvedValue({ ok: true });
let callCount = 0;
diff --git a/packages/appkit/src/beta.ts b/packages/appkit/src/beta.ts
index 3726ca178..3f5bba80c 100644
--- a/packages/appkit/src/beta.ts
+++ b/packages/appkit/src/beta.ts
@@ -38,6 +38,7 @@ export {
type HostedTool,
isFunctionTool,
isHostedTool,
+ type McpConnectAllResult,
mcpServer,
resolveHostedTools,
type ToolConfig,
diff --git a/packages/appkit/src/connectors/mcp/client.ts b/packages/appkit/src/connectors/mcp/client.ts
index 4c8d058b7..5b80997c9 100644
--- a/packages/appkit/src/connectors/mcp/client.ts
+++ b/packages/appkit/src/connectors/mcp/client.ts
@@ -34,6 +34,90 @@ import type { McpEndpointConfig } from "./types";
const logger = createLogger("connector:mcp");
+/**
+ * Hard cap on the size of a single MCP response body, including SSE
+ * frames bundled into one HTTP response. MCP `initialize` / `tools/list`
+ * / `tools/call` responses are JSON-RPC payloads — single-digit kilobytes
+ * in normal use. A response anywhere near this size signals either a
+ * misbehaving server or an attempt to exhaust client memory; we'd rather
+ * fail loudly than allocate unbounded buffers from a remote.
+ */
+const MCP_RESPONSE_BODY_LIMIT_BYTES = 1024 * 1024;
+
+/**
+ * Read a fetch Response body into a string with a hard size cap. Aborts
+ * and throws if the cumulative bytes read cross {@link
+ * MCP_RESPONSE_BODY_LIMIT_BYTES}, so a remote server cannot keep
+ * streaming data past the limit. Returns the empty string when the
+ * response has no readable body.
+ */
+/**
+ * Empty-object fallback used when an MCP server ships a missing or
+ * malformed `inputSchema`. Matches the shape downstream adapters expect
+ * for a function tool that takes no arguments.
+ */
+const EMPTY_TOOL_PARAMETERS: AgentToolDefinition["parameters"] = {
+ type: "object",
+ properties: {},
+};
+
+/**
+ * Coerce a remote MCP server's reported `inputSchema` into the
+ * JSONSchema7 shape AppKit's adapters expect for a function tool's
+ * `parameters`. The MCP wire type is `Record`, so a
+ * misbehaving (or malicious) server could ship arbitrary JSON. We accept
+ * only the standard `{ type: "object", properties: {...} }` shape and
+ * fall back to an empty-parameters schema otherwise — the tool still
+ * registers, it just can't accept arguments.
+ */
+function coerceToolParameters(
+ inputSchema: Record | undefined,
+): AgentToolDefinition["parameters"] {
+ if (!inputSchema || typeof inputSchema !== "object") {
+ return EMPTY_TOOL_PARAMETERS;
+ }
+ const { type, properties } = inputSchema;
+ if (type !== "object") return EMPTY_TOOL_PARAMETERS;
+ if (
+ properties !== undefined &&
+ (typeof properties !== "object" ||
+ properties === null ||
+ Array.isArray(properties))
+ ) {
+ return EMPTY_TOOL_PARAMETERS;
+ }
+ return inputSchema as AgentToolDefinition["parameters"];
+}
+
+async function readResponseTextCapped(
+ response: Response,
+ maxBytes: number,
+ contextLabel: string,
+): Promise {
+ if (!response.body) return "";
+ const reader = response.body.getReader();
+ const decoder = new TextDecoder("utf-8");
+ let total = 0;
+ let out = "";
+ try {
+ while (true) {
+ const { done, value } = await reader.read();
+ if (done) break;
+ total += value.byteLength;
+ if (total > maxBytes) {
+ throw new Error(
+ `MCP ${contextLabel}: response body exceeded ${maxBytes} bytes — refusing to allocate unbounded buffer from a remote server.`,
+ );
+ }
+ out += decoder.decode(value, { stream: true });
+ }
+ out += decoder.decode();
+ } finally {
+ reader.releaseLock();
+ }
+ return out;
+}
+
interface JsonRpcRequest {
jsonrpc: "2.0";
id: number;
@@ -59,6 +143,16 @@ interface McpToolCallResult {
isError?: boolean;
}
+/**
+ * Per-endpoint outcome of {@link AppKitMcpClient.connectAll}. Callers (the
+ * agents plugin in particular) use the split to warn at startup when some
+ * MCP servers are unreachable without aborting boot for the rest.
+ */
+export interface McpConnectAllResult {
+ connected: string[];
+ failed: Array<{ name: string; error: Error }>;
+}
+
interface McpServerConnection {
config: McpEndpointConfig;
resolvedUrl: string;
@@ -100,19 +194,39 @@ export class AppKitMcpClient {
private options: { dnsLookup?: DnsLookup; fetchImpl?: typeof fetch } = {},
) {}
- async connectAll(endpoints: McpEndpointConfig[]): Promise {
+ /**
+ * Connects every endpoint in parallel and returns a structured summary so
+ * callers can distinguish "all connected" from "some failed".
+ *
+ * Returning the result instead of throwing is deliberate: one
+ * misconfigured MCP server should not take down the entire agents plugin
+ * at boot, and the agents plugin uses the summary to warn at startup with
+ * the failed-endpoint names. Errors are also logged here so a caller
+ * that ignores the return still gets per-endpoint diagnostics.
+ *
+ * @returns `connected` lists the endpoint names that initialised
+ * successfully; `failed` carries `{ name, error }` for the rest.
+ */
+ async connectAll(
+ endpoints: McpEndpointConfig[],
+ ): Promise {
const results = await Promise.allSettled(
endpoints.map((ep) => this.connect(ep)),
);
+ const out: McpConnectAllResult = { connected: [], failed: [] };
for (let i = 0; i < results.length; i++) {
- if (results[i].status === "rejected") {
- logger.error(
- "Failed to connect MCP server %s: %O",
- endpoints[i].name,
- (results[i] as PromiseRejectedResult).reason,
- );
+ const r = results[i];
+ const name = endpoints[i].name;
+ if (r.status === "fulfilled") {
+ out.connected.push(name);
+ } else {
+ const error =
+ r.reason instanceof Error ? r.reason : new Error(String(r.reason));
+ logger.error("Failed to connect MCP server %s: %O", name, error);
+ out.failed.push({ name, error });
}
}
+ return out;
}
private resolveUrl(endpoint: McpEndpointConfig): string {
@@ -201,11 +315,7 @@ export class AppKitMcpClient {
defs.push({
name: `mcp.${serverName}.${toolName}`,
description: schema.description ?? toolName,
- parameters:
- (schema.inputSchema as AgentToolDefinition["parameters"]) ?? {
- type: "object",
- properties: {},
- },
+ parameters: coerceToolParameters(schema.inputSchema),
});
}
}
@@ -261,18 +371,22 @@ export class AppKitMcpClient {
);
const result = rpcResult.result as McpToolCallResult;
+ // `text` is optional on `McpToolCallResult.content[]` per the MCP
+ // spec; filtering only on `type === "text"` lets `c.text` be
+ // `undefined`, which `Array.join` would render as the literal
+ // string `"undefined"` and ship to the agent. Narrow on both
+ // fields so the joined string only contains real text.
+ const textContent = (result.content ?? []).filter(
+ (c): c is { type: "text"; text: string } =>
+ c.type === "text" && typeof c.text === "string",
+ );
+
if (result.isError) {
- const errText = (result.content ?? [])
- .filter((c) => c.type === "text")
- .map((c) => c.text)
- .join("\n");
+ const errText = textContent.map((c) => c.text).join("\n");
throw new Error(errText || "MCP tool call failed");
}
- return (result.content ?? [])
- .filter((c) => c.type === "text")
- .map((c) => c.text)
- .join("\n");
+ return textContent.map((c) => c.text).join("\n");
}
async close(): Promise {
@@ -334,11 +448,20 @@ export class AppKitMcpClient {
}
const contentType = response.headers.get("content-type") ?? "";
+ // Always read the body via the capped helper so a misconfigured or
+ // malicious server can't exhaust client memory by streaming an
+ // unbounded payload. Applies to both SSE (`response.text()` would
+ // have buffered the whole stream) and plain JSON (`response.json()`
+ // does the same internally).
+ const bodyText = await readResponseTextCapped(
+ response,
+ MCP_RESPONSE_BODY_LIMIT_BYTES,
+ method,
+ );
let json: JsonRpcResponse;
if (contentType.includes("text/event-stream")) {
- const text = await response.text();
- const lastData = text
+ const lastData = bodyText
.split("\n")
.filter((line) => line.startsWith("data: "))
.map((line) => line.slice(6))
@@ -348,7 +471,10 @@ export class AppKitMcpClient {
}
json = JSON.parse(lastData) as JsonRpcResponse;
} else {
- json = (await response.json()) as JsonRpcResponse;
+ if (bodyText.length === 0) {
+ throw new Error(`MCP response for ${method} had an empty body`);
+ }
+ json = JSON.parse(bodyText) as JsonRpcResponse;
}
if (json.error) {
@@ -380,12 +506,36 @@ export class AppKitMcpClient {
}
const fetchImpl = this.options.fetchImpl ?? fetch;
- await fetchImpl(url, {
- method: "POST",
- headers,
- body: JSON.stringify({ jsonrpc: "2.0", method }),
- signal: AbortSignal.timeout(30_000),
- });
+ // MCP notifications are fire-and-forget per spec — we don't throw on
+ // failure. But silently swallowing 4xx/5xx hides server-side
+ // rejections that would otherwise look like a successful connect()
+ // followed by mysterious tool-call failures. Surface the bad status
+ // via the logger so the dev sees it without breaking the protocol
+ // contract.
+ try {
+ const response = await fetchImpl(url, {
+ method: "POST",
+ headers,
+ body: JSON.stringify({ jsonrpc: "2.0", method }),
+ signal: AbortSignal.timeout(30_000),
+ });
+ if (!response.ok) {
+ logger.warn(
+ "MCP notification %s to %s returned %d %s — the server may have rejected the request, but per MCP spec notifications are fire-and-forget and the connection is considered established.",
+ method,
+ url,
+ response.status,
+ response.statusText,
+ );
+ }
+ } catch (err) {
+ logger.warn(
+ "MCP notification %s to %s failed before a response was received: %O",
+ method,
+ url,
+ err,
+ );
+ }
}
/**
diff --git a/packages/appkit/src/connectors/mcp/index.ts b/packages/appkit/src/connectors/mcp/index.ts
index f9f32a418..5d0a69d79 100644
--- a/packages/appkit/src/connectors/mcp/index.ts
+++ b/packages/appkit/src/connectors/mcp/index.ts
@@ -1,4 +1,4 @@
-export { AppKitMcpClient } from "./client";
+export { AppKitMcpClient, type McpConnectAllResult } from "./client";
export {
buildMcpHostPolicy,
type McpHostPolicyConfig,
diff --git a/packages/appkit/src/connectors/mcp/tests/client.test.ts b/packages/appkit/src/connectors/mcp/tests/client.test.ts
index 0cdffa291..839d14f95 100644
--- a/packages/appkit/src/connectors/mcp/tests/client.test.ts
+++ b/packages/appkit/src/connectors/mcp/tests/client.test.ts
@@ -182,6 +182,128 @@ describe("AppKitMcpClient — host allowlist", () => {
});
});
+describe("AppKitMcpClient — connectAll partial failures", () => {
+ // `connectAll` used to return `void` after logging per-endpoint errors,
+ // so callers couldn't distinguish "all servers up" from "one of three
+ // failed". The structured return surfaces both sides of that split for
+ // the agents plugin to render a single aggregate warning at boot.
+
+ function successResponders() {
+ return [
+ () =>
+ jsonResponse(
+ { jsonrpc: "2.0", id: 1, result: {} },
+ { "mcp-session-id": "sess" },
+ ),
+ () => jsonResponse({ jsonrpc: "2.0", result: null }),
+ () =>
+ jsonResponse({
+ jsonrpc: "2.0",
+ id: 3,
+ result: { tools: [{ name: "t", description: "t" }] },
+ }),
+ ];
+ }
+
+ test("reports every successful endpoint by name with no failures", async () => {
+ const { fetchImpl } = recordingFetch([
+ ...successResponders(),
+ ...successResponders(),
+ ]);
+ const client = new AppKitMcpClient(
+ WORKSPACE,
+ workspaceAuth,
+ workspacePolicy,
+ {
+ fetchImpl,
+ dnsLookup: publicDnsLookup,
+ },
+ );
+ const result = await client.connectAll([
+ { name: "alpha", url: `${WORKSPACE}/api/2.0/mcp/alpha` },
+ { name: "beta", url: `${WORKSPACE}/api/2.0/mcp/beta` },
+ ]);
+ expect(result.connected.sort()).toEqual(["alpha", "beta"]);
+ expect(result.failed).toEqual([]);
+ });
+
+ test("isolates a failing endpoint and keeps the rest connected", async () => {
+ // First endpoint succeeds; the second is rejected by host policy
+ // before any fetch fires. The third succeeds. Without the split
+ // return, the caller couldn't tell which endpoints booted.
+ const { fetchImpl } = recordingFetch([
+ ...successResponders(),
+ ...successResponders(),
+ ]);
+ const client = new AppKitMcpClient(
+ WORKSPACE,
+ workspaceAuth,
+ workspacePolicy,
+ {
+ fetchImpl,
+ dnsLookup: publicDnsLookup,
+ },
+ );
+ const result = await client.connectAll([
+ { name: "ok-1", url: `${WORKSPACE}/api/2.0/mcp/ok-1` },
+ { name: "blocked", url: "https://blocked.example.com/mcp" },
+ { name: "ok-2", url: `${WORKSPACE}/api/2.0/mcp/ok-2` },
+ ]);
+
+ expect(result.connected.sort()).toEqual(["ok-1", "ok-2"]);
+ expect(result.failed).toHaveLength(1);
+ expect(result.failed[0].name).toBe("blocked");
+ expect(result.failed[0].error).toBeInstanceOf(Error);
+ expect(result.failed[0].error.message).toMatch(/blocked/);
+ });
+
+ test("handles all-failed without throwing — caller decides how to react", async () => {
+ // Both endpoints rejected at policy time → no fetches happen.
+ const { fetchImpl, calls } = recordingFetch([]);
+ const client = new AppKitMcpClient(
+ WORKSPACE,
+ workspaceAuth,
+ workspacePolicy,
+ {
+ fetchImpl,
+ dnsLookup: publicDnsLookup,
+ },
+ );
+ const result = await client.connectAll([
+ { name: "x", url: "https://x.example.com/mcp" },
+ { name: "y", url: "https://y.example.com/mcp" },
+ ]);
+ expect(calls).toHaveLength(0);
+ expect(result.connected).toEqual([]);
+ expect(result.failed.map((f) => f.name).sort()).toEqual(["x", "y"]);
+ });
+
+ test("wraps non-Error rejection reasons so callers get a real Error", async () => {
+ // Force a non-Error throw via a custom fetch that rejects with a
+ // string. Real-world failures already throw Error, but the wrapper
+ // protects against odd transports that throw scalars.
+ const fetchImpl: typeof fetch = async () => {
+ throw "boom-as-string";
+ };
+ const client = new AppKitMcpClient(
+ WORKSPACE,
+ workspaceAuth,
+ workspacePolicy,
+ {
+ fetchImpl,
+ dnsLookup: publicDnsLookup,
+ },
+ );
+ const result = await client.connectAll([
+ { name: "weird", url: `${WORKSPACE}/api/2.0/mcp/weird` },
+ ]);
+ expect(result.connected).toEqual([]);
+ expect(result.failed).toHaveLength(1);
+ expect(result.failed[0].error).toBeInstanceOf(Error);
+ expect(result.failed[0].error.message).toContain("boom-as-string");
+ });
+});
+
describe("AppKitMcpClient — callTool auth scoping", () => {
test("drops caller-supplied OBO token when destination is not workspace-origin", async () => {
const connectResponders = [
@@ -400,3 +522,182 @@ describe("AppKitMcpClient — caller abort signal composition", () => {
expect(error.name).toBe("AbortError");
});
});
+
+describe("AppKitMcpClient — callTool result hardening", () => {
+ let authSpy: ReturnType;
+
+ beforeEach(() => {
+ authSpy = vi.fn(workspaceAuth);
+ });
+
+ async function connectAndCall(
+ callResult: unknown,
+ ): Promise<{ result: string }> {
+ const connectResponders = [
+ () =>
+ jsonResponse(
+ { jsonrpc: "2.0", id: 1, result: {} },
+ { "mcp-session-id": "sess-1" },
+ ),
+ () => jsonResponse({ jsonrpc: "2.0", result: null }),
+ () =>
+ jsonResponse({
+ jsonrpc: "2.0",
+ id: 3,
+ result: { tools: [{ name: "tool" }] },
+ }),
+ ];
+ const callResponder = () =>
+ jsonResponse({ jsonrpc: "2.0", id: 4, result: callResult });
+
+ const { fetchImpl } = recordingFetch([...connectResponders, callResponder]);
+ const client = new AppKitMcpClient(WORKSPACE, authSpy, workspacePolicy, {
+ fetchImpl,
+ dnsLookup: publicDnsLookup,
+ });
+ await client.connect({
+ name: "srv",
+ url: `${WORKSPACE}/api/2.0/mcp/genie/abc`,
+ });
+ const result = await client.callTool("mcp.srv.tool", {}, undefined);
+ return { result };
+ }
+
+ test("filters content entries whose text is undefined (regression: 'undefined' literal in joined output)", async () => {
+ // McpToolCallResult.content[i].text is optional. Previously the
+ // filter only checked `type === "text"`, so an entry like
+ // { type: "text" } (text undefined) flowed through, and
+ // `Array.join('\n')` emitted the literal string "undefined".
+ const { result } = await connectAndCall({
+ content: [
+ { type: "text", text: "first line" },
+ { type: "text" },
+ { type: "text", text: "second line" },
+ { type: "image", data: "..." },
+ ],
+ });
+ expect(result).toBe("first line\nsecond line");
+ expect(result).not.toContain("undefined");
+ });
+
+ test("filters undefined text on the error path too", async () => {
+ await expect(
+ connectAndCall({
+ isError: true,
+ content: [{ type: "text" }, { type: "text", text: "boom" }],
+ }),
+ ).rejects.toThrow(/^boom$/);
+ });
+
+ test("error with no text content falls back to a generic message", async () => {
+ await expect(
+ connectAndCall({
+ isError: true,
+ content: [{ type: "text" }, { type: "image", data: "..." }],
+ }),
+ ).rejects.toThrow(/MCP tool call failed/);
+ });
+});
+
+describe("AppKitMcpClient — response body size cap", () => {
+ let authSpy: ReturnType;
+
+ beforeEach(() => {
+ authSpy = vi.fn(workspaceAuth);
+ });
+
+ test("rejects an unbounded response body (1 MB cap)", async () => {
+ // Mimic a server streaming forever: each `read()` returns another
+ // 64 KB chunk. The capped reader must abort once it crosses the
+ // 1 MB limit rather than buffer indefinitely.
+ const oversizedBody = new ReadableStream({
+ start(controller) {
+ const chunk = new Uint8Array(64 * 1024).fill(0x41); // 'A'
+ let pushed = 0;
+ const maxChunks = 32; // 32 * 64KiB = 2 MiB, well above the 1 MiB cap
+ const id = setInterval(() => {
+ controller.enqueue(chunk);
+ pushed++;
+ if (pushed >= maxChunks) {
+ clearInterval(id);
+ controller.close();
+ }
+ }, 0);
+ },
+ });
+ const oversizedResponse = new Response(oversizedBody, {
+ status: 200,
+ headers: { "content-type": "application/json" },
+ });
+
+ const connectResponders = [
+ () =>
+ jsonResponse(
+ { jsonrpc: "2.0", id: 1, result: {} },
+ { "mcp-session-id": "sess-1" },
+ ),
+ () => jsonResponse({ jsonrpc: "2.0", result: null }),
+ () => oversizedResponse,
+ ];
+ const { fetchImpl } = recordingFetch(connectResponders);
+ const client = new AppKitMcpClient(WORKSPACE, authSpy, workspacePolicy, {
+ fetchImpl,
+ dnsLookup: publicDnsLookup,
+ });
+
+ await expect(
+ client.connect({
+ name: "evil",
+ url: `${WORKSPACE}/api/2.0/mcp/genie/abc`,
+ }),
+ ).rejects.toThrow(/exceeded 1048576 bytes/);
+ });
+});
+
+describe("AppKitMcpClient — sendNotification HTTP error surfacing", () => {
+ let authSpy: ReturnType;
+
+ beforeEach(() => {
+ authSpy = vi.fn(workspaceAuth);
+ });
+
+ test("connect succeeds even when notifications/initialized returns 4xx (fire-and-forget per spec)", async () => {
+ // The MCP spec says notifications are fire-and-forget. We must not
+ // throw, and connect() must return normally; the regression we're
+ // guarding is that the failure shouldn't silently appear as a clean
+ // connect from the dev's perspective (the warning log surfaces it).
+ const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {});
+ try {
+ const { fetchImpl } = recordingFetch([
+ () =>
+ jsonResponse(
+ { jsonrpc: "2.0", id: 1, result: {} },
+ { "mcp-session-id": "sess-1" },
+ ),
+ () =>
+ new Response("bad request", {
+ status: 400,
+ statusText: "Bad Request",
+ }),
+ () =>
+ jsonResponse({
+ jsonrpc: "2.0",
+ id: 3,
+ result: { tools: [{ name: "tool" }] },
+ }),
+ ]);
+ const client = new AppKitMcpClient(WORKSPACE, authSpy, workspacePolicy, {
+ fetchImpl,
+ dnsLookup: publicDnsLookup,
+ });
+ await expect(
+ client.connect({
+ name: "srv",
+ url: `${WORKSPACE}/api/2.0/mcp/genie/abc`,
+ }),
+ ).resolves.not.toThrow();
+ } finally {
+ warnSpy.mockRestore();
+ }
+ });
+});
diff --git a/packages/appkit/src/core/agent/load-agents.ts b/packages/appkit/src/core/agent/load-agents.ts
index 3cb694151..13b2ff70d 100644
--- a/packages/appkit/src/core/agent/load-agents.ts
+++ b/packages/appkit/src/core/agent/load-agents.ts
@@ -24,7 +24,12 @@ export interface LoadContext {
defaultModel?: AgentAdapter | Promise | string;
/** Ambient tool library referenced by frontmatter `tools: [key1, key2]`. */
availableTools?: Record;
- /** Registered plugin toolkits referenced by frontmatter `toolkits: [...]`. */
+ /**
+ * Registered plugin toolkits referenced by `plugin:NAME` entries in the
+ * unified `tools:` frontmatter list. Keyed by plugin name; each value
+ * exposes the same `toolkit(opts?)` surface as the `plugins` argument to
+ * `tools(plugins) => Record<...>` in the code form.
+ */
plugins?: Map;
/**
* Code-defined agents contributed by `agents({ agents: { ... } })`. The
@@ -46,8 +51,21 @@ export interface LoadResult {
interface Frontmatter {
endpoint?: string;
model?: string;
- toolkits?: ToolkitSpec[];
- tools?: string[];
+ /**
+ * Unified tool list. Each entry is one of:
+ *
+ * - **`plugin:`** (string) — pull every tool from the named plugin.
+ * - **`plugin:: [tool1, tool2]`** — pull only the listed tools
+ * (shorthand for `{ only: [...] }`).
+ * - **`plugin:: { ...ToolkitOptions }`** — pass full
+ * `prefix` / `only` / `except` / `rename` options.
+ * - **``** (string, no `plugin:` prefix) — ambient tool name
+ * resolved against the `agents({ tools: { ... } })` config.
+ *
+ * Mirrors the TS function form `tools(plugins) { ... }` where plugin
+ * tools and inline tools live in the same record.
+ */
+ tools?: FrontmatterToolEntry[];
/**
* Other agent ids to expose as sub-agents. Each becomes an `agent-`
* tool at runtime. Resolution happens at directory-load time in
@@ -62,7 +80,23 @@ interface Frontmatter {
ephemeral?: boolean;
}
-type ToolkitSpec = string | { [pluginName: string]: ToolkitOptions | string[] };
+/**
+ * Each item in {@link Frontmatter.tools}. Strings are either ambient tool
+ * names (no prefix) or bare plugin references (`plugin:NAME`). Objects are
+ * single-key mappings whose key is `plugin:NAME` and whose value is either
+ * an array of local tool names (sugar for `{ only: [...] }`) or a full
+ * `ToolkitOptions` record.
+ *
+ * Named `FrontmatterToolEntry` to avoid colliding with the exported
+ * `ToolEntry` from `tools/define-tool.ts` — that is the plugin-author API
+ * surface (`defineTool({ ... }) : ToolEntry`); this is the frontmatter
+ * parse type. They are unrelated and live in different layers.
+ */
+type FrontmatterToolEntry =
+ | string
+ | { [key: string]: ToolkitOptions | string[] };
+
+const PLUGIN_PREFIX = "plugin:";
/**
* Derives the logical agent id from a markdown path. When the file is named
@@ -82,7 +116,6 @@ export function agentIdFromMarkdownPath(filePath: string): string {
const ALLOWED_KEYS = new Set([
"endpoint",
"model",
- "toolkits",
"tools",
"agents",
"maxSteps",
@@ -345,73 +378,125 @@ function resolveFrontmatterTools(
const out: Record = {};
const pluginIdx = ctx.plugins ?? new Map();
- for (const spec of fm.toolkits ?? []) {
- const [pluginName, opts] = parseToolkitSpec(spec, filePath, agentName);
- const provider = pluginIdx.get(pluginName);
- if (!provider) {
- throw new Error(
- `Agent '${agentName}' (${filePath}) references toolkit '${pluginName}', but plugin '${pluginName}' is not registered. Available: ${
+ for (const entry of fm.tools ?? []) {
+ const parsed = parseToolEntry(entry, filePath, agentName);
+ if (parsed.kind === "plugin") {
+ const provider = pluginIdx.get(parsed.pluginName);
+ if (!provider) {
+ const available =
pluginIdx.size > 0
? Array.from(pluginIdx.keys()).join(", ")
- : ""
- }`,
- );
- }
- const entries = provider.toolkit(opts) as Record;
- for (const [key, entry] of Object.entries(entries)) {
- if (!isToolkitEntry(entry)) {
+ : "";
throw new Error(
- `Plugin '${pluginName}'.toolkit() returned a value at key '${key}' that is not a ToolkitEntry`,
+ `Agent '${agentName}' (${filePath}) references 'plugin:${parsed.pluginName}', but plugin '${parsed.pluginName}' is not registered. Available: ${available}`,
);
}
- out[key] = entry as ToolkitEntry;
- }
- }
-
- for (const key of fm.tools ?? []) {
- const tool = ctx.availableTools?.[key];
- if (!tool) {
- const available = ctx.availableTools
- ? Object.keys(ctx.availableTools).join(", ")
- : "";
- throw new Error(
- `Agent '${agentName}' (${filePath}) references tool '${key}', which is not in the agents() plugin's tools field. Available: ${available}`,
- );
+ const entries = provider.toolkit(parsed.opts) as Record;
+ for (const [key, value] of Object.entries(entries)) {
+ if (!isToolkitEntry(value)) {
+ throw new Error(
+ `Plugin '${parsed.pluginName}'.toolkit() returned a value at key '${key}' that is not a ToolkitEntry`,
+ );
+ }
+ out[key] = value as ToolkitEntry;
+ }
+ } else {
+ const tool = ctx.availableTools?.[parsed.toolName];
+ if (!tool) {
+ const available = ctx.availableTools
+ ? Object.keys(ctx.availableTools).join(", ")
+ : "";
+ throw new Error(
+ `Agent '${agentName}' (${filePath}) references ambient tool '${parsed.toolName}', which is not in the agents() plugin's tools field. Available: ${available}. ` +
+ "If you meant to reference a plugin, use the 'plugin:NAME' prefix.",
+ );
+ }
+ out[parsed.toolName] = tool;
}
- out[key] = tool;
}
return out;
}
-function parseToolkitSpec(
- spec: ToolkitSpec,
+type ParsedToolEntry =
+ | { kind: "plugin"; pluginName: string; opts: ToolkitOptions | undefined }
+ | { kind: "ambient"; toolName: string };
+
+/**
+ * Classify one item in the `tools:` frontmatter list into either a plugin
+ * reference (with optional ToolkitOptions) or an ambient tool lookup.
+ *
+ * Strings starting with `plugin:` are bare plugin references. Strings
+ * without the prefix are ambient tool names. Object entries are
+ * single-key mappings keyed by `plugin:NAME`; the value is either an
+ * array (sugar for `{ only: [...] }`) or a full `ToolkitOptions` record.
+ */
+function parseToolEntry(
+ entry: FrontmatterToolEntry,
filePath: string,
agentName: string,
-): [string, ToolkitOptions | undefined] {
- if (typeof spec === "string") {
- return [spec, undefined];
+): ParsedToolEntry {
+ if (typeof entry === "string") {
+ if (entry.startsWith(PLUGIN_PREFIX)) {
+ const pluginName = entry.slice(PLUGIN_PREFIX.length);
+ if (pluginName.length === 0) {
+ throw new Error(
+ `Agent '${agentName}' (${filePath}) has an empty plugin name in 'plugin:'.`,
+ );
+ }
+ return { kind: "plugin", pluginName, opts: undefined };
+ }
+ if (entry.length === 0) {
+ throw new Error(
+ `Agent '${agentName}' (${filePath}) has an empty string in 'tools:'.`,
+ );
+ }
+ return { kind: "ambient", toolName: entry };
}
- if (typeof spec !== "object" || spec === null) {
+ if (typeof entry !== "object" || entry === null) {
throw new Error(
- `Agent '${agentName}' (${filePath}) has invalid toolkit entry: ${JSON.stringify(spec)}`,
+ `Agent '${agentName}' (${filePath}) has invalid 'tools:' entry: ${JSON.stringify(entry)}`,
);
}
- const keys = Object.keys(spec);
+ const keys = Object.keys(entry);
if (keys.length !== 1) {
throw new Error(
- `Agent '${agentName}' (${filePath}) toolkit entry must have exactly one key, got: ${keys.join(", ")}`,
+ `Agent '${agentName}' (${filePath}) 'tools:' object entry must have exactly one key, got: ${keys.join(", ")}`,
+ );
+ }
+ const key = keys[0];
+ // Bare `- plugin:` (no name after the colon) parses as a mapping with the
+ // key `"plugin"`. Catch that as a friendly error rather than dumping it
+ // through the generic "expected key 'plugin:NAME'" branch.
+ if (key === "plugin") {
+ throw new Error(
+ `Agent '${agentName}' (${filePath}) has an empty plugin name in 'plugin:'.`,
+ );
+ }
+ if (!key.startsWith(PLUGIN_PREFIX)) {
+ throw new Error(
+ `Agent '${agentName}' (${filePath}) 'tools:' object entries are reserved for plugin references; expected key 'plugin:NAME', got '${key}'. ` +
+ "Use a bare string for ambient tools (e.g. `- get_weather`).",
+ );
+ }
+ const pluginName = key.slice(PLUGIN_PREFIX.length);
+ if (pluginName.length === 0) {
+ throw new Error(
+ `Agent '${agentName}' (${filePath}) has an empty plugin name in 'plugin:'.`,
);
}
- const pluginName = keys[0];
- const value = spec[pluginName];
+ const value = entry[key];
if (Array.isArray(value)) {
- return [pluginName, { only: value }];
+ return { kind: "plugin", pluginName, opts: { only: value } };
}
if (typeof value === "object" && value !== null) {
- return [pluginName, value as ToolkitOptions];
+ return {
+ kind: "plugin",
+ pluginName,
+ opts: value as ToolkitOptions,
+ };
}
throw new Error(
- `Agent '${agentName}' (${filePath}) toolkit '${pluginName}' options must be an array of tool names or an options object`,
+ `Agent '${agentName}' (${filePath}) 'plugin:${pluginName}' options must be an array of tool names or a ToolkitOptions object.`,
);
}
diff --git a/packages/appkit/src/core/agent/tests/build-toolkit.test.ts b/packages/appkit/src/core/agent/tests/build-toolkit.test.ts
index 08f71da9b..2ab9eb6c4 100644
--- a/packages/appkit/src/core/agent/tests/build-toolkit.test.ts
+++ b/packages/appkit/src/core/agent/tests/build-toolkit.test.ts
@@ -8,12 +8,12 @@ const registry: ToolRegistry = {
query: defineTool({
description: "Run a query",
schema: z.object({ sql: z.string() }),
- handler: () => "ok",
+ execute: () => "ok",
}),
history: defineTool({
description: "Get query history",
schema: z.object({}),
- handler: () => [],
+ execute: () => [],
}),
};
@@ -79,18 +79,18 @@ describe("buildToolkitEntries", () => {
description: "safe read",
schema: z.object({}),
autoInheritable: true,
- handler: () => "ok",
+ execute: () => "ok",
}),
writeIt: defineTool({
description: "unsafe write",
schema: z.object({}),
autoInheritable: false,
- handler: () => "ok",
+ execute: () => "ok",
}),
unmarked: defineTool({
description: "default: not auto-inheritable",
schema: z.object({}),
- handler: () => "ok",
+ execute: () => "ok",
}),
};
const entries = buildToolkitEntries("p", mixed);
diff --git a/packages/appkit/src/core/agent/tests/define-tool.test.ts b/packages/appkit/src/core/agent/tests/define-tool.test.ts
index ef61e8c4b..829a29715 100644
--- a/packages/appkit/src/core/agent/tests/define-tool.test.ts
+++ b/packages/appkit/src/core/agent/tests/define-tool.test.ts
@@ -13,12 +13,12 @@ describe("defineTool()", () => {
description: "echo",
schema: z.object({ msg: z.string() }),
annotations: { readOnly: true },
- handler: ({ msg }) => msg,
+ execute: ({ msg }) => msg,
});
expect(entry.description).toBe("echo");
expect(entry.annotations).toEqual({ readOnly: true });
- expect(typeof entry.handler).toBe("function");
+ expect(typeof entry.execute).toBe("function");
});
});
@@ -27,11 +27,11 @@ describe("executeFromRegistry", () => {
echo: defineTool({
description: "echo",
schema: z.object({ msg: z.string() }),
- handler: ({ msg }) => `got ${msg}`,
+ execute: ({ msg }) => `got ${msg}`,
}),
};
- test("validates args and calls handler on success", async () => {
+ test("validates args and calls execute on success", async () => {
const result = await executeFromRegistry(registry, "echo", { msg: "hi" });
expect(result).toBe("got hi");
});
@@ -49,15 +49,15 @@ describe("executeFromRegistry", () => {
);
});
- test("forwards AbortSignal to the handler", async () => {
- const handler = vi.fn(async (_args: { x: string }, signal?: AbortSignal) =>
+ test("forwards AbortSignal to the execute callback", async () => {
+ const execute = vi.fn(async (_args: { x: string }, signal?: AbortSignal) =>
signal?.aborted ? "aborted" : "ok",
);
const reg: ToolRegistry = {
t: defineTool({
description: "t",
schema: z.object({ x: z.string() }),
- handler,
+ execute,
}),
};
@@ -65,8 +65,8 @@ describe("executeFromRegistry", () => {
controller.abort();
await executeFromRegistry(reg, "t", { x: "hi" }, controller.signal);
- expect(handler).toHaveBeenCalledTimes(1);
- expect(handler.mock.calls[0][1]).toBe(controller.signal);
+ expect(execute).toHaveBeenCalledTimes(1);
+ expect(execute.mock.calls[0][1]).toBe(controller.signal);
});
});
@@ -79,7 +79,7 @@ describe("toolsFromRegistry", () => {
query: z.string().describe("SQL query"),
}),
annotations: { readOnly: true, requiresUserContext: true },
- handler: () => "ok",
+ execute: () => "ok",
}),
};
@@ -105,12 +105,12 @@ describe("toolsFromRegistry", () => {
"uploads.list": defineTool({
description: "list uploads",
schema: z.object({}),
- handler: () => [],
+ execute: () => [],
}),
"documents.list": defineTool({
description: "list documents",
schema: z.object({}),
- handler: () => [],
+ execute: () => [],
}),
};
@@ -124,7 +124,7 @@ describe("toolsFromRegistry", () => {
plain: defineTool({
description: "plain",
schema: z.object({}),
- handler: () => "ok",
+ execute: () => "ok",
}),
};
const [def] = toolsFromRegistry(registry);
diff --git a/packages/appkit/src/core/agent/tests/function-tool.test.ts b/packages/appkit/src/core/agent/tests/function-tool.test.ts
index 8e668d69e..2f8788b3e 100644
--- a/packages/appkit/src/core/agent/tests/function-tool.test.ts
+++ b/packages/appkit/src/core/agent/tests/function-tool.test.ts
@@ -47,10 +47,14 @@ describe("isFunctionTool", () => {
expect(isFunctionTool({ type: "function", name: "x" })).toBe(false);
});
- test("returns false when name is missing", () => {
- expect(isFunctionTool({ type: "function", execute: () => "y" })).toBe(
- false,
- );
+ test("returns true when name is omitted (record key wins downstream)", () => {
+ // Regression: previously `tool({ description, schema, execute })` (no
+ // name) produced a FunctionTool whose `name: undefined` failed this
+ // guard and broke registration with "unrecognized shape". The agents
+ // plugin always overrides `name` with the record key from
+ // `tools: { my_tool: tool({...}) }`, so requiring `name` here was
+ // rejecting valid input.
+ expect(isFunctionTool({ type: "function", execute: () => "y" })).toBe(true);
});
});
diff --git a/packages/appkit/src/core/agent/tests/load-agents.test.ts b/packages/appkit/src/core/agent/tests/load-agents.test.ts
index aa87a841a..195a31796 100644
--- a/packages/appkit/src/core/agent/tests/load-agents.test.ts
+++ b/packages/appkit/src/core/agent/tests/load-agents.test.ts
@@ -63,11 +63,15 @@ describe("parseFrontmatter", () => {
expect(content).toBe("Hello body");
});
- test("parses nested arrays", () => {
+ test("parses nested arrays in unified tools list", () => {
const { data } = parseFrontmatter(
- "---\ntoolkits:\n - analytics\n - files: [uploads.list]\n---\nbody",
+ "---\ntools:\n - plugin:analytics\n - plugin:files: [uploads.list]\n - get_weather\n---\nbody",
);
- expect(data?.toolkits).toEqual(["analytics", { files: ["uploads.list"] }]);
+ expect(data?.tools).toEqual([
+ "plugin:analytics",
+ { "plugin:files": ["uploads.list"] },
+ "get_weather",
+ ]);
});
test("returns null data when no frontmatter", () => {
@@ -146,29 +150,30 @@ describe("loadAgentsFromDir", () => {
test("throws when frontmatter references an unregistered plugin", async () => {
writeAgent(
"broken",
- "---\nendpoint: e\ntoolkits: [missing]\n---\nBroken agent.",
+ "---\nendpoint: e\ntools:\n - plugin:missing\n---\nBroken agent.",
);
await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow(
- /references toolkit 'missing'/,
+ /references 'plugin:missing'/,
);
});
test("throws when frontmatter references an unknown ambient tool", async () => {
writeAgent(
"broken",
- "---\nendpoint: e\ntools: [unknown_tool]\n---\nBroken.",
+ "---\nendpoint: e\ntools:\n - unknown_tool\n---\nBroken.",
);
await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow(
- /references tool 'unknown_tool'/,
+ /references ambient tool 'unknown_tool'/,
);
+ await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow(/plugin:NAME/);
});
- test("resolves toolkits + ambient tools when provided", async () => {
+ test("resolves a mix of plugin and ambient tools in the unified list", async () => {
const registry: ToolRegistry = {
query: defineTool({
description: "q",
schema: z.object({ sql: z.string() }),
- handler: () => "ok",
+ execute: () => "ok",
}),
};
const plugins = new Map<
@@ -193,7 +198,7 @@ describe("loadAgentsFromDir", () => {
writeAgent(
"analyst",
- "---\nendpoint: e\ntoolkits:\n - analytics\ntools:\n - get_weather\n---\nBody.",
+ "---\nendpoint: e\ntools:\n - plugin:analytics\n - get_weather\n---\nBody.",
);
const res = await loadAgentsFromDir(workDir, {
plugins,
@@ -206,6 +211,91 @@ describe("loadAgentsFromDir", () => {
]);
});
+ test("plugin:NAME with an array filters via { only }", async () => {
+ const registry: ToolRegistry = {
+ list: defineTool({
+ description: "l",
+ schema: z.object({}),
+ execute: () => [],
+ }),
+ read: defineTool({
+ description: "r",
+ schema: z.object({ path: z.string() }),
+ execute: () => "x",
+ }),
+ delete: defineTool({
+ description: "d",
+ schema: z.object({ path: z.string() }),
+ execute: () => undefined,
+ }),
+ };
+ const plugins = new Map<
+ string,
+ { toolkit: (opts?: unknown) => Record }
+ >([
+ [
+ "files",
+ {
+ toolkit: (opts) =>
+ buildToolkitEntries("files", registry, opts as never),
+ },
+ ],
+ ]);
+
+ writeAgent(
+ "reader",
+ "---\nendpoint: e\ntools:\n - plugin:files: [list, read]\n---\nReader.",
+ );
+ const res = await loadAgentsFromDir(workDir, { plugins });
+ expect(Object.keys(res.defs.reader.tools ?? {}).sort()).toEqual([
+ "files.list",
+ "files.read",
+ ]);
+ });
+
+ test("plugin:NAME with a full ToolkitOptions object honours prefix/rename", async () => {
+ const registry: ToolRegistry = {
+ query: defineTool({
+ description: "q",
+ schema: z.object({ sql: z.string() }),
+ execute: () => "ok",
+ }),
+ };
+ const plugins = new Map<
+ string,
+ { toolkit: (opts?: unknown) => Record }
+ >([
+ [
+ "analytics",
+ {
+ toolkit: (opts) =>
+ buildToolkitEntries("analytics", registry, opts as never),
+ },
+ ],
+ ]);
+
+ writeAgent(
+ "renamer",
+ `---\nendpoint: e\ntools:\n - plugin:analytics: { prefix: "", rename: { query: sql_query } }\n---\nBody.`,
+ );
+ const res = await loadAgentsFromDir(workDir, { plugins });
+ expect(Object.keys(res.defs.renamer.tools ?? {})).toEqual(["sql_query"]);
+ });
+
+ test("rejects an object entry whose key isn't 'plugin:NAME'", async () => {
+ writeAgent("bad", "---\nendpoint: e\ntools:\n - files: [a]\n---\nBad.");
+ await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow(
+ /reserved for plugin references/,
+ );
+ });
+
+ test("rejects empty 'plugin:' (no plugin name after the prefix)", async () => {
+ writeAgent("bad", "---\nendpoint: e\ntools:\n - plugin:\n---\nBad.");
+ await expect(loadAgentsFromDir(workDir, {})).rejects.toThrow(
+ /empty plugin name/,
+ );
+ });
+
describe("agents: sibling sub-agent references", () => {
test("resolves sibling references into def.agents regardless of folder order", async () => {
writeAgent(
diff --git a/packages/appkit/src/core/agent/tests/tool.test.ts b/packages/appkit/src/core/agent/tests/tool.test.ts
index 3d47f3a9b..be9803782 100644
--- a/packages/appkit/src/core/agent/tests/tool.test.ts
+++ b/packages/appkit/src/core/agent/tests/tool.test.ts
@@ -28,6 +28,7 @@ describe("tool()", () => {
test("execute receives typed args on valid input", async () => {
const echo = tool({
name: "echo",
+ description: "Echoes the input message back to the caller.",
schema: z.object({ message: z.string() }),
execute: async ({ message }) => {
const _typed: string = message;
@@ -42,6 +43,7 @@ describe("tool()", () => {
test("returns formatted error string (does not throw) when args are invalid", async () => {
const weather = tool({
name: "get_weather",
+ description: "Get weather for a city.",
schema: z.object({ city: z.string() }),
execute: async ({ city }) => `Sunny in ${city}`,
});
@@ -55,6 +57,7 @@ describe("tool()", () => {
test("joins multiple validation errors with '; '", async () => {
const t = tool({
name: "multi",
+ description: "Multi-arg tool used to exercise multi-issue zod errors.",
schema: z.object({ a: z.string(), b: z.number() }),
execute: async () => "ok",
});
@@ -68,6 +71,7 @@ describe("tool()", () => {
test("optional fields validate when absent", async () => {
const t = tool({
name: "opt",
+ description: "Returns the note when provided, '(no note)' otherwise.",
schema: z.object({ note: z.string().optional() }),
execute: async ({ note }) => note ?? "(no note)",
});
@@ -76,16 +80,68 @@ describe("tool()", () => {
expect(await t.execute({ note: "hello" })).toBe("hello");
});
- test("description falls back to the tool name when omitted", () => {
+ test("description is required and passes through verbatim", () => {
+ // Earlier versions allowed `description` to be omitted and silently
+ // fell back to `config.name`. That surfaced cryptic identifiers like
+ // "get_weather" as the description; the LLM then either skipped the
+ // tool or called it speculatively. The field is now mandatory at the
+ // type level — TS catches the omission at authoring time instead of
+ // pushing the cost of a confused agent into production.
const t = tool({
name: "my_tool",
+ description: "Returns the string 'ok' verbatim.",
schema: z.object({}),
execute: async () => "ok",
});
- expect(t.description).toBe("my_tool");
+ expect(t.description).toBe("Returns the string 'ok' verbatim.");
expect(t.parameters).toBeDefined();
});
+
+ test("name is optional — agents plugin overrides it with the record key", () => {
+ // Regression: PR #306 reviewer hit a runtime crash because the
+ // template wrote `tool({ description, schema, execute })` (no name)
+ // and the FunctionTool shape guard rejected the result. The agent
+ // runtime always overrides `name` with the record key in
+ // `tools: { my_tool: tool({...}) }`, so requiring it here was
+ // mis-shaping a valid input.
+ const t = tool({
+ description: "Returns the current server time",
+ schema: z.object({}),
+ execute: () => "2026-05-11T00:00:00Z",
+ });
+
+ expect(t.type).toBe("function");
+ expect(t.name).toBeUndefined();
+ expect(t.description).toBe("Returns the current server time");
+ });
+
+ test("execute may return non-string shapes; downstream normalises", async () => {
+ // Regression: `execute` was typed `Promise | string` but the
+ // template's tools naturally return objects. The runtime serialises
+ // via `normalizeToolResult`; tighten typing to `unknown` and verify
+ // the value flows through.
+ const t = tool({
+ name: "now",
+ description: "Returns the current timestamp as an ISO 8601 string.",
+ schema: z.object({}),
+ execute: () => ({ now: "2026-05-11T00:00:00Z" }),
+ });
+ const result = (await t.execute({})) as { now: string };
+ expect(result).toEqual({ now: "2026-05-11T00:00:00Z" });
+ });
+
+ test("zod-error message uses a generic label when name is omitted", async () => {
+ const t = tool({
+ description: "needs a city",
+ schema: z.object({ city: z.string() }),
+ execute: () => "ok",
+ });
+ const result = await t.execute({});
+ expect(typeof result).toBe("string");
+ expect(result).toContain("Invalid arguments for tool");
+ expect(result).toContain("city");
+ });
});
describe("formatZodError", () => {
diff --git a/packages/appkit/src/core/agent/tools/define-tool.ts b/packages/appkit/src/core/agent/tools/define-tool.ts
index b51792f4c..047eaaf94 100644
--- a/packages/appkit/src/core/agent/tools/define-tool.ts
+++ b/packages/appkit/src/core/agent/tools/define-tool.ts
@@ -20,11 +20,22 @@ export interface ToolEntry {
* consider it safe enough to appear in every agent's tool record without an
* explicit `tools:` declaration. Destructive or privilege-sensitive tools
* should leave this unset so that they only reach agents that wire them
- * explicitly (via `tools:` object/function form, markdown `toolkits:`, or
+ * explicitly (via `tools:` object/function form, markdown `plugin:NAME`
+ * entries in the unified `tools:` list, or
* `plugins..toolkit({ only: [...] })`).
*/
autoInheritable?: boolean;
- handler: (
+ /**
+ * Callback the agents plugin invokes after Zod validation succeeds.
+ *
+ * Named `execute` to match the public `tool({ execute })` form — both the
+ * agent-author surface and the plugin-author surface now spell their
+ * callback the same way. `args` is the inferred Zod output (so `T extends
+ * z.ZodType` flows through and `args` is fully typed). `signal` is the
+ * per-run AbortSignal: forward it to any awaited I/O so cancellation
+ * actually unwinds the call (analytics and lakebase both do this).
+ */
+ execute: (
args: z.infer,
signal?: AbortSignal,
) => unknown | Promise;
@@ -65,7 +76,7 @@ export async function executeFromRegistry(
if (!parsed.success) {
return formatZodError(parsed.error, name);
}
- return entry.handler(parsed.data, signal);
+ return entry.execute(parsed.data, signal);
}
/**
diff --git a/packages/appkit/src/core/agent/tools/function-tool.ts b/packages/appkit/src/core/agent/tools/function-tool.ts
index 19820f8fe..b4e00930c 100644
--- a/packages/appkit/src/core/agent/tools/function-tool.ts
+++ b/packages/appkit/src/core/agent/tools/function-tool.ts
@@ -2,7 +2,14 @@ import type { AgentToolDefinition, ToolAnnotations } from "shared";
export interface FunctionTool {
type: "function";
- name: string;
+ /**
+ * Optional. When this tool is placed in a keyed record
+ * (`tools: { my_tool: ... }` or the function form), the agents plugin
+ * overrides this with the record key at index-build time. Only set it
+ * explicitly when constructing a `FunctionTool` outside any
+ * keyed-record context.
+ */
+ name?: string;
description?: string | null;
parameters?: Record | null;
strict?: boolean | null;
@@ -16,25 +23,34 @@ export interface FunctionTool {
* tool indexes.
*/
annotations?: ToolAnnotations;
- execute: (args: Record) => Promise | string;
+ /**
+ * Returns any shape; downstream `normalizeToolResult` serializes to a
+ * string before handing the value to the LLM.
+ */
+ execute: (args: Record) => unknown | Promise;
}
export function isFunctionTool(value: unknown): value is FunctionTool {
if (typeof value !== "object" || value === null) return false;
const obj = value as Record;
- return (
- obj.type === "function" &&
- typeof obj.name === "string" &&
- typeof obj.execute === "function"
- );
+ // `name` is intentionally not required: the agents plugin overrides it
+ // with the record key (`tools: { my_tool: tool({...}) }` -> "my_tool")
+ // so requiring it on the FunctionTool shape rejects perfectly-valid
+ // `tool({ description, schema, execute })` calls that omit the name.
+ return obj.type === "function" && typeof obj.execute === "function";
}
export function functionToolToDefinition(
tool: FunctionTool,
): AgentToolDefinition {
+ // `name` is guaranteed to be overridden downstream by the record key
+ // when the tool is registered through `AgentDefinition.tools`. Falling
+ // back to an empty string here keeps the type honest without
+ // surfacing a sentinel that could leak into a non-record context.
+ const name = tool.name ?? "";
return {
- name: tool.name,
- description: tool.description ?? tool.name,
+ name,
+ description: tool.description ?? name,
parameters: (tool.parameters as AgentToolDefinition["parameters"]) ?? {
type: "object",
properties: {},
diff --git a/packages/appkit/src/core/agent/tools/index.ts b/packages/appkit/src/core/agent/tools/index.ts
index 053fdc15b..82601d2b4 100644
--- a/packages/appkit/src/core/agent/tools/index.ts
+++ b/packages/appkit/src/core/agent/tools/index.ts
@@ -1,4 +1,7 @@
-export { AppKitMcpClient } from "../../../connectors/mcp/client";
+export {
+ AppKitMcpClient,
+ type McpConnectAllResult,
+} from "../../../connectors/mcp/client";
export {
defineTool,
executeFromRegistry,
diff --git a/packages/appkit/src/core/agent/tools/tool.ts b/packages/appkit/src/core/agent/tools/tool.ts
index 53305c236..6ebdaa9bb 100644
--- a/packages/appkit/src/core/agent/tools/tool.ts
+++ b/packages/appkit/src/core/agent/tools/tool.ts
@@ -4,8 +4,29 @@ import type { FunctionTool } from "./function-tool";
import { toToolJSONSchema } from "./json-schema";
export interface ToolConfig {
- name: string;
- description?: string;
+ /**
+ * Optional. When the tool is placed in a keyed record (the standard
+ * `tools: { my_tool: tool({...}) }` form, or the function form
+ * `tools(plugins) => ({ my_tool: tool({...}) })`), the agents plugin
+ * overrides the tool's LLM-visible name with the record key. Set
+ * `name` explicitly only if you're constructing a `FunctionTool`
+ * outside any keyed-record context — otherwise the record key wins.
+ */
+ name?: string;
+ /**
+ * What the tool does, what it expects, and when the LLM should call it.
+ * The model reads this verbatim when deciding whether to invoke the tool,
+ * so write it for an LLM, not for a human reader of your code: spell out
+ * the inputs, the return shape, and any pre-conditions or side effects.
+ *
+ * Required. Earlier versions silently fell back to the tool's name when
+ * omitted, which surfaced cryptic identifiers like `"get_weather"` as the
+ * description — the model then had no signal about expected use and
+ * either skipped the tool or called it speculatively. Making this
+ * mandatory at the type level forces a real description at authoring
+ * time instead of debugging a confused agent later.
+ */
+ description: string;
schema: S;
/**
* Behavioural hints forwarded to the resolved tool definition. Prefer
@@ -16,7 +37,15 @@ export interface ToolConfig {
* added this field.
*/
annotations?: ToolAnnotations;
- execute: (args: z.infer) => Promise | string;
+ /**
+ * Returning a non-string value is fine: the agent runtime serializes
+ * the result via `normalizeToolResult` before handing it to the LLM
+ * (strings pass through; `null` becomes `"null"`; everything else gets
+ * `JSON.stringify`'d; `undefined` becomes `""`). Return whatever shape
+ * is most natural for your tool — typically an object — and let the
+ * runtime handle the wire format.
+ */
+ execute: (args: z.infer) => unknown | Promise;
}
/**
@@ -29,21 +58,25 @@ export interface ToolConfig {
* can self-correct on its next turn.
*/
export function tool(config: ToolConfig): FunctionTool {
- const parameters = toToolJSONSchema(config.schema) as unknown as Record<
- string,
- unknown
- >;
+ const parameters = toToolJSONSchema(config.schema);
+
+ // `name` is only used for the zod-validation error message and the
+ // FunctionTool's `name` field; the agents plugin overrides the latter
+ // with the record key (`tools: { my_tool: ... }` -> "my_tool") at
+ // index-build time. Fall back to a generic label so errors are still
+ // legible when `name` is omitted.
+ const labelForErrors = config.name ?? "tool";
return {
type: "function",
- name: config.name,
- description: config.description ?? config.name,
+ ...(config.name !== undefined ? { name: config.name } : {}),
+ description: config.description,
parameters,
...(config.annotations ? { annotations: config.annotations } : {}),
execute: async (args: Record) => {
const parsed = config.schema.safeParse(args);
if (!parsed.success) {
- return formatZodError(parsed.error, config.name);
+ return formatZodError(parsed.error, labelForErrors);
}
return config.execute(parsed.data as z.infer);
},
diff --git a/packages/appkit/src/core/agent/types.ts b/packages/appkit/src/core/agent/types.ts
index 4ff7d31ed..cf47845f7 100644
--- a/packages/appkit/src/core/agent/types.ts
+++ b/packages/appkit/src/core/agent/types.ts
@@ -117,7 +117,24 @@ export type AgentTools = Record;
export type AgentToolsFn = (plugins: Plugins) => AgentTools;
export interface AgentDefinition {
- /** Filled in from the enclosing key when used in `agents: { foo: def }`. */
+ /**
+ * Stable identifier for the agent. **Optional and informational** —
+ * when the definition is registered via `agents: { foo: def }` (code) or
+ * lives at `config/agents//agent.md` (markdown), the **registry key
+ * always wins** and `name` is ignored. The agent will be reachable as
+ * `foo` (or ``) regardless of what this field contains.
+ *
+ * Set `name` when:
+ * - Running standalone via `runAgent({ agent: def })`, where there is
+ * no enclosing key. The runtime uses it for the agent's slot in
+ * error messages and OTel spans.
+ * - Building a definition that may be passed to either form and you
+ * want a consistent fallback label.
+ *
+ * Setting `name` to a value that differs from the registry key is
+ * harmless but confusing — prefer keeping them aligned or omitting `name`
+ * entirely.
+ */
name?: string;
/** System prompt body. For markdown-loaded agents this is the file body. */
instructions: string;
@@ -161,8 +178,8 @@ export interface AgentDefinition {
* with no explicit `tools:` declaration receive every registered ToolProvider
* plugin tool whose author marked `autoInheritable: true`. Tools without that
* flag — destructive, state-mutating, or privilege-sensitive — never spread
- * automatically and must be wired via `tools:` (object or function form) or
- * markdown `toolkits:`.
+ * automatically and must be wired via `tools:` (object or function form in
+ * code, `plugin:NAME` entries in markdown frontmatter).
*
* Defaults are `false` for both origins (safe-by-default): developers must
* consciously opt an origin in to any auto-inherit behaviour.
@@ -199,14 +216,20 @@ export interface AgentsPluginConfig extends BasePluginConfig {
*/
mcp?: McpHostPolicyConfig;
/**
- * Human-in-the-loop approval gate for destructive tool calls. When enabled
+ * Human-in-the-loop approval gate for mutating tool calls. When enabled
* (the default), the agents plugin emits an `appkit.approval_pending` SSE
- * event before executing any tool annotated `destructive: true` and waits
- * for a `POST /chat/approve` decision from the same user who initiated the
- * stream. A missing decision after `timeoutMs` auto-denies the call.
+ * event before executing any tool whose annotation flags it as mutating —
+ * `effect: "write" | "update" | "destructive"` (preferred) or the legacy
+ * `destructive: true` boolean — and waits for a `POST /chat/approve`
+ * decision from the same user who initiated the stream. A missing decision
+ * after `timeoutMs` auto-denies the call.
*/
approval?: {
- /** Require human approval for tools annotated `destructive: true`. Default: `true`. */
+ /**
+ * Require human approval for tools that mutate state. Triggered by
+ * `effect: "write" | "update" | "destructive"` (preferred) or the legacy
+ * `destructive: true` boolean. Default: `true`.
+ */
requireForDestructive?: boolean;
/** Milliseconds to wait before auto-denying. Default: 60_000. */
timeoutMs?: number;
diff --git a/packages/appkit/src/plugins/agents/agents.ts b/packages/appkit/src/plugins/agents/agents.ts
index 3c20d6165..40217e54e 100644
--- a/packages/appkit/src/plugins/agents/agents.ts
+++ b/packages/appkit/src/plugins/agents/agents.ts
@@ -172,16 +172,44 @@ export class AgentsPlugin extends Plugin implements ToolProvider {
}
}
- /** Effective approval policy with defaults applied. */
+ /**
+ * Effective approval policy with defaults applied. Memoised so the
+ * `timeoutMs` validation warning fires at most once per plugin instance —
+ * `resolvedApprovalPolicy` gets hit on every chat stream and a noisy
+ * misconfig would otherwise spam the logs.
+ *
+ * `timeoutMs` is clamped to a 1s floor so a misconfigured value (`0`,
+ * negative, or `NaN`) can't degrade into immediate auto-denial of every
+ * mutating tool call.
+ */
+ private cachedApprovalPolicy: {
+ requireForDestructive: boolean;
+ timeoutMs: number;
+ } | null = null;
+
private get resolvedApprovalPolicy(): {
requireForDestructive: boolean;
timeoutMs: number;
} {
+ if (this.cachedApprovalPolicy) return this.cachedApprovalPolicy;
const cfg = this.config.approval ?? {};
- return {
+ const APPROVAL_TIMEOUT_FLOOR_MS = 1_000;
+ const APPROVAL_TIMEOUT_DEFAULT_MS = 60_000;
+ let timeoutMs = cfg.timeoutMs ?? APPROVAL_TIMEOUT_DEFAULT_MS;
+ if (!Number.isFinite(timeoutMs) || timeoutMs < APPROVAL_TIMEOUT_FLOOR_MS) {
+ logger.warn(
+ "approval.timeoutMs=%s is below the %sms floor; using default %sms instead. Mutating tool calls would otherwise auto-deny before any UI could respond.",
+ cfg.timeoutMs,
+ APPROVAL_TIMEOUT_FLOOR_MS,
+ APPROVAL_TIMEOUT_DEFAULT_MS,
+ );
+ timeoutMs = APPROVAL_TIMEOUT_DEFAULT_MS;
+ }
+ this.cachedApprovalPolicy = {
requireForDestructive: cfg.requireForDestructive ?? true,
- timeoutMs: cfg.timeoutMs ?? 60_000,
+ timeoutMs,
};
+ return this.cachedApprovalPolicy;
}
/** Effective DoS limits with defaults applied. */
@@ -235,7 +263,7 @@ export class AgentsPlugin extends Plugin implements ToolProvider {
const entry = this.activeStreams.get(requestId);
if (!entry) return;
this.activeStreams.delete(requestId);
- const next = (this.userStreamCounts.get(entry.userId) ?? 1) - 1;
+ const next = (this.userStreamCounts.get(entry.userId) ?? 0) - 1;
if (next <= 0) {
this.userStreamCounts.delete(entry.userId);
} else {
@@ -259,10 +287,21 @@ export class AgentsPlugin extends Plugin implements ToolProvider {
*/
async reload(): Promise {
const next = await this.buildAgentRegistry();
- if (this.mcpClient) {
- await this.mcpClient.close();
- this.mcpClient = null;
- }
+ // Deliberately NOT closing the existing mcpClient here. Tool
+ // dispatch in `dispatchToolCall` reads `this.mcpClient` at call
+ // time; closing it mid-stream throws "MCP client is closed" from
+ // the next sendRpc and kills the in-flight conversation. The
+ // client owns only short-lived `fetch` handles (no keep-alive
+ // sockets) and the connections map persists in the live instance,
+ // so dropping `this.mcpClient` would also strand in-flight tool
+ // calls that resolved the field a moment earlier. Leave the live
+ // client in place; `buildAgentRegistry` -> `connectHostedTools`
+ // adds any new endpoints to the same instance, and stale
+ // connections from a removed config become unreachable through
+ // the new agent tool indexes (small memory cost, no correctness
+ // hazard). The shutdown path still closes — that's process
+ // teardown, where in-flight streams have already been aborted via
+ // `abortActiveOperations`.
this.agents = next.agents;
this.defaultAgentName = next.defaultAgentName;
}
@@ -367,7 +406,9 @@ export class AgentsPlugin extends Plugin implements ToolProvider {
/**
* Builds the map of plugin-name → toolkit that the markdown loader consults
- * when resolving `toolkits:` frontmatter entries.
+ * when resolving `plugin:NAME` entries in the unified `tools:` frontmatter
+ * list (and, equivalently, that the code form passes as the `plugins`
+ * argument to `tools(plugins) => Record<...>`).
*/
private pluginProviderIndex(): Map<
string,
@@ -685,7 +726,20 @@ export class AgentsPlugin extends Plugin implements ToolProvider {
}
const endpoints = resolveHostedTools(hostedTools);
- await this.mcpClient.connectAll(endpoints);
+ const result = await this.mcpClient.connectAll(endpoints);
+ if (result.failed.length > 0) {
+ // Per-endpoint errors are already logged inside `connectAll`; this
+ // aggregate warning makes the partial-success state visible at the
+ // agent-registration boundary so operators see "agent X registered
+ // without N hosted-tool endpoints" alongside the connect-time
+ // errors, instead of just an opaque list of MCP failures.
+ logger.warn(
+ "MCP: %s of %s endpoints failed to connect (%s). Agents that reference these endpoints will boot without their hosted tools.",
+ result.failed.length,
+ endpoints.length,
+ result.failed.map((f) => f.name).join(", "),
+ );
+ }
for (const def of this.mcpClient.getAllToolDefinitions()) {
index.set(def.name, {
@@ -1144,6 +1198,16 @@ export class AgentsPlugin extends Plugin implements ToolProvider {
runState.limits.toolCallTimeoutMs,
);
} else if (entry.source === "function") {
+ // Function tools declare their parameters as a JSON-object schema,
+ // so adapters always serialize `args` as an object. A non-object
+ // value here means the upstream model emitted malformed tool-call
+ // JSON; surface a clear error rather than silently passing through
+ // a wrong-shape value the tool will then choke on.
+ if (typeof args !== "object" || args === null || Array.isArray(args)) {
+ throw new Error(
+ `Function tool '${name}' received non-object arguments (got ${args === null ? "null" : Array.isArray(args) ? "array" : typeof args}); expected a JSON object.`,
+ );
+ }
result = await entry.functionTool.execute(
args as Record,
);
diff --git a/packages/appkit/src/plugins/agents/manifest.json b/packages/appkit/src/plugins/agents/manifest.json
index 922a10fee..f2e5420bc 100644
--- a/packages/appkit/src/plugins/agents/manifest.json
+++ b/packages/appkit/src/plugins/agents/manifest.json
@@ -3,21 +3,20 @@
"name": "agents",
"displayName": "Agents Plugin",
"stability": "beta",
- "hidden": true,
"description": "AI agents driven by markdown configs or code, with auto-tool-discovery from registered plugins",
"resources": {
"required": [],
"optional": [
{
"type": "serving_endpoint",
- "alias": "Model Serving (agents)",
+ "alias": "Default LLM for agents",
"resourceKey": "agents-serving-endpoint",
- "description": "Databricks Model Serving endpoint for agents using workspace-hosted models (`DatabricksAdapter.fromModelServing`). Wire the same endpoint name AppKit reads from `DATABRICKS_SERVING_ENDPOINT_NAME` when no per-agent model is configured. The same env var the `serving` plugin reads — one value covers both. Omit when agents use only external adapters.",
+ "description": "Default streaming-capable LLM endpoint for agents that don't pin their own model",
"permission": "CAN_QUERY",
"fields": {
"name": {
"env": "DATABRICKS_SERVING_ENDPOINT_NAME",
- "description": "Endpoint name passed to Model Serving when agents default to `DatabricksAdapter.fromModelServing()`. Shared with the `serving` plugin."
+ "description": "Default LLM serving endpoint name"
}
}
}
diff --git a/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts b/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts
index da8dd6bd6..c654e477f 100644
--- a/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts
+++ b/packages/appkit/src/plugins/agents/tests/agents-plugin.test.ts
@@ -176,13 +176,39 @@ describe("AgentsPlugin", () => {
expect(api.get("support")?.instructions).toBe("From code");
});
+ test("reload() does not close the existing mcpClient (in-flight streams keep working)", async () => {
+ // Regression: prior `reload()` called `await this.mcpClient.close()`
+ // and dropped the reference. Tool dispatch reads `this.mcpClient`
+ // at call time (agents.ts dispatchToolCall path), so a stream that
+ // started before reload and continues afterwards would hit "MCP
+ // client is closed" mid-conversation. The fix removes the
+ // synchronous close — the existing client survives reload and
+ // dispatches keep working.
+ const plugin = instantiate({ dir: false });
+ const closeSpy = vi.fn(async () => {});
+ const fakeClient = {
+ close: closeSpy,
+ callTool: vi.fn(),
+ connectAll: vi.fn(async () => ({ connected: [], failed: [] })),
+ getAllToolDefinitions: () => [],
+ };
+ // biome-ignore lint/suspicious/noExplicitAny: seeding private mcpClient
+ (plugin as any).mcpClient = fakeClient;
+ await plugin.setup();
+ await plugin.reload();
+
+ expect(closeSpy).not.toHaveBeenCalled();
+ // biome-ignore lint/suspicious/noExplicitAny: read private mcpClient
+ expect((plugin as any).mcpClient).toBe(fakeClient);
+ });
+
test("auto-inherit default is safe (both file and code get nothing without an explicit opt-in)", async () => {
const registry: ToolRegistry = {
query: defineTool({
description: "q",
schema: z.object({ sql: z.string() }),
autoInheritable: true, // even with autoInheritable, no spread without opt-in
- handler: () => "ok",
+ execute: () => "ok",
}),
};
const provider = makeToolProvider("analytics", registry);
@@ -221,13 +247,13 @@ describe("AgentsPlugin", () => {
description: "read-only query",
schema: z.object({ sql: z.string() }),
autoInheritable: true,
- handler: () => "ok",
+ execute: () => "ok",
}),
destructive: defineTool({
description: "mutation",
schema: z.object({}),
// autoInheritable left unset → skipped even when opted in
- handler: () => "ok",
+ execute: () => "ok",
}),
};
const provider = makeToolProvider("analytics", registry);
@@ -259,12 +285,12 @@ describe("AgentsPlugin", () => {
description: "safe",
schema: z.object({}),
autoInheritable: true,
- handler: () => "ok",
+ execute: () => "ok",
}),
unsafe: defineTool({
description: "unsafe",
schema: z.object({}),
- handler: () => "ok",
+ execute: () => "ok",
}),
};
const provider = makeToolProvider("p", registry);
@@ -299,14 +325,14 @@ describe("AgentsPlugin", () => {
query: defineTool({
description: "q",
schema: z.object({ sql: z.string() }),
- handler: () => "ok",
+ execute: () => "ok",
}),
};
const registry2: ToolRegistry = {
list: defineTool({
description: "l",
schema: z.object({}),
- handler: () => [],
+ execute: () => [],
}),
};
const ctx = fakeContext([
@@ -317,7 +343,7 @@ describe("AgentsPlugin", () => {
writeMarkdownAgent(
tmpDir,
"analyst",
- "---\ntoolkits: [analytics]\n---\nAnalyst.",
+ "---\ntools:\n - plugin:analytics\n---\nAnalyst.",
);
const plugin = instantiate(
@@ -378,7 +404,7 @@ describe("AgentsPlugin", () => {
query: defineTool({
description: "q",
schema: z.object({ sql: z.string() }),
- handler: () => "ok",
+ execute: () => "ok",
}),
};
const ctx = fakeContext([
@@ -417,7 +443,7 @@ describe("AgentsPlugin", () => {
query: defineTool({
description: "q",
schema: z.object({ sql: z.string() }),
- handler: () => "ok",
+ execute: () => "ok",
}),
};
const ctx = fakeContext([
@@ -489,7 +515,7 @@ describe("AgentsPlugin", () => {
query: defineTool({
description: "q",
schema: z.object({ sql: z.string() }),
- handler: () => "ok",
+ execute: () => "ok",
}),
};
const filesReg: ToolRegistry = {
@@ -497,7 +523,7 @@ describe("AgentsPlugin", () => {
description: "l",
schema: z.object({}),
autoInheritable: true,
- handler: () => [],
+ execute: () => [],
}),
};
const ctx = fakeContext([
@@ -545,7 +571,7 @@ describe("AgentsPlugin", () => {
description: "l",
schema: z.object({}),
autoInheritable: true,
- handler: () => [],
+ execute: () => [],
}),
};
const ctx = fakeContext([
diff --git a/packages/appkit/src/plugins/agents/tests/approval-config.test.ts b/packages/appkit/src/plugins/agents/tests/approval-config.test.ts
new file mode 100644
index 000000000..0827da35e
--- /dev/null
+++ b/packages/appkit/src/plugins/agents/tests/approval-config.test.ts
@@ -0,0 +1,81 @@
+import { beforeEach, describe, expect, test, vi } from "vitest";
+import { CacheManager } from "../../../cache";
+import { AgentsPlugin } from "../agents";
+
+/**
+ * `approval.timeoutMs` validation. A misconfigured value (`0`, negative,
+ * `NaN`, or `Infinity`) used to silently make every mutating tool call
+ * auto-deny before any UI could possibly respond, because the gate's wait
+ * resolved immediately. The plugin now clamps anything below the 1s floor
+ * back to the 60s default and logs a warning.
+ *
+ * White-box read of the private getter — `resolvedApprovalPolicy` is the
+ * single read-site for `timeoutMs` inside the plugin, and exposing a
+ * dedicated public accessor just for tests would leak internals.
+ */
+
+function policyOf(plugin: AgentsPlugin) {
+ return (plugin as unknown as { resolvedApprovalPolicy: unknown })
+ .resolvedApprovalPolicy as {
+ requireForDestructive: boolean;
+ timeoutMs: number;
+ };
+}
+
+beforeEach(() => {
+ CacheManager.getInstanceSync = vi.fn(() => ({
+ get: vi.fn(),
+ set: vi.fn(),
+ delete: vi.fn(),
+ getOrExecute: vi.fn(async (_k: unknown[], fn: () => Promise) =>
+ fn(),
+ ),
+ generateKey: vi.fn(() => "test-key"),
+ })) as unknown as typeof CacheManager.getInstanceSync;
+});
+
+describe("AgentsPlugin.resolvedApprovalPolicy.timeoutMs", () => {
+ test("uses the default (60_000) when approval is omitted", () => {
+ const plugin = new AgentsPlugin({});
+ expect(policyOf(plugin).timeoutMs).toBe(60_000);
+ });
+
+ test("passes valid positive values through unchanged", () => {
+ const plugin = new AgentsPlugin({ approval: { timeoutMs: 5_000 } });
+ expect(policyOf(plugin).timeoutMs).toBe(5_000);
+ });
+
+ test("clamps zero to the default with a warning", () => {
+ const plugin = new AgentsPlugin({ approval: { timeoutMs: 0 } });
+ expect(policyOf(plugin).timeoutMs).toBe(60_000);
+ });
+
+ test("clamps negative values to the default", () => {
+ const plugin = new AgentsPlugin({ approval: { timeoutMs: -1 } });
+ expect(policyOf(plugin).timeoutMs).toBe(60_000);
+ });
+
+ test("clamps NaN to the default", () => {
+ const plugin = new AgentsPlugin({ approval: { timeoutMs: Number.NaN } });
+ expect(policyOf(plugin).timeoutMs).toBe(60_000);
+ });
+
+ test("clamps Infinity to the default (not finite)", () => {
+ const plugin = new AgentsPlugin({
+ approval: { timeoutMs: Number.POSITIVE_INFINITY },
+ });
+ expect(policyOf(plugin).timeoutMs).toBe(60_000);
+ });
+
+ test("clamps sub-1000ms values to the default (1s floor)", () => {
+ const plugin = new AgentsPlugin({ approval: { timeoutMs: 250 } });
+ expect(policyOf(plugin).timeoutMs).toBe(60_000);
+ });
+
+ test("memoises the validation result — repeated reads return the same object", () => {
+ const plugin = new AgentsPlugin({ approval: { timeoutMs: 5_000 } });
+ const a = policyOf(plugin);
+ const b = policyOf(plugin);
+ expect(a).toBe(b);
+ });
+});
diff --git a/packages/appkit/src/plugins/agents/tests/dispatch-tool-call.test.ts b/packages/appkit/src/plugins/agents/tests/dispatch-tool-call.test.ts
index dd9514e4a..047836648 100644
--- a/packages/appkit/src/plugins/agents/tests/dispatch-tool-call.test.ts
+++ b/packages/appkit/src/plugins/agents/tests/dispatch-tool-call.test.ts
@@ -355,6 +355,34 @@ describe("runSubAgent — sub-agent event forwarding", () => {
* happens. `metadata` events are NOT forwarded because the sub-agent has
* its own threadId and overwriting the parent's would break multi-turn.
*/
+ test("rejects when depth exceeds limits.maxSubAgentDepth before invoking the child", async () => {
+ // Backstop for the runtime cycle case: even without an explicit
+ // cycle, two agents delegating to each other will eventually exceed
+ // the depth limit and we want a clear error, not an unbounded stack.
+ const plugin = new AgentsPlugin({
+ dir: false,
+ agents: {},
+ limits: { maxSubAgentDepth: 2 },
+ });
+ const { runState } = makeRunState(plugin);
+ runState.limits.maxSubAgentDepth = 2;
+
+ const childRun = vi.fn();
+ const child = {
+ name: "child",
+ instructions: "test",
+ adapter: { run: childRun },
+ toolIndex: new Map(),
+ // biome-ignore lint/suspicious/noExplicitAny: minimal stub
+ } as any;
+
+ await expect(
+ // biome-ignore lint/suspicious/noExplicitAny: call private
+ (plugin as any).runSubAgent(runState, child, { input: "go" }, 3),
+ ).rejects.toThrow(/Sub-agent depth exceeded \(limit 2\)/);
+ expect(childRun).not.toHaveBeenCalled();
+ });
+
test("forwards every sub-agent event into the parent stream except metadata", async () => {
const plugin = new AgentsPlugin({ dir: false, agents: {} });
const { runState, pushed } = makeRunState(plugin);
diff --git a/packages/appkit/src/plugins/agents/tool-approval-gate.ts b/packages/appkit/src/plugins/agents/tool-approval-gate.ts
index 669f30a96..4aeb92925 100644
--- a/packages/appkit/src/plugins/agents/tool-approval-gate.ts
+++ b/packages/appkit/src/plugins/agents/tool-approval-gate.ts
@@ -1,10 +1,11 @@
/**
- * Server-side state for the human-in-the-loop approval gate on
- * `destructive: true` agent tool calls.
+ * Server-side state for the human-in-the-loop approval gate on mutating
+ * agent tool calls — tools annotated with `effect: "write" | "update" |
+ * "destructive"` (preferred) or the legacy `destructive: true` boolean.
*
* Lifecycle:
*
- * 1. `wait(...)` is called from inside `executeTool` when a destructive tool
+ * 1. `wait(...)` is called from inside `executeTool` when a mutating tool
* is about to execute. A `Pending` record is registered and a timer is
* scheduled for auto-deny. The returned promise is what blocks the
* adapter until the decision arrives.
diff --git a/packages/appkit/src/plugins/analytics/analytics.ts b/packages/appkit/src/plugins/analytics/analytics.ts
index fdcb16b43..b4994d42c 100644
--- a/packages/appkit/src/plugins/analytics/analytics.ts
+++ b/packages/appkit/src/plugins/analytics/analytics.ts
@@ -287,11 +287,11 @@ export class AnalyticsPlugin extends Plugin implements ToolProvider {
),
}),
annotations: {
- readOnly: true,
+ effect: "read",
requiresUserContext: true,
},
autoInheritable: true,
- handler: (args, signal) => {
+ execute: (args, signal) => {
assertReadOnlySql(args.query);
return this.query(args.query, undefined, undefined, signal);
},
diff --git a/packages/appkit/src/plugins/analytics/tests/analytics.readonly.test.ts b/packages/appkit/src/plugins/analytics/tests/analytics.readonly.test.ts
index 749137ef3..b8cf21d77 100644
--- a/packages/appkit/src/plugins/analytics/tests/analytics.readonly.test.ts
+++ b/packages/appkit/src/plugins/analytics/tests/analytics.readonly.test.ts
@@ -29,14 +29,14 @@ function makePlugin(): AnalyticsPlugin {
return new AnalyticsPlugin({});
}
-describe("AnalyticsPlugin.query agent tool — readOnly annotation", () => {
- test("is advertised with readOnly:true and requiresUserContext:true", () => {
+describe("AnalyticsPlugin.query agent tool — annotations", () => {
+ test('is advertised with effect:"read" and requiresUserContext:true', () => {
const plugin = makePlugin();
const defs = plugin.getAgentTools();
const query = defs.find((d) => d.name === "query");
expect(query).toBeDefined();
expect(query?.annotations).toEqual({
- readOnly: true,
+ effect: "read",
requiresUserContext: true,
});
});
diff --git a/packages/appkit/src/plugins/beta-exports.generated.ts b/packages/appkit/src/plugins/beta-exports.generated.ts
index 7fff0af71..82f6c4a78 100644
--- a/packages/appkit/src/plugins/beta-exports.generated.ts
+++ b/packages/appkit/src/plugins/beta-exports.generated.ts
@@ -5,4 +5,4 @@
// subpath ships each plugin. Editing this file by hand will drift it from the
// manifests and the synced appkit.plugins.json.
-export {};
+export { agents } from "./agents";
diff --git a/packages/appkit/src/plugins/files/plugin.ts b/packages/appkit/src/plugins/files/plugin.ts
index 0542f6618..000237a80 100644
--- a/packages/appkit/src/plugins/files/plugin.ts
+++ b/packages/appkit/src/plugins/files/plugin.ts
@@ -1064,36 +1064,48 @@ export class FilesPlugin extends Plugin implements ToolProvider {
.optional()
.describe("Directory path to list (optional, defaults to root)"),
}),
- annotations: { readOnly: true, requiresUserContext: true },
+ annotations: { effect: "read", requiresUserContext: true },
autoInheritable: true,
- handler: (args) => api().list(args.path),
+ execute: (args, signal) => {
+ signal?.throwIfAborted();
+ return api().list(args.path);
+ },
}),
[`${volumeKey}.read`]: defineTool({
description: `Read a text file from the "${volumeKey}" volume`,
schema: z.object({
path: z.string().describe("File path to read"),
}),
- annotations: { readOnly: true, requiresUserContext: true },
+ annotations: { effect: "read", requiresUserContext: true },
autoInheritable: true,
- handler: (args) => api().read(args.path),
+ execute: (args, signal) => {
+ signal?.throwIfAborted();
+ return api().read(args.path);
+ },
}),
[`${volumeKey}.exists`]: defineTool({
description: `Check if a file or directory exists in the "${volumeKey}" volume`,
schema: z.object({
path: z.string().describe("Path to check"),
}),
- annotations: { readOnly: true, requiresUserContext: true },
+ annotations: { effect: "read", requiresUserContext: true },
autoInheritable: true,
- handler: (args) => api().exists(args.path),
+ execute: (args, signal) => {
+ signal?.throwIfAborted();
+ return api().exists(args.path);
+ },
}),
[`${volumeKey}.metadata`]: defineTool({
description: `Get metadata (size, type, last modified) for a file in the "${volumeKey}" volume`,
schema: z.object({
path: z.string().describe("File path"),
}),
- annotations: { readOnly: true, requiresUserContext: true },
+ annotations: { effect: "read", requiresUserContext: true },
autoInheritable: true,
- handler: (args) => api().metadata(args.path),
+ execute: (args, signal) => {
+ signal?.throwIfAborted();
+ return api().metadata(args.path);
+ },
}),
[`${volumeKey}.upload`]: defineTool({
description: `Upload a text file to the "${volumeKey}" volume`,
@@ -1105,19 +1117,24 @@ export class FilesPlugin extends Plugin implements ToolProvider {
.optional()
.describe("Whether to overwrite existing file"),
}),
- annotations: { destructive: true, requiresUserContext: true },
- handler: (args) =>
- api().upload(args.path, args.contents, {
+ annotations: { effect: "destructive", requiresUserContext: true },
+ execute: (args, signal) => {
+ signal?.throwIfAborted();
+ return api().upload(args.path, args.contents, {
overwrite: args.overwrite,
- }),
+ });
+ },
}),
[`${volumeKey}.delete`]: defineTool({
description: `Delete a file from the "${volumeKey}" volume`,
schema: z.object({
path: z.string().describe("File path to delete"),
}),
- annotations: { destructive: true, requiresUserContext: true },
- handler: (args) => api().delete(args.path),
+ annotations: { effect: "destructive", requiresUserContext: true },
+ execute: (args, signal) => {
+ signal?.throwIfAborted();
+ return api().delete(args.path);
+ },
}),
};
}
diff --git a/packages/appkit/src/plugins/genie/genie.ts b/packages/appkit/src/plugins/genie/genie.ts
index 8248b2c4a..3d4706087 100644
--- a/packages/appkit/src/plugins/genie/genie.ts
+++ b/packages/appkit/src/plugins/genie/genie.ts
@@ -73,13 +73,14 @@ export class GeniePlugin extends Plugin implements ToolProvider {
"Optional conversation ID to continue an existing conversation",
),
}),
- annotations: { requiresUserContext: true },
- handler: async (args) => {
+ annotations: { effect: "read", requiresUserContext: true },
+ execute: async (args, signal) => {
const events: GenieStreamEvent[] = [];
for await (const event of this.sendMessage(
alias,
args.content,
args.conversationId,
+ { signal },
)) {
events.push(event);
}
@@ -93,9 +94,10 @@ export class GeniePlugin extends Plugin implements ToolProvider {
.string()
.describe("The conversation ID to retrieve"),
}),
- annotations: { readOnly: true, requiresUserContext: true },
+ annotations: { effect: "read", requiresUserContext: true },
autoInheritable: true,
- handler: (args) => this.getConversation(alias, args.conversationId),
+ execute: (args, signal) =>
+ this.getConversation(alias, args.conversationId, signal),
}),
};
}
@@ -304,7 +306,13 @@ export class GeniePlugin extends Plugin implements ToolProvider {
async getConversation(
alias: string,
conversationId: string,
+ signal?: AbortSignal,
): Promise {
+ // Honour an already-cancelled stream before paying any I/O cost. The
+ // underlying connector's pagination loop is signal-agnostic today, so
+ // this catches the common case (tool dispatched after the user
+ // cancelled) without a deeper connector change.
+ signal?.throwIfAborted();
const spaceId = this.resolveSpaceId(alias);
if (!spaceId) {
@@ -328,8 +336,9 @@ export class GeniePlugin extends Plugin implements ToolProvider {
alias: string,
content: string,
conversationId?: string,
- options?: { timeout?: number },
+ options?: { timeout?: number; signal?: AbortSignal },
): AsyncGenerator {
+ options?.signal?.throwIfAborted();
const spaceId = this.resolveSpaceId(alias);
if (!spaceId) {
throw new Error(`Unknown space alias: ${alias}`);
@@ -341,7 +350,7 @@ export class GeniePlugin extends Plugin implements ToolProvider {
spaceId,
content,
conversationId,
- { timeout },
+ { timeout, signal: options?.signal },
);
}
diff --git a/packages/appkit/src/plugins/lakebase/lakebase.ts b/packages/appkit/src/plugins/lakebase/lakebase.ts
index 36c64c5ae..49930355f 100644
--- a/packages/appkit/src/plugins/lakebase/lakebase.ts
+++ b/packages/appkit/src/plugins/lakebase/lakebase.ts
@@ -179,11 +179,18 @@ export class LakebasePlugin extends Plugin implements ToolProvider {
.describe("Parameter values corresponding to placeholders"),
}),
annotations: {
- readOnly,
- destructive: !readOnly,
+ effect: readOnly ? "read" : "destructive",
idempotent: false,
},
- handler: async (args) => {
+ execute: async (args, signal) => {
+ // Matches the files plugin pattern: the pg connection API
+ // doesn't accept AbortSignal in its current shape, so deeper
+ // mid-call cancellation needs a separate plumbing pass on the
+ // connector. This entry check still catches the common case —
+ // a tool dispatched after the user already cancelled the
+ // stream — and unwinds cleanly instead of running to
+ // completion against the SQL warehouse.
+ signal?.throwIfAborted();
if (readOnly) {
assertReadOnlySql(args.text);
return this.runReadOnlyStatement(args.text, args.values);
diff --git a/packages/appkit/src/plugins/lakebase/tests/lakebase-agent-tool.test.ts b/packages/appkit/src/plugins/lakebase/tests/lakebase-agent-tool.test.ts
index 855ce1012..756423178 100644
--- a/packages/appkit/src/plugins/lakebase/tests/lakebase-agent-tool.test.ts
+++ b/packages/appkit/src/plugins/lakebase/tests/lakebase-agent-tool.test.ts
@@ -81,8 +81,7 @@ describe("LakebasePlugin — agent tool opt-in", () => {
expect(defs).toHaveLength(1);
expect(defs[0].name).toBe("query");
expect(defs[0].annotations).toEqual({
- readOnly: true,
- destructive: false,
+ effect: "read",
idempotent: false,
});
});
@@ -93,8 +92,7 @@ describe("LakebasePlugin — agent tool opt-in", () => {
});
const defs = plugin.getAgentTools();
expect(defs[0].annotations).toEqual({
- readOnly: false,
- destructive: true,
+ effect: "destructive",
idempotent: false,
});
});
diff --git a/packages/appkit/src/plugins/lakebase/types.ts b/packages/appkit/src/plugins/lakebase/types.ts
index 2c469d1c8..f92d85b75 100644
--- a/packages/appkit/src/plugins/lakebase/types.ts
+++ b/packages/appkit/src/plugins/lakebase/types.ts
@@ -18,16 +18,17 @@ import type { LakebasePoolConfig } from "../../connectors/lakebase";
* PostgreSQL server rejects writes that slip past the classifier (e.g., a
* `SELECT` over a function with side effects).
*
- * When `readOnly: false`, the tool is annotated `destructive: true` and the
- * agents plugin will require human approval for every invocation (see
+ * When `readOnly: false`, the tool is annotated `effect: "destructive"` and
+ * the agents plugin will require human approval for every invocation (see
* `AgentsPluginConfig.approval`).
*/
export interface LakebaseExposeAsAgentTool {
/**
* Enforce read-only execution. Defaults to `true`. Set to `false` to allow
* destructive statements — highly discouraged outside of tightly controlled
- * single-user deployments. Combined with the `destructive: true` annotation,
- * the agents plugin will require explicit human approval for each call.
+ * single-user deployments. Combined with the `effect: "destructive"`
+ * annotation, the agents plugin will require explicit human approval for
+ * each call.
*/
readOnly?: boolean;
}
diff --git a/packages/appkit/src/plugins/server/index.ts b/packages/appkit/src/plugins/server/index.ts
index bd5e66745..e66abf5ad 100644
--- a/packages/appkit/src/plugins/server/index.ts
+++ b/packages/appkit/src/plugins/server/index.ts
@@ -113,6 +113,13 @@ export class ServerPlugin extends Plugin {
this.serverApplication.use(requestMetricsMiddleware);
this.serverApplication.use(
express.json({
+ // Express's stock 100kb default is too tight for modern apps —
+ // agent chat payloads and any base64-encoded upload (e.g. the
+ // dev playground's smart-dashboard "save view" screenshot at
+ // ~105KB) blow past it instantly. Raise to 1mb by default and
+ // let consumers tune via `server({ bodyLimit })` if they need
+ // more headroom.
+ limit: this.config.bodyLimit ?? "1mb",
type: (req) => {
// Skip JSON parsing for routes that declared skipBodyParsing
// (e.g. file uploads where the raw body must flow through).
diff --git a/packages/appkit/src/plugins/server/types.ts b/packages/appkit/src/plugins/server/types.ts
index a97ff527b..e13b7a78d 100644
--- a/packages/appkit/src/plugins/server/types.ts
+++ b/packages/appkit/src/plugins/server/types.ts
@@ -4,4 +4,15 @@ export interface ServerConfig extends BasePluginConfig {
port?: number;
staticPath?: string;
host?: string;
+ /**
+ * Max request body size accepted by the built-in `express.json()`
+ * middleware. Accepts any string the `bytes` library understands
+ * (`"1mb"`, `"10mb"`, `"512kb"`, …). Defaults to `"1mb"` — high enough
+ * for agent chat payloads and modest base64 uploads (the dev
+ * playground's smart-dashboard "save view" screenshot is the
+ * motivating case), low enough that an attacker can't trivially
+ * exhaust memory by spamming oversized JSON. Raise it explicitly if
+ * your app routinely posts larger JSON bodies.
+ */
+ bodyLimit?: string;
}
diff --git a/packages/appkit/src/typedoc.entry.ts b/packages/appkit/src/typedoc.entry.ts
new file mode 100644
index 000000000..295ed192f
--- /dev/null
+++ b/packages/appkit/src/typedoc.entry.ts
@@ -0,0 +1,9 @@
+/**
+ * Documentation merge entry for Typedoc — combines the stable `@databricks/appkit`
+ * surface with `@databricks/appkit/beta`. Not meant for application imports.
+ *
+ * @packageDocumentation
+ */
+
+export * from "./beta";
+export * from "./index";
diff --git a/packages/shared/src/agent.ts b/packages/shared/src/agent.ts
index 74fc500aa..6486b1b29 100644
--- a/packages/shared/src/agent.ts
+++ b/packages/shared/src/agent.ts
@@ -76,6 +76,16 @@ export interface ToolCall {
id: string;
name: string;
args: unknown;
+ /**
+ * Vendor-opaque "thought signature" blob attached by Vertex AI / Gemini
+ * 2.x models to every function call they emit. Resumed threads must
+ * echo this back verbatim on the next request or Vertex rejects with
+ * `INVALID_ARGUMENT: function call X is missing a thought_signature`.
+ * Stored here so adapters can preserve it across persistence
+ * boundaries. Non-Gemini endpoints leave this undefined.
+ * See https://docs.cloud.google.com/vertex-ai/generative-ai/docs/thought-signatures
+ */
+ thoughtSignature?: string;
}
export interface Thread {
@@ -121,10 +131,12 @@ export type AgentEvent =
| { type: "metadata"; data: Record }
| {
/**
- * Emitted by the agents plugin (not adapters) when a tool call annotated
- * `destructive: true` is awaiting human approval. Clients should render
- * an approval prompt and POST to `/chat/approve` with the matching
- * `approvalId` and a `decision` of `approve` or `deny`.
+ * Emitted by the agents plugin (not adapters) when a mutating tool call
+ * is awaiting human approval — fires for tools annotated with
+ * `effect: "write" | "update" | "destructive"` (preferred) or the
+ * legacy `destructive: true` boolean. Clients should render an approval
+ * prompt and POST to `/chat/approve` with the matching `approvalId` and
+ * a `decision` of `approve` or `deny`.
*/
type: "approval_pending";
approvalId: string;
@@ -225,8 +237,10 @@ export interface AppKitMetadataEvent {
}
/**
- * Emitted when a destructive tool call is awaiting human approval. The client
- * should render an approval UI and POST the decision to `/chat/approve` with
+ * Emitted when a mutating tool call is awaiting human approval. Fires for
+ * tools annotated with `effect: "write" | "update" | "destructive"`
+ * (preferred) or the legacy `destructive: true` boolean. The client should
+ * render an approval UI and POST the decision to `/chat/approve` with
* `{ streamId, approvalId, decision: "approve" | "deny" }`. If no decision
* arrives before the server-side timeout, the call is auto-denied and the
* agent receives a denial string as the tool output.
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 4db8fbe3c..c576bd74a 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -2166,15 +2166,9 @@ packages:
resolution: {integrity: sha512-lBSBiRruFurFKXr5Hbsl2thmGweAPmddhF3jb99U4EMDA5L+e5Y1rAkOS07Nvrup7HUMBDrCV45meaxZnt28nQ==}
engines: {node: '>=20.0'}
- '@emnapi/core@1.7.1':
- resolution: {integrity: sha512-o1uhUASyo921r2XtHYOHy7gdkGLge8ghBEQHMWmyJFoXlpU58kIrhhN3w26lpQb6dspetweapMn2CSNwQ8I4wg==}
-
'@emnapi/core@1.8.1':
resolution: {integrity: sha512-AvT9QFpxK0Zd8J0jopedNm+w/2fIzvtPKPjqyw9jwvBaReTTqPBk9Hixaz7KbjimP+QNz605/XnjFcDAL2pqBg==}
- '@emnapi/runtime@1.7.1':
- resolution: {integrity: sha512-PVtJr5CmLwYAU9PZDMITZoR5iAOShYREoR45EyyLrbntV50mdePTgUn4AmOw90Ifcj+x2kRjdzr1HP3RrNiHGA==}
-
'@emnapi/runtime@1.8.1':
resolution: {integrity: sha512-mehfKSMWjjNol8659Z8KxEMrdSJDDot5SXMq00dM8BN4o+CLNXQ0xH2V7EchNHV4RmbZLmmPdEaXZc5H2FXmDg==}
@@ -2676,9 +2670,6 @@ packages:
'@mermaid-js/parser@0.6.3':
resolution: {integrity: sha512-lnjOhe7zyHjc+If7yT4zoedx2vo4sHaTmtkl1+or8BRTnCtDmcTpAjpzDSfCZrshM5bCoz0GyidzadJAH1xobA==}
- '@napi-rs/wasm-runtime@1.0.7':
- resolution: {integrity: sha512-SeDnOO0Tk7Okiq6DbXmmBODgOAb9dp9gjlphokTUxmt8U3liIP1ZsozBahH69j/RJv+Rfs6IwUKHTgQYJ/HBAw==}
-
'@napi-rs/wasm-runtime@1.1.1':
resolution: {integrity: sha512-p64ah1M1ld8xjWv3qbvFwHiFVWrq1yFvV4f7w+mzaqiR4IlSgkqhcRdHwsGgomwzBH51sRY4NEowLxnaBjcW/A==}
@@ -5501,6 +5492,13 @@ packages:
autocomplete.js@0.37.1:
resolution: {integrity: sha512-PgSe9fHYhZEsm/9jggbjtVsGXJkPLvd+9mC7gZJ662vVL5CRWEtm/mIrrzCx0MrNxHVwxD5d00UOn6NsmL2LUQ==}
+ autoprefixer@10.4.21:
+ resolution: {integrity: sha512-O+A6LWV5LDHSJD3LjHYoNi4VLsj/Whi7k6zG12xTYaU4cQ8oxQGckXNX8cRHK5yOZ/ppVHe0ZBXGzSV9jXdVbQ==}
+ engines: {node: ^10 || ^12 || >=14}
+ hasBin: true
+ peerDependencies:
+ postcss: ^8.1.0
+
autoprefixer@10.4.23:
resolution: {integrity: sha512-YYTXSFulfwytnjAPlw8QHncHJmlvFKtczb8InXaAx9Q0LbfDnfEYDE55omerIJKihhmU61Ft+cAOSzQVaBUmeA==}
engines: {node: ^10 || ^12 || >=14}
@@ -7266,6 +7264,9 @@ packages:
resolution: {integrity: sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow==}
engines: {node: '>= 0.6'}
+ fraction.js@4.3.7:
+ resolution: {integrity: sha512-ZsDfxO51wGAXREY55a7la9LScWpwv9RxIrYABrlvOFBlH/ShPnrtsXeuUIfXKKOVicNxQ+o8JTbJvjS4M89yew==}
+
fraction.js@5.3.4:
resolution: {integrity: sha512-1X1NTtiJphryn/uLQz3whtY6jK3fTqoE3ohKs0tT+Ujr1W59oopxmoEh7Lu5p6vBaPbgoM0bzveAW4Qi5RyWDQ==}
@@ -9054,6 +9055,10 @@ packages:
resolution: {integrity: sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA==}
engines: {node: '>=0.10.0'}
+ normalize-range@0.1.2:
+ resolution: {integrity: sha512-bdok/XvKII3nUpklnV6P2hxtMNrCboOjAcyBuQnWEhO665FwrSNRxU+AqpsyvO6LgGYPspN+lu5CLtw4jPRKNA==}
+ engines: {node: '>=0.10.0'}
+
normalize-url@8.1.0:
resolution: {integrity: sha512-X06Mfd/5aKsRHc0O0J5CUedwnPmnDtLF2+nq+KN9KSDlJHkPuh0JUviWjEWMe0SW/9TDdSLVPuk7L5gGTIA1/w==}
engines: {node: '>=14.16'}
@@ -14444,23 +14449,12 @@ snapshots:
- uglify-js
- webpack-cli
- '@emnapi/core@1.7.1':
- dependencies:
- '@emnapi/wasi-threads': 1.1.0
- tslib: 2.8.1
- optional: true
-
'@emnapi/core@1.8.1':
dependencies:
'@emnapi/wasi-threads': 1.1.0
tslib: 2.8.1
optional: true
- '@emnapi/runtime@1.7.1':
- dependencies:
- tslib: 2.8.1
- optional: true
-
'@emnapi/runtime@1.8.1':
dependencies:
tslib: 2.8.1
@@ -14930,13 +14924,6 @@ snapshots:
dependencies:
langium: 3.3.1
- '@napi-rs/wasm-runtime@1.0.7':
- dependencies:
- '@emnapi/core': 1.7.1
- '@emnapi/runtime': 1.7.1
- '@tybys/wasm-util': 0.10.1
- optional: true
-
'@napi-rs/wasm-runtime@1.1.1':
dependencies:
'@emnapi/core': 1.8.1
@@ -16667,7 +16654,7 @@ snapshots:
'@rolldown/binding-wasm32-wasi@1.0.0-beta.41':
dependencies:
- '@napi-rs/wasm-runtime': 1.0.7
+ '@napi-rs/wasm-runtime': 1.1.1
optional: true
'@rolldown/binding-wasm32-wasi@1.0.0-rc.3':
@@ -17978,6 +17965,16 @@ snapshots:
dependencies:
immediate: 3.3.0
+ autoprefixer@10.4.21(postcss@8.5.6):
+ dependencies:
+ browserslist: 4.28.1
+ caniuse-lite: 1.0.30001760
+ fraction.js: 4.3.7
+ normalize-range: 0.1.2
+ picocolors: 1.1.1
+ postcss: 8.5.6
+ postcss-value-parser: 4.2.0
+
autoprefixer@10.4.23(postcss@8.5.6):
dependencies:
browserslist: 4.28.1
@@ -18811,7 +18808,7 @@ snapshots:
cssnano-preset-advanced@6.1.2(postcss@8.5.6):
dependencies:
- autoprefixer: 10.4.23(postcss@8.5.6)
+ autoprefixer: 10.4.21(postcss@8.5.6)
browserslist: 4.28.1
cssnano-preset-default: 6.1.2(postcss@8.5.6)
postcss: 8.5.6
@@ -19841,6 +19838,8 @@ snapshots:
forwarded@0.2.0: {}
+ fraction.js@4.3.7: {}
+
fraction.js@5.3.4: {}
fresh@0.5.2: {}
@@ -22102,6 +22101,8 @@ snapshots:
normalize-path@3.0.0: {}
+ normalize-range@0.1.2: {}
+
normalize-url@8.1.0: {}
normalize-url@8.1.1: {}
diff --git a/template/appkit.plugins.json b/template/appkit.plugins.json
index d3c8702f9..53f8ed81e 100644
--- a/template/appkit.plugins.json
+++ b/template/appkit.plugins.json
@@ -2,6 +2,31 @@
"$schema": "https://databricks.github.io/appkit/schemas/template-plugins.schema.json",
"version": "1.1",
"plugins": {
+ "agents": {
+ "name": "agents",
+ "displayName": "Agents Plugin",
+ "description": "AI agents driven by markdown configs or code, with auto-tool-discovery from registered plugins",
+ "package": "@databricks/appkit",
+ "resources": {
+ "required": [],
+ "optional": [
+ {
+ "type": "serving_endpoint",
+ "alias": "Default LLM for agents",
+ "resourceKey": "agents-serving-endpoint",
+ "description": "Default streaming-capable LLM endpoint for agents that don't pin their own model",
+ "permission": "CAN_QUERY",
+ "fields": {
+ "name": {
+ "env": "DATABRICKS_SERVING_ENDPOINT_NAME",
+ "description": "Default LLM serving endpoint name"
+ }
+ }
+ }
+ ]
+ },
+ "stability": "beta"
+ },
"analytics": {
"name": "analytics",
"displayName": "Analytics Plugin",
diff --git a/template/client/src/App.tsx b/template/client/src/App.tsx
index 5510bb805..4b86eb557 100644
--- a/template/client/src/App.tsx
+++ b/template/client/src/App.tsx
@@ -5,6 +5,9 @@ import {
CardHeader,
CardTitle,
} from '@databricks/appkit-ui/react';
+{{- if .plugins.agents}}
+import { AgentChat } from './pages/agents/AgentChat';
+{{- end}}
{{- if .plugins.analytics}}
import { AnalyticsPage } from './pages/analytics/AnalyticsPage';
{{- end}}
@@ -43,6 +46,11 @@ function Layout() {
Home
+{{- if .plugins.agents}}
+
+ Agents
+
+{{- end}}
{{- if .plugins.analytics}}
Analytics
@@ -93,6 +101,9 @@ const router = createBrowserRouter([
element: ,
children: [
{ path: '/', element: },
+{{- if .plugins.agents}}
+ { path: '/agents', element: },
+{{- end}}
{{- if .plugins.analytics}}
{ path: '/analytics', element: },
{{- end}}
diff --git a/template/client/src/pages/agents/AgentChat.tsx b/template/client/src/pages/agents/AgentChat.tsx
new file mode 100644
index 000000000..a64855335
--- /dev/null
+++ b/template/client/src/pages/agents/AgentChat.tsx
@@ -0,0 +1,213 @@
+{{if .plugins.agents -}}
+import { useEffect, useRef, useState } from 'react';
+import {
+ type AgentChatEvent,
+ Button,
+ Card,
+ CardContent,
+ Input,
+ useAgentChat,
+ usePluginClientConfig,
+} from '@databricks/appkit-ui/react';
+
+interface Message {
+ id: string;
+ role: 'user' | 'assistant' | 'tool';
+ content: string;
+ toolName?: string;
+}
+
+/**
+ * Shape of the agents plugin's `clientConfig()` payload — exposed by
+ * the agents plugin at server startup and inlined into the boot HTML
+ * via `