diff --git a/docs/docs.json b/docs/docs.json index 85a6bb1fd..427ba541b 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -104,6 +104,7 @@ "pages": [ "v3/guides/integrations/claude-code", "v3/guides/integrations/opencode", + "v3/guides/integrations/vercel-ai-sdk", "v3/guides/integrations/crewai", "v3/guides/integrations/langgraph", "v3/guides/integrations/mcp", diff --git a/docs/v3/guides/integrations/vercel-ai-sdk.mdx b/docs/v3/guides/integrations/vercel-ai-sdk.mdx new file mode 100644 index 000000000..1903094ea --- /dev/null +++ b/docs/v3/guides/integrations/vercel-ai-sdk.mdx @@ -0,0 +1,399 @@ +--- +title: "Vercel AI SDK" +icon: "triangle" +iconType: "solid" +description: "Add persistent user memory and reasoning to any Vercel AI SDK app with Honcho" +sidebarTitle: "Vercel AI SDK" +--- + +Integrate Honcho with the Vercel AI SDK to build AI apps that remember users across sessions. The [Vercel AI SDK](https://sdk.vercel.ai) is an open-source TypeScript toolkit for building AI-powered apps with a unified API across providers. This guide shows you how to wrap any `generateText` or `streamText` call with Honcho's memory middleware and reasoning tools. + + +The full package source and examples are available on [GitHub](https://github.com/plastic-labs/vercel-ai-sdk-package). + + +## What We're Building + +We'll wire Honcho into a Vercel AI SDK app so the model receives context from past conversations and can query what it knows about the user mid-generation. Here's how the pieces fit together: + +- **Vercel AI SDK** handles model calls and streaming +- **Honcho** stores messages and retrieves user context before each generation +- **Your model provider** can be Anthropic, OpenAI, Google, etc. + +The key benefit: you don't manually manage conversation history across sessions. Honcho handles persistence and context injection — the model always has a rich picture of who it's talking to. (New to Honcho's primitives? See [peers and sessions](/v3/documentation/core-concepts/architecture).) + +## Setup + +Install the package: + + +```bash npm +npm install @honcho-ai/vercel-ai-sdk +``` + +```bash pnpm +pnpm add @honcho-ai/vercel-ai-sdk +``` + +```bash yarn +yarn add @honcho-ai/vercel-ai-sdk +``` + +```bash bun +bun add @honcho-ai/vercel-ai-sdk +``` + + +Get your API key at [app.honcho.dev](https://app.honcho.dev). + +```bash +HONCHO_API_KEY=your-api-key +HONCHO_WORKSPACE_ID=your-workspace-id +``` + +## Use the Skill + +The package ships a Skill that can walk an agent through wiring Honcho into your Vercel AI SDK app automatically — it greps for your `generateText` / `streamText` call sites, asks where `userId` / `sessionId` come from, and applies the integration in place. + +```bash +npx skills add plastic-labs/vercel-ai-sdk +``` + +Then invoke `/honcho-vercel-ai-sdk`. + + +If you've already installed `@honcho-ai/vercel-ai-sdk` via npm, you can symlink the skill directly. Example shown is for Claude Code: + +```bash +mkdir -p ~/.claude/skills/honcho-vercel-ai-sdk +ln -sf "$(pwd)/node_modules/@honcho-ai/vercel-ai-sdk/skills/honcho-vercel-ai-sdk/SKILL.md" \ + ~/.claude/skills/honcho-vercel-ai-sdk/SKILL.md +``` + +Restart the session, then invoke `/honcho-vercel-ai-sdk`. + + +## Create a Provider Instance + +`createHoncho()` is the entry point. It reads your API key and workspace from environment variables and returns a provider object with `middleware()`, `tools()`, and `send()`. + +```typescript +import { createHoncho } from '@honcho-ai/vercel-ai-sdk'; + +const honcho = createHoncho(); +``` + +You can set a stable `defaultAssistantId` on the provider to identify the AI peer across all calls: + +```typescript +const honcho = createHoncho({ + defaultAssistantId: 'my-assistant', +}); +``` + +## Add Middleware + +`honcho.middleware()` is compatible with `wrapLanguageModel`. Two things happen on each call: + +1. **Before generation** — Honcho fetches the user's representation, peer card, session summary, and recent messages and injects them into the system prompt +2. **After generation** — the user message and assistant response are stored back in Honcho with correct peer attribution + +```typescript +import { createHoncho } from '@honcho-ai/vercel-ai-sdk'; +import { wrapLanguageModel, generateText } from 'ai'; +import { anthropic } from '@ai-sdk/anthropic'; + +const honcho = createHoncho(); + +const model = wrapLanguageModel({ + model: anthropic('claude-sonnet-4-6'), + middleware: honcho.middleware({ + userId: 'user-abc', + sessionId: 'session-123', + }), +}); + +const { text } = await generateText({ + model, + prompt: 'What should I focus on today?', +}); +``` + +Pass `userId` and `sessionId` per request — no session handles to construct. Both default to lazily generated IDs if omitted, which is fine for local scripts but not for multi-user server traffic. + +## Add Tools + +`honcho.tools()` gives the model six tools it can call mid-generation to query or update what it knows about the user: + +| Tool | What it does | +| --- | --- | +| `honcho_chat` | Dialectic reasoning — ask natural-language questions about the user; answers synthesized from full interaction history | +| `honcho_context` | Short summary of recent context within the session | +| `honcho_search` | Semantic search over stored conversation messages | +| `honcho_search_conclusions` | Query derived conclusions: personality traits, preferences, behavioral patterns | +| `honcho_get_representation` | Full synthesized profile of the user | +| `honcho_save_conclusion` | Persist an observation about the user for future sessions | + +Pass the same `userId` and `sessionId` to `honcho.tools()` so tool calls bind to the same peers as the middleware: + +```typescript +import { generateText, stepCountIs } from 'ai'; + +const { text } = await generateText({ + model, + tools: honcho.tools({ + userId: 'user-abc', + sessionId: 'session-123', + }), + stopWhen: stepCountIs(3), + prompt: 'Based on our conversations, what do I care about most?', +}); +``` + +## Complete Example + +Here's a full working example combining middleware and tools. + +Want a runnable end-to-end version? See the [Full Script](#full-script). + +```typescript +import { createHoncho } from '@honcho-ai/vercel-ai-sdk'; +import { wrapLanguageModel, generateText, stepCountIs } from 'ai'; +import { anthropic } from '@ai-sdk/anthropic'; + +const honcho = createHoncho({ + defaultAssistantId: 'assistant', +}); + +const userId = 'user-abc'; +const sessionId = 'session-123'; + +const model = wrapLanguageModel({ + model: anthropic('claude-sonnet-4-6'), + middleware: honcho.middleware({ userId, sessionId }), +}); + +const { text } = await generateText({ + model, + tools: honcho.tools({ userId, sessionId }), + stopWhen: stepCountIs(3), + prompt: 'What should we work on today?', +}); + +console.log(text); +``` + +## Streaming + +`streamText` works the same way — middleware handles persistence after the stream completes: + +```typescript +import { createHoncho } from '@honcho-ai/vercel-ai-sdk'; +import { wrapLanguageModel, streamText } from 'ai'; +import { openai } from '@ai-sdk/openai'; + +const honcho = createHoncho(); + +const userId = 'user-abc'; +const sessionId = 'session-456'; + +const model = wrapLanguageModel({ + model: openai('gpt-4o'), + middleware: honcho.middleware({ userId, sessionId }), +}); + +const result = streamText({ + model, + tools: honcho.tools({ userId, sessionId }), + prompt: 'What should we work on today?', +}); + +for await (const chunk of result.textStream) { + process.stdout.write(chunk); +} +``` + +## Using with `messages` + +If your app already manages conversation history and passes a `messages` array directly, set `injectHistory: false` to prevent Honcho from prepending duplicate history: + +```typescript +honcho.middleware({ + userId, + sessionId, + injectHistory: false, // don't prepend history — we're passing messages directly +}) +``` + +Honcho still injects the user's representation and peer card into the system prompt, and still persists messages after generation. With `injectHistory: false` you must pass a `messages` array — without either `messages` or `prompt`, the Vercel AI SDK throws `Invalid prompt: prompt or messages must be defined`. + +## Verifying the Integration + +### 1. Isolate Honcho's Contribution + +Let's confirm the memory is actually coming from Honcho and not your app's existing conversation history. + +Two ways to check: 1) through a developer method 2) through the UI. + +**Token delta (developer check).** On a session with a few prior turns, run the same prompt twice — once with `injectHistory: false` and once without. + +Compare `result.usage.inputTokens`: + +```typescript +const baseline = await generateText({ + model: wrapLanguageModel({ + model: anthropic('claude-sonnet-4-6'), + middleware: honcho.middleware({ userId, sessionId, injectHistory: false }), + }), + prompt: 'What do you know about my preferences?', +}); + +const injected = await generateText({ + model: wrapLanguageModel({ + model: anthropic('claude-sonnet-4-6'), + middleware: honcho.middleware({ userId, sessionId }), + }), + prompt: 'What do you know about my preferences?', +}); + +console.log(injected.usage.inputTokens - baseline.usage.inputTokens); +``` + +A positive delta is Honcho's representation, peer card, and session summary being injected into the system prompt. Expect ~0 on a fresh peer — the deriver runs asynchronously after messages persist, so injected context only populates after a few prior turns. + +**Dashboard (UI check).** Open [app.honcho.dev/explore](https://app.honcho.dev/explore), select your workspace, and confirm your peer and session appear under the Peers and Sessions tables. + +With Honcho's contribution isolated, the rest of this section shows what the integration feels like in practice. + +### 2. First turn + +Send any message. The model responds normally — nothing is stored yet. Context injection returns empty on the first turn. + +### 3. Build memory across turns + +Have a multi-turn conversation and share something about yourself: + +```text +I prefer concise answers and I mostly work in TypeScript. +``` + +After a few turns, ask: + +```text +What do you know about my preferences? +``` + +If the model references TypeScript and concise answers without being told again in this session, memory is working. + +### 4. Cross-session recall + +Start a new session (new `sessionId`) with the same `userId`. Ask: + +```text +Call your honcho_search tool with the query 'TypeScript' and quote the exact verbatim message that contained TypeScript. Do not paraphrase. +``` + +If the search returns a message from the prior session word-for-word, peer-scoped retrieval is crossing session boundaries. `honcho_search` queries the user's messages across all their sessions and doesn't depend on the deriver, so it works regardless of how short the prior session was. + +To confirm the tool actually fired, inspect `result.steps[i].toolCalls`: + +```typescript +const toolFires = result.steps?.flatMap((step, i) => + (step.toolCalls ?? []).map((tc) => ({ step: i, tool: tc.toolName, input: tc.input })) +) ?? []; +console.log(toolFires); +// [{ step: 0, tool: "honcho_search", input: { query: "TypeScript", limit: 10 } }] +``` + +When the model takes more than one turn (call a tool, see the result, then answer), the top-level `result.toolCalls` is empty — check inside each `step`. + +## Full Script + + +```typescript +/** + * Multi-turn chat with Honcho memory + Vercel AI SDK. + * + * Prerequisites: + * 1. Install dependencies: + * npm install @honcho-ai/vercel-ai-sdk ai @ai-sdk/anthropic dotenv + * 2. Set environment variables in `.env`: + * HONCHO_API_KEY=your-honcho-api-key + * HONCHO_WORKSPACE_ID=your-workspace-id + * ANTHROPIC_API_KEY=your-anthropic-api-key + * 3. Run with: npx tsx honcho_vercel_chat.ts + * + * Pass a stable userId from your auth system and a sessionId for the conversation + * thread; Honcho handles persistence and context injection on every turn. + */ + +import 'dotenv/config'; +import { createHoncho } from '@honcho-ai/vercel-ai-sdk'; +import { wrapLanguageModel, generateText, stepCountIs } from 'ai'; +import { anthropic } from '@ai-sdk/anthropic'; +import * as readline from 'node:readline/promises'; +import { stdin as input, stdout as output } from 'node:process'; + +const honcho = createHoncho({ + defaultAssistantId: 'assistant', +}); + +const userId = process.env.USER_ID ?? 'demo-user'; +const sessionId = process.env.SESSION_ID ?? `session-${Date.now()}`; + +const model = wrapLanguageModel({ + model: anthropic('claude-sonnet-4-6'), + middleware: honcho.middleware({ userId, sessionId }), +}); + +async function chat(prompt: string): Promise { + const { text } = await generateText({ + model, + tools: honcho.tools({ userId, sessionId }), + stopWhen: stepCountIs(3), + prompt, + }); + return text; +} + +async function main() { + const rl = readline.createInterface({ input, output }); + console.log(`Honcho session: ${sessionId} (user: ${userId})`); + console.log('Type a message, or "exit" to quit.\n'); + + while (true) { + const userMessage = (await rl.question('you > ')).trim(); + if (!userMessage || userMessage === 'exit') break; + const reply = await chat(userMessage); + console.log(`bot > ${reply}\n`); + } + + rl.close(); +} + +main().catch((err) => { + console.error(err); + process.exit(1); +}); +``` + + +## Next Steps + + + + Source, tests, and full API reference for @honcho-ai/vercel-ai-sdk. + + + + Learn about peers, sessions, and dialectic reasoning. + + + + Run Honcho locally with your Vercel AI SDK app. + + + + wrapLanguageModel, middleware, and tool use reference. + + diff --git a/src/crud/message.py b/src/crud/message.py index 3334c6e87..76f9cab7c 100644 --- a/src/crud/message.py +++ b/src/crud/message.py @@ -140,13 +140,12 @@ async def _build_merged_snippets( for msg in matched_messages: session_matches.setdefault(msg.session_name, []).append(msg) - snippets: list[tuple[list[models.Message], list[models.Message]]] = [] - + # Build merged ranges per session, then issue a single batched query + session_ranges: dict[str, list[tuple[int, int, list[models.Message]]]] = {} for sess_name, matches in session_matches.items(): matches.sort(key=lambda m: m.seq_in_session) merged_ranges: list[tuple[int, int, list[models.Message]]] = [] - for match in matches: start = match.seq_in_session - context_window end = match.seq_in_session + context_window @@ -161,25 +160,42 @@ async def _build_merged_snippets( else: merged_ranges.append((start, end, [match])) - # Batch all ranges into a single query using OR conditions. - # NOTE: If callers ever pass a very high limit (many disjoint ranges), - # consider chunking to avoid oversized SQL / planner issues. - range_conditions = [ - models.Message.seq_in_session.between(start_seq, end_seq) - for start_seq, end_seq, _ in merged_ranges - ] - context_stmt = ( - select(models.Message) - .where(models.Message.workspace_name == workspace_name) - .where(models.Message.session_name == sess_name) - .where(or_(*range_conditions)) - .order_by(models.Message.seq_in_session.asc()) + session_ranges[sess_name] = merged_ranges + + # One OR-of-ANDs predicate covers every (session, range) pair + session_predicates = [ + and_( + models.Message.session_name == sess_name, + or_( + *( + models.Message.seq_in_session.between(start_seq, end_seq) + for start_seq, end_seq, _ in merged_ranges + ) + ), + ) + for sess_name, merged_ranges in session_ranges.items() + ] + + context_stmt = ( + select(models.Message) + .where(models.Message.workspace_name == workspace_name) + .where(or_(*session_predicates)) + .order_by( + models.Message.session_name.asc(), + models.Message.seq_in_session.asc(), ) + ) - context_result = await db.execute(context_stmt) - all_context_messages = list(context_result.scalars().all()) + context_result = await db.execute(context_stmt) + by_session: dict[str, list[models.Message]] = {} + for msg in context_result.scalars().all(): + by_session.setdefault(msg.session_name, []).append(msg) - # Partition results back into their respective ranges + snippets: list[ + tuple[list[models.Message], list[models.Message]] + ] = [] # list of tuples, each containing query matches and context messages + for sess_name, merged_ranges in session_ranges.items(): + all_context_messages = by_session.get(sess_name, []) for start_seq, end_seq, range_matches in merged_ranges: context_messages = [ msg diff --git a/tests/crud/test_representation_manager.py b/tests/crud/test_representation_manager.py index dfd959d76..ff49ac079 100644 --- a/tests/crud/test_representation_manager.py +++ b/tests/crud/test_representation_manager.py @@ -24,6 +24,7 @@ async def _fake_tracked_db(_name: str): def _saved_observations(mock_save: AsyncMock): call = mock_save.await_args + assert call is not None, "mock was not awaited" if "all_observations" in call.kwargs: return call.kwargs["all_observations"] if len(call.args) > 1: @@ -162,7 +163,9 @@ async def test_query_documents_most_derived_excludes_soft_deleted( class TestRepresentationManagerSave: @pytest.mark.asyncio - async def test_save_representation_filters_blank_observations_before_embedding(self): + async def test_save_representation_filters_blank_observations_before_embedding( + self, + ): manager = RepresentationManager( "workspace", observer="observer", @@ -202,7 +205,7 @@ async def test_save_representation_filters_blank_observations_before_embedding(s message_ids=[1], session_name="session", message_created_at=datetime.now(timezone.utc), - message_level_configuration=SimpleNamespace( + message_level_configuration=SimpleNamespace( # pyright: ignore[reportArgumentType] dream=SimpleNamespace(enabled=False) ), ) @@ -258,7 +261,7 @@ async def test_save_representation_filters_blank_deductive_observations(self): message_ids=[1], session_name="session", message_created_at=datetime.now(timezone.utc), - message_level_configuration=SimpleNamespace( + message_level_configuration=SimpleNamespace( # pyright: ignore[reportArgumentType] dream=SimpleNamespace(enabled=False) ), ) @@ -311,7 +314,7 @@ async def test_save_representation_skips_all_blank_observations(self): message_ids=[1], session_name="session", message_created_at=datetime.now(timezone.utc), - message_level_configuration=SimpleNamespace( + message_level_configuration=SimpleNamespace( # pyright: ignore[reportArgumentType] dream=SimpleNamespace(enabled=False) ), ) diff --git a/tests/integration/test_message_embeddings.py b/tests/integration/test_message_embeddings.py index 091e8cf3e..c5e3f1004 100644 --- a/tests/integration/test_message_embeddings.py +++ b/tests/integration/test_message_embeddings.py @@ -17,11 +17,50 @@ from src.config import settings from src.crud import create_messages from src.crud import message as message_crud -from src.models import Peer, Workspace +from src.models import Message, Peer, Workspace from src.schemas import MessageCreate from src.utils.search import search +class _FakeScalarResult: + def __init__(self, rows: list[models.Message]): + self._rows: list[Message] = rows + + def all(self) -> list[models.Message]: + return self._rows + + +class _FakeResult: + def __init__(self, rows: list[models.Message]): + self._rows: list[Message] = rows + + def scalars(self) -> _FakeScalarResult: + return _FakeScalarResult(self._rows) + + +class _CountingDb: + def __init__(self, rows: list[models.Message]): + self._rows: list[Message] = rows + self.execute_count: int = 0 + + async def execute(self, _stmt: Any) -> _FakeResult: + self.execute_count += 1 + return _FakeResult(self._rows) + + +def _message(session_name: str, seq_in_session: int) -> models.Message: + return models.Message( + workspace_name="workspace", + session_name=session_name, + peer_name="peer", + content=f"{session_name}:{seq_in_session}", + public_id=generate_nanoid(), + seq_in_session=seq_in_session, + token_count=1, + created_at=datetime.now(timezone.utc), + ) + + @pytest.mark.asyncio async def test_message_embedding_created_when_setting_enabled( db_session: AsyncSession, @@ -260,6 +299,46 @@ async def test_semantic_search_when_embeddings_enabled( assert created_message.public_id in found_message_ids +@pytest.mark.asyncio +async def test_build_merged_snippets_batches_context_query_across_sessions(): + """Context expansion should not issue one DB query per matched session.""" + matched_messages = [ + _message("session_a", 10), + _message("session_b", 20), + _message("session_c", 30), + ] + context_messages = [ + _message("session_a", 9), + _message("session_a", 10), + _message("session_a", 11), + _message("session_a", 99), + _message("session_b", 19), + _message("session_b", 20), + _message("session_b", 21), + _message("session_c", 29), + _message("session_c", 30), + _message("session_c", 31), + ] + db = _CountingDb(context_messages) + + snippets = await message_crud._build_merged_snippets( # pyright: ignore[reportPrivateUsage] + db, # pyright: ignore[reportArgumentType] + workspace_name="workspace", + matched_messages=matched_messages, + context_window=1, + ) + + assert db.execute_count == 1 + assert [len(matches) for matches, _ in snippets] == [1, 1, 1] + assert [ + [msg.content for msg in context_messages] for _, context_messages in snippets + ] == [ + ["session_a:9", "session_a:10", "session_a:11"], + ["session_b:19", "session_b:20", "session_b:21"], + ["session_c:29", "session_c:30", "session_c:31"], + ] + + @pytest.mark.asyncio async def test_search_messages_external_lookup_happens_before_tracked_db( monkeypatch: pytest.MonkeyPatch, diff --git a/tests/routes/test_messages.py b/tests/routes/test_messages.py index 07040e208..a121cfe0c 100644 --- a/tests/routes/test_messages.py +++ b/tests/routes/test_messages.py @@ -1001,8 +1001,10 @@ async def test_create_message_without_timestamp_uses_default( db_session.add(test_session) await db_session.commit() - # Record time before request - before_request = datetime.datetime.now(datetime.timezone.utc) + # Pad the window to absorb client/Postgres clock skew under Docker. + before_request = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta( + seconds=1 + ) response = client.post( f"/v3/workspaces/{test_workspace.name}/sessions/{test_session.name}/messages", @@ -1017,8 +1019,9 @@ async def test_create_message_without_timestamp_uses_default( }, ) - # Record time after request - after_request = datetime.datetime.now(datetime.timezone.utc) + after_request = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + seconds=1 + ) assert response.status_code == 201 data = response.json() @@ -1053,8 +1056,9 @@ async def test_create_batch_messages_with_mixed_timestamps( timestamp1 = datetime.datetime(2023, 1, 1, 12, 0, 0, tzinfo=datetime.timezone.utc) timestamp2 = datetime.datetime(2023, 1, 2, 12, 0, 0, tzinfo=datetime.timezone.utc) - # Record time before request for default timestamp - before_request = datetime.datetime.now(datetime.timezone.utc) + before_request = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta( + seconds=1 + ) response = client.post( f"/v3/workspaces/{test_workspace.name}/sessions/{test_session.name}/messages", @@ -1081,7 +1085,9 @@ async def test_create_batch_messages_with_mixed_timestamps( }, ) - after_request = datetime.datetime.now(datetime.timezone.utc) + after_request = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + seconds=1 + ) assert response.status_code == 201 data = response.json() @@ -1124,8 +1130,9 @@ async def test_create_message_with_null_timestamp( db_session.add(test_session) await db_session.commit() - # Record time before request - before_request = datetime.datetime.now(datetime.timezone.utc) + before_request = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta( + seconds=1 + ) response = client.post( f"/v3/workspaces/{test_workspace.name}/sessions/{test_session.name}/messages", @@ -1141,7 +1148,9 @@ async def test_create_message_with_null_timestamp( }, ) - after_request = datetime.datetime.now(datetime.timezone.utc) + after_request = datetime.datetime.now(datetime.timezone.utc) + datetime.timedelta( + seconds=1 + ) assert response.status_code == 201 data = response.json() diff --git a/tests/unified/run.py b/tests/unified/run.py index b9e4a5767..c0848471a 100644 --- a/tests/unified/run.py +++ b/tests/unified/run.py @@ -12,13 +12,13 @@ async def main(): parser = argparse.ArgumentParser(description="Run Unified Honcho Tests") - parser.add_argument( + target_group = parser.add_mutually_exclusive_group() + target_group.add_argument( "--test-dir", type=str, - default="tests/unified/test_cases", help="Directory containing JSON test files", ) - parser.add_argument( + target_group.add_argument( "--test-file", type=str, help="Path to a single JSON test file to run", @@ -32,10 +32,8 @@ async def main(): args = parser.parse_args() - # Validate mutually exclusive args - if args.test_file and args.test_dir != "tests/unified/test_cases": - print("Error: Cannot specify both --test-file and --test-dir") - sys.exit(1) + if args.test_file is None and args.test_dir is None: + args.test_dir = "tests/unified/test_cases" if args.test_file: test_path = Path(args.test_file)