From 14282c0e257f4261b77b2a8f5f19c18d840a3314 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 20 Apr 2026 10:37:09 +0200 Subject: [PATCH 1/6] Expand AI agent detection in user-agent Add detection for Goose, Amp, Augment, VS Code Copilot, Kiro, and Windsurf. Also honor the agents.md standard AGENT env var with an "unknown" fallback when set to a value we don't recognize. Switches the detection data model from (envVar, product) pairs to agent records with a list of matchers. Each agent fires if any of its matchers fires (presence-only or exact value). Ambiguity is judged by unique product, not raw matcher hits, so the same agent setting both a bespoke var and AGENT= is not ambiguous. Co-authored-by: Isaac Signed-off-by: simon --- .../com/databricks/sdk/core/UserAgent.java | 136 +++++++++++++--- .../databricks/sdk/core/UserAgentTest.java | 146 +++++++++++++++++- 2 files changed, 260 insertions(+), 22 deletions(-) diff --git a/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java b/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java index 22bd6b22f..e7d35bbd1 100644 --- a/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java +++ b/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java @@ -237,51 +237,147 @@ private static String cicdProvider() { return cicdProvider; } - // Maps an environment variable to an agent product name. - private static class AgentDef { + // Matches an environment variable. If value is empty, matching is + // presence-only (any value, including empty string, counts as a match). If + // value is non-empty, the env var must be set to exactly that value. + private static class EnvMatcher { private final String envVar; - private final String product; + private final String value; + + EnvMatcher(String envVar) { + this(envVar, ""); + } - AgentDef(String envVar, String product) { + EnvMatcher(String envVar, String value) { this.envVar = envVar; + this.value = value; + } + + boolean fires(Environment env) { + String v = env.get(envVar); + if (v == null) { + return false; + } + if (value.isEmpty()) { + return true; + } + return v.equals(value); + } + } + + // Describes a single AI coding agent and the environment matchers that + // identify it. The agent is detected if ANY matcher in matchAny fires. + private static class KnownAgent { + private final String product; + private final List matchAny; + + KnownAgent(String product, List matchAny) { this.product = product; + this.matchAny = matchAny; + } + + boolean fires(Environment env) { + for (EnvMatcher m : matchAny) { + if (m.fires(env)) { + return true; + } + } + return false; } } + // The agents.md standard env var. When set to a value we don't specifically + // recognize, detection falls back to "unknown". + private static final String AGENT_ENV_VAR = "AGENT"; + // Canonical list of known AI coding agents. // Keep this list in sync with databricks-sdk-go and databricks-sdk-py. - private static List listKnownAgents() { + // Agents are listed alphabetically by product name. + private static List listKnownAgents() { return Arrays.asList( - new AgentDef("ANTIGRAVITY_AGENT", "antigravity"), // Closed source (Google) - new AgentDef("CLAUDECODE", "claude-code"), // https://github.com/anthropics/claude-code - new AgentDef("CLINE_ACTIVE", "cline"), // https://github.com/cline/cline (v3.24.0+) - new AgentDef("CODEX_CI", "codex"), // https://github.com/openai/codex - new AgentDef("COPILOT_CLI", "copilot-cli"), // https://github.com/features/copilot - new AgentDef("CURSOR_AGENT", "cursor"), // Closed source - new AgentDef("GEMINI_CLI", "gemini-cli"), // https://google-gemini.github.io/gemini-cli - new AgentDef("OPENCODE", "opencode"), // https://github.com/opencode-ai/opencode - new AgentDef("OPENCLAW_SHELL", "openclaw")); // https://github.com/anthropics/openclaw + new KnownAgent( + "amp", + Arrays.asList( + new EnvMatcher("AMP_CURRENT_THREAD_ID"), new EnvMatcher(AGENT_ENV_VAR, "amp"))), + new KnownAgent( + "antigravity", + Collections.singletonList( + new EnvMatcher("ANTIGRAVITY_AGENT"))), // Closed source (Google) + new KnownAgent("augment", Collections.singletonList(new EnvMatcher("AUGMENT_AGENT"))), + new KnownAgent( + "claude-code", + Collections.singletonList( + new EnvMatcher("CLAUDECODE"))), // https://github.com/anthropics/claude-code + new KnownAgent( + "cline", + Collections.singletonList( + new EnvMatcher("CLINE_ACTIVE"))), // https://github.com/cline/cline (v3.24.0+) + new KnownAgent( + "codex", + Collections.singletonList( + new EnvMatcher("CODEX_CI"))), // https://github.com/openai/codex + new KnownAgent( + "copilot-cli", + Collections.singletonList( + new EnvMatcher("COPILOT_CLI"))), // https://github.com/features/copilot + new KnownAgent( + "copilot-vscode", + Collections.singletonList(new EnvMatcher("COPILOT_MODEL"))), // VS Code Copilot + new KnownAgent( + "cursor", Collections.singletonList(new EnvMatcher("CURSOR_AGENT"))), // Closed source + new KnownAgent( + "gemini-cli", + Collections.singletonList( + new EnvMatcher("GEMINI_CLI"))), // https://google-gemini.github.io/gemini-cli + new KnownAgent( + "goose", + Arrays.asList( + new EnvMatcher("GOOSE_TERMINAL"), new EnvMatcher(AGENT_ENV_VAR, "goose"))), + new KnownAgent("kiro", Collections.singletonList(new EnvMatcher("KIRO"))), + new KnownAgent( + "opencode", + Collections.singletonList( + new EnvMatcher("OPENCODE"))), // https://github.com/opencode-ai/opencode + new KnownAgent( + "openclaw", + Collections.singletonList( + new EnvMatcher("OPENCLAW_SHELL"))), // https://github.com/anthropics/openclaw + new KnownAgent("windsurf", Collections.singletonList(new EnvMatcher("WINDSURF_AGENT")))); } // Looks up the active agent provider based on environment variables. - // Returns the agent name if exactly one is set (non-empty). - // Returns empty string if zero or multiple agents detected. + // + // For each agent, it fires if ANY of its matchers fires. The function counts + // how many distinct agents matched: + // - Exactly one agent matched: return its product name. + // - More than one agent matched: return "" (ambiguity). + // - Zero agents matched: if the agents.md standard AGENT env var is set to + // any non-empty value, return "unknown". Otherwise return "". + // + // Unlike CI/CD detection (which returns the first match), agent detection + // uses an ambiguity guard because agent env vars can be stacked (e.g., + // running Cline inside Cursor). private static String lookupAgentProvider(Environment env) { String detected = ""; int count = 0; - for (AgentDef agent : listKnownAgents()) { - String value = env.get(agent.envVar); - if (value != null && !value.isEmpty()) { + for (KnownAgent agent : listKnownAgents()) { + if (agent.fires(env)) { detected = agent.product; count++; if (count > 1) { - return ""; + break; } } } if (count == 1) { return detected; } + if (count == 0) { + String agentValue = env.get(AGENT_ENV_VAR); + if (agentValue != null && !agentValue.isEmpty()) { + return "unknown"; + } + } return ""; } diff --git a/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java b/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java index 54405982c..750560876 100644 --- a/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java +++ b/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java @@ -218,6 +218,146 @@ public void testAgentProviderOpenclaw() { Assertions.assertTrue(UserAgent.asString().contains("agent/openclaw")); } + @Test + public void testAgentProviderAmp() { + setupAgentEnv( + new HashMap() { + { + put("AMP_CURRENT_THREAD_ID", "thread-123"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/amp")); + } + + @Test + public void testAgentProviderAugment() { + setupAgentEnv( + new HashMap() { + { + put("AUGMENT_AGENT", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/augment")); + } + + @Test + public void testAgentProviderCopilotVscode() { + setupAgentEnv( + new HashMap() { + { + put("COPILOT_MODEL", "gpt-4"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/copilot-vscode")); + } + + @Test + public void testAgentProviderGoose() { + setupAgentEnv( + new HashMap() { + { + put("GOOSE_TERMINAL", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/goose")); + } + + @Test + public void testAgentProviderKiro() { + setupAgentEnv( + new HashMap() { + { + put("KIRO", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/kiro")); + } + + @Test + public void testAgentProviderWindsurf() { + setupAgentEnv( + new HashMap() { + { + put("WINDSURF_AGENT", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/windsurf")); + } + + @Test + public void testAgentProviderAgentEnvGoose() { + setupAgentEnv( + new HashMap() { + { + put("AGENT", "goose"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/goose")); + } + + @Test + public void testAgentProviderGooseBothMatchers() { + // GOOSE_TERMINAL and AGENT=goose both fire the goose matcher. Since they + // both identify the same agent, this is NOT ambiguous. + setupAgentEnv( + new HashMap() { + { + put("GOOSE_TERMINAL", "1"); + put("AGENT", "goose"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/goose")); + } + + @Test + public void testAgentProviderAmpBothMatchers() { + // AMP_CURRENT_THREAD_ID and AGENT=amp both identify amp, not ambiguous. + setupAgentEnv( + new HashMap() { + { + put("AMP_CURRENT_THREAD_ID", "thread-123"); + put("AGENT", "amp"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/amp")); + } + + @Test + public void testAgentProviderAgentEnvUnknown() { + setupAgentEnv( + new HashMap() { + { + put("AGENT", "someweirdthing"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/unknown")); + } + + @Test + public void testAgentProviderAgentEnvEmpty() { + // AGENT="" should not trigger the unknown fallback. + setupAgentEnv( + new HashMap() { + { + put("AGENT", ""); + } + }); + Assertions.assertFalse(UserAgent.asString().contains("agent/")); + } + + @Test + public void testAgentProviderAgentEnvAmbiguity() { + // AGENT=goose fires goose, CLAUDECODE=1 fires claude-code. Ambiguity. + setupAgentEnv( + new HashMap() { + { + put("AGENT", "goose"); + put("CLAUDECODE", "1"); + } + }); + Assertions.assertFalse(UserAgent.asString().contains("agent/")); + } + @Test public void testAgentProviderNoAgent() { setupAgentEnv(new HashMap<>()); @@ -237,14 +377,16 @@ public void testAgentProviderMultipleAgents() { } @Test - public void testAgentProviderEmptyValue() { + public void testAgentProviderEmptyValueStillSet() { + // Empty string still counts as "set" for presence-only matchers, + // matching databricks-sdk-go semantics. setupAgentEnv( new HashMap() { { put("CLAUDECODE", ""); } }); - Assertions.assertFalse(UserAgent.asString().contains("agent/")); + Assertions.assertTrue(UserAgent.asString().contains("agent/claude-code")); } @Test From e9c4eaa3229d4c5bc2d872ed3dd9ccac8afea79c Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 20 Apr 2026 11:07:39 +0200 Subject: [PATCH 2/6] Address review feedback on agent detection - Add NEXT_CHANGELOG.md entry covering the expanded agent list, the AGENT standard, and the empty-string semantics change. - When the main matcher loop finds no match and AGENT is set to a known product name, return that product name instead of "unknown" (implicit known-product fallback). Known matchers still win over the fallback, so AGENT=cursor + CLAUDECODE=1 still yields claude-code. - Restore alphabetical ordering: openclaw before opencode. - Add provenance comments on new agent entries (goose, amp, augment, copilot-vscode, kiro, windsurf). - New tests: testAgentProviderAgentEnvAmp, testAgentProviderAgentEnvCursor, testAgentProviderKnownMatcherWinsOverAgentFallback. Co-authored-by: Isaac Signed-off-by: simon --- NEXT_CHANGELOG.md | 2 +- .../com/databricks/sdk/core/UserAgent.java | 42 +++++++++++++------ .../databricks/sdk/core/UserAgentTest.java | 39 +++++++++++++++++ 3 files changed, 70 insertions(+), 13 deletions(-) diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index e98519f52..f335db553 100755 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -5,7 +5,7 @@ ### New Features and Improvements * Add support for authentication through Azure Managed Service Identity (MSI) via the new `azure-msi` credential provider. * Support `default_profile` in `[__settings__]` section of `.databrickscfg` for consistent default profile resolution across CLI and SDKs. -* Added automatic detection of AI coding agents (Antigravity, Claude Code, Cline, Codex, Copilot CLI, Cursor, Gemini CLI, OpenCode) in the user-agent string. The SDK now appends `agent/` to HTTP request headers when running inside a known AI agent environment. +* Added automatic detection of AI coding agents (Amp, Antigravity, Augment, Claude Code, Cline, Codex, Copilot CLI, Copilot VS Code, Cursor, Gemini CLI, Goose, Kiro, OpenClaw, OpenCode, Windsurf) in the user-agent string. The SDK now appends `agent/` to HTTP request headers when running inside a known AI agent environment. Also honors the `AGENT=` standard: when `AGENT` is set to a known product name the SDK reports that product, and when set to an unrecognized non-empty value the SDK reports `agent/unknown`. Environment variables set to the empty string (e.g. `CLAUDECODE=""`) now count as "set" for presence-only matchers, matching `databricks-sdk-go` semantics; previously they were treated as unset. ### Bug Fixes * Fixed non-JSON error responses (e.g. plain-text "Invalid Token" with HTTP 403) producing `Unknown` instead of the correct typed exception (`PermissionDenied`, `Unauthenticated`, etc.). The error message no longer contains Jackson deserialization internals. diff --git a/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java b/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java index e7d35bbd1..fb2b9cdfd 100644 --- a/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java +++ b/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java @@ -298,12 +298,16 @@ private static List listKnownAgents() { new KnownAgent( "amp", Arrays.asList( - new EnvMatcher("AMP_CURRENT_THREAD_ID"), new EnvMatcher(AGENT_ENV_VAR, "amp"))), + new EnvMatcher("AMP_CURRENT_THREAD_ID"), + new EnvMatcher(AGENT_ENV_VAR, "amp"))), // https://ampcode.com/ new KnownAgent( "antigravity", Collections.singletonList( new EnvMatcher("ANTIGRAVITY_AGENT"))), // Closed source (Google) - new KnownAgent("augment", Collections.singletonList(new EnvMatcher("AUGMENT_AGENT"))), + new KnownAgent( + "augment", + Collections.singletonList( + new EnvMatcher("AUGMENT_AGENT"))), // https://www.augmentcode.com/ new KnownAgent( "claude-code", Collections.singletonList( @@ -320,9 +324,9 @@ private static List listKnownAgents() { "copilot-cli", Collections.singletonList( new EnvMatcher("COPILOT_CLI"))), // https://github.com/features/copilot + // VS Code Copilot terminal; best-effort heuristic, not officially identified. new KnownAgent( - "copilot-vscode", - Collections.singletonList(new EnvMatcher("COPILOT_MODEL"))), // VS Code Copilot + "copilot-vscode", Collections.singletonList(new EnvMatcher("COPILOT_MODEL"))), new KnownAgent( "cursor", Collections.singletonList(new EnvMatcher("CURSOR_AGENT"))), // Closed source new KnownAgent( @@ -332,17 +336,23 @@ private static List listKnownAgents() { new KnownAgent( "goose", Arrays.asList( - new EnvMatcher("GOOSE_TERMINAL"), new EnvMatcher(AGENT_ENV_VAR, "goose"))), - new KnownAgent("kiro", Collections.singletonList(new EnvMatcher("KIRO"))), + new EnvMatcher("GOOSE_TERMINAL"), + new EnvMatcher(AGENT_ENV_VAR, "goose"))), // https://block.github.io/goose/ + new KnownAgent( + "kiro", + Collections.singletonList(new EnvMatcher("KIRO"))), // https://kiro.dev/ (Amazon) + new KnownAgent( + "openclaw", + Collections.singletonList( + new EnvMatcher("OPENCLAW_SHELL"))), // https://github.com/anthropics/openclaw new KnownAgent( "opencode", Collections.singletonList( new EnvMatcher("OPENCODE"))), // https://github.com/opencode-ai/opencode new KnownAgent( - "openclaw", + "windsurf", Collections.singletonList( - new EnvMatcher("OPENCLAW_SHELL"))), // https://github.com/anthropics/openclaw - new KnownAgent("windsurf", Collections.singletonList(new EnvMatcher("WINDSURF_AGENT")))); + new EnvMatcher("WINDSURF_AGENT")))); // https://codeium.com/windsurf (Codeium) } // Looks up the active agent provider based on environment variables. @@ -352,15 +362,18 @@ private static List listKnownAgents() { // - Exactly one agent matched: return its product name. // - More than one agent matched: return "" (ambiguity). // - Zero agents matched: if the agents.md standard AGENT env var is set to - // any non-empty value, return "unknown". Otherwise return "". + // a known product name, return that product name. If it is set to any + // other non-empty value, return "unknown". Otherwise return "". // // Unlike CI/CD detection (which returns the first match), agent detection // uses an ambiguity guard because agent env vars can be stacked (e.g., - // running Cline inside Cursor). + // running Cline inside Cursor). Known matchers always win over the AGENT + // fallback, so e.g. AGENT=cursor + CLAUDECODE=1 yields "claude-code". private static String lookupAgentProvider(Environment env) { + List knownAgents = listKnownAgents(); String detected = ""; int count = 0; - for (KnownAgent agent : listKnownAgents()) { + for (KnownAgent agent : knownAgents) { if (agent.fires(env)) { detected = agent.product; count++; @@ -375,6 +388,11 @@ private static String lookupAgentProvider(Environment env) { if (count == 0) { String agentValue = env.get(AGENT_ENV_VAR); if (agentValue != null && !agentValue.isEmpty()) { + for (KnownAgent agent : knownAgents) { + if (agent.product.equals(agentValue)) { + return agentValue; + } + } return "unknown"; } } diff --git a/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java b/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java index 750560876..65f0c9cca 100644 --- a/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java +++ b/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java @@ -295,6 +295,45 @@ public void testAgentProviderAgentEnvGoose() { Assertions.assertTrue(UserAgent.asString().contains("agent/goose")); } + @Test + public void testAgentProviderAgentEnvAmp() { + setupAgentEnv( + new HashMap() { + { + put("AGENT", "amp"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/amp")); + } + + @Test + public void testAgentProviderAgentEnvCursor() { + // AGENT=cursor with no cursor-specific env var. Falls through to the + // AGENT fallback and matches "cursor" as a known product name. + setupAgentEnv( + new HashMap() { + { + put("AGENT", "cursor"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/cursor")); + } + + @Test + public void testAgentProviderKnownMatcherWinsOverAgentFallback() { + // Known matchers always win over the AGENT fallback. AGENT=somethingweird + // alone would yield "unknown", but CLAUDECODE=1 takes precedence. + setupAgentEnv( + new HashMap() { + { + put("AGENT", "somethingweird"); + put("CLAUDECODE", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/claude-code")); + Assertions.assertFalse(UserAgent.asString().contains("agent/unknown")); + } + @Test public void testAgentProviderGooseBothMatchers() { // GOOSE_TERMINAL and AGENT=goose both fire the goose matcher. Since they From 04fa424f01f08fb013b1b53a3be9b0642cf1e627 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 20 Apr 2026 11:33:50 +0200 Subject: [PATCH 3/6] Make explicit env vars win over AGENT fallback in agent detection Previously, agents like amp and goose had dual matchers: their explicit env var (AMP_CURRENT_THREAD_ID, GOOSE_TERMINAL) plus AGENT=. This caused asymmetric ambiguity: AGENT=goose + CLAUDECODE=1 resolved to "" (both matchers fired on different products), while AGENT=cursor + CLAUDECODE=1 resolved to "claude-code" (only claude-code matched, cursor was handled by the AGENT fallback which does not trigger once an explicit matcher has fired). The rule is now uniform: explicit env var matchers always take precedence over the generic AGENT= signal. AGENT is treated purely as a fallback for agents without an explicit matcher, or for products we do not yet specifically recognize. Changes: - Remove per-agent AGENT= matchers from amp and goose entries. Those products still set AGENT=; the central fallback in lookupAgentProvider handles them. - Update the lookupAgentProvider doc comment to reflect the new rule. - Flip the existing AGENT=goose + CLAUDECODE=1 test to expect "claude-code" and rename accordingly. - Add test for GOOSE_TERMINAL=1 + AGENT=cursor -> "goose". - Add test for COPILOT_CLI=1 + COPILOT_MODEL=gpt-4 -> "" (documents the known, intentional ambiguity for Copilot CLI BYOK users). - Update NEXT_CHANGELOG entry to mention precedence rule. Signed-off-by: simon --- NEXT_CHANGELOG.md | 2 +- .../com/databricks/sdk/core/UserAgent.java | 22 +++++++---- .../databricks/sdk/core/UserAgentTest.java | 38 ++++++++++++++++++- 3 files changed, 51 insertions(+), 11 deletions(-) diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index f335db553..e860e3731 100755 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -5,7 +5,7 @@ ### New Features and Improvements * Add support for authentication through Azure Managed Service Identity (MSI) via the new `azure-msi` credential provider. * Support `default_profile` in `[__settings__]` section of `.databrickscfg` for consistent default profile resolution across CLI and SDKs. -* Added automatic detection of AI coding agents (Amp, Antigravity, Augment, Claude Code, Cline, Codex, Copilot CLI, Copilot VS Code, Cursor, Gemini CLI, Goose, Kiro, OpenClaw, OpenCode, Windsurf) in the user-agent string. The SDK now appends `agent/` to HTTP request headers when running inside a known AI agent environment. Also honors the `AGENT=` standard: when `AGENT` is set to a known product name the SDK reports that product, and when set to an unrecognized non-empty value the SDK reports `agent/unknown`. Environment variables set to the empty string (e.g. `CLAUDECODE=""`) now count as "set" for presence-only matchers, matching `databricks-sdk-go` semantics; previously they were treated as unset. +* Added automatic detection of AI coding agents (Amp, Antigravity, Augment, Claude Code, Cline, Codex, Copilot CLI, Copilot VS Code, Cursor, Gemini CLI, Goose, Kiro, OpenClaw, OpenCode, Windsurf) in the user-agent string. The SDK now appends `agent/` to HTTP request headers when running inside a known AI agent environment. Also honors the `AGENT=` standard: when `AGENT` is set to a known product name the SDK reports that product, and when set to an unrecognized non-empty value the SDK reports `agent/unknown`. Environment variables set to the empty string (e.g. `CLAUDECODE=""`) now count as "set" for presence-only matchers, matching `databricks-sdk-go` semantics; previously they were treated as unset. Explicit agent env vars (e.g. `CLAUDECODE`, `GOOSE_TERMINAL`) always take precedence over the generic `AGENT=` signal. ### Bug Fixes * Fixed non-JSON error responses (e.g. plain-text "Invalid Token" with HTTP 403) producing `Unknown` instead of the correct typed exception (`PermissionDenied`, `Unauthenticated`, etc.). The error message no longer contains Jackson deserialization internals. diff --git a/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java b/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java index fb2b9cdfd..673681606 100644 --- a/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java +++ b/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java @@ -295,11 +295,11 @@ boolean fires(Environment env) { // Agents are listed alphabetically by product name. private static List listKnownAgents() { return Arrays.asList( + // Amp also sets AGENT=amp; handled by the central AGENT fallback. new KnownAgent( "amp", - Arrays.asList( - new EnvMatcher("AMP_CURRENT_THREAD_ID"), - new EnvMatcher(AGENT_ENV_VAR, "amp"))), // https://ampcode.com/ + Collections.singletonList( + new EnvMatcher("AMP_CURRENT_THREAD_ID"))), // https://ampcode.com/ new KnownAgent( "antigravity", Collections.singletonList( @@ -333,11 +333,11 @@ private static List listKnownAgents() { "gemini-cli", Collections.singletonList( new EnvMatcher("GEMINI_CLI"))), // https://google-gemini.github.io/gemini-cli + // Goose also sets AGENT=goose; handled by the central AGENT fallback. new KnownAgent( "goose", - Arrays.asList( - new EnvMatcher("GOOSE_TERMINAL"), - new EnvMatcher(AGENT_ENV_VAR, "goose"))), // https://block.github.io/goose/ + Collections.singletonList( + new EnvMatcher("GOOSE_TERMINAL"))), // https://block.github.io/goose/ new KnownAgent( "kiro", Collections.singletonList(new EnvMatcher("KIRO"))), // https://kiro.dev/ (Amazon) @@ -357,6 +357,11 @@ private static List listKnownAgents() { // Looks up the active agent provider based on environment variables. // + // Explicit env var matchers (e.g. CLAUDECODE, GOOSE_TERMINAL) always take + // precedence over the generic AGENT= signal. The AGENT env var is + // treated purely as a fallback for agents that have no explicit matcher, or + // for agents we do not yet specifically recognize. + // // For each agent, it fires if ANY of its matchers fires. The function counts // how many distinct agents matched: // - Exactly one agent matched: return its product name. @@ -367,8 +372,9 @@ private static List listKnownAgents() { // // Unlike CI/CD detection (which returns the first match), agent detection // uses an ambiguity guard because agent env vars can be stacked (e.g., - // running Cline inside Cursor). Known matchers always win over the AGENT - // fallback, so e.g. AGENT=cursor + CLAUDECODE=1 yields "claude-code". + // running Cline inside Cursor). Because explicit matchers win over AGENT, + // e.g. AGENT=cursor + CLAUDECODE=1 yields "claude-code", and + // AGENT=goose + CLAUDECODE=1 also yields "claude-code". private static String lookupAgentProvider(Environment env) { List knownAgents = listKnownAgents(); String detected = ""; diff --git a/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java b/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java index 65f0c9cca..347d6ac04 100644 --- a/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java +++ b/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java @@ -385,8 +385,9 @@ public void testAgentProviderAgentEnvEmpty() { } @Test - public void testAgentProviderAgentEnvAmbiguity() { - // AGENT=goose fires goose, CLAUDECODE=1 fires claude-code. Ambiguity. + public void testAgentProviderExplicitEnvWinsOverAgentEnv() { + // CLAUDECODE=1 is an explicit matcher and wins over AGENT=goose (which + // is only consulted as a fallback when no explicit matcher fires). setupAgentEnv( new HashMap() { { @@ -394,6 +395,39 @@ public void testAgentProviderAgentEnvAmbiguity() { put("CLAUDECODE", "1"); } }); + Assertions.assertTrue(UserAgent.asString().contains("agent/claude-code")); + } + + @Test + public void testAgentProviderExplicitEnvWinsOverKnownAgentEnv() { + // GOOSE_TERMINAL=1 is an explicit matcher; AGENT=cursor (even though + // "cursor" is a known product name) is ignored because an explicit + // matcher already fired. + setupAgentEnv( + new HashMap() { + { + put("GOOSE_TERMINAL", "1"); + put("AGENT", "cursor"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/goose")); + Assertions.assertFalse(UserAgent.asString().contains("agent/cursor")); + } + + @Test + public void testAgentProviderCopilotCliAndCopilotVscodeAmbiguous() { + // Copilot CLI can be invoked with BYOK models, which may also set + // COPILOT_MODEL. In that case both copilot-cli and copilot-vscode + // matchers fire on different products, so detection is ambiguous. + // This is intentional: ambiguity is preferred over silently picking + // one product. + setupAgentEnv( + new HashMap() { + { + put("COPILOT_CLI", "1"); + put("COPILOT_MODEL", "gpt-4"); + } + }); Assertions.assertFalse(UserAgent.asString().contains("agent/")); } From 3e12cecc17ab5148cd02bbf3bacaf7329d4b0d48 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 20 Apr 2026 11:49:06 +0200 Subject: [PATCH 4/6] Simplify agent detection data model Signed-off-by: simon --- .../com/databricks/sdk/core/UserAgent.java | 168 +++++------------- 1 file changed, 49 insertions(+), 119 deletions(-) diff --git a/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java b/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java index 673681606..903a46b67 100644 --- a/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java +++ b/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java @@ -237,52 +237,15 @@ private static String cicdProvider() { return cicdProvider; } - // Matches an environment variable. If value is empty, matching is - // presence-only (any value, including empty string, counts as a match). If - // value is non-empty, the env var must be set to exactly that value. - private static class EnvMatcher { - private final String envVar; - private final String value; - - EnvMatcher(String envVar) { - this(envVar, ""); - } - - EnvMatcher(String envVar, String value) { - this.envVar = envVar; - this.value = value; - } - - boolean fires(Environment env) { - String v = env.get(envVar); - if (v == null) { - return false; - } - if (value.isEmpty()) { - return true; - } - return v.equals(value); - } - } - - // Describes a single AI coding agent and the environment matchers that - // identify it. The agent is detected if ANY matcher in matchAny fires. + // Describes a single AI coding agent: the env var that identifies it and the + // product name reported in the user agent. private static class KnownAgent { + private final String envVar; private final String product; - private final List matchAny; - KnownAgent(String product, List matchAny) { + KnownAgent(String envVar, String product) { + this.envVar = envVar; this.product = product; - this.matchAny = matchAny; - } - - boolean fires(Environment env) { - for (EnvMatcher m : matchAny) { - if (m.fires(env)) { - return true; - } - } - return false; } } @@ -295,64 +258,26 @@ boolean fires(Environment env) { // Agents are listed alphabetically by product name. private static List listKnownAgents() { return Arrays.asList( - // Amp also sets AGENT=amp; handled by the central AGENT fallback. - new KnownAgent( - "amp", - Collections.singletonList( - new EnvMatcher("AMP_CURRENT_THREAD_ID"))), // https://ampcode.com/ - new KnownAgent( - "antigravity", - Collections.singletonList( - new EnvMatcher("ANTIGRAVITY_AGENT"))), // Closed source (Google) new KnownAgent( - "augment", - Collections.singletonList( - new EnvMatcher("AUGMENT_AGENT"))), // https://www.augmentcode.com/ - new KnownAgent( - "claude-code", - Collections.singletonList( - new EnvMatcher("CLAUDECODE"))), // https://github.com/anthropics/claude-code - new KnownAgent( - "cline", - Collections.singletonList( - new EnvMatcher("CLINE_ACTIVE"))), // https://github.com/cline/cline (v3.24.0+) - new KnownAgent( - "codex", - Collections.singletonList( - new EnvMatcher("CODEX_CI"))), // https://github.com/openai/codex - new KnownAgent( - "copilot-cli", - Collections.singletonList( - new EnvMatcher("COPILOT_CLI"))), // https://github.com/features/copilot + "AMP_CURRENT_THREAD_ID", + "amp"), // https://ampcode.com/ (also sets AGENT=amp, handled centrally) + new KnownAgent("ANTIGRAVITY_AGENT", "antigravity"), // Closed source (Google) + new KnownAgent("AUGMENT_AGENT", "augment"), // https://www.augmentcode.com/ + new KnownAgent("CLAUDECODE", "claude-code"), // https://github.com/anthropics/claude-code + new KnownAgent("CLINE_ACTIVE", "cline"), // https://github.com/cline/cline (v3.24.0+) + new KnownAgent("CODEX_CI", "codex"), // https://github.com/openai/codex + new KnownAgent("COPILOT_CLI", "copilot-cli"), // https://github.com/features/copilot // VS Code Copilot terminal; best-effort heuristic, not officially identified. + new KnownAgent("COPILOT_MODEL", "copilot-vscode"), + new KnownAgent("CURSOR_AGENT", "cursor"), // Closed source + new KnownAgent("GEMINI_CLI", "gemini-cli"), // https://google-gemini.github.io/gemini-cli new KnownAgent( - "copilot-vscode", Collections.singletonList(new EnvMatcher("COPILOT_MODEL"))), - new KnownAgent( - "cursor", Collections.singletonList(new EnvMatcher("CURSOR_AGENT"))), // Closed source - new KnownAgent( - "gemini-cli", - Collections.singletonList( - new EnvMatcher("GEMINI_CLI"))), // https://google-gemini.github.io/gemini-cli - // Goose also sets AGENT=goose; handled by the central AGENT fallback. - new KnownAgent( - "goose", - Collections.singletonList( - new EnvMatcher("GOOSE_TERMINAL"))), // https://block.github.io/goose/ - new KnownAgent( - "kiro", - Collections.singletonList(new EnvMatcher("KIRO"))), // https://kiro.dev/ (Amazon) - new KnownAgent( - "openclaw", - Collections.singletonList( - new EnvMatcher("OPENCLAW_SHELL"))), // https://github.com/anthropics/openclaw - new KnownAgent( - "opencode", - Collections.singletonList( - new EnvMatcher("OPENCODE"))), // https://github.com/opencode-ai/opencode - new KnownAgent( - "windsurf", - Collections.singletonList( - new EnvMatcher("WINDSURF_AGENT")))); // https://codeium.com/windsurf (Codeium) + "GOOSE_TERMINAL", + "goose"), // https://block.github.io/goose/ (also sets AGENT=goose, handled centrally) + new KnownAgent("KIRO", "kiro"), // https://kiro.dev/ (Amazon) + new KnownAgent("OPENCLAW_SHELL", "openclaw"), // https://github.com/anthropics/openclaw + new KnownAgent("OPENCODE", "opencode"), // https://github.com/opencode-ai/opencode + new KnownAgent("WINDSURF_AGENT", "windsurf")); // https://codeium.com/windsurf (Codeium) } // Looks up the active agent provider based on environment variables. @@ -376,33 +301,38 @@ private static List listKnownAgents() { // e.g. AGENT=cursor + CLAUDECODE=1 yields "claude-code", and // AGENT=goose + CLAUDECODE=1 also yields "claude-code". private static String lookupAgentProvider(Environment env) { - List knownAgents = listKnownAgents(); - String detected = ""; - int count = 0; - for (KnownAgent agent : knownAgents) { - if (agent.fires(env)) { - detected = agent.product; - count++; - if (count > 1) { - break; - } + List agents = listKnownAgents(); + + List matches = new ArrayList<>(); + for (KnownAgent a : agents) { + if (env.get(a.envVar) != null) { + matches.add(a.product); } } - if (count == 1) { - return detected; + + if (matches.size() == 1) { + return matches.get(0); } - if (count == 0) { - String agentValue = env.get(AGENT_ENV_VAR); - if (agentValue != null && !agentValue.isEmpty()) { - for (KnownAgent agent : knownAgents) { - if (agent.product.equals(agentValue)) { - return agentValue; - } - } - return "unknown"; + if (matches.size() > 1) { + return ""; // ambiguity + } + return agentEnvFallback(env, agents); + } + + // agentEnvFallback honors the agents.md AGENT= standard. + // Returns the value if it matches a known product name, "unknown" if AGENT + // is set to any other non-empty value, and "" if AGENT is unset or empty. + private static String agentEnvFallback(Environment env, List agents) { + String v = env.get(AGENT_ENV_VAR); + if (v == null || v.isEmpty()) { + return ""; + } + for (KnownAgent a : agents) { + if (a.product.equals(v)) { + return v; } } - return ""; + return "unknown"; } // Thread-safe lazy initialization of agent provider detection From f6e161fa9d2449bfa5cd8712e7d5347d129f6309 Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 20 Apr 2026 14:46:01 +0200 Subject: [PATCH 5/6] Report agent/multiple when agents are stacked Nested agents (e.g. a Cursor CLI subagent spawned by Claude Code) set multiple agent env vars on the same process. The previous ambiguity guard silently dropped the signal in that case. Report "multiple" instead so the stacked case is visible in telemetry. Also collapse the known BYOK false positive where Copilot CLI users have COPILOT_MODEL set alongside COPILOT_CLI: that pair now reports "copilot-cli" rather than "multiple". Co-authored-by: Isaac Signed-off-by: simon --- NEXT_CHANGELOG.md | 2 +- .../com/databricks/sdk/core/UserAgent.java | 25 +++++++---- .../databricks/sdk/core/UserAgentTest.java | 44 +++++++++++++++---- 3 files changed, 53 insertions(+), 18 deletions(-) diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index e860e3731..060cf4b23 100755 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -5,7 +5,7 @@ ### New Features and Improvements * Add support for authentication through Azure Managed Service Identity (MSI) via the new `azure-msi` credential provider. * Support `default_profile` in `[__settings__]` section of `.databrickscfg` for consistent default profile resolution across CLI and SDKs. -* Added automatic detection of AI coding agents (Amp, Antigravity, Augment, Claude Code, Cline, Codex, Copilot CLI, Copilot VS Code, Cursor, Gemini CLI, Goose, Kiro, OpenClaw, OpenCode, Windsurf) in the user-agent string. The SDK now appends `agent/` to HTTP request headers when running inside a known AI agent environment. Also honors the `AGENT=` standard: when `AGENT` is set to a known product name the SDK reports that product, and when set to an unrecognized non-empty value the SDK reports `agent/unknown`. Environment variables set to the empty string (e.g. `CLAUDECODE=""`) now count as "set" for presence-only matchers, matching `databricks-sdk-go` semantics; previously they were treated as unset. Explicit agent env vars (e.g. `CLAUDECODE`, `GOOSE_TERMINAL`) always take precedence over the generic `AGENT=` signal. +* Added automatic detection of AI coding agents (Amp, Antigravity, Augment, Claude Code, Cline, Codex, Copilot CLI, Copilot VS Code, Cursor, Gemini CLI, Goose, Kiro, OpenClaw, OpenCode, Windsurf) in the user-agent string. The SDK now appends `agent/` to HTTP request headers when running inside a known AI agent environment. Also honors the `AGENT=` standard: when `AGENT` is set to a known product name the SDK reports that product, and when set to an unrecognized non-empty value the SDK reports `agent/unknown`. Environment variables set to the empty string (e.g. `CLAUDECODE=""`) now count as "set" for presence-only matchers, matching `databricks-sdk-go` semantics; previously they were treated as unset. Explicit agent env vars (e.g. `CLAUDECODE`, `GOOSE_TERMINAL`) always take precedence over the generic `AGENT=` signal. When multiple agent env vars are present (e.g. a Cursor CLI subagent invoked from Claude Code), the user-agent reports `agent/multiple`. ### Bug Fixes * Fixed non-JSON error responses (e.g. plain-text "Invalid Token" with HTTP 403) producing `Unknown` instead of the correct typed exception (`PermissionDenied`, `Unauthenticated`, etc.). The error message no longer contains Jackson deserialization internals. diff --git a/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java b/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java index 903a46b67..7dd2eef9b 100644 --- a/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java +++ b/databricks-sdk-java/src/main/java/com/databricks/sdk/core/UserAgent.java @@ -287,19 +287,19 @@ private static List listKnownAgents() { // treated purely as a fallback for agents that have no explicit matcher, or // for agents we do not yet specifically recognize. // - // For each agent, it fires if ANY of its matchers fires. The function counts - // how many distinct agents matched: + // The function counts how many distinct agents matched via explicit env vars: // - Exactly one agent matched: return its product name. - // - More than one agent matched: return "" (ambiguity). + // - More than one agent matched: return "multiple". Agent env vars can be + // stacked when one agent invokes another as a subagent (e.g. Claude Code + // spawning a Cursor CLI subprocess), so the child process inherits env + // vars from multiple layers. // - Zero agents matched: if the agents.md standard AGENT env var is set to // a known product name, return that product name. If it is set to any // other non-empty value, return "unknown". Otherwise return "". // - // Unlike CI/CD detection (which returns the first match), agent detection - // uses an ambiguity guard because agent env vars can be stacked (e.g., - // running Cline inside Cursor). Because explicit matchers win over AGENT, - // e.g. AGENT=cursor + CLAUDECODE=1 yields "claude-code", and - // AGENT=goose + CLAUDECODE=1 also yields "claude-code". + // Because explicit matchers win over AGENT, e.g. AGENT=cursor + CLAUDECODE=1 + // yields "claude-code", and AGENT=goose + CLAUDECODE=1 also yields + // "claude-code". private static String lookupAgentProvider(Environment env) { List agents = listKnownAgents(); @@ -310,11 +310,18 @@ private static String lookupAgentProvider(Environment env) { } } + // Known BYOK false positive: Copilot CLI users often set COPILOT_MODEL + // alongside COPILOT_CLI. Treat that pair as a single copilot-cli signal + // rather than a stacked multi-agent setup. + if (matches.contains("copilot-cli") && matches.contains("copilot-vscode")) { + matches.removeIf(m -> m.equals("copilot-vscode")); + } + if (matches.size() == 1) { return matches.get(0); } if (matches.size() > 1) { - return ""; // ambiguity + return "multiple"; } return agentEnvFallback(env, agents); } diff --git a/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java b/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java index 347d6ac04..232409c84 100644 --- a/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java +++ b/databricks-sdk-java/src/test/java/com/databricks/sdk/core/UserAgentTest.java @@ -415,12 +415,10 @@ public void testAgentProviderExplicitEnvWinsOverKnownAgentEnv() { } @Test - public void testAgentProviderCopilotCliAndCopilotVscodeAmbiguous() { - // Copilot CLI can be invoked with BYOK models, which may also set - // COPILOT_MODEL. In that case both copilot-cli and copilot-vscode - // matchers fire on different products, so detection is ambiguous. - // This is intentional: ambiguity is preferred over silently picking - // one product. + public void testAgentProviderCopilotCliAndCopilotVscodeCollapseToCopilotCli() { + // Copilot CLI users (BYOK mode) often set COPILOT_MODEL alongside + // COPILOT_CLI. Treat the pair as a single copilot-cli signal rather + // than a stacked multi-agent setup. setupAgentEnv( new HashMap() { { @@ -428,7 +426,22 @@ public void testAgentProviderCopilotCliAndCopilotVscodeAmbiguous() { put("COPILOT_MODEL", "gpt-4"); } }); - Assertions.assertFalse(UserAgent.asString().contains("agent/")); + Assertions.assertTrue(UserAgent.asString().contains("agent/copilot-cli")); + } + + @Test + public void testAgentProviderCopilotByokCollapseStillMultiple() { + // The Copilot BYOK collapse only drops the copilot-vscode match. If + // another agent is also present, the result is still "multiple". + setupAgentEnv( + new HashMap() { + { + put("COPILOT_CLI", "1"); + put("COPILOT_MODEL", "gpt-4"); + put("CLAUDECODE", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/multiple")); } @Test @@ -439,6 +452,8 @@ public void testAgentProviderNoAgent() { @Test public void testAgentProviderMultipleAgents() { + // Nested agents (e.g. Claude Code spawning a Cursor CLI subagent) set + // multiple explicit matchers on the same process. setupAgentEnv( new HashMap() { { @@ -446,7 +461,20 @@ public void testAgentProviderMultipleAgents() { put("CURSOR_AGENT", "1"); } }); - Assertions.assertFalse(UserAgent.asString().contains("agent/")); + Assertions.assertTrue(UserAgent.asString().contains("agent/multiple")); + } + + @Test + public void testAgentProviderThreeStackedAgents() { + setupAgentEnv( + new HashMap() { + { + put("CLAUDECODE", "1"); + put("CURSOR_AGENT", "1"); + put("AUGMENT_AGENT", "1"); + } + }); + Assertions.assertTrue(UserAgent.asString().contains("agent/multiple")); } @Test From db6717c72a39d3521ba1af42bd2f63a175421edd Mon Sep 17 00:00:00 2001 From: simon Date: Mon, 20 Apr 2026 16:35:31 +0200 Subject: [PATCH 6/6] Regenerate lockfiles after v0.104.0 merge Merging main bumped the project version to 0.104.0 but left the committed lockfile.json files pinned to 0.103.0, failing the maven-lockfile validation step. Regenerate both lockfiles under JDK 11 (matching the CI configuration) and run fix-lockfile to rewrite JFrog proxy URLs back to Maven Central. Co-authored-by: Isaac Signed-off-by: simon --- databricks-sdk-java/lockfile.json | 2 +- lockfile.json | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/databricks-sdk-java/lockfile.json b/databricks-sdk-java/lockfile.json index 68d464015..877c72f01 100644 --- a/databricks-sdk-java/lockfile.json +++ b/databricks-sdk-java/lockfile.json @@ -1,7 +1,7 @@ { "artifactId": "databricks-sdk-java", "groupId": "com.databricks", - "version": "0.103.0", + "version": "0.104.0", "lockFileVersion": 1, "dependencies": [ { diff --git a/lockfile.json b/lockfile.json index 31056496f..a795f157e 100644 --- a/lockfile.json +++ b/lockfile.json @@ -1,7 +1,7 @@ { "artifactId": "databricks-sdk-parent", "groupId": "com.databricks", - "version": "0.103.0", + "version": "0.104.0", "lockFileVersion": 1, "dependencies": [], "mavenPlugins": [], @@ -9,7 +9,7 @@ "environment": { "osName": "Mac OS X", "mavenVersion": "3.9.14", - "javaVersion": "25.0.2" + "javaVersion": "11.0.12" }, "config": { "includeMavenPlugins": false,