From 05c721e6db1baf5e1390cd6f8ed7893a61dc179c Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Thu, 30 Apr 2026 14:05:27 -0700 Subject: [PATCH 01/23] docs(rfc): add agent-driven policy management --- rfc/0001-agent-driven-policy-management.md | 721 +++++++++++++++++++++ 1 file changed, 721 insertions(+) create mode 100644 rfc/0001-agent-driven-policy-management.md diff --git a/rfc/0001-agent-driven-policy-management.md b/rfc/0001-agent-driven-policy-management.md new file mode 100644 index 000000000..2d80b8399 --- /dev/null +++ b/rfc/0001-agent-driven-policy-management.md @@ -0,0 +1,721 @@ +--- +authors: + - "@alwatson" +state: draft +links: + - https://github.com/NVIDIA/OpenShell/issues/1062 + - https://github.com/NVIDIA/OpenShell/blob/main/architecture/policy-advisor.md +--- + +# RFC 0001 - Agent-Driven Policy Management + + + +## Summary + +Evolve OpenShell's existing Policy Advisor into an agent-driven policy management system that lets agents inspect current sandbox policy, draft narrow policy changes, submit them for review, and apply approved updates without restarting the sandbox. The safety model stays the same: sandbox-side analysis, gateway-side validation and persistence, and explicit approval boundaries. The main change is the authoring and review experience: every sandbox should expose local policy guidance and APIs, and every developer surface should expose a responsive inbox for reviewing proposals. + +## Motivation + +OpenShell already has the core of a dynamic policy editing experience: + +- The sandbox proxy emits deny events. +- The sandbox-side `DenialAggregator` and mechanistic mapper convert those into draft `PolicyChunk` proposals. +- The gateway persists proposals and merges approved rules into the active policy. +- The TUI and CLI already provide review and approval flows. +- Running sandboxes already hot-reload dynamic policy updates. + +That is a strong foundation, but the current experience is still fundamentally operator-driven and network-centric. It is excellent for "observe a deny, approve a generated endpoint rule" but incomplete for the broader product promise: an agent should be able to understand what is blocked, discover what policy language is available, generate the narrowest valid policy change, and submit it to the developer with enough rationale and verification signal that approval is fast and trustworthy. + +This matters because: + +- developers should not need to learn policy syntax before becoming productive +- agents have the most task context and can often draft narrower changes than humans +- approvals should feel like reviewing a validated outcome, not guessing about a YAML diff +- the inbox experience must be fast and clear across TUI, CLI, and SDK surfaces +- organizations need a path from human approval to trusted bounded automation without losing auditability or least privilege + +This RFC proposes the next layer: make policy adaptation an intentional, agent-native workflow instead of a reactive operator convenience. + +## MVP implementation note + +The first implementation is tracked in [#1062](https://github.com/NVIDIA/OpenShell/issues/1062). It intentionally starts with the smallest agent-driven loop that can validate the product experience: + +- structured L7 REST deny responses for agent-readable failures +- a sandbox-local `openshell-policy` CLI backed by existing files, logs, and per-sandbox mTLS gateway calls +- static sandbox-local agent guidance in `/etc/openshell/SKILL.md` +- agent-authored proposal provenance, validation status, and rejection guidance in the existing draft policy flow +- TUI/CLI review for a single sandbox, with polling as the MVP refresh path + +The MVP deliberately defers the supervisor Unix-socket API, server-streaming multi-sandbox inbox, Slack/web adapters, org ceilings, trusted auto-apply, and in-process prover optimization. Those remain aligned with the RFC direction, but they are not required to prove the initial loop. + +## Non-goals + +- Allowing an in-sandbox agent to self-approve or unilaterally apply its own policy changes. +- Moving proposal generation into the gateway. Sandbox-side analysis remains the architectural default. +- Solving every policy domain in the first release. Network policy is the initial scope because it is the only hot-reloadable policy domain in the current architecture; filesystem and process policy can follow later through a different lifecycle model. +- Replacing the existing mechanistic mapper. It remains the deterministic baseline and safety net. +- Making Rego authoring a direct end-user requirement. The system should expose policy semantics to agents and advanced users, not require hand-authored policy for common workflows. + +## Proposal + +### Enforcement model + +This RFC is not proposing a generic "policy update" system without specifying what gets enforced. The intended model is layered: + +- **L4 remains the universal baseline** + Every outbound connection is gated by host, port, and binary identity. +- **L7 is the preferred least-privilege model for supported application protocols** + Today this primarily means `protocol: rest` with per-method and per-path rules for HTTP APIs. +- **Protocol-aware or tool-aware policy layers may sit above L7 where useful** + MCP is a strong candidate for a future higher-level enforcement surface, but it should be modeled explicitly rather than implied. + +For the initial implementation of this RFC, dynamic policy management should be grounded in the enforcement model OpenShell already has in the codebase today: + +- L4 network policy for all outbound traffic +- L7 REST enforcement for HTTP APIs where `protocol: rest` is configured +- policy prover checks that can distinguish L4-only access from L7-enforced access + +This matters because "allow GitHub" is not a single thing: + +- `github.com:443` used by `git` may require L4-only allowance depending on the workflow and protocol behavior +- `api.github.com:443` used by `gh`, `curl`, or an SDK is often a great fit for L7 REST controls +- the best least-privilege design often splits those paths rather than treating GitHub as one broad capability + +This RFC is therefore network-first by design, not because other policy domains are unimportant, but because the current OpenShell architecture only supports live mutation of `network_policies`. Filesystem, Landlock, and process settings are applied at sandbox startup and are currently immutable for the lifetime of a sandbox. + +### Product direction + +Every OpenShell sandbox should be able to host an agent-capable policy workflow with four core affordances: + +1. A local capability description that teaches an agent how to inspect current policy state, understand the available policy language, and submit a proposal for review. +2. A sandbox-local or supervisor-adjacent API for reading effective policy, recent denials, and proposal state. +3. A gateway-managed developer inbox for reviewing, editing, approving, rejecting, and auditing proposals in real time. +4. A validation pipeline that checks proposed policy changes before they are applied. + +The product bar is not just correctness. The interaction model itself must be good: + +- proposals should appear quickly after a deny or agent request +- review surfaces should be understandable without policy expertise +- the same proposal should look coherent in the TUI, CLI, and SDKs +- approval should take one action when the system has high confidence +- high-volume exploratory agent workflows should not drown the user in repetitive prompts + +### UX requirements and latency targets + +For the developer inbox experience: + +- OpenShell **must provide a push/subscription path** for proposal and decision updates to the TUI, CLI, and SDKs. +- Polling may exist as a fallback, but polling-only delivery is not sufficient for the intended UX. + +Target UX metrics: + +- **Proposal appearance latency** + From the time the gateway accepts a proposal or actionable deny-derived recommendation to the time it appears in a connected inbox client: + - target `p50 <= 2s` + - target `p95 <= 5s` +- **Decision propagation latency** + From approval, rejection, edit, or auto-apply at the gateway to the time all connected inbox clients reflect the new state: + - target `p50 <= 1s` + - target `p95 <= 3s` +- **Activation feedback latency** + From gateway receipt of sandbox policy status (`loaded` or `failed`) to visible client state update: + - target `p50 <= 1s` + - target `p95 <= 3s` + +If sandbox policy activation takes longer than these targets, the inbox should still update immediately with an intermediate state such as `pending_activation` rather than leaving the user uncertain. + +The desired user experience: + +1. An agent encounters a deny with a structured explanation from the sandbox supervisor. +2. The agent uses a local policy-management skill to inspect the current effective policy, denial context, and relevant policy primitives. +3. The agent produces a minimal proposed change and submits it through a stable proposal API. +4. The developer sees the proposal in the TUI, CLI, or SDK, reviews its rationale and validation results, and approves or rejects it. +5. OpenShell applies the approved change as a hot-reloaded policy update and preserves a durable audit trail. + +### What exists today + +The RFC explicitly builds on the current codebase rather than replacing it. + +Current implementation points: + +- `crates/openshell-sandbox/src/denial_aggregator.rs` + Sandbox-side aggregation of deny events. +- `crates/openshell-sandbox/src/mechanistic_mapper.rs` + Deterministic generation of draft `PolicyChunk` recommendations, including partial L7 support. +- `crates/openshell-server/src/grpc/policy.rs` + Persistence, approval, merge, rejection, edit, undo, and policy revision handling. +- `crates/openshell-tui/src/ui/sandbox_draft.rs` + TUI review and approval surface for network rules. +- `crates/openshell-cli/src/run.rs` + `openshell rule get|approve|reject|approve-all|history`. +- `architecture/policy-advisor.md` + Current sandbox-side recommendation design. + +Important capabilities that already exist and should be preserved: + +- sandbox-side proposal generation +- hot-reloadable policy updates +- proposal editing and undo RPCs in `proto/openshell.proto` +- durable draft chunk storage and approval history +- a deterministic, mechanistic proposal path that does not require an LLM +- a real distinction between L4-only and L7 REST enforcement + +### What is missing + +The current implementation lacks several parts required for the intended developer experience: + +- A standard in-sandbox skill or instruction bundle for local agents. +- A first-class proposal API that agents can use intentionally, not only through deny-triggered analysis. +- Rich proposal context beyond host/port/binary, especially for developer intent, repository/task context, and write operations. +- Validation outputs that explain what a proposal would permit before approval. +- A generalized "developer inbox" model that can power the TUI, CLI, SDK, and future Slack/web surfaces from the same backend abstraction. +- A clear separation between: + - observed deny events, + - agent-authored policy changes, + - validated approval-ready proposals, + - applied policy revisions. +- A trust model for non-human approvers, where a trusted external agent may apply policy changes automatically when those changes remain within an organization-defined maximum policy envelope. +- Explicit proposal semantics for whether a recommendation is: + - L4-only + - L7 REST + - a conversion from L4 to L7 + - a future protocol-aware policy type such as MCP-aware controls + +### Architecture + +```mermaid +flowchart LR + AGENT["Agent in sandbox"] --> SKILL["Local policy skill / instructions"] + SKILL --> API["Supervisor policy API"] + API --> STATE["Effective policy + deny history + schema help"] + AGENT --> PROPOSE["Submit proposal"] + PROPOSE --> GW["Gateway proposal service"] + DENY["Proxy deny + L7 deny"] --> AGG["Sandbox aggregator + mechanistic mapper"] + AGG --> GW + GW --> VALIDATE["Validation + simulation + prover"] + VALIDATE --> INBOX["Developer inbox"] + INBOX -->|approve| MERGE["Policy merge + revision"] + INBOX -->|reject/edit| GW + MERGE --> POLL["Sandbox policy poll / push"] + POLL --> API +``` + +The important architectural principle is that the current Policy Advisor pipeline becomes one producer of proposals, not the only producer. Agent-authored proposals and mechanistic proposals should land in the same gateway inbox and go through the same validation and approval machinery. + +The second architectural principle is that approval is policy-driven. Human approval is the default mode, but the same machinery must also support a trusted external control plane deciding that a proposal is safe to auto-apply because it fits under higher-level organizational constraints. + +The end-to-end interaction should look like this: + +```mermaid +sequenceDiagram + participant A as Agent in Sandbox + participant S as Local Skill / CLI + participant U as Supervisor Local API (Unix socket) + participant P as Local Prover Aid + participant G as Gateway Proposal Service + participant X as External Validator / Trusted Approver + participant I as Developer Inbox + + A->>S: read policy skill / instructions + A->>U: get-effective-policy, get-recent-denials + U-->>A: effective policy + deny context + A->>P: evaluate candidate policy change + P-->>A: advisory narrowing feedback + A->>U: submit proposal + U->>G: forward proposal + provenance + G->>X: validate against policy + org ceiling + X-->>G: human_review_required or eligible_for_automatic_apply + G-->>I: publish proposal / status update + alt human review + I->>G: approve / reject / reject_with_guidance + G-->>U: proposal result + U-->>A: optional structured feedback + else trusted external auto-apply + X->>G: apply approved change + G-->>I: auto-applied status + audit trail + end +``` + +### Core design concepts + +#### 1. Policy workspace inside every sandbox + +Every default sandbox should expose a stable local "policy workspace" that agents can rely on. This is not necessarily a mutable file checked into the user's repo; it is a capability surface available from inside the sandbox. + +The workspace should provide: + +- Effective sandbox policy, already resolved to the currently active version. +- Policy schema and examples relevant to the sandbox's supported policy features. +- A description of approval workflow semantics. +- Recent denials and related proposal history. +- Guidance for generating the narrowest change possible. + +The first implementation can be a generated `SKILL.md` plus a local CLI or Unix-socket API exposed by the sandbox supervisor. The long-term contract is the API; the skill is the ergonomic on-ramp. + +The sandbox-facing surface must also have an explicit information boundary: + +Visible to the in-sandbox agent: + +- the current effective sandbox policy +- deny reasons and local proposal history for that sandbox +- supported policy primitives and examples +- coarse proposal outcomes such as `needs_human_review`, `eligible_for_external_review`, or `rejected_for_safety` + +Not exposed directly to the in-sandbox agent: + +- the full org ceiling policy document +- trusted external approver policies or scoring heuristics +- hidden approval thresholds +- policy state for other sandboxes, users, or org workflows + +Ceiling evaluation, trust decisions, and automatic-apply eligibility should be computed outside the sandbox. + +OpenShell should also make prover-backed analysis available to agents as a proposal-authoring aid. This lets an agent test whether a candidate policy is broader than necessary and iterate toward a narrower proposal before submission. Inside the sandbox, prover output is advisory and used for drafting only. Approval and automatic-apply decisions must still rely on external validation in the gateway or trusted approver path. + +#### 2. Intentional proposal submission API + +Add a supervisor-facing proposal API so an agent can submit policy updates even before or after a deny aggregation cycle. + +Example agent actions: + +- "Show me the current policy affecting `git`." +- "Explain why this GitHub push was denied." +- "Draft the minimal rule to allow writes to `github.com` and `api.github.com` for `git` only." +- "Submit this proposal for human review." + +This proposal path should support two modes: + +- `draft_from_observation` + Builds on real deny history. +- `draft_from_agent_intent` + Allows an agent to proactively request a change based on planned work. + +Both should land in the same inbox with provenance captured. + +When multiple producers submit effectively the same proposal, the gateway should apply a deterministic merge policy: + +- mechanistic proposals establish the baseline proposal record +- richer agent-authored proposals for the same sandbox + endpoint + binary may upgrade the existing record's rationale, context, and proposed L7 refinement +- fallback observation updates may continue to bump hit counts and timestamps without discarding richer metadata + +The important product requirement is that a richer agent proposal must not be silently lost behind an earlier mechanistic proposal. + +#### 3. Proposal model evolution + +Extend the existing `PolicyChunk`/draft-chunk model into a more expressive proposal object while preserving backward compatibility for current rule review commands. + +Additional fields should include: + +- Proposal source: mechanistic, agent-authored, or hybrid. +- Requested capability summary in plain language. +- Validation status and findings summary. +- Diff against current effective policy. +- Enforcement layer for first-release proposal types: + - `l4` + - `l7_rest` +- Intended scope: + - endpoint-only + - L7 method/path + - binary restriction + - time-bounded or session-bounded, if supported later +- Optional task context: + - repo URL + - issue/RFC reference + - command or tool that triggered the need + +The inbox should make it obvious whether a proposal is an L4 tunnel, an L7 REST rule, or a conversion from broad access to narrower L7 controls. + +Future protocol-aware proposal kinds such as MCP-aware controls should extend the model later rather than forcing the first-release schema to generalize prematurely. + +#### 4. Validation before approval + +Approval should present validated consequences, not just a proposed rule. + +Validation stages: + +1. Schema and static safety validation. +2. Deterministic simulation: + - what new hosts, ports, methods, or binaries would become reachable + - whether the change overlaps or broadens an existing rule + - whether the proposal is L4-only or protected by L7 enforcement +3. Policy-specific safety checks: + - always-blocked destinations + - suspicious private IP overrides + - wildcard or full-access expansions + - binaries or protocols that bypass L7 inspection +4. Formal verification when supported: + - use the existing prover infrastructure to check that the proposal satisfies a declared intent and does not exceed it + +The validator should emit an approval summary such as: + +- "Allows `git` to `github.com:443` and `api.github.com:443`." +- "Does not grant access to other GitHub hosts." +- "Adds write-capable REST paths for repo push semantics." +- "Touches only dynamic network policy." +- "This change is L4-only and does not provide method/path restriction." +- "This change upgrades the endpoint to L7 REST enforcement." + +Validation should also support two decision modes: + +- `human_review_required` + The proposal is shown in the developer inbox for explicit approval. +- `eligible_for_automatic_apply` + The proposal remains within a trusted approval envelope and may be applied automatically by policy. + +For first release, the recommended automatic-apply scope is intentionally narrow: + +- trusted external approver only +- network policy only +- L7 REST preferred where supported +- ephemeral lease durability by default +- only when prover, validation, and org ceiling checks succeed without ambiguity + +#### 4a. Structured deny feedback + +Denied operations should not only appear in logs and inboxes. OpenShell should also provide a structured deny feedback path that helps the in-sandbox agent recover intelligently by returning: + +- a machine-readable explanation of what was denied +- the relevant enforcement layer (`l4` or `l7_rest`) +- the reason the current policy did not allow it +- a pointer to the local policy workspace/API for inspection and proposal drafting + +The delivery mechanism may vary, but the RFC requires this to be a first-class capability rather than only an operator-facing side effect. + +#### 5. Unified developer inbox + +The existing draft-chunk review surface should become a generalized developer inbox with: + +- Real-time updates from the gateway. +- Filterable by sandbox, status, source, severity, and validation state. +- Renderable in: + - TUI + - CLI + - SDK/API + - future Slack/web integrations +- Support for: + - approve + - reject + - reject with guidance + - edit + - bulk approve with safeguards + - undo + - audit/history inspection + +The current TUI "Network Rules" panel is the correct seed, but the mental model should shift from "network rules list" to "policy proposal inbox." + +To support the UX targets above, the inbox architecture should include a subscription mechanism from the gateway to clients, such as streaming gRPC, SSE, or an equivalent event feed. The exact transport can be implementation-specific, but the user-visible behavior should be push-first. + +Rejection should be part of a revise-and-resubmit loop rather than a dead end. Operators should be able to reject a proposal with explanation so the agent can draft a narrower or corrected follow-up without requiring the operator to hand-author the policy change themselves. + +#### 6. L7-first agent experience + +A major product requirement is enabling strong default sandboxes with granular approval flows, especially for APIs like GitHub: + +- The default sandbox permits read-only GitHub API access via L7 policy. +- An agent attempts a write operation. +- The sandbox returns a structured deny that tells the agent: + - what was blocked, + - what part of policy caused the denial, + - how to inspect current policy, + - how to submit a narrow proposal. +- The agent proposes the smallest change needed for the target repo/workflow. +- The developer reviews a proposal phrased in task terms, not raw YAML only. + +OpenShell should explicitly steer the system toward the narrowest viable enforcement level: + +- prefer L7 REST rules for HTTP APIs such as GitHub, LinkedIn, X, Slack, Jira, and similar services +- fall back to L4 only when the protocol or client behavior prevents meaningful L7 enforcement +- tell the developer when a proposal is broad because the workload itself is broad, not because the system failed to model it precisely + +### REST, L4, and MCP + +REST APIs are the clearest near-term least-privilege win because OpenShell already supports `protocol: rest`, access presets, explicit method/path rules, TLS termination, and prover logic that can distinguish L4-only access from L7 write exposure. L7 REST should therefore be the default recommendation path for HTTP APIs, while L4-only proposals remain available for non-HTTP or opaque clients and should be clearly marked as broader access. MCP remains strategically important, but it should not drive the first-release schema: remote MCP still rests on transport controls such as HTTP/SSE/WebSocket, while local stdio MCP does not map neatly to network enforcement. The near-term plan is simple: **Phase 1-4 focus on L4 + L7 REST policy management; MCP-aware controls land as a later dedicated track.** + +#### 7. Trusted external approvers and policy ceilings + +Human approval should remain the default, but the system should also support a second mode where a trusted agent outside the sandbox can approve and apply changes automatically on behalf of the user when: + +- the organization defines an immutable high-level policy ceiling +- the sandbox policy starts below that ceiling +- the agent proposes a narrower incremental change needed to complete a task +- the prover and policy validator can show that the change stays within the allowed envelope + +In this model: + +- the org-level ceiling acts as a non-bypassable maximum +- sandbox policy revisions can expand only within that ceiling +- a trusted external agent or control-plane service may auto-apply compliant changes +- every request, validation result, and applied revision is logged for audit + +This gives OpenShell a path to adaptive least privilege without forcing a human to approve every safe change in real time. + +### Trust and approval model + +OpenShell should support at least three approval modes: + +1. `human_in_the_loop` + Every proposal requires explicit user approval. +2. `trusted_agent_within_ceiling` + A trusted external agent may apply changes automatically when validation and prover checks confirm the proposal stays within an org or user-defined maximum. +3. `manual_only_locked_down` + No automatic apply; some proposals may be visible but categorically blocked from execution by policy. + +The RFC does not propose allowing an in-sandbox agent to self-approve its own policy requests. Trusted external auto-apply is **in scope**, but it is distinct from autonomous in-sandbox mutation. The minimum shippable baseline is still a strong human-in-the-loop workflow. + +### Organizational policy layering + +This RFC assumes policy layering rather than a single mutable document: + +- `org ceiling policy` + The maximum capability envelope defined by security or platform teams. +- `sandbox effective policy` + The currently active policy for a sandbox, always a subset of the org ceiling when one exists. +- `proposal diff` + The incremental change requested by an agent or generated from deny analysis. + +For a proposal to be auto-applied, it must satisfy all of: + +1. valid OpenShell policy schema and merge semantics +2. no violation of always-blocked destinations or other hard safety rules +3. no violation of org ceiling constraints +4. successful prover or simulation checks against declared assumptions +5. successful audit logging and attribution + +If any check fails, the proposal falls back to human review or outright rejection. + +### Durability model + +Policy changes should not all have the same lifecycle. This RFC proposes three durability classes: + +1. `ephemeral_lease` + A time-bounded grant that expires automatically unless renewed. This is the recommended default for automatically applied expansions. +2. `sandbox_durable` + A durable revision for a specific sandbox or long-lived workflow. Suitable for human-approved changes or explicit promotion from a lease. +3. `promoted_policy_artifact` + A reusable policy artifact intended for future sandboxes, templates, or org-managed defaults. + +Recommended defaults: + +- auto-applied trusted-agent changes should start as `ephemeral_lease` unless explicitly promoted +- human-approved changes may become `sandbox_durable` directly when the reviewer intends lasting behavior +- promotion into reusable artifacts should be a deliberate step + +### Reject with guidance + +Operators should be able to do more than approve or reject. The system should support a guided rejection path: + +- `approve` + Accept and apply the proposal. +- `reject` + Decline the proposal without expecting an immediate follow-up. +- `reject_with_guidance` + Decline the proposal while returning operator guidance that the agent can use to revise and resubmit. + +Guidance may include free-form explanation plus structured hints such as `too_broad`, `use_l7_not_l4`, `wrong_binary_scope`, `wrong_endpoint`, `needs_time_limit`, or `outside_org_ceiling`. + +### Example: trusted daily research workflow + +One motivating workflow is a recurring research task: search X and LinkedIn for posts about a topic, summarize the results, and email the summary to the user. In that flow, the sandbox may start with minimal permissions plus an email provider, then request new outbound access to X and LinkedIn. A trusted external policy agent can prefer L7 REST rules when possible and apply them automatically when they fit within the organization's permitted research ceiling. + +### API and component changes + +#### Sandbox supervisor + +Add a local policy interaction surface: + +- Unix domain socket API +- sandbox-local CLI backed by that API + +Representative operations: + +- read effective policy and recent denials +- inspect proposal guidance and current proposal state +- submit a policy proposal + +This surface must be readable by the agent but not self-approving. + +Phase 2 implementation decisions: + +- primary transport: Unix domain socket API +- ergonomic wrapper: sandbox-local CLI +- first trust model: the sandbox is treated as single-tenant, so local callers are part of the sandbox tenant; this does not grant approval rights +- first proposal format: a minimal structured request tailored to L4 and L7 REST changes + +#### Gateway / server + +Extend the gateway proposal service to support: + +- explicit agent-authored proposal submission +- richer proposal metadata +- validation result persistence +- inbox subscriptions for multiple frontends +- trusted approver identities and authorization policies +- automatic-apply decisions gated by org ceiling and validation outcomes +- enforcement-layer-aware summaries and diffing +- durability classes and lease expiration metadata +- rejection reasons and operator guidance that can feed follow-up proposals +- stronger audit records tying: + - deny event(s) + - proposal author/source + - approval decision + - resulting policy revision + +The existing gRPC policy service is the natural place to grow this. + +#### TUI, CLI, and SDK + +The TUI should evolve from the current rules panel into a richer inbox with proposal summaries, validation state, diff views, edit-before-approve flow, and a clear distinction between "awaiting you" and "already auto-applied within policy ceiling." The CLI should preserve `openshell rule` for compatibility while introducing clearer proposal-centric aliases, and CLI/SDK surfaces should expose the same approval metadata so integrators can build their own inboxes and automation. + +## Implementation plan + +### Phase 1: Productize the current Policy Advisor + +Goal: turn the existing network rule draft flow into a first-class, polished foundation. + +Deliverables: + +- Rename and frame the current draft-chunk system internally as a proposal inbox. +- Add proposal provenance fields and validation summary fields. +- Improve TUI and CLI language to emphasize reviewable proposals. +- Document the current approval loop as a stable workflow. +- Set explicit UX targets for proposal latency and review responsiveness. +- Add a push/subscription path for proposal and decision updates to inbox clients. +- Audit existing `PolicyChunk` and draft-chunk persistence fields, then either hydrate, deprecate, or remove hollow fields before extending the model further. + +This phase is mostly packaging and data-model hardening on top of existing code in: + +- `crates/openshell-sandbox` +- `crates/openshell-server` +- `crates/openshell-tui` +- `crates/openshell-cli` + +### Phase 2: Local agent skill and supervisor policy API + +Goal: let any agent in a sandbox intentionally inspect and draft policy changes. + +Deliverables: + +- Generated sandbox-local `SKILL.md` or equivalent instruction bundle. +- Supervisor read APIs for policy state, denials, and capabilities. +- Initial proposal submission API. +- Structured deny messages that point agents to the local policy workflow. +- Feedback path so agents can read operator rejection guidance and iterate on a proposal. + +This is the point where the feature becomes broadly useful to OpenClaw, Claude Code, Cursor, and other agents. + +### Phase 3: Validation and simulation + +Goal: make approval trustworthy and fast. + +Deliverables: + +- Policy diff generation. +- Consequence summaries for proposed changes. +- Integration with prover/simulation infrastructure where available. +- Clear validation statuses in TUI and CLI. +- Org ceiling checks and trusted-agent auto-apply eligibility. +- Clear reporting for L4-only versus L7-enforced proposals. +- Safety-aware redaction so sandbox-local introspection does not expose full ceiling internals. + +This phase is critical before broadening beyond simple endpoint approvals. + +### Phase 4: Rich L7 authoring and GitHub write flow + +Goal: demonstrate the full UX on a high-value developer workflow. + +Deliverables: + +- Structured GitHub write-policy proposals from agent intent. +- Support for method/path-level rule authoring via agent workflow. +- Validation tuned for common provider/API patterns. +- Demo and tutorial flows centered on repo write access. + +This phase should produce the canonical blocked-write upgrade experience. + +### Phase 5: Generalized inbox surfaces + +Goal: expose proposal review outside the TUI. + +Deliverables: + +- Stable SDK/API for proposal feeds and decisions. +- CLI parity for all proposal operations. +- Optional Slack/web notification adapters. + +### Phase 6: Trusted automation and recurring workflows + +Goal: support safe automatic policy evolution for approved automation patterns. + +Deliverables: + +- policy ceiling model for org or platform admins +- trusted external approver identity model +- automatic apply path when proposals stay within ceiling +- audit trail and reporting for auto-applied revisions +- lease-based durability for automatically applied changes +- reference workflow for recurring research-and-email automation + +### Future phase: protocol-aware policy adapters + +Goal: extend dynamic policy management beyond REST where higher-level semantics exist, including MCP-aware policy controls, richer SQL enforcement once enforce-mode support exists, and protocol-specific adapters for common tool ecosystems. + +## Migration and compatibility + +The intended rollout is additive-first. + +- Existing `openshell rule` commands should continue to work while proposal-centric APIs and UX are introduced. +- Existing mechanistic sandboxes should remain compatible with a newer gateway during the transition. +- Database and proto evolution should prefer additive fields and compatibility shims before any cleanup of legacy draft-chunk semantics. +- If proposal semantics outgrow the current draft-chunk schema, migration should preserve existing pending, approved, and rejected records rather than discarding inbox history. + +## Risks + +- Agent-authored proposals may overfit to task success and underweight least privilege. +- A local skill that teaches policy mutation could be abused if submission and approval boundaries are not crisp. +- Validation that is too weak will make approvals feel unsafe; validation that is too noisy will make the UX slow and frustrating. +- Expanding too quickly from network policy into filesystem/process policy could blur scope and delay a polished first release. +- Adding multiple proposal producers without a unified model could create duplicate or conflicting inbox entries. +- If the inbox UX is not excellent, developers may perceive OpenShell as secure but cumbersome and choose a less safe system with lower friction. +- Automatic apply under trusted-agent control could become a footgun if org ceiling semantics are vague or prover guarantees are misunderstood. + +## Alternatives + +### Keep the current Policy Advisor as-is + +This would preserve a useful feature, but it leaves the product short of the agent-native UX we want. Developers would still do too much translation work between denies, policy syntax, and human approval. + +### Rely only on a human-side coding agent outside the sandbox + +This is workable for expert users and is already partially demonstrated in tutorials, but it misses the core product insight: the in-sandbox agent has the best task context and should be the one drafting the narrowest possible change. + +### Let agents mutate policy directly without approval + +This would be faster, but it is not aligned with OpenShell's safety model and would erase the developer-control story that makes dynamic policy editing acceptable in the first place. + +### Require human approval for every policy change forever + +This is safer in a narrow sense, but it caps automation quality and makes some recurring workflows awkward or brittle. A trusted external approver model bounded by organizational ceilings provides a better long-term path. + +### Treat all network expansion as generic L4 access + +This would simplify the proposal model, but it would throw away one of OpenShell's strongest differentiators. For API-driven developer workflows, L7 REST enforcement is often the right least-privilege abstraction and should be surfaced directly in the RFC and UI. + +### Move proposal generation to the gateway + +This would centralize logic, but it weakens the current architecture. Sandbox-side analysis is the right default because it scales naturally and keeps task-local context near the source of truth. + +## Open questions + +- How should developer intent be declared for validation: + - free-form text + - a structured capability request + - both +- Do we want a single proposal inbox for all policy domains eventually, or separate inboxes that share infrastructure? +- How should org ceiling policy be authored and stored: OpenShell policy syntax, a separate constraint language, or both? +- Which identities are allowed to act as trusted external approvers, and how are those permissions delegated? +- How do we present auto-applied changes so users feel informed rather than surprised? +- When L7 policy is involved, how much raw request context can be safely shown to the developer without leaking sensitive request data? +- Should MCP-aware policy be modeled as network policy enrichment, a separate policy domain, or a capability layer above both? From d8696864fc066fc7d0c83ee7c0af170d99ce552a Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Thu, 30 Apr 2026 14:57:09 -0700 Subject: [PATCH 02/23] docs(rfc): switch policy MVP to local API --- rfc/0001-agent-driven-policy-management.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/rfc/0001-agent-driven-policy-management.md b/rfc/0001-agent-driven-policy-management.md index 2d80b8399..1502c93de 100644 --- a/rfc/0001-agent-driven-policy-management.md +++ b/rfc/0001-agent-driven-policy-management.md @@ -44,7 +44,7 @@ This RFC proposes the next layer: make policy adaptation an intentional, agent-n The first implementation is tracked in [#1062](https://github.com/NVIDIA/OpenShell/issues/1062). It intentionally starts with the smallest agent-driven loop that can validate the product experience: - structured L7 REST deny responses for agent-readable failures -- a sandbox-local `openshell-policy` CLI backed by existing files, logs, and per-sandbox mTLS gateway calls +- a sandbox-local `policy.local` HTTP API backed by existing files, logs, and per-sandbox mTLS gateway calls - static sandbox-local agent guidance in `/etc/openshell/SKILL.md` - agent-authored proposal provenance, validation status, and rejection guidance in the existing draft policy flow - TUI/CLI review for a single sandbox, with polling as the MVP refresh path @@ -212,8 +212,8 @@ The end-to-end interaction should look like this: ```mermaid sequenceDiagram participant A as Agent in Sandbox - participant S as Local Skill / CLI - participant U as Supervisor Local API (Unix socket) + participant S as Local Skill + participant U as policy.local API participant P as Local Prover Aid participant G as Gateway Proposal Service participant X as External Validator / Trusted Approver @@ -253,7 +253,7 @@ The workspace should provide: - Recent denials and related proposal history. - Guidance for generating the narrowest change possible. -The first implementation can be a generated `SKILL.md` plus a local CLI or Unix-socket API exposed by the sandbox supervisor. The long-term contract is the API; the skill is the ergonomic on-ramp. +The first implementation can be a static `SKILL.md` plus a sandbox-local `policy.local` HTTP API. The long-term contract is the API; the skill is the ergonomic on-ramp. MCP can wrap this API later for agents that benefit from tool discovery, but it should not be the first load-bearing protocol or a separate implementation path. The sandbox-facing surface must also have an explicit information boundary: @@ -526,8 +526,8 @@ One motivating workflow is a recurring research task: search X and LinkedIn for Add a local policy interaction surface: -- Unix domain socket API -- sandbox-local CLI backed by that API +- sandbox-local `policy.local` HTTP API +- optional future MCP wrapper backed by that API Representative operations: @@ -539,10 +539,10 @@ This surface must be readable by the agent but not self-approving. Phase 2 implementation decisions: -- primary transport: Unix domain socket API -- ergonomic wrapper: sandbox-local CLI +- primary transport: sandbox-local HTTP JSON at `policy.local` +- ergonomic wrapper: defer MCP/CLI wrappers until the local API proves useful - first trust model: the sandbox is treated as single-tenant, so local callers are part of the sandbox tenant; this does not grant approval rights -- first proposal format: a minimal structured request tailored to L4 and L7 REST changes +- first proposal format: reuse the `PolicyMergeOperation` shape behind `openshell policy update`; sandbox-origin operations are stored as draft chunks for approval instead of being applied immediately #### Gateway / server From e82b535ff8480eec74d31ccbe8e47b88f2310ae9 Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Thu, 30 Apr 2026 15:20:59 -0700 Subject: [PATCH 03/23] docs(rfc): clarify policy advisor skill and local logs --- rfc/0001-agent-driven-policy-management.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/rfc/0001-agent-driven-policy-management.md b/rfc/0001-agent-driven-policy-management.md index 1502c93de..07c9bb8a0 100644 --- a/rfc/0001-agent-driven-policy-management.md +++ b/rfc/0001-agent-driven-policy-management.md @@ -45,7 +45,7 @@ The first implementation is tracked in [#1062](https://github.com/NVIDIA/OpenShe - structured L7 REST deny responses for agent-readable failures - a sandbox-local `policy.local` HTTP API backed by existing files, logs, and per-sandbox mTLS gateway calls -- static sandbox-local agent guidance in `/etc/openshell/SKILL.md` +- static sandbox-local agent guidance in `/etc/openshell/skills/policy_advisor.md` - agent-authored proposal provenance, validation status, and rejection guidance in the existing draft policy flow - TUI/CLI review for a single sandbox, with polling as the MVP refresh path @@ -91,7 +91,7 @@ This RFC is therefore network-first by design, not because other policy domains Every OpenShell sandbox should be able to host an agent-capable policy workflow with four core affordances: 1. A local capability description that teaches an agent how to inspect current policy state, understand the available policy language, and submit a proposal for review. -2. A sandbox-local or supervisor-adjacent API for reading effective policy, recent denials, and proposal state. +2. A sandbox-local or supervisor-adjacent API for reading effective policy, recent denials, sandbox-local activity logs, and proposal state. 3. A gateway-managed developer inbox for reviewing, editing, approving, rejecting, and auditing proposals in real time. 4. A validation pipeline that checks proposed policy changes before they are applied. @@ -250,17 +250,17 @@ The workspace should provide: - Effective sandbox policy, already resolved to the currently active version. - Policy schema and examples relevant to the sandbox's supported policy features. - A description of approval workflow semantics. -- Recent denials and related proposal history. +- Recent denials, sandbox-local activity logs, and related proposal history. - Guidance for generating the narrowest change possible. -The first implementation can be a static `SKILL.md` plus a sandbox-local `policy.local` HTTP API. The long-term contract is the API; the skill is the ergonomic on-ramp. MCP can wrap this API later for agents that benefit from tool discovery, but it should not be the first load-bearing protocol or a separate implementation path. +The first implementation can be a static `/etc/openshell/skills/policy_advisor.md` plus a sandbox-local `policy.local` HTTP API. The long-term contract is the API; the skill is the ergonomic on-ramp. MCP can wrap this API later for agents that benefit from tool discovery, but it should not be the first load-bearing protocol or a separate implementation path. The sandbox-facing surface must also have an explicit information boundary: Visible to the in-sandbox agent: - the current effective sandbox policy -- deny reasons and local proposal history for that sandbox +- deny reasons, sandbox-local logs, and local proposal history for that sandbox - supported policy primitives and examples - coarse proposal outcomes such as `needs_human_review`, `eligible_for_external_review`, or `rejected_for_safety` @@ -531,7 +531,7 @@ Add a local policy interaction surface: Representative operations: -- read effective policy and recent denials +- read effective policy, recent denials, and sandbox-local activity logs - inspect proposal guidance and current proposal state - submit a policy proposal @@ -542,7 +542,7 @@ Phase 2 implementation decisions: - primary transport: sandbox-local HTTP JSON at `policy.local` - ergonomic wrapper: defer MCP/CLI wrappers until the local API proves useful - first trust model: the sandbox is treated as single-tenant, so local callers are part of the sandbox tenant; this does not grant approval rights -- first proposal format: reuse the `PolicyMergeOperation` shape behind `openshell policy update`; sandbox-origin operations are stored as draft chunks for approval instead of being applied immediately +- first proposal format: reuse the `PolicyMergeOperation` shape behind `openshell policy update` inside a JSON request body; the supervisor/local service bundles those operations with intent, summary, and optional evidence references, sends them to the gateway over gRPC, and the gateway stores them as draft chunks for approval instead of applying them immediately #### Gateway / server @@ -598,8 +598,8 @@ Goal: let any agent in a sandbox intentionally inspect and draft policy changes. Deliverables: -- Generated sandbox-local `SKILL.md` or equivalent instruction bundle. -- Supervisor read APIs for policy state, denials, and capabilities. +- Generated sandbox-local `policy_advisor.md` or equivalent instruction bundle. +- Supervisor read APIs for policy state, denials, local activity logs, and capabilities. - Initial proposal submission API. - Structured deny messages that point agents to the local policy workflow. - Feedback path so agents can read operator rejection guidance and iterate on a proposal. From 5616786da4f6e1c0bd03eb458eec20f353c49aa9 Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Fri, 1 May 2026 10:40:43 -0700 Subject: [PATCH 04/23] feat(sandbox): add agent-driven policy proposal loop --- crates/openshell-sandbox/src/l7/relay.rs | 5 + crates/openshell-sandbox/src/l7/rest.rs | 211 +++++- crates/openshell-sandbox/src/lib.rs | 26 + crates/openshell-sandbox/src/policy_local.rs | 659 ++++++++++++++++++ crates/openshell-sandbox/src/proxy.rs | 48 +- crates/openshell-sandbox/src/skills.rs | 63 ++ .../src/skills/policy_advisor.md | 129 ++++ .../agent-driven-policy-management/README.md | 83 +++ .../agent-driven-policy-management/demo.sh | 385 ++++++++++ .../policy.template.yaml | 28 + .../sandbox-runner.sh | 142 ++++ 11 files changed, 1769 insertions(+), 10 deletions(-) create mode 100644 crates/openshell-sandbox/src/policy_local.rs create mode 100644 crates/openshell-sandbox/src/skills.rs create mode 100644 crates/openshell-sandbox/src/skills/policy_advisor.md create mode 100644 examples/agent-driven-policy-management/README.md create mode 100755 examples/agent-driven-policy-management/demo.sh create mode 100644 examples/agent-driven-policy-management/policy.template.yaml create mode 100755 examples/agent-driven-policy-management/sandbox-runner.sh diff --git a/crates/openshell-sandbox/src/l7/relay.rs b/crates/openshell-sandbox/src/l7/relay.rs index d0599ea99..ff765e354 100644 --- a/crates/openshell-sandbox/src/l7/relay.rs +++ b/crates/openshell-sandbox/src/l7/relay.rs @@ -584,6 +584,11 @@ where &reason, client, Some(&redacted_target), + Some(crate::l7::rest::DenyResponseContext { + host: Some(&ctx.host), + port: Some(ctx.port), + binary: Some(&ctx.binary_path), + }), ) .await?; return Ok(()); diff --git a/crates/openshell-sandbox/src/l7/rest.rs b/crates/openshell-sandbox/src/l7/rest.rs index 19acdbf32..464425435 100644 --- a/crates/openshell-sandbox/src/l7/rest.rs +++ b/crates/openshell-sandbox/src/l7/rest.rs @@ -72,10 +72,19 @@ impl L7Provider for RestProvider { reason: &str, client: &mut C, ) -> Result<()> { - send_deny_response(req, policy_name, reason, client, None).await + send_deny_response(req, policy_name, reason, client, None, None).await } } +/// Extra sandbox-side context included in agent-readable deny responses when +/// the relay has it available. +#[derive(Debug, Clone, Copy, Default)] +pub(crate) struct DenyResponseContext<'a> { + pub(crate) host: Option<&'a str>, + pub(crate) port: Option, + pub(crate) binary: Option<&'a str>, +} + impl RestProvider { /// Deny with a redacted target for the response body. pub(crate) async fn deny_with_redacted_target( @@ -85,8 +94,9 @@ impl RestProvider { reason: &str, client: &mut C, redacted_target: Option<&str>, + context: Option>, ) -> Result<()> { - send_deny_response(req, policy_name, reason, client, redacted_target).await + send_deny_response(req, policy_name, reason, client, redacted_target, context).await } } @@ -452,14 +462,9 @@ async fn send_deny_response( reason: &str, client: &mut C, redacted_target: Option<&str>, + context: Option>, ) -> Result<()> { - let target = redacted_target.unwrap_or(&req.target); - let body = serde_json::json!({ - "error": "policy_denied", - "policy": policy_name, - "rule": format!("{} {}", req.action, target), - "detail": reason - }); + let body = deny_response_body(req, policy_name, reason, redacted_target, context); let body_bytes = body.to_string(); let response = format!( "HTTP/1.1 403 Forbidden\r\n\ @@ -481,6 +486,91 @@ async fn send_deny_response( Ok(()) } +fn deny_response_body( + req: &L7Request, + policy_name: &str, + reason: &str, + redacted_target: Option<&str>, + context: Option>, +) -> serde_json::Value { + let target = redacted_target.unwrap_or(&req.target); + let context = context.unwrap_or_default(); + let host = non_empty(context.host); + let binary = non_empty(context.binary); + + let mut rule_missing = serde_json::Map::new(); + rule_missing.insert("type".to_string(), serde_json::json!("rest_allow")); + rule_missing.insert("layer".to_string(), serde_json::json!("l7")); + rule_missing.insert("method".to_string(), serde_json::json!(req.action)); + rule_missing.insert("path".to_string(), serde_json::json!(target)); + if let Some(host) = host { + rule_missing.insert("host".to_string(), serde_json::json!(host)); + } + if let Some(port) = context.port { + rule_missing.insert("port".to_string(), serde_json::json!(port)); + } + if let Some(binary) = binary { + rule_missing.insert("binary".to_string(), serde_json::json!(binary)); + } + + let mut body = serde_json::Map::new(); + body.insert("error".to_string(), serde_json::json!("policy_denied")); + body.insert("policy".to_string(), serde_json::json!(policy_name)); + body.insert( + "rule".to_string(), + serde_json::json!(format!("{} {}", req.action, target)), + ); + body.insert("detail".to_string(), serde_json::json!(reason)); + body.insert("layer".to_string(), serde_json::json!("l7")); + body.insert("protocol".to_string(), serde_json::json!("rest")); + body.insert("method".to_string(), serde_json::json!(req.action)); + body.insert("path".to_string(), serde_json::json!(target)); + if let Some(host) = host { + body.insert("host".to_string(), serde_json::json!(host)); + } + if let Some(port) = context.port { + body.insert("port".to_string(), serde_json::json!(port)); + } + if let Some(binary) = binary { + body.insert("binary".to_string(), serde_json::json!(binary)); + } + body.insert( + "rule_missing".to_string(), + serde_json::Value::Object(rule_missing), + ); + body.insert( + "next_steps".to_string(), + serde_json::json!([ + { + "action": "read_skill", + "path": "/etc/openshell/skills/policy_advisor.md" + }, + { + "action": "inspect_policy", + "method": "GET", + "url": "http://policy.local/v1/policy/current" + }, + { + "action": "inspect_recent_denials", + "method": "GET", + "url": "http://policy.local/v1/denials?last=5" + }, + { + "action": "submit_proposal", + "method": "POST", + "url": "http://policy.local/v1/proposals", + "body_type": "PolicyMergeOperation" + } + ]), + ); + + serde_json::Value::Object(body) +} + +fn non_empty(value: Option<&str>) -> Option<&str> { + value.map(str::trim).filter(|value| !value.is_empty()) +} + /// Parse Content-Length or Transfer-Encoding from HTTP headers. /// /// Per RFC 7230 Section 3.3.3, rejects requests containing both @@ -977,6 +1067,109 @@ mod tests { const TEST_POLICY: &str = include_str!("../../data/sandbox-policy.rego"); + #[test] + fn deny_response_body_is_agent_readable_and_redacted() { + let req = L7Request { + action: "PUT".to_string(), + target: "/repos/NVIDIA/OpenShell/contents/README.md?access_token=secret-token" + .to_string(), + query_params: HashMap::new(), + raw_header: Vec::new(), + body_length: BodyLength::ContentLength(128), + }; + + let body = deny_response_body( + &req, + "github-readonly", + "no matching L7 allow rule", + Some("/repos/NVIDIA/OpenShell/contents/README.md"), + Some(DenyResponseContext { + host: Some("api.github.com"), + port: Some(443), + binary: Some("/usr/bin/gh"), + }), + ); + + assert_eq!(body["error"], "policy_denied"); + assert_eq!(body["policy"], "github-readonly"); + assert_eq!(body["layer"], "l7"); + assert_eq!(body["protocol"], "rest"); + assert_eq!(body["method"], "PUT"); + assert_eq!(body["host"], "api.github.com"); + assert_eq!(body["port"], 443); + assert_eq!(body["binary"], "/usr/bin/gh"); + assert_eq!(body["path"], "/repos/NVIDIA/OpenShell/contents/README.md"); + assert_eq!( + body["rule"], + "PUT /repos/NVIDIA/OpenShell/contents/README.md" + ); + assert_eq!(body["rule_missing"]["type"], "rest_allow"); + assert_eq!(body["rule_missing"]["layer"], "l7"); + assert_eq!(body["rule_missing"]["method"], "PUT"); + assert_eq!( + body["rule_missing"]["path"], + "/repos/NVIDIA/OpenShell/contents/README.md" + ); + assert_eq!(body["rule_missing"]["host"], "api.github.com"); + assert_eq!(body["rule_missing"]["port"], 443); + assert_eq!(body["rule_missing"]["binary"], "/usr/bin/gh"); + assert_eq!(body["next_steps"][0]["action"], "read_skill"); + assert_eq!( + body["next_steps"][0]["path"], + "/etc/openshell/skills/policy_advisor.md" + ); + assert_eq!(body["next_steps"][3]["body_type"], "PolicyMergeOperation"); + assert!( + !body.to_string().contains("secret-token"), + "deny body must not leak query params or credential values" + ); + } + + #[tokio::test] + async fn send_deny_response_writes_structured_json_403() { + let (mut client, mut server) = tokio::io::duplex(4096); + let send = tokio::spawn(async move { + let req = L7Request { + action: "POST".to_string(), + target: "/user/repos".to_string(), + query_params: HashMap::new(), + raw_header: Vec::new(), + body_length: BodyLength::ContentLength(64), + }; + send_deny_response( + &req, + "github-readonly", + "no matching L7 allow rule", + &mut server, + None, + Some(DenyResponseContext { + host: Some("api.github.com"), + port: Some(443), + binary: Some("/usr/bin/gh"), + }), + ) + .await + .unwrap(); + }); + + let mut received = Vec::new(); + client.read_to_end(&mut received).await.unwrap(); + send.await.unwrap(); + + let response = String::from_utf8(received).unwrap(); + assert!(response.starts_with("HTTP/1.1 403 Forbidden")); + assert!(response.contains("Content-Type: application/json")); + assert!(response.contains("X-OpenShell-Policy: github-readonly")); + + let (_, body) = response.split_once("\r\n\r\n").unwrap(); + let body: serde_json::Value = serde_json::from_str(body).unwrap(); + assert_eq!(body["error"], "policy_denied"); + assert_eq!(body["method"], "POST"); + assert_eq!(body["path"], "/user/repos"); + assert_eq!(body["rule_missing"]["host"], "api.github.com"); + assert_eq!(body["next_steps"][2]["action"], "inspect_recent_denials"); + } + #[test] fn parse_content_length() { let headers = "POST /api HTTP/1.1\r\nHost: example.com\r\nContent-Length: 42\r\n\r\n"; diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index abbf7eb65..c2f1a4262 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -15,12 +15,14 @@ pub mod log_push; pub mod mechanistic_mapper; pub mod opa; mod policy; +mod policy_local; mod process; pub mod procfs; mod provider_credentials; pub mod proxy; mod sandbox; mod secrets; +mod skills; mod ssh; mod supervisor_session; @@ -260,6 +262,11 @@ pub async fn run_sandbox( policy_data, ) .await?; + let policy_local_ctx = Arc::new(policy_local::PolicyLocalContext::new( + retained_proto.clone(), + openshell_endpoint.clone(), + sandbox_name_for_agg.clone().or_else(|| sandbox_id.clone()), + )); // Validate that the required "sandbox" user exists in this image. // All sandbox images must include this user for privilege dropping. @@ -318,6 +325,18 @@ pub async fn run_sandbox( // Prepare filesystem: create and chown read_write directories prepare_filesystem(&policy)?; + match skills::install_static_skills() { + Ok(installed) => { + info!( + path = %installed.policy_advisor.display(), + "Installed sandbox agent skill" + ); + } + Err(error) => { + warn!(error = %error, "Failed to install sandbox agent skill"); + } + } + // Generate ephemeral CA and TLS state for HTTPS L7 inspection. // The CA cert is written to disk so sandbox processes can trust it. let (tls_state, ca_file_paths) = if matches!(policy.network.mode, NetworkMode::Proxy) { @@ -485,6 +504,7 @@ pub async fn run_sandbox( tls_state, inference_ctx, Some(provider_credentials.clone()), + Some(policy_local_ctx.clone()), denial_tx, ) .await?; @@ -801,6 +821,7 @@ pub async fn run_sandbox( let poll_ocsf_enabled = ocsf_enabled.clone(); let poll_pid = entrypoint_pid.clone(); let poll_provider_credentials = provider_credentials.clone(); + let poll_policy_local = policy_local_ctx.clone(); let poll_interval_secs: u64 = std::env::var("OPENSHELL_POLICY_POLL_INTERVAL_SECS") .ok() .and_then(|v| v.parse().ok()) @@ -815,6 +836,7 @@ pub async fn run_sandbox( poll_interval_secs, &poll_ocsf_enabled, poll_provider_credentials, + Some(poll_policy_local), ) .await { @@ -2159,6 +2181,7 @@ async fn run_policy_poll_loop( interval_secs: u64, ocsf_enabled: &std::sync::atomic::AtomicBool, provider_credentials: provider_credentials::ProviderCredentialState, + policy_local_ctx: Option>, ) -> Result<()> { use crate::grpc_client::CachedOpenShellClient; use openshell_core::proto::PolicySource; @@ -2276,6 +2299,9 @@ async fn run_policy_poll_loop( let pid = entrypoint_pid.load(Ordering::Acquire); match opa_engine.reload_from_proto_with_pid(policy, pid) { Ok(()) => { + if let Some(ctx) = policy_local_ctx.as_ref() { + ctx.set_current_policy(policy.clone()).await; + } if result.global_policy_version > 0 { ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx()) .severity(SeverityId::Informational) diff --git a/crates/openshell-sandbox/src/policy_local.rs b/crates/openshell-sandbox/src/policy_local.rs new file mode 100644 index 000000000..f44843704 --- /dev/null +++ b/crates/openshell-sandbox/src/policy_local.rs @@ -0,0 +1,659 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Sandbox-local policy advisor HTTP API. + +use miette::{IntoDiagnostic, Result}; +use openshell_core::proto::{ + L7Allow, L7DenyRule, L7Rule, NetworkBinary, NetworkEndpoint, NetworkPolicyRule, PolicyChunk, + SandboxPolicy as ProtoSandboxPolicy, SubmitPolicyAnalysisRequest, +}; +use serde::Deserialize; +use std::collections::HashMap; +use std::sync::Arc; +use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; +use tokio::sync::RwLock; + +pub const POLICY_LOCAL_HOST: &str = "policy.local"; + +const MAX_POLICY_LOCAL_BODY_BYTES: usize = 64 * 1024; + +#[derive(Debug)] +pub struct PolicyLocalContext { + current_policy: Arc>>, + gateway_endpoint: Option, + sandbox_name: Option, +} + +impl PolicyLocalContext { + pub fn new( + current_policy: Option, + gateway_endpoint: Option, + sandbox_name: Option, + ) -> Self { + Self { + current_policy: Arc::new(RwLock::new(current_policy)), + gateway_endpoint, + sandbox_name, + } + } + + pub async fn set_current_policy(&self, policy: ProtoSandboxPolicy) { + *self.current_policy.write().await = Some(policy); + } +} + +pub async fn handle_forward_request( + ctx: &PolicyLocalContext, + method: &str, + path: &str, + initial_request: &[u8], + client: &mut S, +) -> Result<()> +where + S: AsyncRead + AsyncWrite + Unpin, +{ + let body = read_request_body(initial_request, client).await?; + let (status, payload) = route_request(ctx, method, path, &body).await; + write_json_response(client, status, payload).await +} + +async fn route_request( + ctx: &PolicyLocalContext, + method: &str, + path: &str, + body: &[u8], +) -> (u16, serde_json::Value) { + let route = path.split_once('?').map_or(path, |(route, _)| route); + match (method, route) { + ("GET", "/v1/policy/current") => current_policy_response(ctx).await, + ("GET", "/v1/denials") => ( + 200, + serde_json::json!({ + "denials": [], + "note": "recent-denial listing is not wired in this MVP slice; use the structured 403 body and /var/log/openshell*.log for now" + }), + ), + ("POST", "/v1/proposals") => submit_proposal(ctx, body).await, + _ => ( + 404, + serde_json::json!({ + "error": "not_found", + "detail": format!("policy.local route not found: {method} {route}") + }), + ), + } +} + +async fn current_policy_response(ctx: &PolicyLocalContext) -> (u16, serde_json::Value) { + let Some(policy) = ctx.current_policy.read().await.clone() else { + return ( + 404, + serde_json::json!({ + "error": "policy_unavailable", + "detail": "no current sandbox policy is loaded" + }), + ); + }; + + match openshell_policy::serialize_sandbox_policy(&policy) { + Ok(policy_yaml) => ( + 200, + serde_json::json!({ + "format": "yaml", + "policy_yaml": policy_yaml + }), + ), + Err(error) => ( + 500, + serde_json::json!({ + "error": "policy_serialize_failed", + "detail": error.to_string() + }), + ), + } +} + +async fn submit_proposal(ctx: &PolicyLocalContext, body: &[u8]) -> (u16, serde_json::Value) { + let Some(endpoint) = ctx.gateway_endpoint.as_deref() else { + return ( + 503, + serde_json::json!({ + "error": "gateway_unavailable", + "detail": "policy proposal submission requires a gateway-connected sandbox" + }), + ); + }; + let Some(sandbox_name) = ctx + .sandbox_name + .as_deref() + .map(str::trim) + .filter(|name| !name.is_empty()) + else { + return ( + 503, + serde_json::json!({ + "error": "sandbox_name_unavailable", + "detail": "policy proposal submission requires a sandbox name" + }), + ); + }; + + let chunks = match proposal_chunks_from_body(body) { + Ok(chunks) => chunks, + Err(error) => return (400, error_payload("invalid_proposal", error)), + }; + + let client = match crate::grpc_client::CachedOpenShellClient::connect(endpoint).await { + Ok(client) => client, + Err(error) => { + return ( + 502, + serde_json::json!({ + "error": "gateway_connect_failed", + "detail": error.to_string() + }), + ); + } + }; + + let mut raw_client = client.raw_client(); + let response = match raw_client + .submit_policy_analysis(SubmitPolicyAnalysisRequest { + summaries: vec![], + proposed_chunks: chunks, + analysis_mode: "agent".to_string(), + name: sandbox_name.to_string(), + }) + .await + { + Ok(response) => response.into_inner(), + Err(error) => { + return ( + 502, + serde_json::json!({ + "error": "proposal_submit_failed", + "detail": error.to_string() + }), + ); + } + }; + + ( + 202, + serde_json::json!({ + "status": "submitted", + "accepted_chunks": response.accepted_chunks, + "rejected_chunks": response.rejected_chunks, + "rejection_reasons": response.rejection_reasons, + "note": "the gateway assigns proposal ids; review pending proposals in the developer inbox" + }), + ) +} + +fn proposal_chunks_from_body(body: &[u8]) -> std::result::Result, String> { + let request: ProposalRequest = serde_json::from_slice(body).map_err(|e| e.to_string())?; + if request.operations.is_empty() { + return Err("proposal requires at least one operation".to_string()); + } + + let mut chunks = Vec::new(); + for operation in request.operations { + let Some(add_rule) = operation + .get("addRule") + .or_else(|| operation.get("add_rule")) + .cloned() + else { + return Err( + "this MVP accepts addRule operations; submit a full narrow NetworkPolicyRule" + .to_string(), + ); + }; + let add_rule: AddNetworkRuleJson = + serde_json::from_value(add_rule).map_err(|e| e.to_string())?; + chunks.push(policy_chunk_from_add_rule( + add_rule, + request.intent_summary.as_deref().unwrap_or_default(), + )?); + } + + Ok(chunks) +} + +fn policy_chunk_from_add_rule( + add_rule: AddNetworkRuleJson, + intent_summary: &str, +) -> std::result::Result { + let mut rule = network_rule_from_json(add_rule.rule)?; + let rule_name = add_rule + .rule_name + .as_deref() + .map(str::trim) + .filter(|name| !name.is_empty()) + .map_or_else(|| rule.name.clone(), ToString::to_string); + if rule_name.trim().is_empty() { + return Err("addRule.ruleName or rule.name is required".to_string()); + } + if rule.name.trim().is_empty() { + rule.name.clone_from(&rule_name); + } + + let binary = rule + .binaries + .first() + .map(|binary| binary.path.clone()) + .unwrap_or_default(); + + Ok(PolicyChunk { + id: String::new(), + status: "pending".to_string(), + rule_name, + proposed_rule: Some(rule), + rationale: intent_summary.to_string(), + security_notes: String::new(), + confidence: 0.75, + denial_summary_ids: vec![], + created_at_ms: 0, + decided_at_ms: 0, + stage: "agent".to_string(), + supersedes_chunk_id: String::new(), + hit_count: 1, + first_seen_ms: 0, + last_seen_ms: 0, + binary, + }) +} + +fn network_rule_from_json( + rule: NetworkPolicyRuleJson, +) -> std::result::Result { + if rule.endpoints.is_empty() { + return Err("rule.endpoints must contain at least one endpoint".to_string()); + } + + let endpoints = rule + .endpoints + .into_iter() + .map(network_endpoint_from_json) + .collect::, _>>()?; + let binaries = rule + .binaries + .into_iter() + .map(|binary| NetworkBinary { + path: binary.path, + ..Default::default() + }) + .collect(); + + Ok(NetworkPolicyRule { + name: rule.name.unwrap_or_default(), + endpoints, + binaries, + }) +} + +fn network_endpoint_from_json( + endpoint: NetworkEndpointJson, +) -> std::result::Result { + if endpoint.host.trim().is_empty() { + return Err("endpoint.host is required".to_string()); + } + + let mut ports = endpoint.ports; + if ports.is_empty() && endpoint.port > 0 { + ports.push(endpoint.port); + } + if ports.is_empty() { + return Err("endpoint.port or endpoint.ports is required".to_string()); + } + if endpoint + .rules + .iter() + .any(|rule| rule.allow.path.contains('?')) + { + return Err("L7 allow paths must not include query strings".to_string()); + } + + let port = ports.first().copied().unwrap_or_default(); + let rules = endpoint + .rules + .into_iter() + .map(|rule| L7Rule { + allow: Some(L7Allow { + method: rule.allow.method, + path: rule.allow.path, + command: rule.allow.command, + query: HashMap::new(), + }), + }) + .collect(); + let deny_rules = endpoint + .deny_rules + .into_iter() + .map(|rule| L7DenyRule { + method: rule.method, + path: rule.path, + command: rule.command, + query: HashMap::new(), + }) + .collect(); + + Ok(NetworkEndpoint { + host: endpoint.host, + port, + protocol: endpoint.protocol, + tls: endpoint.tls, + enforcement: endpoint.enforcement, + access: endpoint.access, + rules, + allowed_ips: endpoint.allowed_ips, + ports, + deny_rules, + allow_encoded_slash: endpoint.allow_encoded_slash, + }) +} + +async fn read_request_body(initial_request: &[u8], client: &mut S) -> Result> +where + S: AsyncRead + Unpin, +{ + let Some(header_end) = find_header_end(initial_request) else { + return Ok(Vec::new()); + }; + let content_length = parse_content_length(&initial_request[..header_end])?; + if content_length > MAX_POLICY_LOCAL_BODY_BYTES { + return Err(miette::miette!( + "policy.local request body exceeds {MAX_POLICY_LOCAL_BODY_BYTES} bytes" + )); + } + + let mut body = initial_request[header_end..].to_vec(); + if body.len() > content_length { + body.truncate(content_length); + } + while body.len() < content_length { + let remaining = content_length - body.len(); + let mut chunk = vec![0u8; remaining.min(8192)]; + let n = client.read(&mut chunk).await.into_diagnostic()?; + if n == 0 { + return Err(miette::miette!("policy.local request body ended early")); + } + body.extend_from_slice(&chunk[..n]); + } + + Ok(body) +} + +fn parse_content_length(headers: &[u8]) -> Result { + let headers = String::from_utf8_lossy(headers); + for line in headers.lines().skip(1) { + if let Some((name, value)) = line.split_once(':') + && name.eq_ignore_ascii_case("content-length") + { + return value + .trim() + .parse::() + .into_diagnostic() + .map_err(|_| miette::miette!("invalid policy.local Content-Length")); + } + } + Ok(0) +} + +fn find_header_end(buf: &[u8]) -> Option { + buf.windows(4) + .position(|window| window == b"\r\n\r\n") + .map(|idx| idx + 4) +} + +async fn write_json_response( + client: &mut S, + status: u16, + payload: serde_json::Value, +) -> Result<()> +where + S: AsyncWrite + Unpin, +{ + let body = payload.to_string(); + let response = format!( + "HTTP/1.1 {status} {}\r\n\ + Content-Type: application/json\r\n\ + Content-Length: {}\r\n\ + Connection: close\r\n\ + \r\n\ + {}", + status_text(status), + body.len(), + body + ); + client + .write_all(response.as_bytes()) + .await + .into_diagnostic()?; + client.flush().await.into_diagnostic()?; + Ok(()) +} + +fn status_text(status: u16) -> &'static str { + match status { + 202 => "Accepted", + 400 => "Bad Request", + 404 => "Not Found", + 500 => "Internal Server Error", + 502 => "Bad Gateway", + 503 => "Service Unavailable", + _ => "OK", + } +} + +fn error_payload(error: &str, detail: String) -> serde_json::Value { + serde_json::json!({ + "error": error, + "detail": detail + }) +} + +#[derive(Debug, Deserialize)] +struct ProposalRequest { + #[serde(default)] + intent_summary: Option, + #[serde(default)] + operations: Vec, +} + +#[derive(Debug, Deserialize)] +struct AddNetworkRuleJson { + #[serde(default, rename = "ruleName", alias = "rule_name")] + rule_name: Option, + rule: NetworkPolicyRuleJson, +} + +#[derive(Debug, Deserialize)] +struct NetworkPolicyRuleJson { + #[serde(default)] + name: Option, + #[serde(default)] + endpoints: Vec, + #[serde(default)] + binaries: Vec, +} + +#[derive(Debug, Deserialize)] +struct NetworkEndpointJson { + host: String, + #[serde(default)] + port: u32, + #[serde(default)] + ports: Vec, + #[serde(default)] + protocol: String, + #[serde(default)] + tls: String, + #[serde(default)] + enforcement: String, + #[serde(default)] + access: String, + #[serde(default)] + rules: Vec, + #[serde(default)] + allowed_ips: Vec, + #[serde(default)] + deny_rules: Vec, + #[serde(default)] + allow_encoded_slash: bool, +} + +#[derive(Debug, Deserialize)] +struct NetworkBinaryJson { + path: String, +} + +#[derive(Debug, Deserialize)] +struct L7RuleJson { + allow: L7AllowJson, +} + +#[derive(Debug, Deserialize)] +struct L7AllowJson { + #[serde(default)] + method: String, + #[serde(default)] + path: String, + #[serde(default)] + command: String, +} + +#[derive(Debug, Deserialize)] +struct L7DenyRuleJson { + #[serde(default)] + method: String, + #[serde(default)] + path: String, + #[serde(default)] + command: String, +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn proposal_chunks_from_body_accepts_add_rule_operation() { + let body = br#"{ + "intent_summary": "Allow gh to create one repo.", + "operations": [ + { + "addRule": { + "ruleName": "github_api_repo_create", + "rule": { + "endpoints": [ + { + "host": "api.github.com", + "port": 443, + "protocol": "rest", + "tls": "terminate", + "enforcement": "enforce", + "rules": [ + { + "allow": { + "method": "POST", + "path": "/user/repos" + } + } + ] + } + ], + "binaries": [ + { + "path": "/usr/bin/gh" + } + ] + } + } + } + ] + }"#; + + let chunks = proposal_chunks_from_body(body).unwrap(); + + assert_eq!(chunks.len(), 1); + assert_eq!(chunks[0].rule_name, "github_api_repo_create"); + assert_eq!(chunks[0].rationale, "Allow gh to create one repo."); + assert_eq!(chunks[0].binary, "/usr/bin/gh"); + let rule = chunks[0].proposed_rule.as_ref().unwrap(); + assert_eq!(rule.name, "github_api_repo_create"); + assert_eq!(rule.endpoints[0].host, "api.github.com"); + assert_eq!(rule.endpoints[0].port, 443); + assert_eq!(rule.endpoints[0].ports, vec![443]); + assert_eq!(rule.endpoints[0].protocol, "rest"); + assert_eq!( + rule.endpoints[0].rules[0].allow.as_ref().unwrap().path, + "/user/repos" + ); + } + + #[test] + fn proposal_chunks_from_body_rejects_query_in_l7_path() { + let body = br#"{ + "operations": [ + { + "addRule": { + "ruleName": "bad", + "rule": { + "endpoints": [ + { + "host": "api.github.com", + "port": 443, + "rules": [ + { + "allow": { + "method": "GET", + "path": "/repos?token=secret" + } + } + ] + } + ] + } + } + } + ] + }"#; + + let error = proposal_chunks_from_body(body).unwrap_err(); + assert!(error.contains("query strings")); + assert!(!error.contains("secret")); + } + + #[tokio::test] + async fn current_policy_route_returns_yaml_envelope() { + let ctx = PolicyLocalContext::new( + Some(ProtoSandboxPolicy { + version: 1, + ..Default::default() + }), + None, + None, + ); + + let (mut client, mut server) = tokio::io::duplex(4096); + let request = + b"GET http://policy.local/v1/policy/current HTTP/1.1\r\nHost: policy.local\r\n\r\n"; + let task = tokio::spawn(async move { + handle_forward_request(&ctx, "GET", "/v1/policy/current", request, &mut server) + .await + .unwrap(); + }); + + let mut received = Vec::new(); + client.read_to_end(&mut received).await.unwrap(); + task.await.unwrap(); + + let response = String::from_utf8(received).unwrap(); + assert!(response.starts_with("HTTP/1.1 200 OK")); + let (_, body) = response.split_once("\r\n\r\n").unwrap(); + let body: serde_json::Value = serde_json::from_str(body).unwrap(); + assert_eq!(body["format"], "yaml"); + assert!(body["policy_yaml"].as_str().unwrap().contains("version: 1")); + } +} diff --git a/crates/openshell-sandbox/src/proxy.rs b/crates/openshell-sandbox/src/proxy.rs index 179576d82..f20e51655 100644 --- a/crates/openshell-sandbox/src/proxy.rs +++ b/crates/openshell-sandbox/src/proxy.rs @@ -8,6 +8,7 @@ use crate::identity::BinaryIdentityCache; use crate::l7::tls::ProxyTlsState; use crate::opa::{NetworkAction, OpaEngine, PolicyGenerationGuard}; use crate::policy::ProxyPolicy; +use crate::policy_local::{POLICY_LOCAL_HOST, PolicyLocalContext}; use crate::provider_credentials::ProviderCredentialState; use crate::secrets::{SecretResolver, rewrite_header_line}; use miette::{IntoDiagnostic, Result}; @@ -157,6 +158,7 @@ impl ProxyHandle { tls_state: Option>, inference_ctx: Option>, provider_credentials: Option, + policy_local_ctx: Option>, denial_tx: Option>, ) -> Result { // Use override bind_addr, fall back to policy http_addr, then default @@ -195,13 +197,22 @@ impl ProxyHandle { let spid = entrypoint_pid.clone(); let tls = tls_state.clone(); let inf = inference_ctx.clone(); + let policy_local = policy_local_ctx.clone(); let resolver = provider_credentials .as_ref() .and_then(ProviderCredentialState::resolver); let dtx = denial_tx.clone(); tokio::spawn(async move { if let Err(err) = handle_tcp_connection( - stream, opa, cache, spid, tls, inf, resolver, dtx, + stream, + opa, + cache, + spid, + tls, + inf, + policy_local, + resolver, + dtx, ) .await { @@ -316,6 +327,7 @@ async fn handle_tcp_connection( entrypoint_pid: Arc, tls_state: Option>, inference_ctx: Option>, + policy_local_ctx: Option>, secret_resolver: Option>, denial_tx: Option>, ) -> Result<()> { @@ -360,6 +372,7 @@ async fn handle_tcp_connection( opa_engine, identity_cache, entrypoint_pid, + policy_local_ctx, secret_resolver, denial_tx.as_ref(), ) @@ -2411,6 +2424,7 @@ async fn handle_forward_proxy( opa_engine: Arc, identity_cache: Arc, entrypoint_pid: Arc, + policy_local_ctx: Option>, secret_resolver: Option>, denial_tx: Option<&mpsc::UnboundedSender>, ) -> Result<()> { @@ -2434,6 +2448,38 @@ async fn handle_forward_proxy( }; let host_lc = host.to_ascii_lowercase(); + if host_lc == POLICY_LOCAL_HOST { + if scheme != "http" || port != 80 { + respond( + client, + &build_json_error_response( + 400, + "Bad Request", + "invalid_policy_local_scheme", + "Use http://policy.local only", + ), + ) + .await?; + return Ok(()); + } + if let Some(ctx) = policy_local_ctx { + return crate::policy_local::handle_forward_request( + &ctx, + method, + &path, + &buf[..used], + client, + ) + .await; + } + respond( + client, + b"HTTP/1.1 503 Service Unavailable\r\nContent-Length: 31\r\n\r\npolicy.local is not configured", + ) + .await?; + return Ok(()); + } + // 2. Reject HTTPS — must use CONNECT for TLS if scheme == "https" { { diff --git a/crates/openshell-sandbox/src/skills.rs b/crates/openshell-sandbox/src/skills.rs new file mode 100644 index 000000000..f7a26ea71 --- /dev/null +++ b/crates/openshell-sandbox/src/skills.rs @@ -0,0 +1,63 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Static agent guidance files exposed inside the sandbox. + +use miette::{IntoDiagnostic, Result}; +use std::path::{Path, PathBuf}; + +const SKILLS_RELATIVE_DIR: &str = "etc/openshell/skills"; +const POLICY_ADVISOR_FILE: &str = "policy_advisor.md"; +const POLICY_ADVISOR_CONTENT: &str = include_str!("skills/policy_advisor.md"); + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct InstalledSkills { + pub policy_advisor: PathBuf, +} + +pub fn install_static_skills() -> Result { + install_static_skills_at(Path::new("/")) +} + +fn install_static_skills_at(root: &Path) -> Result { + let skills_dir = root.join(SKILLS_RELATIVE_DIR); + std::fs::create_dir_all(&skills_dir).into_diagnostic()?; + + let policy_advisor = skills_dir.join(POLICY_ADVISOR_FILE); + std::fs::write(&policy_advisor, POLICY_ADVISOR_CONTENT).into_diagnostic()?; + + #[cfg(unix)] + { + use std::os::unix::fs::PermissionsExt as _; + + std::fs::set_permissions(&policy_advisor, std::fs::Permissions::from_mode(0o444)) + .into_diagnostic()?; + } + + Ok(InstalledSkills { policy_advisor }) +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn install_static_skills_at_writes_policy_advisor() { + let dir = tempfile::tempdir().unwrap(); + + let installed = install_static_skills_at(dir.path()).unwrap(); + + let expected = dir + .path() + .join("etc") + .join("openshell") + .join("skills") + .join("policy_advisor.md"); + assert_eq!(installed.policy_advisor, expected); + + let content = std::fs::read_to_string(expected).unwrap(); + assert!(content.contains("# OpenShell Policy Advisor")); + assert!(content.contains("policy.local")); + assert!(content.contains("PolicyMergeOperation")); + } +} diff --git a/crates/openshell-sandbox/src/skills/policy_advisor.md b/crates/openshell-sandbox/src/skills/policy_advisor.md new file mode 100644 index 000000000..741759e82 --- /dev/null +++ b/crates/openshell-sandbox/src/skills/policy_advisor.md @@ -0,0 +1,129 @@ +# OpenShell Policy Advisor + +Use this when OpenShell blocks a network request and the response or logs say +`policy_denied`. + +## Goal + +Draft the smallest policy proposal that allows the user's current task without +giving the sandbox broad new network access. The developer approves or rejects +the proposal; do not try to bypass policy. + +## Local API + +Use the sandbox-local policy API: + +- `GET http://policy.local/v1/policy/current` +- `GET http://policy.local/v1/denials?last=10` +- `POST http://policy.local/v1/proposals` + +The MVP proposal endpoint accepts a JSON object containing an `intent_summary` +and one or more `PolicyMergeOperation` objects. Start with a full `addRule` +operation because the existing developer inbox reviews complete draft rules. + +You can also inspect your own logs: + +- `/var/log/openshell.YYYY-MM-DD.log` is the default shorthand log. +- `/var/log/openshell-ocsf.YYYY-MM-DD.log` is present when OCSF JSONL logging is enabled. + +## Workflow + +1. Read the denial response body. Use `layer`, `method`, `path`, `host`, + `port`, `binary`, `rule_missing`, and `detail` as evidence. +2. Fetch the current policy from `policy.local`. +3. Fetch recent denials if the response body is incomplete. +4. Prefer L7 REST rules for REST APIs. Use L4 only for non-REST protocols or + when the client tunnels opaque traffic that OpenShell cannot inspect. +5. Draft the narrowest rule: exact host, exact port, exact binary when known, + exact method, and the smallest safe path glob. +6. Submit the proposal, tell the developer what you proposed, and wait for + approval before retrying the denied action. + +## Proposal Shapes + +Submit a complete narrow REST-inspected rule: + +```json +{ + "intent_summary": "Allow gh to update repository contents in NVIDIA/OpenShell only.", + "operations": [ + { + "addRule": { + "ruleName": "github_api_repo_contents_write", + "rule": { + "name": "github_api_repo_contents_write", + "endpoints": [ + { + "host": "api.github.com", + "port": 443, + "protocol": "rest", + "enforcement": "enforce", + "rules": [ + { + "allow": { + "method": "PUT", + "path": "/repos/NVIDIA/OpenShell/contents/**" + } + } + ] + } + ], + "binaries": [ + { + "path": "/usr/bin/gh" + } + ] + } + } + } + ] +} +``` + +For GitHub repository creation, keep the path exact: + +```json +{ + "intent_summary": "Allow gh to create a repository for the authenticated user.", + "operations": [ + { + "addRule": { + "ruleName": "github_api_repo_create", + "rule": { + "name": "github_api_repo_create", + "endpoints": [ + { + "host": "api.github.com", + "port": 443, + "protocol": "rest", + "enforcement": "enforce", + "rules": [ + { + "allow": { + "method": "POST", + "path": "/user/repos" + } + } + ] + } + ], + "binaries": [ + { + "path": "/usr/bin/gh" + } + ] + } + } + } + ] +} +``` + +## Norms + +- Do not propose wildcard hosts such as `**` or `*.com`. +- Do not propose `access: full` to fix a single denied REST request. +- Do not include query strings, tokens, credentials, or secret values. +- Explain uncertainty in `intent_summary` instead of widening the rule. +- If pushing with `git` fails, that may be a separate L4 or protocol-specific + path from GitHub REST API access; propose it separately. diff --git a/examples/agent-driven-policy-management/README.md b/examples/agent-driven-policy-management/README.md new file mode 100644 index 000000000..0fca1d217 --- /dev/null +++ b/examples/agent-driven-policy-management/README.md @@ -0,0 +1,83 @@ + + + +# Agent-Driven Policy Management Demo + +Run the first policy-advisor MVP loop from one host-side script: + +1. Use the active OpenShell gateway. +2. Create a GitHub provider from a host token. +3. Start a sandbox with read-only L7 GitHub API access. +4. Attempt a GitHub contents write from inside the sandbox and capture the + structured `policy_denied` response. +5. Submit a narrow policy proposal through `http://policy.local/v1/proposals`. +6. Approve the draft rule from outside the sandbox. +7. Retry the same write and confirm it succeeds. + +The script is deterministic. It does not launch a real coding agent; it uses +the same sandbox-local interfaces that the agent will use. + +## Prerequisites + +- An active OpenShell gateway that includes the current sandbox supervisor + build. +- `curl`, `jq`, and `ssh` on the host machine. +- The GitHub CLI (`gh`) if you want to create the scratch repo with the command + below. +- A disposable or demo-safe GitHub repository. +- A GitHub token with contents write permission for that repository. + +## Create A Scratch Repo + +Use a private scratch repository with an initial README. The README matters +because GitHub does not create the default branch until the first commit exists. + +```bash +gh repo create zredlined/openshell-policy-demo \ + --private \ + --add-readme \ + --description "OpenShell policy advisor demo scratch repo" +``` + +The demo never creates repositories and refuses to overwrite an existing demo +file. Each default run writes a new timestamped file under +`openshell-policy-advisor-demo/`. + +## Quick Start + +```bash +export DEMO_GITHUB_OWNER= +export DEMO_GITHUB_REPO= +export DEMO_GITHUB_TOKEN= + +bash examples/agent-driven-policy-management/demo.sh +``` + +If you use the GitHub CLI, this also works: + +```bash +export DEMO_GITHUB_OWNER= +export DEMO_GITHUB_REPO= +export DEMO_GITHUB_TOKEN="$(gh auth token)" + +bash examples/agent-driven-policy-management/demo.sh +``` + +The demo writes one markdown file under: + +```text +openshell-policy-advisor-demo/.md +``` + +Use a scratch repository or a demo branch if you do not want this file in a +production repository. + +## Options + +```bash +export OPENSHELL_BIN=/path/to/openshell +export DEMO_BRANCH=main +export DEMO_RUN_ID="$(date +%Y%m%d-%H%M%S)" +export DEMO_FILE_DIR=openshell-policy-advisor-demo +export DEMO_KEEP_SANDBOX=0 +``` diff --git a/examples/agent-driven-policy-management/demo.sh b/examples/agent-driven-policy-management/demo.sh new file mode 100755 index 000000000..7c6dc339c --- /dev/null +++ b/examples/agent-driven-policy-management/demo.sh @@ -0,0 +1,385 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +POLICY_TEMPLATE="${SCRIPT_DIR}/policy.template.yaml" +RUNNER_SOURCE="${SCRIPT_DIR}/sandbox-runner.sh" + +if [[ -z "${OPENSHELL_BIN:-}" ]]; then + if [[ -x "${REPO_ROOT}/target/debug/openshell" ]]; then + OPENSHELL_BIN="${REPO_ROOT}/target/debug/openshell" + else + OPENSHELL_BIN="openshell" + fi +fi + +DEMO_BRANCH="${DEMO_BRANCH:-main}" +DEMO_RUN_ID="${DEMO_RUN_ID:-$(date +%Y%m%d-%H%M%S)}" +DEMO_FILE_DIR="${DEMO_FILE_DIR:-openshell-policy-advisor-demo}" +DEMO_FILE_PATH="${DEMO_FILE_PATH:-${DEMO_FILE_DIR}/${DEMO_RUN_ID}.md}" +DEMO_SANDBOX_NAME="${DEMO_SANDBOX_NAME:-policy-agent-${DEMO_RUN_ID}}" +DEMO_GITHUB_PROVIDER_NAME="${DEMO_GITHUB_PROVIDER_NAME:-github-policy-agent-${DEMO_RUN_ID}}" +DEMO_KEEP_SANDBOX="${DEMO_KEEP_SANDBOX:-0}" +DEMO_RETRY_ATTEMPTS="${DEMO_RETRY_ATTEMPTS:-30}" +DEMO_RETRY_SLEEP="${DEMO_RETRY_SLEEP:-2}" + +TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/openshell-agent-policy.XXXXXX")" +POLICY_FILE="${TMP_DIR}/policy.yaml" +SSH_CONFIG="${TMP_DIR}/ssh_config" +SSH_HOST="" + +BOLD='\033[1m' +DIM='\033[2m' +CYAN='\033[36m' +GREEN='\033[32m' +RED='\033[31m' +YELLOW='\033[33m' +RESET='\033[0m' + +step() { + printf "\n${BOLD}${CYAN}==> %s${RESET}\n\n" "$1" +} + +info() { + printf " %b\n" "$*" +} + +fail() { + printf "\n${RED}error:${RESET} %s\n" "$*" >&2 + exit 1 +} + +cleanup() { + local status=$? + + if [[ "$DEMO_KEEP_SANDBOX" != "1" ]]; then + "$OPENSHELL_BIN" sandbox delete "$DEMO_SANDBOX_NAME" >/dev/null 2>&1 || true + else + printf "\n${YELLOW}Keeping sandbox because DEMO_KEEP_SANDBOX=1: %s${RESET}\n" "$DEMO_SANDBOX_NAME" + fi + + "$OPENSHELL_BIN" provider delete "$DEMO_GITHUB_PROVIDER_NAME" >/dev/null 2>&1 || true + + if [[ $status -eq 0 ]]; then + rm -rf "$TMP_DIR" + else + printf "\n${YELLOW}Temporary files kept at: %s${RESET}\n" "$TMP_DIR" + fi +} +trap cleanup EXIT + +require_command() { + command -v "$1" >/dev/null 2>&1 || fail "missing required command: $1" +} + +validate_name() { + local label="$1" + local value="$2" + [[ "$value" =~ ^[A-Za-z0-9_.-]+$ ]] || fail "$label may contain only letters, numbers, '.', '_', and '-'" +} + +validate_path() { + local label="$1" + local value="$2" + [[ "$value" =~ ^[A-Za-z0-9._/-]+$ ]] || fail "$label may contain only letters, numbers, '.', '_', '-', and '/'" + [[ "$value" != /* ]] || fail "$label must be relative" + [[ "$value" != *..* ]] || fail "$label must not contain '..'" +} + +resolve_token() { + if [[ -z "${DEMO_GITHUB_TOKEN:-}" ]]; then + if [[ -n "${GITHUB_TOKEN:-}" ]]; then + DEMO_GITHUB_TOKEN="$GITHUB_TOKEN" + elif [[ -n "${GH_TOKEN:-}" ]]; then + DEMO_GITHUB_TOKEN="$GH_TOKEN" + elif command -v gh >/dev/null 2>&1; then + DEMO_GITHUB_TOKEN="$(gh auth token 2>/dev/null || true)" + fi + fi + + [[ -n "${DEMO_GITHUB_TOKEN:-}" ]] || fail "set DEMO_GITHUB_TOKEN, GITHUB_TOKEN, GH_TOKEN, or sign in with gh" + export GITHUB_TOKEN="$DEMO_GITHUB_TOKEN" +} + +validate_env() { + require_command curl + require_command jq + require_command ssh + require_command "$OPENSHELL_BIN" + + [[ -f "$RUNNER_SOURCE" ]] || fail "missing sandbox runner: $RUNNER_SOURCE" + [[ -n "${DEMO_GITHUB_OWNER:-}" ]] || fail "set DEMO_GITHUB_OWNER" + [[ -n "${DEMO_GITHUB_REPO:-}" ]] || fail "set DEMO_GITHUB_REPO" + [[ "$DEMO_RUN_ID" =~ ^[a-z0-9-]+$ ]] || fail "DEMO_RUN_ID may contain only lowercase letters, numbers, and '-'" + [[ "$DEMO_RETRY_ATTEMPTS" =~ ^[0-9]+$ ]] || fail "DEMO_RETRY_ATTEMPTS must be a number" + [[ "$DEMO_RETRY_SLEEP" =~ ^[0-9]+$ ]] || fail "DEMO_RETRY_SLEEP must be a number" + + validate_name "DEMO_GITHUB_OWNER" "$DEMO_GITHUB_OWNER" + validate_name "DEMO_GITHUB_REPO" "$DEMO_GITHUB_REPO" + validate_path "DEMO_BRANCH" "$DEMO_BRANCH" + validate_path "DEMO_FILE_PATH" "$DEMO_FILE_PATH" + + resolve_token +} + +github_api_status() { + local url="$1" + local body="$2" + curl -sS \ + -o "$body" \ + -w "%{http_code}" \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${DEMO_GITHUB_TOKEN}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "$url" +} + +check_gateway() { + step "Checking active OpenShell gateway" + if ! "$OPENSHELL_BIN" status >/dev/null 2>&1; then + fail "active OpenShell gateway is not reachable; start one separately, for example: mise run cluster" + fi + "$OPENSHELL_BIN" status | sed 's/^/ /' +} + +check_github_access() { + step "Checking GitHub repository access" + local body status branch branches_body branches_status branches + body="${TMP_DIR}/github-repo.json" + status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}" "$body")" + + if [[ "$status" != "200" ]]; then + printf '%s\n' "$(jq -r '.message // empty' "$body" 2>/dev/null)" | sed 's/^/ /' + fail "GitHub returned HTTP $status for ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}; check the repo name and token access" + fi + + if jq -e 'has("permissions") and (.permissions.push == false and .permissions.admin == false and .permissions.maintain == false)' "$body" >/dev/null; then + fail "GitHub token can read ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO} but does not appear to have write access" + fi + + branch="$(jq -rn --arg v "$DEMO_BRANCH" '$v|@uri')" + body="${TMP_DIR}/github-branch.json" + status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/branches/${branch}" "$body")" + if [[ "$status" != "200" ]]; then + branches_body="${TMP_DIR}/github-branches.json" + branches_status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/branches?per_page=20" "$branches_body")" + if [[ "$branches_status" == "200" ]]; then + branches="$(jq -r 'map(.name) | join(", ")' "$branches_body")" + if [[ -z "$branches" ]]; then + fail "GitHub repo exists but has no branches yet; add an initial README or push ${DEMO_BRANCH} before running the demo" + fi + fail "GitHub returned HTTP $status for branch ${DEMO_BRANCH}; set DEMO_BRANCH to one of: ${branches}" + fi + fail "GitHub returned HTTP $status for branch ${DEMO_BRANCH}" + fi + + body="${TMP_DIR}/github-demo-file.json" + status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}?ref=${branch}" "$body")" + if [[ "$status" == "200" ]]; then + fail "demo output file already exists: ${DEMO_FILE_PATH}; choose a new DEMO_RUN_ID or DEMO_FILE_PATH" + fi + [[ "$status" == "404" ]] || fail "GitHub returned HTTP $status while checking demo output path ${DEMO_FILE_PATH}" + + info "${GREEN}GitHub repo, branch, and output path are safe for this run.${RESET}" +} + +create_provider() { + step "Creating temporary GitHub provider" + "$OPENSHELL_BIN" provider delete "$DEMO_GITHUB_PROVIDER_NAME" >/dev/null 2>&1 || true + "$OPENSHELL_BIN" provider create \ + --name "$DEMO_GITHUB_PROVIDER_NAME" \ + --type github \ + --credential GITHUB_TOKEN +} + +create_sandbox() { + step "Creating sandbox with read-only GitHub L7 policy" + cp "$POLICY_TEMPLATE" "$POLICY_FILE" + "$OPENSHELL_BIN" sandbox delete "$DEMO_SANDBOX_NAME" >/dev/null 2>&1 || true + "$OPENSHELL_BIN" sandbox create \ + --name "$DEMO_SANDBOX_NAME" \ + --provider "$DEMO_GITHUB_PROVIDER_NAME" \ + --policy "$POLICY_FILE" \ + --upload "${RUNNER_SOURCE}:/sandbox/policy-demo-runner.sh" \ + --no-git-ignore \ + --keep \ + --no-auto-providers \ + --no-tty \ + -- bash -lc "chmod +x /sandbox/policy-demo-runner.sh && echo sandbox ready" +} + +connect_ssh() { + step "Connecting to sandbox over SSH" + "$OPENSHELL_BIN" sandbox ssh-config "$DEMO_SANDBOX_NAME" > "$SSH_CONFIG" + SSH_HOST="$(awk '/^Host / { print $2; exit }' "$SSH_CONFIG")" + [[ -n "$SSH_HOST" ]] || fail "could not find Host entry in sandbox SSH config" + + local retries=30 + local i + for i in $(seq 1 "$retries"); do + if ssh -F "$SSH_CONFIG" "$SSH_HOST" true >/dev/null 2>&1; then + return + fi + sleep 2 + done + fail "SSH connection to sandbox timed out" +} + +sandbox_exec() { + ssh -F "$SSH_CONFIG" "$SSH_HOST" "$@" +} + +http_status() { + awk -F= '/^HTTP_STATUS=/ { print $2; exit }' +} + +http_body() { + sed '/^HTTP_STATUS=/d' +} + +run_policy_local_checks() { + step "Checking sandbox-local skill and policy.local" + sandbox_exec /sandbox/policy-demo-runner.sh check-skill >/dev/null + info "${GREEN}Skill installed:${RESET} /etc/openshell/skills/policy_advisor.md" + + local output + output="$(sandbox_exec /sandbox/policy-demo-runner.sh current-policy)" + local status + status="$(printf '%s\n' "$output" | http_status)" + [[ "$status" == "200" ]] || fail "policy.local current policy returned HTTP $status" + + info "${GREEN}policy.local returned the current sandbox policy.${RESET}" + info "Initial policy: read-only REST access to api.github.com for /usr/bin/curl" +} + +attempt_write() { + sandbox_exec /sandbox/policy-demo-runner.sh put-file \ + "$DEMO_GITHUB_OWNER" \ + "$DEMO_GITHUB_REPO" \ + "$DEMO_BRANCH" \ + "$DEMO_FILE_PATH" \ + "$DEMO_RUN_ID" +} + +submit_policy_proposal() { + sandbox_exec /sandbox/policy-demo-runner.sh submit-proposal \ + "$DEMO_GITHUB_OWNER" \ + "$DEMO_GITHUB_REPO" \ + "$DEMO_FILE_PATH" +} + +capture_initial_denial() { + step "Attempting GitHub contents write from inside sandbox" + local output + output="$(attempt_write)" + local status + local body + status="$(printf '%s\n' "$output" | http_status)" + body="$(printf '%s\n' "$output" | http_body)" + + [[ "$status" == "403" ]] || fail "expected OpenShell HTTP 403, got HTTP $status" + printf '%s\n' "$body" | jq -e '.error == "policy_denied"' >/dev/null \ + || fail "expected structured policy_denied body" + printf '%s\n' "$body" | jq -e '.layer == "l7" and .protocol == "rest" and .method == "PUT"' >/dev/null \ + || fail "expected structured L7 REST deny fields" + + printf '%s\n' "$body" | jq -r ' + "Denied: \(.method) \(.path)", + "Layer: \(.layer)/\(.protocol) host=\(.host):\(.port) binary=\(.binary)", + "Agent guidance: \(.next_steps | map(.action) | join(" -> "))" + ' | sed 's/^/ /' + info "${GREEN}Captured structured L7 policy denial.${RESET}" +} + +submit_and_approve() { + step "Submitting proposal through policy.local" + local output + output="$(submit_policy_proposal)" + local status + local body + status="$(printf '%s\n' "$output" | http_status)" + body="$(printf '%s\n' "$output" | http_body)" + + [[ "$status" == "202" ]] || fail "expected proposal submit HTTP 202, got HTTP $status" + [[ "$(printf '%s\n' "$body" | jq -r '.accepted_chunks // 0')" != "0" ]] \ + || fail "proposal was not accepted" + printf '%s\n' "$body" | jq -r '"Proposal submitted: \(.accepted_chunks) accepted, \(.rejected_chunks) rejected"' | sed 's/^/ /' + + step "Approving pending draft rule from outside the sandbox" + "$OPENSHELL_BIN" rule get "$DEMO_SANDBOX_NAME" --status pending | sed 's/^/ /' + "$OPENSHELL_BIN" rule approve-all "$DEMO_SANDBOX_NAME" | sed 's/^/ /' +} + +print_success_summary() { + jq '{ + path: .content.path, + html_url: .content.html_url, + commit: .commit.sha, + message: .commit.message + }' +} + +retry_until_allowed() { + step "Retrying GitHub contents write after approval" + local output status body attempt + + for attempt in $(seq 1 "$DEMO_RETRY_ATTEMPTS"); do + output="$(attempt_write)" + status="$(printf '%s\n' "$output" | http_status)" + body="$(printf '%s\n' "$output" | http_body)" + + if printf '%s\n' "$body" | jq -e '.error == "policy_denied"' >/dev/null 2>&1; then + info "${DIM}Attempt ${attempt}/${DEMO_RETRY_ATTEMPTS}: policy not loaded yet; retrying...${RESET}" + sleep "$DEMO_RETRY_SLEEP" + continue + fi + + if [[ "$status" == "200" || "$status" == "201" ]]; then + printf '%s\n' "$body" | print_success_summary | sed 's/^/ /' + info "${GREEN}GitHub write succeeded from inside the sandbox.${RESET}" + return + fi + + printf '%s\n' "$body" | jq . | sed 's/^/ /' + if [[ "$status" == "404" ]]; then + fail "policy allowed the request, but GitHub returned HTTP 404; check DEMO_GITHUB_OWNER, DEMO_GITHUB_REPO, and token access" + fi + fail "policy allowed the request, but GitHub returned HTTP $status" + done + + fail "timed out waiting for approved policy to load into the sandbox" +} + +show_logs() { + step "Policy decision trace" + "$OPENSHELL_BIN" logs "$DEMO_SANDBOX_NAME" --since 5m -n 50 2>&1 \ + | grep -E 'HTTP:PUT|CONFIG:LOADED|ReportPolicyStatus' \ + | tail -n 8 \ + | sed 's/^/ /' || true +} + +main() { + validate_env + check_gateway + check_github_access + create_provider + create_sandbox + connect_ssh + run_policy_local_checks + capture_initial_denial + submit_and_approve + retry_until_allowed + show_logs + + printf "\n${BOLD}${GREEN}✓ Demo complete.${RESET}\n\n" + printf " Sandbox: %s\n" "$DEMO_SANDBOX_NAME" + printf " Repository: https://github.com/%s/%s\n" "$DEMO_GITHUB_OWNER" "$DEMO_GITHUB_REPO" + printf " File: %s\n" "$DEMO_FILE_PATH" +} + +main "$@" diff --git a/examples/agent-driven-policy-management/policy.template.yaml b/examples/agent-driven-policy-management/policy.template.yaml new file mode 100644 index 000000000..6452cb01c --- /dev/null +++ b/examples/agent-driven-policy-management/policy.template.yaml @@ -0,0 +1,28 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +version: 1 + +filesystem_policy: + include_workdir: true + read_only: [/usr, /lib, /proc, /dev/urandom, /app, /etc, /var/log] + read_write: [/sandbox, /tmp, /dev/null] + +landlock: + compatibility: best_effort + +process: + run_as_user: sandbox + run_as_group: sandbox + +network_policies: + github_api_readonly: + name: github-api-readonly + endpoints: + - host: api.github.com + port: 443 + protocol: rest + enforcement: enforce + access: read-only + binaries: + - { path: /usr/bin/curl } diff --git a/examples/agent-driven-policy-management/sandbox-runner.sh b/examples/agent-driven-policy-management/sandbox-runner.sh new file mode 100755 index 000000000..780e85573 --- /dev/null +++ b/examples/agent-driven-policy-management/sandbox-runner.sh @@ -0,0 +1,142 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +cmd="$1" +shift + +json_status_response() { + local status="$1" + local body="$2" + printf 'HTTP_STATUS=%s\n' "$status" + cat "$body" + printf '\n' +} + +case "$cmd" in + check-skill) + test -f /etc/openshell/skills/policy_advisor.md + sed -n '1,40p' /etc/openshell/skills/policy_advisor.md + ;; + + current-policy) + body="$(mktemp)" + status="$(curl -sS -o "$body" -w "%{http_code}" http://policy.local/v1/policy/current)" + json_status_response "$status" "$body" + ;; + + put-file) + owner="$1" + repo="$2" + branch="$3" + file_path="$4" + run_id="$5" + body="$(mktemp)" + payload="$(mktemp)" + + python3 - "$branch" "$run_id" > "$payload" <<'PY' +import base64 +import json +import sys + +branch, run_id = sys.argv[1:3] +content = f"""# OpenShell policy advisor demo + +Run id: {run_id} + +This file was written from inside an OpenShell sandbox after an agent-authored +policy proposal was approved. +""" + +payload = { + "message": f"docs: add OpenShell policy advisor demo note {run_id}", + "branch": branch, + "content": base64.b64encode(content.encode("utf-8")).decode("ascii"), +} +print(json.dumps(payload)) +PY + + status="$(curl -sS \ + -o "$body" \ + -w "%{http_code}" \ + -X PUT \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${GITHUB_TOKEN}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + -H "Content-Type: application/json" \ + --data-binary "@${payload}" \ + "https://api.github.com/repos/${owner}/${repo}/contents/${file_path}")" + json_status_response "$status" "$body" + ;; + + submit-proposal) + owner="$1" + repo="$2" + file_path="$3" + body="$(mktemp)" + payload="$(mktemp)" + + python3 - "$owner" "$repo" "$file_path" > "$payload" <<'PY' +import json +import sys + +owner, repo, file_path = sys.argv[1:4] +rule_path = f"/repos/{owner}/{repo}/contents/{file_path}" +payload = { + "intent_summary": ( + "Allow curl to write the demo note to " + f"{owner}/{repo} at {file_path} only." + ), + "operations": [ + { + "addRule": { + "ruleName": "github_api_demo_contents_write", + "rule": { + "name": "github_api_demo_contents_write", + "endpoints": [ + { + "host": "api.github.com", + "port": 443, + "protocol": "rest", + "enforcement": "enforce", + "rules": [ + { + "allow": { + "method": "PUT", + "path": rule_path, + } + } + ], + } + ], + "binaries": [ + { + "path": "/usr/bin/curl", + } + ], + }, + } + } + ], +} +print(json.dumps(payload)) +PY + + status="$(curl -sS \ + -o "$body" \ + -w "%{http_code}" \ + -X POST \ + -H "Content-Type: application/json" \ + --data-binary "@${payload}" \ + http://policy.local/v1/proposals)" + json_status_response "$status" "$body" + ;; + + *) + echo "unknown command: $cmd" >&2 + exit 64 + ;; +esac From 4893a4d6f61147760984f02f0475e73a4366716d Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Mon, 4 May 2026 07:18:51 -0700 Subject: [PATCH 05/23] test(examples): add codex policy dogfood loop --- .../agent-driven-policy-management/README.md | 34 +- .../agent-driven-policy-management/dogfood.sh | 349 ++++++++++++++++++ .../policy.template.yaml | 31 ++ .../prompts/codex-dogfood.md | 47 +++ .../sandbox-runner.sh | 113 ++++++ 5 files changed, 572 insertions(+), 2 deletions(-) create mode 100755 examples/agent-driven-policy-management/dogfood.sh create mode 100644 examples/agent-driven-policy-management/prompts/codex-dogfood.md diff --git a/examples/agent-driven-policy-management/README.md b/examples/agent-driven-policy-management/README.md index 0fca1d217..2a5ee4561 100644 --- a/examples/agent-driven-policy-management/README.md +++ b/examples/agent-driven-policy-management/README.md @@ -14,8 +14,13 @@ Run the first policy-advisor MVP loop from one host-side script: 6. Approve the draft rule from outside the sandbox. 7. Retry the same write and confirm it succeeds. -The script is deterministic. It does not launch a real coding agent; it uses -the same sandbox-local interfaces that the agent will use. +`demo.sh` is deterministic. It does not launch a real coding agent; it uses the +same sandbox-local interfaces that the agent will use. + +`dogfood.sh` runs the next loop: Codex starts inside the sandbox, observes the +structured denial, reads `/etc/openshell/skills/policy_advisor.md`, drafts and +submits a narrow proposal through `policy.local`, then retries after the host +developer approves. ## Prerequisites @@ -63,12 +68,36 @@ export DEMO_GITHUB_TOKEN="$(gh auth token)" bash examples/agent-driven-policy-management/demo.sh ``` +## Codex Dogfood + +Sign in to Codex locally, then run: + +```bash +codex login + +export DEMO_GITHUB_OWNER= +export DEMO_GITHUB_REPO= +export DEMO_GITHUB_TOKEN="$(gh auth token)" + +bash examples/agent-driven-policy-management/dogfood.sh +``` + +The host script only orchestrates sandbox lifecycle and developer approval. The +policy proposal is authored by Codex inside the sandbox from the installed +skill, structured denial response, and `policy.local` API. + The demo writes one markdown file under: ```text openshell-policy-advisor-demo/.md ``` +The dogfood run writes under: + +```text +openshell-policy-advisor-dogfood/.md +``` + Use a scratch repository or a demo branch if you do not want this file in a production repository. @@ -80,4 +109,5 @@ export DEMO_BRANCH=main export DEMO_RUN_ID="$(date +%Y%m%d-%H%M%S)" export DEMO_FILE_DIR=openshell-policy-advisor-demo export DEMO_KEEP_SANDBOX=0 +export DEMO_APPROVAL_TIMEOUT_SECS=180 ``` diff --git a/examples/agent-driven-policy-management/dogfood.sh b/examples/agent-driven-policy-management/dogfood.sh new file mode 100755 index 000000000..c9237d4ed --- /dev/null +++ b/examples/agent-driven-policy-management/dogfood.sh @@ -0,0 +1,349 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +POLICY_TEMPLATE="${SCRIPT_DIR}/policy.template.yaml" +RUNNER_SOURCE="${SCRIPT_DIR}/sandbox-runner.sh" +PROMPT_SOURCE="${SCRIPT_DIR}/prompts/codex-dogfood.md" + +if [[ -z "${OPENSHELL_BIN:-}" ]]; then + if [[ -x "${REPO_ROOT}/target/debug/openshell" ]]; then + OPENSHELL_BIN="${REPO_ROOT}/target/debug/openshell" + else + OPENSHELL_BIN="openshell" + fi +fi + +DEMO_BRANCH="${DEMO_BRANCH:-main}" +DEMO_RUN_ID="${DEMO_RUN_ID:-$(date +%Y%m%d-%H%M%S)}" +DEMO_FILE_DIR="${DEMO_FILE_DIR:-openshell-policy-advisor-dogfood}" +DEMO_FILE_PATH="${DEMO_FILE_PATH:-${DEMO_FILE_DIR}/${DEMO_RUN_ID}.md}" +DEMO_SANDBOX_NAME="${DEMO_SANDBOX_NAME:-policy-agent-dogfood-${DEMO_RUN_ID}}" +DEMO_CODEX_PROVIDER_NAME="${DEMO_CODEX_PROVIDER_NAME:-codex-policy-agent-${DEMO_RUN_ID}}" +DEMO_GITHUB_PROVIDER_NAME="${DEMO_GITHUB_PROVIDER_NAME:-github-policy-agent-${DEMO_RUN_ID}}" +DEMO_APPROVAL_TIMEOUT_SECS="${DEMO_APPROVAL_TIMEOUT_SECS:-180}" +DEMO_KEEP_SANDBOX="${DEMO_KEEP_SANDBOX:-0}" + +TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/openshell-agent-policy-dogfood.XXXXXX")" +PAYLOAD_DIR="${TMP_DIR}/payload" +POLICY_FILE="${TMP_DIR}/policy.yaml" +AGENT_LOG="${TMP_DIR}/codex-dogfood.log" +PENDING_FILE="${TMP_DIR}/pending-rule.txt" +mkdir -p "${PAYLOAD_DIR}/prompts" + +BOLD='\033[1m' +DIM='\033[2m' +CYAN='\033[36m' +GREEN='\033[32m' +RED='\033[31m' +YELLOW='\033[33m' +RESET='\033[0m' + +AGENT_PID="" + +step() { + printf "\n${BOLD}${CYAN}==> %s${RESET}\n\n" "$1" +} + +info() { + printf " %b\n" "$*" +} + +redact_output() { + sed -E \ + -e 's|(download_url": "https://raw\.githubusercontent\.com[^?"]+\?token=)[^"]+|\1|g' \ + -e 's|(Authorization: Bearer )[A-Za-z0-9._-]+|\1|g' +} + +fail() { + printf "\n${RED}error:${RESET} %s\n" "$*" >&2 + if [[ -f "$AGENT_LOG" ]]; then + printf "\n${YELLOW}Agent log tail:${RESET}\n" >&2 + tail -n 120 "$AGENT_LOG" | redact_output | sed 's/^/ /' >&2 || true + fi + exit 1 +} + +cleanup() { + local status=$? + + if [[ "$DEMO_KEEP_SANDBOX" != "1" ]]; then + "$OPENSHELL_BIN" sandbox delete "$DEMO_SANDBOX_NAME" >/dev/null 2>&1 || true + else + printf "\n${YELLOW}Keeping sandbox because DEMO_KEEP_SANDBOX=1: %s${RESET}\n" "$DEMO_SANDBOX_NAME" + fi + + "$OPENSHELL_BIN" provider delete "$DEMO_CODEX_PROVIDER_NAME" >/dev/null 2>&1 || true + "$OPENSHELL_BIN" provider delete "$DEMO_GITHUB_PROVIDER_NAME" >/dev/null 2>&1 || true + + if [[ $status -eq 0 ]]; then + rm -rf "$TMP_DIR" + else + printf "\n${YELLOW}Temporary files kept at: %s${RESET}\n" "$TMP_DIR" + fi +} +trap cleanup EXIT + +require_command() { + command -v "$1" >/dev/null 2>&1 || fail "missing required command: $1" +} + +validate_name() { + local label="$1" + local value="$2" + [[ "$value" =~ ^[A-Za-z0-9_.-]+$ ]] || fail "$label may contain only letters, numbers, '.', '_', and '-'" +} + +validate_path() { + local label="$1" + local value="$2" + [[ "$value" =~ ^[A-Za-z0-9._/-]+$ ]] || fail "$label may contain only letters, numbers, '.', '_', '-', and '/'" + [[ "$value" != /* ]] || fail "$label must be relative" + [[ "$value" != *..* ]] || fail "$label must not contain '..'" +} + +resolve_github_token() { + if [[ -z "${DEMO_GITHUB_TOKEN:-}" ]]; then + if [[ -n "${GITHUB_TOKEN:-}" ]]; then + DEMO_GITHUB_TOKEN="$GITHUB_TOKEN" + elif [[ -n "${GH_TOKEN:-}" ]]; then + DEMO_GITHUB_TOKEN="$GH_TOKEN" + elif command -v gh >/dev/null 2>&1; then + DEMO_GITHUB_TOKEN="$(gh auth token 2>/dev/null || true)" + fi + fi + + [[ -n "${DEMO_GITHUB_TOKEN:-}" ]] || fail "set DEMO_GITHUB_TOKEN, GITHUB_TOKEN, GH_TOKEN, or sign in with gh" + export GITHUB_TOKEN="$DEMO_GITHUB_TOKEN" +} + +resolve_codex_auth() { + [[ -f "${HOME}/.codex/auth.json" ]] || fail "missing local Codex sign-in; run: codex login" + + export CODEX_AUTH_ACCESS_TOKEN + export CODEX_AUTH_REFRESH_TOKEN + export CODEX_AUTH_ACCOUNT_ID + CODEX_AUTH_ACCESS_TOKEN="$(jq -r '.tokens.access_token // empty' "${HOME}/.codex/auth.json")" + CODEX_AUTH_REFRESH_TOKEN="$(jq -r '.tokens.refresh_token // empty' "${HOME}/.codex/auth.json")" + CODEX_AUTH_ACCOUNT_ID="$(jq -r '.tokens.account_id // empty' "${HOME}/.codex/auth.json")" + + [[ -n "$CODEX_AUTH_ACCESS_TOKEN" ]] || fail "local Codex sign-in is missing an access token; run: codex login" + [[ -n "$CODEX_AUTH_REFRESH_TOKEN" ]] || fail "local Codex sign-in is missing a refresh token; run: codex login" + [[ -n "$CODEX_AUTH_ACCOUNT_ID" ]] || fail "local Codex sign-in is missing an account id; run: codex login" +} + +validate_env() { + require_command curl + require_command jq + require_command "$OPENSHELL_BIN" + + [[ -f "$RUNNER_SOURCE" ]] || fail "missing sandbox runner: $RUNNER_SOURCE" + [[ -f "$PROMPT_SOURCE" ]] || fail "missing Codex prompt: $PROMPT_SOURCE" + [[ -n "${DEMO_GITHUB_OWNER:-}" ]] || fail "set DEMO_GITHUB_OWNER" + [[ -n "${DEMO_GITHUB_REPO:-}" ]] || fail "set DEMO_GITHUB_REPO" + [[ "$DEMO_RUN_ID" =~ ^[a-z0-9-]+$ ]] || fail "DEMO_RUN_ID may contain only lowercase letters, numbers, and '-'" + [[ "$DEMO_APPROVAL_TIMEOUT_SECS" =~ ^[0-9]+$ ]] || fail "DEMO_APPROVAL_TIMEOUT_SECS must be a number" + + validate_name "DEMO_GITHUB_OWNER" "$DEMO_GITHUB_OWNER" + validate_name "DEMO_GITHUB_REPO" "$DEMO_GITHUB_REPO" + validate_path "DEMO_BRANCH" "$DEMO_BRANCH" + validate_path "DEMO_FILE_PATH" "$DEMO_FILE_PATH" + + resolve_github_token + resolve_codex_auth +} + +github_api_status() { + local url="$1" + local body="$2" + curl -sS \ + -o "$body" \ + -w "%{http_code}" \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${DEMO_GITHUB_TOKEN}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "$url" +} + +urlencode() { + jq -rn --arg v "$1" '$v|@uri' +} + +check_gateway() { + step "Checking active OpenShell gateway" + "$OPENSHELL_BIN" status >/dev/null 2>&1 \ + || fail "active OpenShell gateway is not reachable; start one separately" + "$OPENSHELL_BIN" status | sed 's/^/ /' +} + +check_github_access() { + step "Checking GitHub repository access" + local body status branch + body="${TMP_DIR}/github-repo.json" + status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}" "$body")" + [[ "$status" == "200" ]] \ + || fail "GitHub returned HTTP $status for ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}; check the repo name and token access" + + if jq -e 'has("permissions") and (.permissions.push == false and .permissions.admin == false and .permissions.maintain == false)' "$body" >/dev/null; then + fail "GitHub token can read ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO} but does not appear to have write access" + fi + + branch="$(urlencode "$DEMO_BRANCH")" + body="${TMP_DIR}/github-branch.json" + status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/branches/${branch}" "$body")" + [[ "$status" == "200" ]] || fail "GitHub returned HTTP $status for branch ${DEMO_BRANCH}" + + body="${TMP_DIR}/github-demo-file.json" + status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}?ref=${branch}" "$body")" + if [[ "$status" == "200" ]]; then + fail "dogfood output file already exists: ${DEMO_FILE_PATH}; choose a new DEMO_RUN_ID or DEMO_FILE_PATH" + fi + [[ "$status" == "404" ]] || fail "GitHub returned HTTP $status while checking output path ${DEMO_FILE_PATH}" + + info "${GREEN}GitHub repo, branch, and output path are safe for this run.${RESET}" +} + +prepare_payload() { + cp "$POLICY_TEMPLATE" "$POLICY_FILE" + cp "$RUNNER_SOURCE" "${PAYLOAD_DIR}/policy-demo-runner.sh" + cp "$PROMPT_SOURCE" "${PAYLOAD_DIR}/prompts/codex-dogfood.md" + chmod +x "${PAYLOAD_DIR}/policy-demo-runner.sh" +} + +create_providers() { + step "Creating temporary Codex and GitHub providers" + "$OPENSHELL_BIN" provider delete "$DEMO_CODEX_PROVIDER_NAME" >/dev/null 2>&1 || true + "$OPENSHELL_BIN" provider delete "$DEMO_GITHUB_PROVIDER_NAME" >/dev/null 2>&1 || true + + "$OPENSHELL_BIN" provider create \ + --name "$DEMO_CODEX_PROVIDER_NAME" \ + --type generic \ + --credential CODEX_AUTH_ACCESS_TOKEN \ + --credential CODEX_AUTH_REFRESH_TOKEN \ + --credential CODEX_AUTH_ACCOUNT_ID >/dev/null + + "$OPENSHELL_BIN" provider create \ + --name "$DEMO_GITHUB_PROVIDER_NAME" \ + --type github \ + --credential GITHUB_TOKEN >/dev/null + + info "${GREEN}Created provider records for this run.${RESET}" +} + +start_codex_sandbox() { + step "Starting Codex dogfood run inside the sandbox" + "$OPENSHELL_BIN" sandbox delete "$DEMO_SANDBOX_NAME" >/dev/null 2>&1 || true + ( + "$OPENSHELL_BIN" sandbox create \ + --name "$DEMO_SANDBOX_NAME" \ + --from base \ + --provider "$DEMO_CODEX_PROVIDER_NAME" \ + --provider "$DEMO_GITHUB_PROVIDER_NAME" \ + --policy "$POLICY_FILE" \ + --upload "${PAYLOAD_DIR}:/sandbox" \ + --no-git-ignore \ + --keep \ + --no-auto-providers \ + --no-tty \ + -- bash /sandbox/payload/policy-demo-runner.sh codex-dogfood \ + "$DEMO_GITHUB_OWNER" \ + "$DEMO_GITHUB_REPO" \ + "$DEMO_BRANCH" \ + "$DEMO_FILE_PATH" \ + "$DEMO_RUN_ID" + ) >"$AGENT_LOG" 2>&1 & + AGENT_PID="$!" + info "${DIM}Codex run started; log: ${AGENT_LOG}${RESET}" +} + +approve_when_pending() { + step "Waiting for Codex to submit a policy proposal" + local start now + start="$(date +%s)" + + while true; do + if ! kill -0 "$AGENT_PID" >/dev/null 2>&1; then + wait "$AGENT_PID" || true + fail "Codex exited before a pending proposal appeared" + fi + + "$OPENSHELL_BIN" rule get "$DEMO_SANDBOX_NAME" --status pending >"$PENDING_FILE" 2>/dev/null || true + if grep -q "Chunk:" "$PENDING_FILE" && grep -q "pending" "$PENDING_FILE"; then + info "${GREEN}Codex submitted a pending proposal.${RESET}" + sed 's/^/ /' "$PENDING_FILE" + + step "Approving pending draft rule from outside the sandbox" + "$OPENSHELL_BIN" rule approve-all "$DEMO_SANDBOX_NAME" | sed 's/^/ /' + return + fi + + now="$(date +%s)" + if (( now - start >= DEMO_APPROVAL_TIMEOUT_SECS )); then + fail "timed out waiting for Codex to submit a policy proposal" + fi + + sleep 2 + done +} + +wait_for_codex() { + step "Waiting for Codex to retry after approval" + if ! wait "$AGENT_PID"; then + fail "Codex dogfood run failed" + fi + info "${GREEN}Codex dogfood run completed.${RESET}" +} + +show_codex_final_message() { + step "Codex final message" + awk ' + /CODEX_FINAL_MESSAGE_BEGIN/ { printing = 1; next } + /CODEX_FINAL_MESSAGE_END/ { printing = 0 } + printing { print } + ' "$AGENT_LOG" | redact_output | sed 's/^/ /' +} + +verify_github_write() { + step "Verifying GitHub write" + local body status branch + branch="$(urlencode "$DEMO_BRANCH")" + body="${TMP_DIR}/github-created-file.json" + status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}?ref=${branch}" "$body")" + [[ "$status" == "200" ]] || fail "expected demo file to exist after Codex run; GitHub returned HTTP $status" + + jq -r '"File: \(.path)", "URL: \(.html_url)"' "$body" | sed 's/^/ /' +} + +show_logs() { + step "Policy decision trace" + "$OPENSHELL_BIN" logs "$DEMO_SANDBOX_NAME" --since 10m -n 80 2>&1 \ + | grep -E 'HTTP:PUT|CONFIG:LOADED|ReportPolicyStatus' \ + | tail -n 12 \ + | sed 's/^/ /' || true +} + +main() { + validate_env + prepare_payload + check_gateway + check_github_access + create_providers + start_codex_sandbox + approve_when_pending + wait_for_codex + show_codex_final_message + verify_github_write + show_logs + + printf "\n${BOLD}${GREEN}✓ Codex dogfood complete.${RESET}\n\n" + printf " Sandbox: %s\n" "$DEMO_SANDBOX_NAME" + printf " Repository: https://github.com/%s/%s\n" "$DEMO_GITHUB_OWNER" "$DEMO_GITHUB_REPO" + printf " File: %s\n" "$DEMO_FILE_PATH" +} + +main "$@" diff --git a/examples/agent-driven-policy-management/policy.template.yaml b/examples/agent-driven-policy-management/policy.template.yaml index 6452cb01c..b52b94ca1 100644 --- a/examples/agent-driven-policy-management/policy.template.yaml +++ b/examples/agent-driven-policy-management/policy.template.yaml @@ -16,6 +16,37 @@ process: run_as_group: sandbox network_policies: + codex: + name: codex + endpoints: + - { host: api.openai.com, port: 443, protocol: rest, enforcement: enforce, access: full } + - { host: auth.openai.com, port: 443, protocol: rest, enforcement: enforce, access: full } + - { host: chatgpt.com, port: 443, protocol: rest, enforcement: enforce, access: full } + - { host: ab.chatgpt.com, port: 443, protocol: rest, enforcement: enforce, access: full } + binaries: + - { path: /usr/bin/codex } + - { path: /usr/bin/node } + - { path: "/usr/lib/node_modules/@openai/**" } + + codex_plugins: + name: codex-plugins + endpoints: + - host: github.com + port: 443 + protocol: rest + enforcement: enforce + rules: + - allow: + method: GET + path: "/openai/plugins.git/info/refs*" + - allow: + method: POST + path: "/openai/plugins.git/git-upload-pack" + binaries: + - { path: /usr/bin/git } + - { path: /usr/lib/git-core/git-remote-http } + - { path: "/usr/lib/node_modules/@openai/**" } + github_api_readonly: name: github-api-readonly endpoints: diff --git a/examples/agent-driven-policy-management/prompts/codex-dogfood.md b/examples/agent-driven-policy-management/prompts/codex-dogfood.md new file mode 100644 index 000000000..4e8be9977 --- /dev/null +++ b/examples/agent-driven-policy-management/prompts/codex-dogfood.md @@ -0,0 +1,47 @@ + + + +# Codex Policy Dogfood Task + +You are inside an OpenShell sandbox. The user asked you to write one markdown +file to GitHub using the GitHub Contents API. + +Target: + +- Repository: `{{OWNER}}/{{REPO}}` +- Branch: `{{BRANCH}}` +- File path: `{{FILE_PATH}}` +- Run id: `{{RUN_ID}}` + +Requirements: + +- Use `curl` and the `GITHUB_TOKEN` environment variable. +- Use ordinary shell commands for this demo. Do not use GitHub MCP tools or + repository-specific helper skills. +- Do not print, echo, or reveal the token. +- First try to write the file with `PUT /repos/{{OWNER}}/{{REPO}}/contents/{{FILE_PATH}}`. +- If OpenShell returns `policy_denied`, read + `/etc/openshell/skills/policy_advisor.md` and follow the local API workflow + there. +- Submit the narrowest proposal that permits only this write. +- Do not include a `tls` field in the proposed endpoint unless you are + explicitly disabling TLS inspection. +- After submitting a proposal, retry the write for up to 90 seconds. The + developer may approve while you are waiting. +- Do not print the full GitHub response body. It can include temporary + `download_url` query tokens. Extract only `content.path`, `content.html_url`, + and `commit.sha`. +- Finish with a short summary that says whether the write succeeded. Include + the GitHub file path and URL if GitHub returns them. + +Suggested file content: + +```markdown +# OpenShell policy advisor dogfood + +Run id: {{RUN_ID}} + +This file was written by Codex from inside an OpenShell sandbox after Codex read +the policy advisor skill, submitted a narrow policy proposal, and waited for +developer approval. +``` diff --git a/examples/agent-driven-policy-management/sandbox-runner.sh b/examples/agent-driven-policy-management/sandbox-runner.sh index 780e85573..35d7e27a0 100755 --- a/examples/agent-driven-policy-management/sandbox-runner.sh +++ b/examples/agent-driven-policy-management/sandbox-runner.sh @@ -5,6 +5,7 @@ set -euo pipefail +RUNNER_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cmd="$1" shift @@ -16,6 +17,114 @@ json_status_response() { printf '\n' } +render_template() { + local template="$1" + local owner="$2" + local repo="$3" + local branch="$4" + local file_path="$5" + local run_id="$6" + + python3 - "$template" "$owner" "$repo" "$branch" "$file_path" "$run_id" <<'PY' +from pathlib import Path +import sys + +template, owner, repo, branch, file_path, run_id = sys.argv[1:7] +text = Path(template).read_text(encoding="utf-8") +for key, value in { + "OWNER": owner, + "REPO": repo, + "BRANCH": branch, + "FILE_PATH": file_path, + "RUN_ID": run_id, +}.items(): + text = text.replace("{{" + key + "}}", value) +print(text, end="") +PY +} + +bootstrap_codex_oauth() { + mkdir -p "$HOME/.codex" + python3 - <<'PY' +from pathlib import Path +import base64 +import json +import os +import time + +def b64url_json(payload): + raw = json.dumps(payload, separators=(",", ":")).encode("utf-8") + return base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii") + +now = int(time.time()) +fake_id_token = ".".join([ + b64url_json({"alg": "none", "typ": "JWT"}), + b64url_json({ + "iss": "https://auth.openai.com", + "aud": "codex", + "sub": "openshell-placeholder", + "email": "placeholder@example.com", + "iat": now, + "exp": now + 3600, + }), + "placeholder", +]) + +path = Path.home() / ".codex" / "auth.json" +path.write_text(json.dumps({ + "auth_mode": "chatgpt", + "OPENAI_API_KEY": None, + "tokens": { + "id_token": fake_id_token, + "access_token": os.environ["CODEX_AUTH_ACCESS_TOKEN"], + "refresh_token": os.environ["CODEX_AUTH_REFRESH_TOKEN"], + "account_id": os.environ["CODEX_AUTH_ACCOUNT_ID"], + }, + "last_refresh": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), +}, indent=2), encoding="utf-8") +path.chmod(0o600) +PY +} + +run_codex_dogfood() { + local owner="$1" + local repo="$2" + local branch="$3" + local file_path="$4" + local run_id="$5" + local prompt final + + command -v codex >/dev/null 2>&1 || { + echo "codex is not installed in this sandbox image" >&2 + exit 69 + } + + bootstrap_codex_oauth + prompt="$(mktemp)" + final="/sandbox/codex-policy-dogfood-final.md" + render_template \ + "${RUNNER_DIR}/prompts/codex-dogfood.md" \ + "$owner" \ + "$repo" \ + "$branch" \ + "$file_path" \ + "$run_id" > "$prompt" + + codex exec \ + --skip-git-repo-check \ + --dangerously-bypass-approvals-and-sandbox \ + --ephemeral \ + --cd /sandbox \ + --color never \ + -c shell_environment_policy.inherit=all \ + --output-last-message "$final" \ + - < "$prompt" + + printf '\nCODEX_FINAL_MESSAGE_BEGIN\n' + sed 's/^/ /' "$final" + printf 'CODEX_FINAL_MESSAGE_END\n' +} + case "$cmd" in check-skill) test -f /etc/openshell/skills/policy_advisor.md @@ -135,6 +244,10 @@ PY json_status_response "$status" "$body" ;; + codex-dogfood) + run_codex_dogfood "$@" + ;; + *) echo "unknown command: $cmd" >&2 exit 64 From 43466414aa79a1b2b5267a333a57152b5c0b1449 Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Mon, 4 May 2026 07:45:31 -0700 Subject: [PATCH 06/23] refactor(examples): make policy demo agent-agnostic --- .../policy.template.yaml | 28 ++ .../agent-driven-policy-management/runner.sh | 113 ----- .../validation.sh | 385 ++++++++++++++++++ .../agent-driven-policy-management/README.md | 71 ++-- .../codex-dogfood.md => agent-task.md} | 14 +- .../agent-driven-policy-management/demo.sh | 327 +++++++-------- .../agent-driven-policy-management/dogfood.sh | 349 ---------------- .../policy.template.yaml | 31 -- 8 files changed, 584 insertions(+), 734 deletions(-) create mode 100644 e2e/agent-driven-policy-management/policy.template.yaml rename examples/agent-driven-policy-management/sandbox-runner.sh => e2e/agent-driven-policy-management/runner.sh (57%) create mode 100755 e2e/agent-driven-policy-management/validation.sh rename examples/agent-driven-policy-management/{prompts/codex-dogfood.md => agent-task.md} (73%) delete mode 100755 examples/agent-driven-policy-management/dogfood.sh diff --git a/e2e/agent-driven-policy-management/policy.template.yaml b/e2e/agent-driven-policy-management/policy.template.yaml new file mode 100644 index 000000000..6452cb01c --- /dev/null +++ b/e2e/agent-driven-policy-management/policy.template.yaml @@ -0,0 +1,28 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +version: 1 + +filesystem_policy: + include_workdir: true + read_only: [/usr, /lib, /proc, /dev/urandom, /app, /etc, /var/log] + read_write: [/sandbox, /tmp, /dev/null] + +landlock: + compatibility: best_effort + +process: + run_as_user: sandbox + run_as_group: sandbox + +network_policies: + github_api_readonly: + name: github-api-readonly + endpoints: + - host: api.github.com + port: 443 + protocol: rest + enforcement: enforce + access: read-only + binaries: + - { path: /usr/bin/curl } diff --git a/examples/agent-driven-policy-management/sandbox-runner.sh b/e2e/agent-driven-policy-management/runner.sh similarity index 57% rename from examples/agent-driven-policy-management/sandbox-runner.sh rename to e2e/agent-driven-policy-management/runner.sh index 35d7e27a0..780e85573 100755 --- a/examples/agent-driven-policy-management/sandbox-runner.sh +++ b/e2e/agent-driven-policy-management/runner.sh @@ -5,7 +5,6 @@ set -euo pipefail -RUNNER_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" cmd="$1" shift @@ -17,114 +16,6 @@ json_status_response() { printf '\n' } -render_template() { - local template="$1" - local owner="$2" - local repo="$3" - local branch="$4" - local file_path="$5" - local run_id="$6" - - python3 - "$template" "$owner" "$repo" "$branch" "$file_path" "$run_id" <<'PY' -from pathlib import Path -import sys - -template, owner, repo, branch, file_path, run_id = sys.argv[1:7] -text = Path(template).read_text(encoding="utf-8") -for key, value in { - "OWNER": owner, - "REPO": repo, - "BRANCH": branch, - "FILE_PATH": file_path, - "RUN_ID": run_id, -}.items(): - text = text.replace("{{" + key + "}}", value) -print(text, end="") -PY -} - -bootstrap_codex_oauth() { - mkdir -p "$HOME/.codex" - python3 - <<'PY' -from pathlib import Path -import base64 -import json -import os -import time - -def b64url_json(payload): - raw = json.dumps(payload, separators=(",", ":")).encode("utf-8") - return base64.urlsafe_b64encode(raw).rstrip(b"=").decode("ascii") - -now = int(time.time()) -fake_id_token = ".".join([ - b64url_json({"alg": "none", "typ": "JWT"}), - b64url_json({ - "iss": "https://auth.openai.com", - "aud": "codex", - "sub": "openshell-placeholder", - "email": "placeholder@example.com", - "iat": now, - "exp": now + 3600, - }), - "placeholder", -]) - -path = Path.home() / ".codex" / "auth.json" -path.write_text(json.dumps({ - "auth_mode": "chatgpt", - "OPENAI_API_KEY": None, - "tokens": { - "id_token": fake_id_token, - "access_token": os.environ["CODEX_AUTH_ACCESS_TOKEN"], - "refresh_token": os.environ["CODEX_AUTH_REFRESH_TOKEN"], - "account_id": os.environ["CODEX_AUTH_ACCOUNT_ID"], - }, - "last_refresh": time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), -}, indent=2), encoding="utf-8") -path.chmod(0o600) -PY -} - -run_codex_dogfood() { - local owner="$1" - local repo="$2" - local branch="$3" - local file_path="$4" - local run_id="$5" - local prompt final - - command -v codex >/dev/null 2>&1 || { - echo "codex is not installed in this sandbox image" >&2 - exit 69 - } - - bootstrap_codex_oauth - prompt="$(mktemp)" - final="/sandbox/codex-policy-dogfood-final.md" - render_template \ - "${RUNNER_DIR}/prompts/codex-dogfood.md" \ - "$owner" \ - "$repo" \ - "$branch" \ - "$file_path" \ - "$run_id" > "$prompt" - - codex exec \ - --skip-git-repo-check \ - --dangerously-bypass-approvals-and-sandbox \ - --ephemeral \ - --cd /sandbox \ - --color never \ - -c shell_environment_policy.inherit=all \ - --output-last-message "$final" \ - - < "$prompt" - - printf '\nCODEX_FINAL_MESSAGE_BEGIN\n' - sed 's/^/ /' "$final" - printf 'CODEX_FINAL_MESSAGE_END\n' -} - case "$cmd" in check-skill) test -f /etc/openshell/skills/policy_advisor.md @@ -244,10 +135,6 @@ PY json_status_response "$status" "$body" ;; - codex-dogfood) - run_codex_dogfood "$@" - ;; - *) echo "unknown command: $cmd" >&2 exit 64 diff --git a/e2e/agent-driven-policy-management/validation.sh b/e2e/agent-driven-policy-management/validation.sh new file mode 100755 index 000000000..526d50e86 --- /dev/null +++ b/e2e/agent-driven-policy-management/validation.sh @@ -0,0 +1,385 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +POLICY_TEMPLATE="${SCRIPT_DIR}/policy.template.yaml" +RUNNER_SOURCE="${SCRIPT_DIR}/runner.sh" + +if [[ -z "${OPENSHELL_BIN:-}" ]]; then + if [[ -x "${REPO_ROOT}/target/debug/openshell" ]]; then + OPENSHELL_BIN="${REPO_ROOT}/target/debug/openshell" + else + OPENSHELL_BIN="openshell" + fi +fi + +DEMO_BRANCH="${DEMO_BRANCH:-main}" +DEMO_RUN_ID="${DEMO_RUN_ID:-$(date +%Y%m%d-%H%M%S)}" +DEMO_FILE_DIR="${DEMO_FILE_DIR:-openshell-policy-advisor-validation}" +DEMO_FILE_PATH="${DEMO_FILE_PATH:-${DEMO_FILE_DIR}/${DEMO_RUN_ID}.md}" +DEMO_SANDBOX_NAME="${DEMO_SANDBOX_NAME:-policy-agent-validation-${DEMO_RUN_ID}}" +DEMO_GITHUB_PROVIDER_NAME="${DEMO_GITHUB_PROVIDER_NAME:-github-policy-validation-${DEMO_RUN_ID}}" +DEMO_KEEP_SANDBOX="${DEMO_KEEP_SANDBOX:-0}" +DEMO_RETRY_ATTEMPTS="${DEMO_RETRY_ATTEMPTS:-30}" +DEMO_RETRY_SLEEP="${DEMO_RETRY_SLEEP:-2}" + +TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/openshell-agent-policy.XXXXXX")" +POLICY_FILE="${TMP_DIR}/policy.yaml" +SSH_CONFIG="${TMP_DIR}/ssh_config" +SSH_HOST="" + +BOLD='\033[1m' +DIM='\033[2m' +CYAN='\033[36m' +GREEN='\033[32m' +RED='\033[31m' +YELLOW='\033[33m' +RESET='\033[0m' + +step() { + printf "\n${BOLD}${CYAN}==> %s${RESET}\n\n" "$1" +} + +info() { + printf " %b\n" "$*" +} + +fail() { + printf "\n${RED}error:${RESET} %s\n" "$*" >&2 + exit 1 +} + +cleanup() { + local status=$? + + if [[ "$DEMO_KEEP_SANDBOX" != "1" ]]; then + "$OPENSHELL_BIN" sandbox delete "$DEMO_SANDBOX_NAME" >/dev/null 2>&1 || true + else + printf "\n${YELLOW}Keeping sandbox because DEMO_KEEP_SANDBOX=1: %s${RESET}\n" "$DEMO_SANDBOX_NAME" + fi + + "$OPENSHELL_BIN" provider delete "$DEMO_GITHUB_PROVIDER_NAME" >/dev/null 2>&1 || true + + if [[ $status -eq 0 ]]; then + rm -rf "$TMP_DIR" + else + printf "\n${YELLOW}Temporary files kept at: %s${RESET}\n" "$TMP_DIR" + fi +} +trap cleanup EXIT + +require_command() { + command -v "$1" >/dev/null 2>&1 || fail "missing required command: $1" +} + +validate_name() { + local label="$1" + local value="$2" + [[ "$value" =~ ^[A-Za-z0-9_.-]+$ ]] || fail "$label may contain only letters, numbers, '.', '_', and '-'" +} + +validate_path() { + local label="$1" + local value="$2" + [[ "$value" =~ ^[A-Za-z0-9._/-]+$ ]] || fail "$label may contain only letters, numbers, '.', '_', '-', and '/'" + [[ "$value" != /* ]] || fail "$label must be relative" + [[ "$value" != *..* ]] || fail "$label must not contain '..'" +} + +resolve_token() { + if [[ -z "${DEMO_GITHUB_TOKEN:-}" ]]; then + if [[ -n "${GITHUB_TOKEN:-}" ]]; then + DEMO_GITHUB_TOKEN="$GITHUB_TOKEN" + elif [[ -n "${GH_TOKEN:-}" ]]; then + DEMO_GITHUB_TOKEN="$GH_TOKEN" + elif command -v gh >/dev/null 2>&1; then + DEMO_GITHUB_TOKEN="$(gh auth token 2>/dev/null || true)" + fi + fi + + [[ -n "${DEMO_GITHUB_TOKEN:-}" ]] || fail "set DEMO_GITHUB_TOKEN, GITHUB_TOKEN, GH_TOKEN, or sign in with gh" + export GITHUB_TOKEN="$DEMO_GITHUB_TOKEN" +} + +validate_env() { + require_command curl + require_command jq + require_command ssh + require_command "$OPENSHELL_BIN" + + [[ -f "$RUNNER_SOURCE" ]] || fail "missing sandbox runner: $RUNNER_SOURCE" + [[ -n "${DEMO_GITHUB_OWNER:-}" ]] || fail "set DEMO_GITHUB_OWNER" + [[ -n "${DEMO_GITHUB_REPO:-}" ]] || fail "set DEMO_GITHUB_REPO" + [[ "$DEMO_RUN_ID" =~ ^[a-z0-9-]+$ ]] || fail "DEMO_RUN_ID may contain only lowercase letters, numbers, and '-'" + [[ "$DEMO_RETRY_ATTEMPTS" =~ ^[0-9]+$ ]] || fail "DEMO_RETRY_ATTEMPTS must be a number" + [[ "$DEMO_RETRY_SLEEP" =~ ^[0-9]+$ ]] || fail "DEMO_RETRY_SLEEP must be a number" + + validate_name "DEMO_GITHUB_OWNER" "$DEMO_GITHUB_OWNER" + validate_name "DEMO_GITHUB_REPO" "$DEMO_GITHUB_REPO" + validate_path "DEMO_BRANCH" "$DEMO_BRANCH" + validate_path "DEMO_FILE_PATH" "$DEMO_FILE_PATH" + + resolve_token +} + +github_api_status() { + local url="$1" + local body="$2" + curl -sS \ + -o "$body" \ + -w "%{http_code}" \ + -H "Accept: application/vnd.github+json" \ + -H "Authorization: Bearer ${DEMO_GITHUB_TOKEN}" \ + -H "X-GitHub-Api-Version: 2022-11-28" \ + "$url" +} + +check_gateway() { + step "Checking active OpenShell gateway" + if ! "$OPENSHELL_BIN" status >/dev/null 2>&1; then + fail "active OpenShell gateway is not reachable; start one separately, for example: mise run cluster" + fi + "$OPENSHELL_BIN" status | sed 's/^/ /' +} + +check_github_access() { + step "Checking GitHub repository access" + local body status branch branches_body branches_status branches + body="${TMP_DIR}/github-repo.json" + status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}" "$body")" + + if [[ "$status" != "200" ]]; then + printf '%s\n' "$(jq -r '.message // empty' "$body" 2>/dev/null)" | sed 's/^/ /' + fail "GitHub returned HTTP $status for ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}; check the repo name and token access" + fi + + if jq -e 'has("permissions") and (.permissions.push == false and .permissions.admin == false and .permissions.maintain == false)' "$body" >/dev/null; then + fail "GitHub token can read ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO} but does not appear to have write access" + fi + + branch="$(jq -rn --arg v "$DEMO_BRANCH" '$v|@uri')" + body="${TMP_DIR}/github-branch.json" + status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/branches/${branch}" "$body")" + if [[ "$status" != "200" ]]; then + branches_body="${TMP_DIR}/github-branches.json" + branches_status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/branches?per_page=20" "$branches_body")" + if [[ "$branches_status" == "200" ]]; then + branches="$(jq -r 'map(.name) | join(", ")' "$branches_body")" + if [[ -z "$branches" ]]; then + fail "GitHub repo exists but has no branches yet; add an initial README or push ${DEMO_BRANCH} before running the demo" + fi + fail "GitHub returned HTTP $status for branch ${DEMO_BRANCH}; set DEMO_BRANCH to one of: ${branches}" + fi + fail "GitHub returned HTTP $status for branch ${DEMO_BRANCH}" + fi + + body="${TMP_DIR}/github-demo-file.json" + status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}?ref=${branch}" "$body")" + if [[ "$status" == "200" ]]; then + fail "validation output file already exists: ${DEMO_FILE_PATH}; choose a new DEMO_RUN_ID or DEMO_FILE_PATH" + fi + [[ "$status" == "404" ]] || fail "GitHub returned HTTP $status while checking demo output path ${DEMO_FILE_PATH}" + + info "${GREEN}GitHub repo, branch, and output path are safe for this run.${RESET}" +} + +create_provider() { + step "Creating temporary GitHub provider" + "$OPENSHELL_BIN" provider delete "$DEMO_GITHUB_PROVIDER_NAME" >/dev/null 2>&1 || true + "$OPENSHELL_BIN" provider create \ + --name "$DEMO_GITHUB_PROVIDER_NAME" \ + --type github \ + --credential GITHUB_TOKEN +} + +create_sandbox() { + step "Creating sandbox with read-only GitHub L7 policy" + cp "$POLICY_TEMPLATE" "$POLICY_FILE" + "$OPENSHELL_BIN" sandbox delete "$DEMO_SANDBOX_NAME" >/dev/null 2>&1 || true + "$OPENSHELL_BIN" sandbox create \ + --name "$DEMO_SANDBOX_NAME" \ + --provider "$DEMO_GITHUB_PROVIDER_NAME" \ + --policy "$POLICY_FILE" \ + --upload "${RUNNER_SOURCE}:/sandbox/policy-validation-runner.sh" \ + --no-git-ignore \ + --keep \ + --no-auto-providers \ + --no-tty \ + -- bash -lc "chmod +x /sandbox/policy-validation-runner.sh && echo sandbox ready" +} + +connect_ssh() { + step "Connecting to sandbox over SSH" + "$OPENSHELL_BIN" sandbox ssh-config "$DEMO_SANDBOX_NAME" > "$SSH_CONFIG" + SSH_HOST="$(awk '/^Host / { print $2; exit }' "$SSH_CONFIG")" + [[ -n "$SSH_HOST" ]] || fail "could not find Host entry in sandbox SSH config" + + local retries=30 + local i + for i in $(seq 1 "$retries"); do + if ssh -F "$SSH_CONFIG" "$SSH_HOST" true >/dev/null 2>&1; then + return + fi + sleep 2 + done + fail "SSH connection to sandbox timed out" +} + +sandbox_exec() { + ssh -F "$SSH_CONFIG" "$SSH_HOST" "$@" +} + +http_status() { + awk -F= '/^HTTP_STATUS=/ { print $2; exit }' +} + +http_body() { + sed '/^HTTP_STATUS=/d' +} + +run_policy_local_checks() { + step "Checking sandbox-local skill and policy.local" + sandbox_exec /sandbox/policy-validation-runner.sh check-skill >/dev/null + info "${GREEN}Skill installed:${RESET} /etc/openshell/skills/policy_advisor.md" + + local output + output="$(sandbox_exec /sandbox/policy-validation-runner.sh current-policy)" + local status + status="$(printf '%s\n' "$output" | http_status)" + [[ "$status" == "200" ]] || fail "policy.local current policy returned HTTP $status" + + info "${GREEN}policy.local returned the current sandbox policy.${RESET}" + info "Initial policy: read-only REST access to api.github.com for /usr/bin/curl" +} + +attempt_write() { + sandbox_exec /sandbox/policy-validation-runner.sh put-file \ + "$DEMO_GITHUB_OWNER" \ + "$DEMO_GITHUB_REPO" \ + "$DEMO_BRANCH" \ + "$DEMO_FILE_PATH" \ + "$DEMO_RUN_ID" +} + +submit_policy_proposal() { + sandbox_exec /sandbox/policy-validation-runner.sh submit-proposal \ + "$DEMO_GITHUB_OWNER" \ + "$DEMO_GITHUB_REPO" \ + "$DEMO_FILE_PATH" +} + +capture_initial_denial() { + step "Attempting GitHub contents write from inside sandbox" + local output + output="$(attempt_write)" + local status + local body + status="$(printf '%s\n' "$output" | http_status)" + body="$(printf '%s\n' "$output" | http_body)" + + [[ "$status" == "403" ]] || fail "expected OpenShell HTTP 403, got HTTP $status" + printf '%s\n' "$body" | jq -e '.error == "policy_denied"' >/dev/null \ + || fail "expected structured policy_denied body" + printf '%s\n' "$body" | jq -e '.layer == "l7" and .protocol == "rest" and .method == "PUT"' >/dev/null \ + || fail "expected structured L7 REST deny fields" + + printf '%s\n' "$body" | jq -r ' + "Denied: \(.method) \(.path)", + "Layer: \(.layer)/\(.protocol) host=\(.host):\(.port) binary=\(.binary)", + "Agent guidance: \(.next_steps | map(.action) | join(" -> "))" + ' | sed 's/^/ /' + info "${GREEN}Captured structured L7 policy denial.${RESET}" +} + +submit_and_approve() { + step "Submitting proposal through policy.local" + local output + output="$(submit_policy_proposal)" + local status + local body + status="$(printf '%s\n' "$output" | http_status)" + body="$(printf '%s\n' "$output" | http_body)" + + [[ "$status" == "202" ]] || fail "expected proposal submit HTTP 202, got HTTP $status" + [[ "$(printf '%s\n' "$body" | jq -r '.accepted_chunks // 0')" != "0" ]] \ + || fail "proposal was not accepted" + printf '%s\n' "$body" | jq -r '"Proposal submitted: \(.accepted_chunks) accepted, \(.rejected_chunks) rejected"' | sed 's/^/ /' + + step "Approving pending draft rule from outside the sandbox" + "$OPENSHELL_BIN" rule get "$DEMO_SANDBOX_NAME" --status pending | sed 's/^/ /' + "$OPENSHELL_BIN" rule approve-all "$DEMO_SANDBOX_NAME" | sed 's/^/ /' +} + +print_success_summary() { + jq '{ + path: .content.path, + html_url: .content.html_url, + commit: .commit.sha, + message: .commit.message + }' +} + +retry_until_allowed() { + step "Retrying GitHub contents write after approval" + local output status body attempt + + for attempt in $(seq 1 "$DEMO_RETRY_ATTEMPTS"); do + output="$(attempt_write)" + status="$(printf '%s\n' "$output" | http_status)" + body="$(printf '%s\n' "$output" | http_body)" + + if printf '%s\n' "$body" | jq -e '.error == "policy_denied"' >/dev/null 2>&1; then + info "${DIM}Attempt ${attempt}/${DEMO_RETRY_ATTEMPTS}: policy not loaded yet; retrying...${RESET}" + sleep "$DEMO_RETRY_SLEEP" + continue + fi + + if [[ "$status" == "200" || "$status" == "201" ]]; then + printf '%s\n' "$body" | print_success_summary | sed 's/^/ /' + info "${GREEN}GitHub write succeeded from inside the sandbox.${RESET}" + return + fi + + printf '%s\n' "$body" | jq . | sed 's/^/ /' + if [[ "$status" == "404" ]]; then + fail "policy allowed the request, but GitHub returned HTTP 404; check DEMO_GITHUB_OWNER, DEMO_GITHUB_REPO, and token access" + fi + fail "policy allowed the request, but GitHub returned HTTP $status" + done + + fail "timed out waiting for approved policy to load into the sandbox" +} + +show_logs() { + step "Policy decision trace" + "$OPENSHELL_BIN" logs "$DEMO_SANDBOX_NAME" --since 5m -n 50 2>&1 \ + | grep -E 'HTTP:PUT|CONFIG:LOADED|ReportPolicyStatus' \ + | tail -n 8 \ + | sed 's/^/ /' || true +} + +main() { + validate_env + check_gateway + check_github_access + create_provider + create_sandbox + connect_ssh + run_policy_local_checks + capture_initial_denial + submit_and_approve + retry_until_allowed + show_logs + + printf "\n${BOLD}${GREEN}✓ Validation complete.${RESET}\n\n" + printf " Sandbox: %s\n" "$DEMO_SANDBOX_NAME" + printf " Repository: https://github.com/%s/%s\n" "$DEMO_GITHUB_OWNER" "$DEMO_GITHUB_REPO" + printf " File: %s\n" "$DEMO_FILE_PATH" +} + +main "$@" diff --git a/examples/agent-driven-policy-management/README.md b/examples/agent-driven-policy-management/README.md index 2a5ee4561..c01b1864a 100644 --- a/examples/agent-driven-policy-management/README.md +++ b/examples/agent-driven-policy-management/README.md @@ -3,34 +3,32 @@ # Agent-Driven Policy Management Demo -Run the first policy-advisor MVP loop from one host-side script: +Run the first policy-advisor MVP loop with a real agent: 1. Use the active OpenShell gateway. 2. Create a GitHub provider from a host token. -3. Start a sandbox with read-only L7 GitHub API access. -4. Attempt a GitHub contents write from inside the sandbox and capture the - structured `policy_denied` response. -5. Submit a narrow policy proposal through `http://policy.local/v1/proposals`. +3. Start a sandbox with your agent command and an uploaded task file. +4. Let the agent hit an OpenShell `policy_denied` response. +5. Let the agent read `/etc/openshell/skills/policy_advisor.md` and submit a + narrow proposal through `http://policy.local/v1/proposals`. 6. Approve the draft rule from outside the sandbox. -7. Retry the same write and confirm it succeeds. +7. Let the agent retry and confirm the GitHub write succeeds. -`demo.sh` is deterministic. It does not launch a real coding agent; it uses the -same sandbox-local interfaces that the agent will use. - -`dogfood.sh` runs the next loop: Codex starts inside the sandbox, observes the -structured denial, reads `/etc/openshell/skills/policy_advisor.md`, drafts and -submits a narrow proposal through `policy.local`, then retries after the host -developer approves. +The shell script is agent-agnostic. It does not know how to sign in to a +specific coding agent. Pass the provider names and sandbox command for the +agent you want to run. ## Prerequisites - An active OpenShell gateway that includes the current sandbox supervisor build. -- `curl`, `jq`, and `ssh` on the host machine. +- `curl` and `jq` on the host machine. - The GitHub CLI (`gh`) if you want to create the scratch repo with the command below. - A disposable or demo-safe GitHub repository. - A GitHub token with contents write permission for that repository. +- An agent provider and policy that let your chosen agent run inside the + sandbox. ## Create A Scratch Repo @@ -50,40 +48,23 @@ file. Each default run writes a new timestamped file under ## Quick Start -```bash -export DEMO_GITHUB_OWNER= -export DEMO_GITHUB_REPO= -export DEMO_GITHUB_TOKEN= - -bash examples/agent-driven-policy-management/demo.sh -``` - -If you use the GitHub CLI, this also works: +The included `policy.template.yaml` only defines the GitHub API target for the +policy-management loop. Use `DEMO_POLICY_FILE` to point at a policy that also +allows your chosen agent to reach its model/provider endpoints. ```bash export DEMO_GITHUB_OWNER= export DEMO_GITHUB_REPO= -export DEMO_GITHUB_TOKEN="$(gh auth token)" +export DEMO_GITHUB_TOKEN= +export DEMO_POLICY_FILE=/path/to/policy-that-allows-your-agent.yaml +export DEMO_AGENT_PROVIDERS="my-agent-provider" +export DEMO_AGENT_COMMAND='' bash examples/agent-driven-policy-management/demo.sh ``` -## Codex Dogfood - -Sign in to Codex locally, then run: - -```bash -codex login - -export DEMO_GITHUB_OWNER= -export DEMO_GITHUB_REPO= -export DEMO_GITHUB_TOKEN="$(gh auth token)" - -bash examples/agent-driven-policy-management/dogfood.sh -``` - The host script only orchestrates sandbox lifecycle and developer approval. The -policy proposal is authored by Codex inside the sandbox from the installed +policy proposal is authored by the agent inside the sandbox from the installed skill, structured denial response, and `policy.local` API. The demo writes one markdown file under: @@ -92,15 +73,12 @@ The demo writes one markdown file under: openshell-policy-advisor-demo/.md ``` -The dogfood run writes under: - -```text -openshell-policy-advisor-dogfood/.md -``` - Use a scratch repository or a demo branch if you do not want this file in a production repository. +The deterministic non-model validation flow lives in +`e2e/agent-driven-policy-management/validation.sh`. + ## Options ```bash @@ -109,5 +87,6 @@ export DEMO_BRANCH=main export DEMO_RUN_ID="$(date +%Y%m%d-%H%M%S)" export DEMO_FILE_DIR=openshell-policy-advisor-demo export DEMO_KEEP_SANDBOX=0 -export DEMO_APPROVAL_TIMEOUT_SECS=180 +export DEMO_APPROVAL_TIMEOUT_SECS=240 +export DEMO_AGENT_PROVIDERS="agent-provider-a agent-provider-b" ``` diff --git a/examples/agent-driven-policy-management/prompts/codex-dogfood.md b/examples/agent-driven-policy-management/agent-task.md similarity index 73% rename from examples/agent-driven-policy-management/prompts/codex-dogfood.md rename to examples/agent-driven-policy-management/agent-task.md index 4e8be9977..8ac367f5f 100644 --- a/examples/agent-driven-policy-management/prompts/codex-dogfood.md +++ b/examples/agent-driven-policy-management/agent-task.md @@ -1,7 +1,7 @@ -# Codex Policy Dogfood Task +# Agent Policy Management Demo Task You are inside an OpenShell sandbox. The user asked you to write one markdown file to GitHub using the GitHub Contents API. @@ -15,9 +15,7 @@ Target: Requirements: -- Use `curl` and the `GITHUB_TOKEN` environment variable. -- Use ordinary shell commands for this demo. Do not use GitHub MCP tools or - repository-specific helper skills. +- Use ordinary shell commands and the `GITHUB_TOKEN` environment variable. - Do not print, echo, or reveal the token. - First try to write the file with `PUT /repos/{{OWNER}}/{{REPO}}/contents/{{FILE_PATH}}`. - If OpenShell returns `policy_denied`, read @@ -26,7 +24,7 @@ Requirements: - Submit the narrowest proposal that permits only this write. - Do not include a `tls` field in the proposed endpoint unless you are explicitly disabling TLS inspection. -- After submitting a proposal, retry the write for up to 90 seconds. The +- After submitting a proposal, retry the write for up to 120 seconds. The developer may approve while you are waiting. - Do not print the full GitHub response body. It can include temporary `download_url` query tokens. Extract only `content.path`, `content.html_url`, @@ -37,11 +35,11 @@ Requirements: Suggested file content: ```markdown -# OpenShell policy advisor dogfood +# OpenShell policy advisor demo Run id: {{RUN_ID}} -This file was written by Codex from inside an OpenShell sandbox after Codex read -the policy advisor skill, submitted a narrow policy proposal, and waited for +This file was written from inside an OpenShell sandbox after the agent read the +policy advisor skill, submitted a narrow policy proposal, and waited for developer approval. ``` diff --git a/examples/agent-driven-policy-management/demo.sh b/examples/agent-driven-policy-management/demo.sh index 7c6dc339c..8613b1031 100755 --- a/examples/agent-driven-policy-management/demo.sh +++ b/examples/agent-driven-policy-management/demo.sh @@ -7,8 +7,8 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" -POLICY_TEMPLATE="${SCRIPT_DIR}/policy.template.yaml" -RUNNER_SOURCE="${SCRIPT_DIR}/sandbox-runner.sh" +DEFAULT_POLICY_FILE="${SCRIPT_DIR}/policy.template.yaml" +TASK_TEMPLATE="${SCRIPT_DIR}/agent-task.md" if [[ -z "${OPENSHELL_BIN:-}" ]]; then if [[ -x "${REPO_ROOT}/target/debug/openshell" ]]; then @@ -18,20 +18,24 @@ if [[ -z "${OPENSHELL_BIN:-}" ]]; then fi fi +DEMO_POLICY_FILE="${DEMO_POLICY_FILE:-$DEFAULT_POLICY_FILE}" +DEMO_SANDBOX_FROM="${DEMO_SANDBOX_FROM:-base}" DEMO_BRANCH="${DEMO_BRANCH:-main}" DEMO_RUN_ID="${DEMO_RUN_ID:-$(date +%Y%m%d-%H%M%S)}" DEMO_FILE_DIR="${DEMO_FILE_DIR:-openshell-policy-advisor-demo}" DEMO_FILE_PATH="${DEMO_FILE_PATH:-${DEMO_FILE_DIR}/${DEMO_RUN_ID}.md}" DEMO_SANDBOX_NAME="${DEMO_SANDBOX_NAME:-policy-agent-${DEMO_RUN_ID}}" DEMO_GITHUB_PROVIDER_NAME="${DEMO_GITHUB_PROVIDER_NAME:-github-policy-agent-${DEMO_RUN_ID}}" +DEMO_AGENT_PROVIDERS="${DEMO_AGENT_PROVIDERS:-}" +DEMO_APPROVAL_TIMEOUT_SECS="${DEMO_APPROVAL_TIMEOUT_SECS:-240}" DEMO_KEEP_SANDBOX="${DEMO_KEEP_SANDBOX:-0}" -DEMO_RETRY_ATTEMPTS="${DEMO_RETRY_ATTEMPTS:-30}" -DEMO_RETRY_SLEEP="${DEMO_RETRY_SLEEP:-2}" -TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/openshell-agent-policy.XXXXXX")" -POLICY_FILE="${TMP_DIR}/policy.yaml" -SSH_CONFIG="${TMP_DIR}/ssh_config" -SSH_HOST="" +TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/openshell-agent-policy-demo.XXXXXX")" +PAYLOAD_DIR="${TMP_DIR}/payload" +TASK_FILE="${PAYLOAD_DIR}/agent-task.md" +AGENT_LOG="${TMP_DIR}/agent.log" +PENDING_FILE="${TMP_DIR}/pending-rule.txt" +mkdir -p "$PAYLOAD_DIR" BOLD='\033[1m' DIM='\033[2m' @@ -41,6 +45,8 @@ RED='\033[31m' YELLOW='\033[33m' RESET='\033[0m' +AGENT_PID="" + step() { printf "\n${BOLD}${CYAN}==> %s${RESET}\n\n" "$1" } @@ -49,8 +55,18 @@ info() { printf " %b\n" "$*" } +redact_output() { + sed -E \ + -e 's|(download_url": "https://raw\.githubusercontent\.com[^?"]+\?token=)[^"]+|\1|g' \ + -e 's|(Authorization: Bearer )[A-Za-z0-9._-]+|\1|g' +} + fail() { printf "\n${RED}error:${RESET} %s\n" "$*" >&2 + if [[ -f "$AGENT_LOG" ]]; then + printf "\n${YELLOW}Agent log tail:${RESET}\n" >&2 + tail -n 120 "$AGENT_LOG" | redact_output | sed 's/^/ /' >&2 || true + fi exit 1 } @@ -91,7 +107,7 @@ validate_path() { [[ "$value" != *..* ]] || fail "$label must not contain '..'" } -resolve_token() { +resolve_github_token() { if [[ -z "${DEMO_GITHUB_TOKEN:-}" ]]; then if [[ -n "${GITHUB_TOKEN:-}" ]]; then DEMO_GITHUB_TOKEN="$GITHUB_TOKEN" @@ -109,22 +125,22 @@ resolve_token() { validate_env() { require_command curl require_command jq - require_command ssh require_command "$OPENSHELL_BIN" - [[ -f "$RUNNER_SOURCE" ]] || fail "missing sandbox runner: $RUNNER_SOURCE" + [[ -f "$DEMO_POLICY_FILE" ]] || fail "missing policy file: $DEMO_POLICY_FILE" + [[ -f "$TASK_TEMPLATE" ]] || fail "missing agent task template: $TASK_TEMPLATE" [[ -n "${DEMO_GITHUB_OWNER:-}" ]] || fail "set DEMO_GITHUB_OWNER" [[ -n "${DEMO_GITHUB_REPO:-}" ]] || fail "set DEMO_GITHUB_REPO" + [[ -n "${DEMO_AGENT_COMMAND:-}" ]] || fail "set DEMO_AGENT_COMMAND to a sandbox command that reads /sandbox/payload/agent-task.md" [[ "$DEMO_RUN_ID" =~ ^[a-z0-9-]+$ ]] || fail "DEMO_RUN_ID may contain only lowercase letters, numbers, and '-'" - [[ "$DEMO_RETRY_ATTEMPTS" =~ ^[0-9]+$ ]] || fail "DEMO_RETRY_ATTEMPTS must be a number" - [[ "$DEMO_RETRY_SLEEP" =~ ^[0-9]+$ ]] || fail "DEMO_RETRY_SLEEP must be a number" + [[ "$DEMO_APPROVAL_TIMEOUT_SECS" =~ ^[0-9]+$ ]] || fail "DEMO_APPROVAL_TIMEOUT_SECS must be a number" validate_name "DEMO_GITHUB_OWNER" "$DEMO_GITHUB_OWNER" validate_name "DEMO_GITHUB_REPO" "$DEMO_GITHUB_REPO" validate_path "DEMO_BRANCH" "$DEMO_BRANCH" validate_path "DEMO_FILE_PATH" "$DEMO_FILE_PATH" - resolve_token + resolve_github_token } github_api_status() { @@ -139,241 +155,178 @@ github_api_status() { "$url" } +urlencode() { + jq -rn --arg v "$1" '$v|@uri' +} + check_gateway() { step "Checking active OpenShell gateway" - if ! "$OPENSHELL_BIN" status >/dev/null 2>&1; then - fail "active OpenShell gateway is not reachable; start one separately, for example: mise run cluster" - fi + "$OPENSHELL_BIN" status >/dev/null 2>&1 \ + || fail "active OpenShell gateway is not reachable; start one separately" "$OPENSHELL_BIN" status | sed 's/^/ /' } check_github_access() { step "Checking GitHub repository access" - local body status branch branches_body branches_status branches + local body status branch body="${TMP_DIR}/github-repo.json" status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}" "$body")" - - if [[ "$status" != "200" ]]; then - printf '%s\n' "$(jq -r '.message // empty' "$body" 2>/dev/null)" | sed 's/^/ /' - fail "GitHub returned HTTP $status for ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}; check the repo name and token access" - fi + [[ "$status" == "200" ]] \ + || fail "GitHub returned HTTP $status for ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}; check the repo name and token access" if jq -e 'has("permissions") and (.permissions.push == false and .permissions.admin == false and .permissions.maintain == false)' "$body" >/dev/null; then fail "GitHub token can read ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO} but does not appear to have write access" fi - branch="$(jq -rn --arg v "$DEMO_BRANCH" '$v|@uri')" + branch="$(urlencode "$DEMO_BRANCH")" body="${TMP_DIR}/github-branch.json" status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/branches/${branch}" "$body")" - if [[ "$status" != "200" ]]; then - branches_body="${TMP_DIR}/github-branches.json" - branches_status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/branches?per_page=20" "$branches_body")" - if [[ "$branches_status" == "200" ]]; then - branches="$(jq -r 'map(.name) | join(", ")' "$branches_body")" - if [[ -z "$branches" ]]; then - fail "GitHub repo exists but has no branches yet; add an initial README or push ${DEMO_BRANCH} before running the demo" - fi - fail "GitHub returned HTTP $status for branch ${DEMO_BRANCH}; set DEMO_BRANCH to one of: ${branches}" - fi - fail "GitHub returned HTTP $status for branch ${DEMO_BRANCH}" - fi + [[ "$status" == "200" ]] || fail "GitHub returned HTTP $status for branch ${DEMO_BRANCH}" body="${TMP_DIR}/github-demo-file.json" status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}?ref=${branch}" "$body")" if [[ "$status" == "200" ]]; then fail "demo output file already exists: ${DEMO_FILE_PATH}; choose a new DEMO_RUN_ID or DEMO_FILE_PATH" fi - [[ "$status" == "404" ]] || fail "GitHub returned HTTP $status while checking demo output path ${DEMO_FILE_PATH}" + [[ "$status" == "404" ]] || fail "GitHub returned HTTP $status while checking output path ${DEMO_FILE_PATH}" info "${GREEN}GitHub repo, branch, and output path are safe for this run.${RESET}" } -create_provider() { +render_task() { + python3 - "$TASK_TEMPLATE" "$TASK_FILE" "$DEMO_GITHUB_OWNER" "$DEMO_GITHUB_REPO" "$DEMO_BRANCH" "$DEMO_FILE_PATH" "$DEMO_RUN_ID" <<'PY' +from pathlib import Path +import sys + +template, output, owner, repo, branch, file_path, run_id = sys.argv[1:8] +text = Path(template).read_text(encoding="utf-8") +for key, value in { + "OWNER": owner, + "REPO": repo, + "BRANCH": branch, + "FILE_PATH": file_path, + "RUN_ID": run_id, +}.items(): + text = text.replace("{{" + key + "}}", value) +Path(output).write_text(text, encoding="utf-8") +PY +} + +create_github_provider() { step "Creating temporary GitHub provider" "$OPENSHELL_BIN" provider delete "$DEMO_GITHUB_PROVIDER_NAME" >/dev/null 2>&1 || true "$OPENSHELL_BIN" provider create \ --name "$DEMO_GITHUB_PROVIDER_NAME" \ --type github \ - --credential GITHUB_TOKEN -} - -create_sandbox() { - step "Creating sandbox with read-only GitHub L7 policy" - cp "$POLICY_TEMPLATE" "$POLICY_FILE" - "$OPENSHELL_BIN" sandbox delete "$DEMO_SANDBOX_NAME" >/dev/null 2>&1 || true - "$OPENSHELL_BIN" sandbox create \ - --name "$DEMO_SANDBOX_NAME" \ - --provider "$DEMO_GITHUB_PROVIDER_NAME" \ - --policy "$POLICY_FILE" \ - --upload "${RUNNER_SOURCE}:/sandbox/policy-demo-runner.sh" \ - --no-git-ignore \ - --keep \ - --no-auto-providers \ - --no-tty \ - -- bash -lc "chmod +x /sandbox/policy-demo-runner.sh && echo sandbox ready" + --credential GITHUB_TOKEN >/dev/null + info "${GREEN}Created GitHub provider for this run.${RESET}" } -connect_ssh() { - step "Connecting to sandbox over SSH" - "$OPENSHELL_BIN" sandbox ssh-config "$DEMO_SANDBOX_NAME" > "$SSH_CONFIG" - SSH_HOST="$(awk '/^Host / { print $2; exit }' "$SSH_CONFIG")" - [[ -n "$SSH_HOST" ]] || fail "could not find Host entry in sandbox SSH config" +provider_args() { + printf '%s\n' "--provider" + printf '%s\n' "$DEMO_GITHUB_PROVIDER_NAME" - local retries=30 - local i - for i in $(seq 1 "$retries"); do - if ssh -F "$SSH_CONFIG" "$SSH_HOST" true >/dev/null 2>&1; then - return - fi - sleep 2 + local normalized="${DEMO_AGENT_PROVIDERS//,/ }" + local provider + for provider in $normalized; do + printf '%s\n' "--provider" + printf '%s\n' "$provider" done - fail "SSH connection to sandbox timed out" -} - -sandbox_exec() { - ssh -F "$SSH_CONFIG" "$SSH_HOST" "$@" } -http_status() { - awk -F= '/^HTTP_STATUS=/ { print $2; exit }' -} +start_agent_sandbox() { + step "Starting agent inside the sandbox" + "$OPENSHELL_BIN" sandbox delete "$DEMO_SANDBOX_NAME" >/dev/null 2>&1 || true -http_body() { - sed '/^HTTP_STATUS=/d' + local args=() + while IFS= read -r arg; do + args+=("$arg") + done < <(provider_args) + + ( + "$OPENSHELL_BIN" sandbox create \ + --name "$DEMO_SANDBOX_NAME" \ + --from "$DEMO_SANDBOX_FROM" \ + "${args[@]}" \ + --policy "$DEMO_POLICY_FILE" \ + --upload "${PAYLOAD_DIR}:/sandbox" \ + --no-git-ignore \ + --keep \ + --no-auto-providers \ + --no-tty \ + -- bash -lc "$DEMO_AGENT_COMMAND" + ) >"$AGENT_LOG" 2>&1 & + AGENT_PID="$!" + info "${DIM}Agent run started; log: ${AGENT_LOG}${RESET}" } -run_policy_local_checks() { - step "Checking sandbox-local skill and policy.local" - sandbox_exec /sandbox/policy-demo-runner.sh check-skill >/dev/null - info "${GREEN}Skill installed:${RESET} /etc/openshell/skills/policy_advisor.md" +approve_when_pending() { + step "Waiting for the agent to submit a policy proposal" + local start now + start="$(date +%s)" - local output - output="$(sandbox_exec /sandbox/policy-demo-runner.sh current-policy)" - local status - status="$(printf '%s\n' "$output" | http_status)" - [[ "$status" == "200" ]] || fail "policy.local current policy returned HTTP $status" - - info "${GREEN}policy.local returned the current sandbox policy.${RESET}" - info "Initial policy: read-only REST access to api.github.com for /usr/bin/curl" -} + while true; do + if ! kill -0 "$AGENT_PID" >/dev/null 2>&1; then + wait "$AGENT_PID" || true + fail "agent exited before a pending proposal appeared" + fi -attempt_write() { - sandbox_exec /sandbox/policy-demo-runner.sh put-file \ - "$DEMO_GITHUB_OWNER" \ - "$DEMO_GITHUB_REPO" \ - "$DEMO_BRANCH" \ - "$DEMO_FILE_PATH" \ - "$DEMO_RUN_ID" -} + "$OPENSHELL_BIN" rule get "$DEMO_SANDBOX_NAME" --status pending >"$PENDING_FILE" 2>/dev/null || true + if grep -q "Chunk:" "$PENDING_FILE" && grep -q "pending" "$PENDING_FILE"; then + info "${GREEN}Agent submitted a pending proposal.${RESET}" + sed 's/^/ /' "$PENDING_FILE" -submit_policy_proposal() { - sandbox_exec /sandbox/policy-demo-runner.sh submit-proposal \ - "$DEMO_GITHUB_OWNER" \ - "$DEMO_GITHUB_REPO" \ - "$DEMO_FILE_PATH" -} + step "Approving pending draft rule from outside the sandbox" + "$OPENSHELL_BIN" rule approve-all "$DEMO_SANDBOX_NAME" | sed 's/^/ /' + return + fi -capture_initial_denial() { - step "Attempting GitHub contents write from inside sandbox" - local output - output="$(attempt_write)" - local status - local body - status="$(printf '%s\n' "$output" | http_status)" - body="$(printf '%s\n' "$output" | http_body)" - - [[ "$status" == "403" ]] || fail "expected OpenShell HTTP 403, got HTTP $status" - printf '%s\n' "$body" | jq -e '.error == "policy_denied"' >/dev/null \ - || fail "expected structured policy_denied body" - printf '%s\n' "$body" | jq -e '.layer == "l7" and .protocol == "rest" and .method == "PUT"' >/dev/null \ - || fail "expected structured L7 REST deny fields" - - printf '%s\n' "$body" | jq -r ' - "Denied: \(.method) \(.path)", - "Layer: \(.layer)/\(.protocol) host=\(.host):\(.port) binary=\(.binary)", - "Agent guidance: \(.next_steps | map(.action) | join(" -> "))" - ' | sed 's/^/ /' - info "${GREEN}Captured structured L7 policy denial.${RESET}" -} + now="$(date +%s)" + if (( now - start >= DEMO_APPROVAL_TIMEOUT_SECS )); then + fail "timed out waiting for the agent to submit a policy proposal" + fi -submit_and_approve() { - step "Submitting proposal through policy.local" - local output - output="$(submit_policy_proposal)" - local status - local body - status="$(printf '%s\n' "$output" | http_status)" - body="$(printf '%s\n' "$output" | http_body)" - - [[ "$status" == "202" ]] || fail "expected proposal submit HTTP 202, got HTTP $status" - [[ "$(printf '%s\n' "$body" | jq -r '.accepted_chunks // 0')" != "0" ]] \ - || fail "proposal was not accepted" - printf '%s\n' "$body" | jq -r '"Proposal submitted: \(.accepted_chunks) accepted, \(.rejected_chunks) rejected"' | sed 's/^/ /' - - step "Approving pending draft rule from outside the sandbox" - "$OPENSHELL_BIN" rule get "$DEMO_SANDBOX_NAME" --status pending | sed 's/^/ /' - "$OPENSHELL_BIN" rule approve-all "$DEMO_SANDBOX_NAME" | sed 's/^/ /' + sleep 2 + done } -print_success_summary() { - jq '{ - path: .content.path, - html_url: .content.html_url, - commit: .commit.sha, - message: .commit.message - }' +wait_for_agent() { + step "Waiting for the agent to retry after approval" + if ! wait "$AGENT_PID"; then + fail "agent run failed" + fi + info "${GREEN}Agent run completed.${RESET}" } -retry_until_allowed() { - step "Retrying GitHub contents write after approval" - local output status body attempt - - for attempt in $(seq 1 "$DEMO_RETRY_ATTEMPTS"); do - output="$(attempt_write)" - status="$(printf '%s\n' "$output" | http_status)" - body="$(printf '%s\n' "$output" | http_body)" - - if printf '%s\n' "$body" | jq -e '.error == "policy_denied"' >/dev/null 2>&1; then - info "${DIM}Attempt ${attempt}/${DEMO_RETRY_ATTEMPTS}: policy not loaded yet; retrying...${RESET}" - sleep "$DEMO_RETRY_SLEEP" - continue - fi - - if [[ "$status" == "200" || "$status" == "201" ]]; then - printf '%s\n' "$body" | print_success_summary | sed 's/^/ /' - info "${GREEN}GitHub write succeeded from inside the sandbox.${RESET}" - return - fi - - printf '%s\n' "$body" | jq . | sed 's/^/ /' - if [[ "$status" == "404" ]]; then - fail "policy allowed the request, but GitHub returned HTTP 404; check DEMO_GITHUB_OWNER, DEMO_GITHUB_REPO, and token access" - fi - fail "policy allowed the request, but GitHub returned HTTP $status" - done +verify_github_write() { + step "Verifying GitHub write" + local body status branch + branch="$(urlencode "$DEMO_BRANCH")" + body="${TMP_DIR}/github-created-file.json" + status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}?ref=${branch}" "$body")" + [[ "$status" == "200" ]] || fail "expected demo file to exist after agent run; GitHub returned HTTP $status" - fail "timed out waiting for approved policy to load into the sandbox" + jq -r '"File: \(.path)", "URL: \(.html_url)"' "$body" | sed 's/^/ /' } show_logs() { step "Policy decision trace" - "$OPENSHELL_BIN" logs "$DEMO_SANDBOX_NAME" --since 5m -n 50 2>&1 \ + "$OPENSHELL_BIN" logs "$DEMO_SANDBOX_NAME" --since 10m -n 80 2>&1 \ | grep -E 'HTTP:PUT|CONFIG:LOADED|ReportPolicyStatus' \ - | tail -n 8 \ + | tail -n 12 \ | sed 's/^/ /' || true } main() { validate_env + render_task check_gateway check_github_access - create_provider - create_sandbox - connect_ssh - run_policy_local_checks - capture_initial_denial - submit_and_approve - retry_until_allowed + create_github_provider + start_agent_sandbox + approve_when_pending + wait_for_agent + verify_github_write show_logs printf "\n${BOLD}${GREEN}✓ Demo complete.${RESET}\n\n" diff --git a/examples/agent-driven-policy-management/dogfood.sh b/examples/agent-driven-policy-management/dogfood.sh deleted file mode 100755 index c9237d4ed..000000000 --- a/examples/agent-driven-policy-management/dogfood.sh +++ /dev/null @@ -1,349 +0,0 @@ -#!/usr/bin/env bash - -# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -set -euo pipefail - -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" -POLICY_TEMPLATE="${SCRIPT_DIR}/policy.template.yaml" -RUNNER_SOURCE="${SCRIPT_DIR}/sandbox-runner.sh" -PROMPT_SOURCE="${SCRIPT_DIR}/prompts/codex-dogfood.md" - -if [[ -z "${OPENSHELL_BIN:-}" ]]; then - if [[ -x "${REPO_ROOT}/target/debug/openshell" ]]; then - OPENSHELL_BIN="${REPO_ROOT}/target/debug/openshell" - else - OPENSHELL_BIN="openshell" - fi -fi - -DEMO_BRANCH="${DEMO_BRANCH:-main}" -DEMO_RUN_ID="${DEMO_RUN_ID:-$(date +%Y%m%d-%H%M%S)}" -DEMO_FILE_DIR="${DEMO_FILE_DIR:-openshell-policy-advisor-dogfood}" -DEMO_FILE_PATH="${DEMO_FILE_PATH:-${DEMO_FILE_DIR}/${DEMO_RUN_ID}.md}" -DEMO_SANDBOX_NAME="${DEMO_SANDBOX_NAME:-policy-agent-dogfood-${DEMO_RUN_ID}}" -DEMO_CODEX_PROVIDER_NAME="${DEMO_CODEX_PROVIDER_NAME:-codex-policy-agent-${DEMO_RUN_ID}}" -DEMO_GITHUB_PROVIDER_NAME="${DEMO_GITHUB_PROVIDER_NAME:-github-policy-agent-${DEMO_RUN_ID}}" -DEMO_APPROVAL_TIMEOUT_SECS="${DEMO_APPROVAL_TIMEOUT_SECS:-180}" -DEMO_KEEP_SANDBOX="${DEMO_KEEP_SANDBOX:-0}" - -TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/openshell-agent-policy-dogfood.XXXXXX")" -PAYLOAD_DIR="${TMP_DIR}/payload" -POLICY_FILE="${TMP_DIR}/policy.yaml" -AGENT_LOG="${TMP_DIR}/codex-dogfood.log" -PENDING_FILE="${TMP_DIR}/pending-rule.txt" -mkdir -p "${PAYLOAD_DIR}/prompts" - -BOLD='\033[1m' -DIM='\033[2m' -CYAN='\033[36m' -GREEN='\033[32m' -RED='\033[31m' -YELLOW='\033[33m' -RESET='\033[0m' - -AGENT_PID="" - -step() { - printf "\n${BOLD}${CYAN}==> %s${RESET}\n\n" "$1" -} - -info() { - printf " %b\n" "$*" -} - -redact_output() { - sed -E \ - -e 's|(download_url": "https://raw\.githubusercontent\.com[^?"]+\?token=)[^"]+|\1|g' \ - -e 's|(Authorization: Bearer )[A-Za-z0-9._-]+|\1|g' -} - -fail() { - printf "\n${RED}error:${RESET} %s\n" "$*" >&2 - if [[ -f "$AGENT_LOG" ]]; then - printf "\n${YELLOW}Agent log tail:${RESET}\n" >&2 - tail -n 120 "$AGENT_LOG" | redact_output | sed 's/^/ /' >&2 || true - fi - exit 1 -} - -cleanup() { - local status=$? - - if [[ "$DEMO_KEEP_SANDBOX" != "1" ]]; then - "$OPENSHELL_BIN" sandbox delete "$DEMO_SANDBOX_NAME" >/dev/null 2>&1 || true - else - printf "\n${YELLOW}Keeping sandbox because DEMO_KEEP_SANDBOX=1: %s${RESET}\n" "$DEMO_SANDBOX_NAME" - fi - - "$OPENSHELL_BIN" provider delete "$DEMO_CODEX_PROVIDER_NAME" >/dev/null 2>&1 || true - "$OPENSHELL_BIN" provider delete "$DEMO_GITHUB_PROVIDER_NAME" >/dev/null 2>&1 || true - - if [[ $status -eq 0 ]]; then - rm -rf "$TMP_DIR" - else - printf "\n${YELLOW}Temporary files kept at: %s${RESET}\n" "$TMP_DIR" - fi -} -trap cleanup EXIT - -require_command() { - command -v "$1" >/dev/null 2>&1 || fail "missing required command: $1" -} - -validate_name() { - local label="$1" - local value="$2" - [[ "$value" =~ ^[A-Za-z0-9_.-]+$ ]] || fail "$label may contain only letters, numbers, '.', '_', and '-'" -} - -validate_path() { - local label="$1" - local value="$2" - [[ "$value" =~ ^[A-Za-z0-9._/-]+$ ]] || fail "$label may contain only letters, numbers, '.', '_', '-', and '/'" - [[ "$value" != /* ]] || fail "$label must be relative" - [[ "$value" != *..* ]] || fail "$label must not contain '..'" -} - -resolve_github_token() { - if [[ -z "${DEMO_GITHUB_TOKEN:-}" ]]; then - if [[ -n "${GITHUB_TOKEN:-}" ]]; then - DEMO_GITHUB_TOKEN="$GITHUB_TOKEN" - elif [[ -n "${GH_TOKEN:-}" ]]; then - DEMO_GITHUB_TOKEN="$GH_TOKEN" - elif command -v gh >/dev/null 2>&1; then - DEMO_GITHUB_TOKEN="$(gh auth token 2>/dev/null || true)" - fi - fi - - [[ -n "${DEMO_GITHUB_TOKEN:-}" ]] || fail "set DEMO_GITHUB_TOKEN, GITHUB_TOKEN, GH_TOKEN, or sign in with gh" - export GITHUB_TOKEN="$DEMO_GITHUB_TOKEN" -} - -resolve_codex_auth() { - [[ -f "${HOME}/.codex/auth.json" ]] || fail "missing local Codex sign-in; run: codex login" - - export CODEX_AUTH_ACCESS_TOKEN - export CODEX_AUTH_REFRESH_TOKEN - export CODEX_AUTH_ACCOUNT_ID - CODEX_AUTH_ACCESS_TOKEN="$(jq -r '.tokens.access_token // empty' "${HOME}/.codex/auth.json")" - CODEX_AUTH_REFRESH_TOKEN="$(jq -r '.tokens.refresh_token // empty' "${HOME}/.codex/auth.json")" - CODEX_AUTH_ACCOUNT_ID="$(jq -r '.tokens.account_id // empty' "${HOME}/.codex/auth.json")" - - [[ -n "$CODEX_AUTH_ACCESS_TOKEN" ]] || fail "local Codex sign-in is missing an access token; run: codex login" - [[ -n "$CODEX_AUTH_REFRESH_TOKEN" ]] || fail "local Codex sign-in is missing a refresh token; run: codex login" - [[ -n "$CODEX_AUTH_ACCOUNT_ID" ]] || fail "local Codex sign-in is missing an account id; run: codex login" -} - -validate_env() { - require_command curl - require_command jq - require_command "$OPENSHELL_BIN" - - [[ -f "$RUNNER_SOURCE" ]] || fail "missing sandbox runner: $RUNNER_SOURCE" - [[ -f "$PROMPT_SOURCE" ]] || fail "missing Codex prompt: $PROMPT_SOURCE" - [[ -n "${DEMO_GITHUB_OWNER:-}" ]] || fail "set DEMO_GITHUB_OWNER" - [[ -n "${DEMO_GITHUB_REPO:-}" ]] || fail "set DEMO_GITHUB_REPO" - [[ "$DEMO_RUN_ID" =~ ^[a-z0-9-]+$ ]] || fail "DEMO_RUN_ID may contain only lowercase letters, numbers, and '-'" - [[ "$DEMO_APPROVAL_TIMEOUT_SECS" =~ ^[0-9]+$ ]] || fail "DEMO_APPROVAL_TIMEOUT_SECS must be a number" - - validate_name "DEMO_GITHUB_OWNER" "$DEMO_GITHUB_OWNER" - validate_name "DEMO_GITHUB_REPO" "$DEMO_GITHUB_REPO" - validate_path "DEMO_BRANCH" "$DEMO_BRANCH" - validate_path "DEMO_FILE_PATH" "$DEMO_FILE_PATH" - - resolve_github_token - resolve_codex_auth -} - -github_api_status() { - local url="$1" - local body="$2" - curl -sS \ - -o "$body" \ - -w "%{http_code}" \ - -H "Accept: application/vnd.github+json" \ - -H "Authorization: Bearer ${DEMO_GITHUB_TOKEN}" \ - -H "X-GitHub-Api-Version: 2022-11-28" \ - "$url" -} - -urlencode() { - jq -rn --arg v "$1" '$v|@uri' -} - -check_gateway() { - step "Checking active OpenShell gateway" - "$OPENSHELL_BIN" status >/dev/null 2>&1 \ - || fail "active OpenShell gateway is not reachable; start one separately" - "$OPENSHELL_BIN" status | sed 's/^/ /' -} - -check_github_access() { - step "Checking GitHub repository access" - local body status branch - body="${TMP_DIR}/github-repo.json" - status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}" "$body")" - [[ "$status" == "200" ]] \ - || fail "GitHub returned HTTP $status for ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}; check the repo name and token access" - - if jq -e 'has("permissions") and (.permissions.push == false and .permissions.admin == false and .permissions.maintain == false)' "$body" >/dev/null; then - fail "GitHub token can read ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO} but does not appear to have write access" - fi - - branch="$(urlencode "$DEMO_BRANCH")" - body="${TMP_DIR}/github-branch.json" - status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/branches/${branch}" "$body")" - [[ "$status" == "200" ]] || fail "GitHub returned HTTP $status for branch ${DEMO_BRANCH}" - - body="${TMP_DIR}/github-demo-file.json" - status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}?ref=${branch}" "$body")" - if [[ "$status" == "200" ]]; then - fail "dogfood output file already exists: ${DEMO_FILE_PATH}; choose a new DEMO_RUN_ID or DEMO_FILE_PATH" - fi - [[ "$status" == "404" ]] || fail "GitHub returned HTTP $status while checking output path ${DEMO_FILE_PATH}" - - info "${GREEN}GitHub repo, branch, and output path are safe for this run.${RESET}" -} - -prepare_payload() { - cp "$POLICY_TEMPLATE" "$POLICY_FILE" - cp "$RUNNER_SOURCE" "${PAYLOAD_DIR}/policy-demo-runner.sh" - cp "$PROMPT_SOURCE" "${PAYLOAD_DIR}/prompts/codex-dogfood.md" - chmod +x "${PAYLOAD_DIR}/policy-demo-runner.sh" -} - -create_providers() { - step "Creating temporary Codex and GitHub providers" - "$OPENSHELL_BIN" provider delete "$DEMO_CODEX_PROVIDER_NAME" >/dev/null 2>&1 || true - "$OPENSHELL_BIN" provider delete "$DEMO_GITHUB_PROVIDER_NAME" >/dev/null 2>&1 || true - - "$OPENSHELL_BIN" provider create \ - --name "$DEMO_CODEX_PROVIDER_NAME" \ - --type generic \ - --credential CODEX_AUTH_ACCESS_TOKEN \ - --credential CODEX_AUTH_REFRESH_TOKEN \ - --credential CODEX_AUTH_ACCOUNT_ID >/dev/null - - "$OPENSHELL_BIN" provider create \ - --name "$DEMO_GITHUB_PROVIDER_NAME" \ - --type github \ - --credential GITHUB_TOKEN >/dev/null - - info "${GREEN}Created provider records for this run.${RESET}" -} - -start_codex_sandbox() { - step "Starting Codex dogfood run inside the sandbox" - "$OPENSHELL_BIN" sandbox delete "$DEMO_SANDBOX_NAME" >/dev/null 2>&1 || true - ( - "$OPENSHELL_BIN" sandbox create \ - --name "$DEMO_SANDBOX_NAME" \ - --from base \ - --provider "$DEMO_CODEX_PROVIDER_NAME" \ - --provider "$DEMO_GITHUB_PROVIDER_NAME" \ - --policy "$POLICY_FILE" \ - --upload "${PAYLOAD_DIR}:/sandbox" \ - --no-git-ignore \ - --keep \ - --no-auto-providers \ - --no-tty \ - -- bash /sandbox/payload/policy-demo-runner.sh codex-dogfood \ - "$DEMO_GITHUB_OWNER" \ - "$DEMO_GITHUB_REPO" \ - "$DEMO_BRANCH" \ - "$DEMO_FILE_PATH" \ - "$DEMO_RUN_ID" - ) >"$AGENT_LOG" 2>&1 & - AGENT_PID="$!" - info "${DIM}Codex run started; log: ${AGENT_LOG}${RESET}" -} - -approve_when_pending() { - step "Waiting for Codex to submit a policy proposal" - local start now - start="$(date +%s)" - - while true; do - if ! kill -0 "$AGENT_PID" >/dev/null 2>&1; then - wait "$AGENT_PID" || true - fail "Codex exited before a pending proposal appeared" - fi - - "$OPENSHELL_BIN" rule get "$DEMO_SANDBOX_NAME" --status pending >"$PENDING_FILE" 2>/dev/null || true - if grep -q "Chunk:" "$PENDING_FILE" && grep -q "pending" "$PENDING_FILE"; then - info "${GREEN}Codex submitted a pending proposal.${RESET}" - sed 's/^/ /' "$PENDING_FILE" - - step "Approving pending draft rule from outside the sandbox" - "$OPENSHELL_BIN" rule approve-all "$DEMO_SANDBOX_NAME" | sed 's/^/ /' - return - fi - - now="$(date +%s)" - if (( now - start >= DEMO_APPROVAL_TIMEOUT_SECS )); then - fail "timed out waiting for Codex to submit a policy proposal" - fi - - sleep 2 - done -} - -wait_for_codex() { - step "Waiting for Codex to retry after approval" - if ! wait "$AGENT_PID"; then - fail "Codex dogfood run failed" - fi - info "${GREEN}Codex dogfood run completed.${RESET}" -} - -show_codex_final_message() { - step "Codex final message" - awk ' - /CODEX_FINAL_MESSAGE_BEGIN/ { printing = 1; next } - /CODEX_FINAL_MESSAGE_END/ { printing = 0 } - printing { print } - ' "$AGENT_LOG" | redact_output | sed 's/^/ /' -} - -verify_github_write() { - step "Verifying GitHub write" - local body status branch - branch="$(urlencode "$DEMO_BRANCH")" - body="${TMP_DIR}/github-created-file.json" - status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}?ref=${branch}" "$body")" - [[ "$status" == "200" ]] || fail "expected demo file to exist after Codex run; GitHub returned HTTP $status" - - jq -r '"File: \(.path)", "URL: \(.html_url)"' "$body" | sed 's/^/ /' -} - -show_logs() { - step "Policy decision trace" - "$OPENSHELL_BIN" logs "$DEMO_SANDBOX_NAME" --since 10m -n 80 2>&1 \ - | grep -E 'HTTP:PUT|CONFIG:LOADED|ReportPolicyStatus' \ - | tail -n 12 \ - | sed 's/^/ /' || true -} - -main() { - validate_env - prepare_payload - check_gateway - check_github_access - create_providers - start_codex_sandbox - approve_when_pending - wait_for_codex - show_codex_final_message - verify_github_write - show_logs - - printf "\n${BOLD}${GREEN}✓ Codex dogfood complete.${RESET}\n\n" - printf " Sandbox: %s\n" "$DEMO_SANDBOX_NAME" - printf " Repository: https://github.com/%s/%s\n" "$DEMO_GITHUB_OWNER" "$DEMO_GITHUB_REPO" - printf " File: %s\n" "$DEMO_FILE_PATH" -} - -main "$@" diff --git a/examples/agent-driven-policy-management/policy.template.yaml b/examples/agent-driven-policy-management/policy.template.yaml index b52b94ca1..6452cb01c 100644 --- a/examples/agent-driven-policy-management/policy.template.yaml +++ b/examples/agent-driven-policy-management/policy.template.yaml @@ -16,37 +16,6 @@ process: run_as_group: sandbox network_policies: - codex: - name: codex - endpoints: - - { host: api.openai.com, port: 443, protocol: rest, enforcement: enforce, access: full } - - { host: auth.openai.com, port: 443, protocol: rest, enforcement: enforce, access: full } - - { host: chatgpt.com, port: 443, protocol: rest, enforcement: enforce, access: full } - - { host: ab.chatgpt.com, port: 443, protocol: rest, enforcement: enforce, access: full } - binaries: - - { path: /usr/bin/codex } - - { path: /usr/bin/node } - - { path: "/usr/lib/node_modules/@openai/**" } - - codex_plugins: - name: codex-plugins - endpoints: - - host: github.com - port: 443 - protocol: rest - enforcement: enforce - rules: - - allow: - method: GET - path: "/openai/plugins.git/info/refs*" - - allow: - method: POST - path: "/openai/plugins.git/git-upload-pack" - binaries: - - { path: /usr/bin/git } - - { path: /usr/lib/git-core/git-remote-http } - - { path: "/usr/lib/node_modules/@openai/**" } - github_api_readonly: name: github-api-readonly endpoints: From 1557551f8748bd3b0dc207f820b4c3ece5668b0e Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Mon, 4 May 2026 07:58:45 -0700 Subject: [PATCH 07/23] refactor(examples): colocate policy validation harness --- examples/agent-driven-policy-management/README.md | 2 +- .../validation}/policy.template.yaml | 0 .../agent-driven-policy-management/validation}/runner.sh | 0 .../agent-driven-policy-management/validation}/validation.sh | 2 +- 4 files changed, 2 insertions(+), 2 deletions(-) rename {e2e/agent-driven-policy-management => examples/agent-driven-policy-management/validation}/policy.template.yaml (100%) rename {e2e/agent-driven-policy-management => examples/agent-driven-policy-management/validation}/runner.sh (100%) rename {e2e/agent-driven-policy-management => examples/agent-driven-policy-management/validation}/validation.sh (99%) diff --git a/examples/agent-driven-policy-management/README.md b/examples/agent-driven-policy-management/README.md index c01b1864a..265df9172 100644 --- a/examples/agent-driven-policy-management/README.md +++ b/examples/agent-driven-policy-management/README.md @@ -77,7 +77,7 @@ Use a scratch repository or a demo branch if you do not want this file in a production repository. The deterministic non-model validation flow lives in -`e2e/agent-driven-policy-management/validation.sh`. +`examples/agent-driven-policy-management/validation/validation.sh`. ## Options diff --git a/e2e/agent-driven-policy-management/policy.template.yaml b/examples/agent-driven-policy-management/validation/policy.template.yaml similarity index 100% rename from e2e/agent-driven-policy-management/policy.template.yaml rename to examples/agent-driven-policy-management/validation/policy.template.yaml diff --git a/e2e/agent-driven-policy-management/runner.sh b/examples/agent-driven-policy-management/validation/runner.sh similarity index 100% rename from e2e/agent-driven-policy-management/runner.sh rename to examples/agent-driven-policy-management/validation/runner.sh diff --git a/e2e/agent-driven-policy-management/validation.sh b/examples/agent-driven-policy-management/validation/validation.sh similarity index 99% rename from e2e/agent-driven-policy-management/validation.sh rename to examples/agent-driven-policy-management/validation/validation.sh index 526d50e86..2b8187444 100755 --- a/e2e/agent-driven-policy-management/validation.sh +++ b/examples/agent-driven-policy-management/validation/validation.sh @@ -6,7 +6,7 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" POLICY_TEMPLATE="${SCRIPT_DIR}/policy.template.yaml" RUNNER_SOURCE="${SCRIPT_DIR}/runner.sh" From fce1500c84a40d9a782c50e2e5a75fc0dfc7875b Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Mon, 4 May 2026 08:04:23 -0700 Subject: [PATCH 08/23] docs(examples): add policy demo env sample --- .gitignore | 1 + .../.env.sample | 29 +++++++++++++++++++ .../agent-driven-policy-management/README.md | 12 ++++---- 3 files changed, 36 insertions(+), 6 deletions(-) create mode 100644 examples/agent-driven-policy-management/.env.sample diff --git a/.gitignore b/.gitignore index 1b37bfd49..634de4ba2 100644 --- a/.gitignore +++ b/.gitignore @@ -159,6 +159,7 @@ Desktop.ini .env .env.* !.env.example +!.env.sample # Logs *.log diff --git a/examples/agent-driven-policy-management/.env.sample b/examples/agent-driven-policy-management/.env.sample new file mode 100644 index 000000000..c04da617e --- /dev/null +++ b/examples/agent-driven-policy-management/.env.sample @@ -0,0 +1,29 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Copy this file to .env.agent-policy-demo, fill in the values, then source it +# from the repository root before running demo.sh. + +DEMO_GITHUB_OWNER=zredlined +DEMO_GITHUB_REPO=openshell-policy-demo + +# Optional when GITHUB_TOKEN, GH_TOKEN, or `gh auth token` is available. +# DEMO_GITHUB_TOKEN= + +# Optional defaults. +# DEMO_BRANCH=main +# DEMO_FILE_DIR=openshell-policy-advisor-demo +# DEMO_KEEP_SANDBOX=0 + +# Point this at a policy that allows your chosen agent to reach its model or +# provider endpoints. The checked-in policy.template.yaml only allows the +# GitHub API read-only path needed to trigger the policy proposal loop. +# DEMO_POLICY_FILE=/absolute/path/to/policy-that-allows-your-agent.yaml + +# Provider names already registered in OpenShell for your chosen agent. +# Use spaces or commas for multiple providers. +# DEMO_AGENT_PROVIDERS="my-agent-provider" + +# The sandbox command that runs your agent and has it read the rendered task. +# Keep this agent-specific setup outside demo.sh so the demo remains portable. +# DEMO_AGENT_COMMAND='agent exec "$(cat /sandbox/payload/agent-task.md)"' diff --git a/examples/agent-driven-policy-management/README.md b/examples/agent-driven-policy-management/README.md index 265df9172..9fbc743cc 100644 --- a/examples/agent-driven-policy-management/README.md +++ b/examples/agent-driven-policy-management/README.md @@ -53,12 +53,12 @@ policy-management loop. Use `DEMO_POLICY_FILE` to point at a policy that also allows your chosen agent to reach its model/provider endpoints. ```bash -export DEMO_GITHUB_OWNER= -export DEMO_GITHUB_REPO= -export DEMO_GITHUB_TOKEN= -export DEMO_POLICY_FILE=/path/to/policy-that-allows-your-agent.yaml -export DEMO_AGENT_PROVIDERS="my-agent-provider" -export DEMO_AGENT_COMMAND='' +cp examples/agent-driven-policy-management/.env.sample .env.agent-policy-demo +$EDITOR .env.agent-policy-demo + +set -a +source .env.agent-policy-demo +set +a bash examples/agent-driven-policy-management/demo.sh ``` From 5a0b5285e728cda8bc8886ed5dc2b837f0597f2c Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Mon, 4 May 2026 08:10:50 -0700 Subject: [PATCH 09/23] docs(examples): use placeholder env example --- .gitignore | 1 - .../{.env.sample => .env.example} | 10 +++++----- examples/agent-driven-policy-management/README.md | 6 +++--- 3 files changed, 8 insertions(+), 9 deletions(-) rename examples/agent-driven-policy-management/{.env.sample => .env.example} (81%) diff --git a/.gitignore b/.gitignore index 634de4ba2..1b37bfd49 100644 --- a/.gitignore +++ b/.gitignore @@ -159,7 +159,6 @@ Desktop.ini .env .env.* !.env.example -!.env.sample # Logs *.log diff --git a/examples/agent-driven-policy-management/.env.sample b/examples/agent-driven-policy-management/.env.example similarity index 81% rename from examples/agent-driven-policy-management/.env.sample rename to examples/agent-driven-policy-management/.env.example index c04da617e..6b491d4d9 100644 --- a/examples/agent-driven-policy-management/.env.sample +++ b/examples/agent-driven-policy-management/.env.example @@ -1,14 +1,14 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -# Copy this file to .env.agent-policy-demo, fill in the values, then source it -# from the repository root before running demo.sh. +# Copy this file to .env from the repository root, fill in the values, then +# source it before running demo.sh. -DEMO_GITHUB_OWNER=zredlined -DEMO_GITHUB_REPO=openshell-policy-demo +DEMO_GITHUB_OWNER= +DEMO_GITHUB_REPO= # Optional when GITHUB_TOKEN, GH_TOKEN, or `gh auth token` is available. -# DEMO_GITHUB_TOKEN= +# DEMO_GITHUB_TOKEN= # Optional defaults. # DEMO_BRANCH=main diff --git a/examples/agent-driven-policy-management/README.md b/examples/agent-driven-policy-management/README.md index 9fbc743cc..e9752fd2a 100644 --- a/examples/agent-driven-policy-management/README.md +++ b/examples/agent-driven-policy-management/README.md @@ -53,11 +53,11 @@ policy-management loop. Use `DEMO_POLICY_FILE` to point at a policy that also allows your chosen agent to reach its model/provider endpoints. ```bash -cp examples/agent-driven-policy-management/.env.sample .env.agent-policy-demo -$EDITOR .env.agent-policy-demo +cp examples/agent-driven-policy-management/.env.example .env +$EDITOR .env set -a -source .env.agent-policy-demo +source .env set +a bash examples/agent-driven-policy-management/demo.sh From 688906cce8789d06783a9f25764bb46cc926ffbc Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Mon, 4 May 2026 11:19:05 -0700 Subject: [PATCH 10/23] feat(sandbox): wire policy.local denials to OCSF JSONL log Wires GET /v1/denials?last=N on the sandbox-local policy advisor API to read recent OCSF JSONL events from /var/log/openshell-ocsf.YYYY-MM-DD.log, filter to network/L7 denials (action_id=2, class_uid 4001/4002), and return a compact summary newest-first. Default limit is 10, capped at 100. Ran inside spawn_blocking so file I/O does not block the policy.local handler. Other cleanup: - POST /v1/proposals now uses the typed grpc_client wrapper instead of raw_client, so accepted/rejected counts surface to the agent uniformly. Wrapper return type extended to the response struct. - Drop the 'add_rule' snake_case alias in the proposal JSON; canonical form is camelCase 'addRule', matching the PolicyMergeOperation convention used elsewhere. - skills/policy_advisor.md updated to match: documents the now-real /v1/denials?last=10 endpoint and uses 'addRule' consistently. - skills.rs test asserts on the canonical 'addRule' phrase rather than the removed 'PolicyMergeOperation' substring. --- crates/openshell-sandbox/src/grpc_client.rs | 17 +- crates/openshell-sandbox/src/policy_local.rs | 316 ++++++++++++++++-- crates/openshell-sandbox/src/skills.rs | 2 +- .../src/skills/policy_advisor.md | 90 ++--- 4 files changed, 331 insertions(+), 94 deletions(-) diff --git a/crates/openshell-sandbox/src/grpc_client.rs b/crates/openshell-sandbox/src/grpc_client.rs index cc35f67b5..1cb15f929 100644 --- a/crates/openshell-sandbox/src/grpc_client.rs +++ b/crates/openshell-sandbox/src/grpc_client.rs @@ -11,8 +11,8 @@ use miette::{IntoDiagnostic, Result, WrapErr}; use openshell_core::proto::{ DenialSummary, GetInferenceBundleRequest, GetInferenceBundleResponse, GetSandboxConfigRequest, GetSandboxProviderEnvironmentRequest, PolicySource, PolicyStatus, ReportPolicyStatusRequest, - SandboxPolicy as ProtoSandboxPolicy, SubmitPolicyAnalysisRequest, UpdateConfigRequest, - inference_client::InferenceClient, open_shell_client::OpenShellClient, + SandboxPolicy as ProtoSandboxPolicy, SubmitPolicyAnalysisRequest, SubmitPolicyAnalysisResponse, + UpdateConfigRequest, inference_client::InferenceClient, open_shell_client::OpenShellClient, }; use tonic::service::interceptor::InterceptedService; use tonic::transport::{Certificate, Channel, ClientTlsConfig, Endpoint, Identity}; @@ -329,15 +329,20 @@ impl CachedOpenShellClient { }) } - /// Submit denial summaries for policy analysis. + /// Submit denial summaries and/or agent-authored proposals for policy analysis. + /// + /// Returns the gateway response so callers can surface accepted/rejected + /// counts and rejection reasons (e.g., the `policy.local` API forwards + /// these to the in-sandbox agent). pub async fn submit_policy_analysis( &self, sandbox_name: &str, summaries: Vec, proposed_chunks: Vec, analysis_mode: &str, - ) -> Result<()> { - self.client + ) -> Result { + let response = self + .client .clone() .submit_policy_analysis(SubmitPolicyAnalysisRequest { name: sandbox_name.to_string(), @@ -348,7 +353,7 @@ impl CachedOpenShellClient { .await .into_diagnostic()?; - Ok(()) + Ok(response.into_inner()) } /// Report policy load status back to the server. diff --git a/crates/openshell-sandbox/src/policy_local.rs b/crates/openshell-sandbox/src/policy_local.rs index f44843704..0efca2721 100644 --- a/crates/openshell-sandbox/src/policy_local.rs +++ b/crates/openshell-sandbox/src/policy_local.rs @@ -6,10 +6,11 @@ use miette::{IntoDiagnostic, Result}; use openshell_core::proto::{ L7Allow, L7DenyRule, L7Rule, NetworkBinary, NetworkEndpoint, NetworkPolicyRule, PolicyChunk, - SandboxPolicy as ProtoSandboxPolicy, SubmitPolicyAnalysisRequest, + SandboxPolicy as ProtoSandboxPolicy, }; use serde::Deserialize; use std::collections::HashMap; +use std::path::{Path, PathBuf}; use std::sync::Arc; use tokio::io::{AsyncRead, AsyncReadExt, AsyncWrite, AsyncWriteExt}; use tokio::sync::RwLock; @@ -17,12 +18,20 @@ use tokio::sync::RwLock; pub const POLICY_LOCAL_HOST: &str = "policy.local"; const MAX_POLICY_LOCAL_BODY_BYTES: usize = 64 * 1024; +const DEFAULT_DENIALS_LIMIT: usize = 10; +const MAX_DENIALS_LIMIT: usize = 100; +/// OCSF rolling appender keeps three files (daily rotation); read the most +/// recent two so a request just past midnight still has yesterday's denials. +const DENIAL_LOG_FILES_TO_SCAN: usize = 2; +const OCSF_LOG_DIR: &str = "/var/log"; +const OCSF_LOG_PREFIX: &str = "openshell-ocsf"; #[derive(Debug)] pub struct PolicyLocalContext { current_policy: Arc>>, gateway_endpoint: Option, sandbox_name: Option, + ocsf_log_dir: PathBuf, } impl PolicyLocalContext { @@ -30,11 +39,26 @@ impl PolicyLocalContext { current_policy: Option, gateway_endpoint: Option, sandbox_name: Option, + ) -> Self { + Self::with_log_dir( + current_policy, + gateway_endpoint, + sandbox_name, + PathBuf::from(OCSF_LOG_DIR), + ) + } + + fn with_log_dir( + current_policy: Option, + gateway_endpoint: Option, + sandbox_name: Option, + ocsf_log_dir: PathBuf, ) -> Self { Self { current_policy: Arc::new(RwLock::new(current_policy)), gateway_endpoint, sandbox_name, + ocsf_log_dir, } } @@ -64,16 +88,10 @@ async fn route_request( path: &str, body: &[u8], ) -> (u16, serde_json::Value) { - let route = path.split_once('?').map_or(path, |(route, _)| route); + let (route, query) = path.split_once('?').map_or((path, ""), |(r, q)| (r, q)); match (method, route) { ("GET", "/v1/policy/current") => current_policy_response(ctx).await, - ("GET", "/v1/denials") => ( - 200, - serde_json::json!({ - "denials": [], - "note": "recent-denial listing is not wired in this MVP slice; use the structured 403 body and /var/log/openshell*.log for now" - }), - ), + ("GET", "/v1/denials") => recent_denials_response(ctx, query).await, ("POST", "/v1/proposals") => submit_proposal(ctx, body).await, _ => ( 404, @@ -114,6 +132,159 @@ async fn current_policy_response(ctx: &PolicyLocalContext) -> (u16, serde_json:: } } +async fn recent_denials_response( + ctx: &PolicyLocalContext, + query: &str, +) -> (u16, serde_json::Value) { + let limit = parse_last_query(query).unwrap_or(DEFAULT_DENIALS_LIMIT); + let log_dir = ctx.ocsf_log_dir.clone(); + + let denials = tokio::task::spawn_blocking(move || read_recent_denials(&log_dir, limit)) + .await + .unwrap_or_else(|_| Vec::new()); + + (200, serde_json::json!({ "denials": denials })) +} + +fn parse_last_query(query: &str) -> Option { + if query.is_empty() { + return None; + } + for pair in query.split('&') { + let Some((key, value)) = pair.split_once('=') else { + continue; + }; + if key == "last" { + return value + .parse::() + .ok() + .map(|n| n.clamp(1, MAX_DENIALS_LIMIT)); + } + } + None +} + +/// Walk the OCSF JSONL log files (most-recent first) and return up to `limit` +/// summarized denial events in newest-first order. +/// +/// Reads files synchronously and is intended to run inside `spawn_blocking`. +fn read_recent_denials(log_dir: &Path, limit: usize) -> Vec { + let Ok(files) = collect_ocsf_log_files(log_dir, DENIAL_LOG_FILES_TO_SCAN) else { + return Vec::new(); + }; + + let mut summaries: Vec = Vec::with_capacity(limit); + for path in files { + let Ok(contents) = std::fs::read_to_string(&path) else { + continue; + }; + // Walk lines newest-first. Within a single file, last line written is + // the freshest event. + for line in contents.lines().rev() { + if line.is_empty() { + continue; + } + let Ok(value) = serde_json::from_str::(line) else { + continue; + }; + let Some(summary) = denial_summary_from_event(&value) else { + continue; + }; + summaries.push(summary); + if summaries.len() >= limit { + return summaries; + } + } + } + summaries +} + +fn collect_ocsf_log_files(log_dir: &Path, max_files: usize) -> std::io::Result> { + let mut entries: Vec<(std::time::SystemTime, PathBuf)> = std::fs::read_dir(log_dir)? + .filter_map(std::result::Result::ok) + .filter_map(|entry| { + let path = entry.path(); + let name = entry.file_name(); + let name = name.to_string_lossy(); + if !name.starts_with(OCSF_LOG_PREFIX) { + return None; + } + let modified = entry.metadata().and_then(|m| m.modified()).ok()?; + Some((modified, path)) + }) + .collect(); + + entries.sort_by_key(|entry| std::cmp::Reverse(entry.0)); + Ok(entries.into_iter().take(max_files).map(|(_, p)| p).collect()) +} + +/// Convert an OCSF event into a compact denial summary, or `None` if the event +/// is not a network/HTTP denial we want to surface to the agent. +fn denial_summary_from_event(value: &serde_json::Value) -> Option { + // OCSF action_id 2 = Denied. Filter aggressively to avoid leaking unrelated + // events (allowed connections, app lifecycle, etc.) into the agent's view. + if value.get("action_id").and_then(serde_json::Value::as_u64) != Some(2) { + return None; + } + + let class_uid = value.get("class_uid").and_then(serde_json::Value::as_u64)?; + let layer = match class_uid { + 4001 => "l4", + 4002 => "l7", + _ => return None, + }; + + let mut summary = serde_json::Map::new(); + summary.insert("layer".to_string(), serde_json::json!(layer)); + + if let Some(time) = value.get("time").and_then(serde_json::Value::as_i64) { + summary.insert("time_ms".to_string(), serde_json::json!(time)); + } + if let Some(message) = value.get("message").and_then(serde_json::Value::as_str) { + summary.insert("message".to_string(), serde_json::json!(message)); + } + if let Some(dst) = value.get("dst_endpoint") { + if let Some(host) = dst + .get("hostname") + .and_then(serde_json::Value::as_str) + .or_else(|| dst.get("ip").and_then(serde_json::Value::as_str)) + { + summary.insert("host".to_string(), serde_json::json!(host)); + } + if let Some(port) = dst.get("port").and_then(serde_json::Value::as_u64) { + summary.insert("port".to_string(), serde_json::json!(port)); + } + } + if let Some(req) = value.get("http_request") { + if let Some(method) = req.get("http_method").and_then(serde_json::Value::as_str) { + summary.insert("method".to_string(), serde_json::json!(method)); + } + if let Some(url) = req.get("url") + && let Some(path) = url.get("path").and_then(serde_json::Value::as_str) + { + summary.insert("path".to_string(), serde_json::json!(path)); + } + } + if let Some(binary) = value + .get("actor") + .and_then(|a| a.get("process")) + .and_then(|p| p.get("file")) + .and_then(|f| f.get("path")) + .and_then(serde_json::Value::as_str) + { + summary.insert("binary".to_string(), serde_json::json!(binary)); + } + if let Some(rule) = value + .get("firewall_rule") + .and_then(|r| r.get("name")) + .and_then(serde_json::Value::as_str) + { + summary.insert("policy".to_string(), serde_json::json!(rule)); + } + + Some(serde_json::Value::Object(summary)) +} + async fn submit_proposal(ctx: &PolicyLocalContext, body: &[u8]) -> (u16, serde_json::Value) { let Some(endpoint) = ctx.gateway_endpoint.as_deref() else { return ( @@ -157,17 +328,11 @@ async fn submit_proposal(ctx: &PolicyLocalContext, body: &[u8]) -> (u16, serde_j } }; - let mut raw_client = client.raw_client(); - let response = match raw_client - .submit_policy_analysis(SubmitPolicyAnalysisRequest { - summaries: vec![], - proposed_chunks: chunks, - analysis_mode: "agent".to_string(), - name: sandbox_name.to_string(), - }) + let response = match client + .submit_policy_analysis(sandbox_name, vec![], chunks, "agent_authored") .await { - Ok(response) => response.into_inner(), + Ok(response) => response, Err(error) => { return ( 502, @@ -186,7 +351,6 @@ async fn submit_proposal(ctx: &PolicyLocalContext, body: &[u8]) -> (u16, serde_j "accepted_chunks": response.accepted_chunks, "rejected_chunks": response.rejected_chunks, "rejection_reasons": response.rejection_reasons, - "note": "the gateway assigns proposal ids; review pending proposals in the developer inbox" }), ) } @@ -199,13 +363,9 @@ fn proposal_chunks_from_body(body: &[u8]) -> std::result::Result, rule: NetworkPolicyRuleJson, } @@ -625,6 +785,110 @@ mod tests { assert!(!error.contains("secret")); } + #[test] + fn parse_last_query_clamps_to_max() { + assert_eq!(parse_last_query("last=5"), Some(5)); + assert_eq!(parse_last_query("foo=bar&last=20"), Some(20)); + assert_eq!(parse_last_query("last=999"), Some(MAX_DENIALS_LIMIT)); + assert_eq!(parse_last_query("last=0"), Some(1)); + assert_eq!(parse_last_query(""), None); + assert_eq!(parse_last_query("other=1"), None); + } + + #[test] + fn denial_summary_filters_to_l4_l7_denied_only() { + let allowed = serde_json::json!({ + "class_uid": 4001, + "action_id": 1, + "dst_endpoint": {"hostname": "api.github.com", "port": 443} + }); + assert!(denial_summary_from_event(&allowed).is_none()); + + let unrelated = serde_json::json!({ + "class_uid": 6002, + "action_id": 2, + "message": "supervisor lifecycle" + }); + assert!(denial_summary_from_event(&unrelated).is_none()); + + let l4_denied = serde_json::json!({ + "class_uid": 4001, + "action_id": 2, + "time": 1_742_054_400_000_i64, + "message": "CONNECT denied api.github.com:443", + "dst_endpoint": {"hostname": "api.github.com", "port": 443}, + "actor": {"process": {"file": {"path": "/usr/bin/curl"}}}, + "firewall_rule": {"name": "github-readonly"} + }); + let summary = denial_summary_from_event(&l4_denied).unwrap(); + assert_eq!(summary["layer"], "l4"); + assert_eq!(summary["host"], "api.github.com"); + assert_eq!(summary["port"], 443); + assert_eq!(summary["binary"], "/usr/bin/curl"); + assert_eq!(summary["policy"], "github-readonly"); + assert_eq!(summary["time_ms"], 1_742_054_400_000_i64); + + let l7_denied = serde_json::json!({ + "class_uid": 4002, + "action_id": 2, + "message": "FORWARD denied PUT /repos/foo/bar/contents/x", + "dst_endpoint": {"hostname": "api.github.com", "port": 443}, + "http_request": { + "http_method": "PUT", + "url": {"path": "/repos/foo/bar/contents/x"} + } + }); + let summary = denial_summary_from_event(&l7_denied).unwrap(); + assert_eq!(summary["layer"], "l7"); + assert_eq!(summary["method"], "PUT"); + assert_eq!(summary["path"], "/repos/foo/bar/contents/x"); + } + + #[tokio::test] + async fn recent_denials_returns_newest_first_from_jsonl_files() { + let dir = tempfile::tempdir().unwrap(); + let log_path = dir.path().join("openshell-ocsf.2026-05-04.log"); + let lines = [ + serde_json::json!({ + "class_uid": 4001, + "action_id": 2, + "time": 1, + "message": "first", + "dst_endpoint": {"hostname": "first.example", "port": 443} + }), + // An allowed event mixed in — must be filtered out. + serde_json::json!({ + "class_uid": 4001, + "action_id": 1, + "time": 2, + "dst_endpoint": {"hostname": "ok.example", "port": 443} + }), + serde_json::json!({ + "class_uid": 4002, + "action_id": 2, + "time": 3, + "message": "second", + "dst_endpoint": {"hostname": "second.example", "port": 443}, + "http_request": {"http_method": "PUT", "url": {"path": "/x"}} + }), + ]; + let body: String = lines + .iter() + .map(|v| format!("{v}\n")) + .collect::>() + .concat(); + std::fs::write(&log_path, body).unwrap(); + + let ctx = PolicyLocalContext::with_log_dir(None, None, None, dir.path().to_path_buf()); + let (status, payload) = recent_denials_response(&ctx, "last=10").await; + assert_eq!(status, 200); + let denials = payload["denials"].as_array().unwrap(); + assert_eq!(denials.len(), 2); + // Newest first. + assert_eq!(denials[0]["message"], "second"); + assert_eq!(denials[1]["message"], "first"); + } + #[tokio::test] async fn current_policy_route_returns_yaml_envelope() { let ctx = PolicyLocalContext::new( diff --git a/crates/openshell-sandbox/src/skills.rs b/crates/openshell-sandbox/src/skills.rs index f7a26ea71..91654699f 100644 --- a/crates/openshell-sandbox/src/skills.rs +++ b/crates/openshell-sandbox/src/skills.rs @@ -58,6 +58,6 @@ mod tests { let content = std::fs::read_to_string(expected).unwrap(); assert!(content.contains("# OpenShell Policy Advisor")); assert!(content.contains("policy.local")); - assert!(content.contains("PolicyMergeOperation")); + assert!(content.contains("addRule")); } } diff --git a/crates/openshell-sandbox/src/skills/policy_advisor.md b/crates/openshell-sandbox/src/skills/policy_advisor.md index 741759e82..c6552de2d 100644 --- a/crates/openshell-sandbox/src/skills/policy_advisor.md +++ b/crates/openshell-sandbox/src/skills/policy_advisor.md @@ -11,37 +11,32 @@ the proposal; do not try to bypass policy. ## Local API -Use the sandbox-local policy API: +The sandbox-local policy API is reachable at `http://policy.local`: -- `GET http://policy.local/v1/policy/current` -- `GET http://policy.local/v1/denials?last=10` -- `POST http://policy.local/v1/proposals` +- `GET /v1/policy/current` — current effective policy as YAML. +- `GET /v1/denials?last=10` — most recent network/L7 denials seen by this + sandbox (newest first). +- `POST /v1/proposals` — submit a proposal for developer approval. -The MVP proposal endpoint accepts a JSON object containing an `intent_summary` -and one or more `PolicyMergeOperation` objects. Start with a full `addRule` -operation because the existing developer inbox reviews complete draft rules. - -You can also inspect your own logs: - -- `/var/log/openshell.YYYY-MM-DD.log` is the default shorthand log. -- `/var/log/openshell-ocsf.YYYY-MM-DD.log` is present when OCSF JSONL logging is enabled. +The proposal body takes an `intent_summary` and one or more `addRule` +operations. Each `addRule` carries a complete narrow `NetworkPolicyRule`. ## Workflow 1. Read the denial response body. Use `layer`, `method`, `path`, `host`, `port`, `binary`, `rule_missing`, and `detail` as evidence. -2. Fetch the current policy from `policy.local`. -3. Fetch recent denials if the response body is incomplete. +2. Fetch the current policy from `/v1/policy/current`. +3. Fetch recent denials from `/v1/denials` if the response body is incomplete. 4. Prefer L7 REST rules for REST APIs. Use L4 only for non-REST protocols or when the client tunnels opaque traffic that OpenShell cannot inspect. 5. Draft the narrowest rule: exact host, exact port, exact binary when known, - exact method, and the smallest safe path glob. -6. Submit the proposal, tell the developer what you proposed, and wait for - approval before retrying the denied action. + exact method, and the smallest safe path. +6. Submit the proposal, tell the developer what you proposed, and retry the + denied action only after approval. -## Proposal Shapes +## Proposal shape -Submit a complete narrow REST-inspected rule: +A complete narrow REST-inspected rule looks like this: ```json { @@ -80,50 +75,23 @@ Submit a complete narrow REST-inspected rule: } ``` -For GitHub repository creation, keep the path exact: - -```json -{ - "intent_summary": "Allow gh to create a repository for the authenticated user.", - "operations": [ - { - "addRule": { - "ruleName": "github_api_repo_create", - "rule": { - "name": "github_api_repo_create", - "endpoints": [ - { - "host": "api.github.com", - "port": 443, - "protocol": "rest", - "enforcement": "enforce", - "rules": [ - { - "allow": { - "method": "POST", - "path": "/user/repos" - } - } - ] - } - ], - "binaries": [ - { - "path": "/usr/bin/gh" - } - ] - } - } - } - ] -} -``` - ## Norms - Do not propose wildcard hosts such as `**` or `*.com`. - Do not propose `access: full` to fix a single denied REST request. -- Do not include query strings, tokens, credentials, or secret values. +- Do not include query strings, tokens, credentials, or secret values in + paths. - Explain uncertainty in `intent_summary` instead of widening the rule. -- If pushing with `git` fails, that may be a separate L4 or protocol-specific - path from GitHub REST API access; propose it separately. +- If pushing with `git` fails, that is a separate L4 or protocol-specific + path from GitHub REST API access. Propose it separately. + +## Local logs (read-only) + +Two local files complement the API and are useful when debugging policy +behavior: + +- `/var/log/openshell.YYYY-MM-DD.log` — shorthand log of sandbox activity. +- `/var/log/openshell-ocsf.YYYY-MM-DD.log` — OCSF JSONL events when enabled. + +The `/v1/denials` endpoint reads these structured events for you; the files +are listed here only as a fallback for inspection. From 2dbfec98151129c4f1e67974ad62a98eb7ebe90b Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Mon, 4 May 2026 11:19:16 -0700 Subject: [PATCH 11/23] feat(cli): show L7 protocol/method/path in rule get output format_endpoint() previously rendered only host:port, dropping protocol, access, and the L7 rules array. That made openshell rule get text output unable to distinguish a broad L4 grant from a method/path-scoped L7 REST rule -- exactly the distinction a developer needs at approval time. New rendering tags each endpoint with its enforcement layer and surfaces allow/deny rules: bare L4: api.example:443 [L4] L7 read-only: api.example:443 [L7 rest, access=read-only] L7 method/path: api.example:443 [L7 rest, allow PUT /v1/foo/bar] Pure display change: no proto, gateway, or behavior changes. Unit test covers all three rendering cases with synthetic fixtures. --- crates/openshell-cli/src/run.rs | 107 +++++++++++++++++++++++++++++--- 1 file changed, 99 insertions(+), 8 deletions(-) diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index fc30b03d6..45702f2e6 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -5422,17 +5422,61 @@ pub async fn sandbox_draft_history(server: &str, name: &str, tls: &TlsOptions) - fn format_endpoints(rule: &openshell_core::proto::NetworkPolicyRule) -> String { rule.endpoints .iter() - .map(|e| { - if e.port > 0 { - format!("{}:{}", e.host, e.port) - } else { - e.host.clone() - } - }) + .map(format_endpoint) .collect::>() .join(", ") } +/// Render an endpoint as `host:port [layer, …allows…, …denies…]` so a reader +/// can tell L4-only access apart from a method/path-scoped L7 grant. The L7 +/// fields (`protocol: rest`, `rules`, `access`) materially change what gets +/// allowed; surfacing them in the default text output is what makes +/// `openshell rule get` useful for approval review. +fn format_endpoint(endpoint: &openshell_core::proto::NetworkEndpoint) -> String { + let host_port = if endpoint.port > 0 { + format!("{}:{}", endpoint.host, endpoint.port) + } else { + endpoint.host.clone() + }; + + let mut tags: Vec = Vec::new(); + let layer_tag = if endpoint.protocol.eq_ignore_ascii_case("rest") { + "L7 rest" + } else if endpoint.protocol.is_empty() { + "L4" + } else { + endpoint.protocol.as_str() + }; + tags.push(layer_tag.to_string()); + + if !endpoint.access.is_empty() { + tags.push(format!("access={}", endpoint.access)); + } + + for r in &endpoint.rules { + if let Some(allow) = &r.allow { + let method = non_empty_or(&allow.method, "*"); + let path = non_empty_or(&allow.path, "*"); + tags.push(format!("allow {method} {path}")); + } + } + for r in &endpoint.deny_rules { + let method = non_empty_or(&r.method, "*"); + let path = non_empty_or(&r.path, "*"); + tags.push(format!("deny {method} {path}")); + } + + format!("{host_port} [{}]", tags.join(", ")) +} + +fn non_empty_or<'a>(value: &'a str, fallback: &'a str) -> &'a str { + if value.is_empty() { + fallback + } else { + value + } +} + /// Format a millisecond timestamp into a readable string. fn format_timestamp_ms(ms: i64) -> String { if ms <= 0 { @@ -5452,7 +5496,8 @@ fn format_timestamp_ms(ms: i64) -> String { #[cfg(test)] mod tests { use super::{ - TlsOptions, dockerfile_sources_supported_for_gateway, format_gateway_select_header, + TlsOptions, dockerfile_sources_supported_for_gateway, format_endpoint, + format_gateway_select_header, format_gateway_select_items, format_provider_attachment_table, gateway_add, gateway_auth_label, gateway_env_override_warning, gateway_select_with, gateway_type_label, git_sync_files, http_health_check, image_requests_gpu, import_local_package_mtls_bundle, @@ -6254,4 +6299,50 @@ mod tests { server.join().expect("server thread"); assert_eq!(status, Some(StatusCode::OK)); } + #[test] + fn format_endpoint_distinguishes_l4_from_l7_rest() { + use openshell_core::proto::{L7Allow, L7DenyRule, L7Rule, NetworkEndpoint}; + + let l4 = NetworkEndpoint { + host: "host.example.test".to_string(), + port: 443, + ..Default::default() + }; + assert_eq!(format_endpoint(&l4), "host.example.test:443 [L4]"); + + let l7_readonly = NetworkEndpoint { + host: "host.example.test".to_string(), + port: 443, + protocol: "rest".to_string(), + access: "read-only".to_string(), + ..Default::default() + }; + assert_eq!( + format_endpoint(&l7_readonly), + "host.example.test:443 [L7 rest, access=read-only]" + ); + + let l7_scoped = NetworkEndpoint { + host: "host.example.test".to_string(), + port: 443, + protocol: "rest".to_string(), + rules: vec![L7Rule { + allow: Some(L7Allow { + method: "PUT".to_string(), + path: "/v1/example/resource".to_string(), + ..Default::default() + }), + }], + deny_rules: vec![L7DenyRule { + method: "DELETE".to_string(), + path: "/v1/example/resource".to_string(), + ..Default::default() + }], + ..Default::default() + }; + assert_eq!( + format_endpoint(&l7_scoped), + "host.example.test:443 [L7 rest, allow PUT /v1/example/resource, deny DELETE /v1/example/resource]" + ); + } } From 85fc4db7099c0c39d6ead28ef2386be33f0b68a2 Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Mon, 4 May 2026 11:19:41 -0700 Subject: [PATCH 12/23] refactor(examples): rewrite policy demo as Codex-default loop Re-shape examples/agent-driven-policy-management/ to be a single, clean end-to-end demonstration of the agent-driven policy loop. A Codex agent inside an OpenShell sandbox attempts a GitHub Contents API write, hits a structured 403 from the L7 proxy, reads the policy_advisor skill, drafts a narrow addRule proposal via http://policy.local/v1/proposals, the host auto-approves, the sandbox hot-reloads policy, and the agent's retry succeeds. Whole loop runs in roughly two minutes. Demo cleanup: - Drop .env file ceremony. Defaults resolve from gh: owner via 'gh api user --jq .login', repo defaults to 'openshell-policy-demo', token from gh auth token / GITHUB_TOKEN / GH_TOKEN. With gh auth login and codex login already done, 'bash demo.sh' Just Works. - Codex-specific. Bootstraps ~/.codex/auth.json from credentials injected by the OpenShell provider, runs codex exec --sandbox danger-full-access (OpenShell is the actual security boundary; bwrap nesting cannot create user namespaces inside the sandbox container). - Tighter narrative output: a single 'Preflight' step, a run summary banner before launch, an inline narration of what's happening inside the sandbox while we poll for the proposal (including the literal structured 403 body the agent acts on), and an OCSF trace at the end filtered to the three events that tell the story (DENY, RELOAD, ALLOW). - Replace Python heredoc templating with sed; uploads use the single-flag pattern (--upload "${PAYLOAD_DIR}:/sandbox") with files referenced at the basename-prefixed path that #952 / #1028 established. - README documents the trust model honestly: structured rule is the contract, agent rationale is a hint, prover validation badge in progress per RFC 0001. Move the deterministic no-LLM regression harness out of examples/ into e2e/policy-advisor/ -- it was a parallel demo, not an example. Same loop without the LLM, useful for iterating on the proxy and policy.local API. --- e2e/policy-advisor/README.md | 29 ++ .../policy-advisor}/policy.template.yaml | 0 .../policy-advisor/sandbox-runner.sh | 0 .../policy-advisor/test.sh | 4 +- .../.env.example | 29 -- .../agent-driven-policy-management/README.md | 147 ++++--- .../agent-task.md | 73 ++-- .../agent-driven-policy-management/demo.sh | 378 ++++++++++-------- .../policy.template.yaml | 41 ++ .../sandbox-agent.sh | 73 ++++ 10 files changed, 473 insertions(+), 301 deletions(-) create mode 100644 e2e/policy-advisor/README.md rename {examples/agent-driven-policy-management/validation => e2e/policy-advisor}/policy.template.yaml (100%) rename examples/agent-driven-policy-management/validation/runner.sh => e2e/policy-advisor/sandbox-runner.sh (100%) rename examples/agent-driven-policy-management/validation/validation.sh => e2e/policy-advisor/test.sh (99%) delete mode 100644 examples/agent-driven-policy-management/.env.example create mode 100755 examples/agent-driven-policy-management/sandbox-agent.sh diff --git a/e2e/policy-advisor/README.md b/e2e/policy-advisor/README.md new file mode 100644 index 000000000..b9796e1a6 --- /dev/null +++ b/e2e/policy-advisor/README.md @@ -0,0 +1,29 @@ + + + +# Policy Advisor end-to-end test + +Deterministic, no-LLM exercise of the agent-driven policy loop: + +1. Start a sandbox with a read-only GitHub L7 policy. +2. From inside the sandbox, attempt a GitHub contents PUT and assert OpenShell + returns a structured `policy_denied` 403. +3. Submit a narrow `addRule` proposal through `http://policy.local/v1/proposals`. +4. Approve the draft from the host and retry until the write succeeds. + +This proves the proxy, the structured deny body, the `policy.local` HTTP API, +the gateway proposal path, and the hot-reload of approved rules — without +involving an LLM. The user-facing demo (`examples/agent-driven-policy-management/`) +runs the same loop with Codex driving from inside the sandbox. + +## Run it + +```bash +DEMO_GITHUB_OWNER= \ +DEMO_GITHUB_REPO=openshell-policy-demo \ +bash e2e/policy-advisor/test.sh +``` + +Requires an active OpenShell gateway (`openshell gateway start`) and a GitHub +token with contents write on the repository (auto-resolved from `gh auth token`, +`GITHUB_TOKEN`, or `GH_TOKEN`). diff --git a/examples/agent-driven-policy-management/validation/policy.template.yaml b/e2e/policy-advisor/policy.template.yaml similarity index 100% rename from examples/agent-driven-policy-management/validation/policy.template.yaml rename to e2e/policy-advisor/policy.template.yaml diff --git a/examples/agent-driven-policy-management/validation/runner.sh b/e2e/policy-advisor/sandbox-runner.sh similarity index 100% rename from examples/agent-driven-policy-management/validation/runner.sh rename to e2e/policy-advisor/sandbox-runner.sh diff --git a/examples/agent-driven-policy-management/validation/validation.sh b/e2e/policy-advisor/test.sh similarity index 99% rename from examples/agent-driven-policy-management/validation/validation.sh rename to e2e/policy-advisor/test.sh index 2b8187444..8f956eb0e 100755 --- a/examples/agent-driven-policy-management/validation/validation.sh +++ b/e2e/policy-advisor/test.sh @@ -6,9 +6,9 @@ set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -REPO_ROOT="$(cd "${SCRIPT_DIR}/../../.." && pwd)" +REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" POLICY_TEMPLATE="${SCRIPT_DIR}/policy.template.yaml" -RUNNER_SOURCE="${SCRIPT_DIR}/runner.sh" +RUNNER_SOURCE="${SCRIPT_DIR}/sandbox-runner.sh" if [[ -z "${OPENSHELL_BIN:-}" ]]; then if [[ -x "${REPO_ROOT}/target/debug/openshell" ]]; then diff --git a/examples/agent-driven-policy-management/.env.example b/examples/agent-driven-policy-management/.env.example deleted file mode 100644 index 6b491d4d9..000000000 --- a/examples/agent-driven-policy-management/.env.example +++ /dev/null @@ -1,29 +0,0 @@ -# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -# Copy this file to .env from the repository root, fill in the values, then -# source it before running demo.sh. - -DEMO_GITHUB_OWNER= -DEMO_GITHUB_REPO= - -# Optional when GITHUB_TOKEN, GH_TOKEN, or `gh auth token` is available. -# DEMO_GITHUB_TOKEN= - -# Optional defaults. -# DEMO_BRANCH=main -# DEMO_FILE_DIR=openshell-policy-advisor-demo -# DEMO_KEEP_SANDBOX=0 - -# Point this at a policy that allows your chosen agent to reach its model or -# provider endpoints. The checked-in policy.template.yaml only allows the -# GitHub API read-only path needed to trigger the policy proposal loop. -# DEMO_POLICY_FILE=/absolute/path/to/policy-that-allows-your-agent.yaml - -# Provider names already registered in OpenShell for your chosen agent. -# Use spaces or commas for multiple providers. -# DEMO_AGENT_PROVIDERS="my-agent-provider" - -# The sandbox command that runs your agent and has it read the rendered task. -# Keep this agent-specific setup outside demo.sh so the demo remains portable. -# DEMO_AGENT_COMMAND='agent exec "$(cat /sandbox/payload/agent-task.md)"' diff --git a/examples/agent-driven-policy-management/README.md b/examples/agent-driven-policy-management/README.md index e9752fd2a..7ff9a7780 100644 --- a/examples/agent-driven-policy-management/README.md +++ b/examples/agent-driven-policy-management/README.md @@ -3,90 +3,85 @@ # Agent-Driven Policy Management Demo -Run the first policy-advisor MVP loop with a real agent: +Run the full agent-driven policy loop end-to-end: -1. Use the active OpenShell gateway. -2. Create a GitHub provider from a host token. -3. Start a sandbox with your agent command and an uploaded task file. -4. Let the agent hit an OpenShell `policy_denied` response. -5. Let the agent read `/etc/openshell/skills/policy_advisor.md` and submit a - narrow proposal through `http://policy.local/v1/proposals`. -6. Approve the draft rule from outside the sandbox. -7. Let the agent retry and confirm the GitHub write succeeds. +1. A Codex agent inside an OpenShell sandbox tries to write a markdown file to + GitHub via the Contents API. +2. OpenShell denies the request with a structured `policy_denied` 403 because + the initial policy only allows read-only access to `api.github.com`. +3. The agent reads `/etc/openshell/skills/policy_advisor.md`, drafts the + narrowest rule needed, and submits it to `http://policy.local/v1/proposals`. +4. You approve the proposal from the host with one keystroke. +5. The sandbox hot-reloads the merged policy and the agent's retry succeeds. -The shell script is agent-agnostic. It does not know how to sign in to a -specific coding agent. Pass the provider names and sandbox command for the -agent you want to run. +The whole loop usually finishes in under two minutes. ## Prerequisites -- An active OpenShell gateway that includes the current sandbox supervisor - build. -- `curl` and `jq` on the host machine. -- The GitHub CLI (`gh`) if you want to create the scratch repo with the command - below. -- A disposable or demo-safe GitHub repository. -- A GitHub token with contents write permission for that repository. -- An agent provider and policy that let your chosen agent run inside the - sandbox. - -## Create A Scratch Repo - -Use a private scratch repository with an initial README. The README matters -because GitHub does not create the default branch until the first commit exists. - -```bash -gh repo create zredlined/openshell-policy-demo \ - --private \ - --add-readme \ - --description "OpenShell policy advisor demo scratch repo" -``` - -The demo never creates repositories and refuses to overwrite an existing demo -file. Each default run writes a new timestamped file under -`openshell-policy-advisor-demo/`. - -## Quick Start +- An active OpenShell gateway (`openshell gateway start`). +- `gh auth login` (or a `GITHUB_TOKEN` env var with contents-write on a + scratch repo). +- `codex login` on the host. +- A scratch GitHub repository with at least one commit on the default branch. + If you don't have one yet: -The included `policy.template.yaml` only defines the GitHub API target for the -policy-management loop. Use `DEMO_POLICY_FILE` to point at a policy that also -allows your chosen agent to reach its model/provider endpoints. + ```shell + gh repo create "$(gh api user --jq .login)/openshell-policy-demo" \ + --private --add-readme \ + --description "OpenShell policy advisor demo scratch repo" + ``` -```bash -cp examples/agent-driven-policy-management/.env.example .env -$EDITOR .env - -set -a -source .env -set +a +## Run it +```shell bash examples/agent-driven-policy-management/demo.sh ``` -The host script only orchestrates sandbox lifecycle and developer approval. The -policy proposal is authored by the agent inside the sandbox from the installed -skill, structured denial response, and `policy.local` API. - -The demo writes one markdown file under: - -```text -openshell-policy-advisor-demo/.md -``` - -Use a scratch repository or a demo branch if you do not want this file in a -production repository. - -The deterministic non-model validation flow lives in -`examples/agent-driven-policy-management/validation/validation.sh`. - -## Options - -```bash -export OPENSHELL_BIN=/path/to/openshell -export DEMO_BRANCH=main -export DEMO_RUN_ID="$(date +%Y%m%d-%H%M%S)" -export DEMO_FILE_DIR=openshell-policy-advisor-demo -export DEMO_KEEP_SANDBOX=0 -export DEMO_APPROVAL_TIMEOUT_SECS=240 -export DEMO_AGENT_PROVIDERS="agent-provider-a agent-provider-b" -``` +That's the whole thing. The demo resolves your GitHub handle from `gh`, picks +`openshell-policy-demo` as the repo, and writes one timestamped markdown file +under `openshell-policy-advisor-demo/` per run. + +## Overrides (all optional) + +| Env var | Default | +|---|---| +| `DEMO_GITHUB_OWNER` | `gh api user --jq .login` | +| `DEMO_GITHUB_REPO` | `openshell-policy-demo` | +| `DEMO_BRANCH` | `main` | +| `DEMO_RUN_ID` | timestamp | +| `DEMO_GITHUB_TOKEN` | falls back to `GITHUB_TOKEN`, `GH_TOKEN`, or `gh auth token` | +| `DEMO_KEEP_SANDBOX` | `0` (set `1` to inspect the sandbox after the demo) | +| `DEMO_APPROVAL_TIMEOUT_SECS` | `240` | +| `OPENSHELL_BIN` | `target/debug/openshell` if present, else `openshell` on `PATH` | + +## What the agent sees + +`policy.template.yaml` is the initial restrictive policy: a read-only L7 REST +rule for `api.github.com` plus the binary set Codex needs. The agent has to +ask for the additional `PUT /repos/.../contents/...` write itself — that's the +proposal you approve. + +## What gets approved (trust model) + +Every proposal lands in the gateway as a `PolicyChunk` — a structured object +with three parts, each with a different trust level: + +| Field | Source | Trust | +|---|---|---| +| `proposed_rule` (host, port, method, path, binary) | agent, schema-validated by the gateway | structured contract — *this* is what you're approving | +| `rationale` (free-form prose) | agent | hint only — a compromised agent can lie here | +| `validation_result` (prover output) | gateway-side prover | trust signal — but this surface is in progress (see [RFC 0001](../../rfc/0001-agent-driven-policy-management.md)) | + +The MVP today shows the structured rule plus the agent's rationale in +`openshell rule get` and the TUI inbox panel. The demo's `openshell rule +approve-all` auto-approves to keep the loop short — in a real session a +developer reviews the structured grant before pressing `a`. Prover-backed +validation badges, computed reachability deltas, and a richer "this is what +the rule actually permits" summary are the next phase. For now, **always +approve based on the structured rule, not the agent's rationale.** + +## Going further + +`e2e/policy-advisor/test.sh` runs the same loop deterministically without an +LLM (curl + the `policy.local` API directly). Use it to validate the proxy and +proposal pipeline when iterating on the sandbox or gateway code. diff --git a/examples/agent-driven-policy-management/agent-task.md b/examples/agent-driven-policy-management/agent-task.md index 8ac367f5f..6aa56b5a3 100644 --- a/examples/agent-driven-policy-management/agent-task.md +++ b/examples/agent-driven-policy-management/agent-task.md @@ -1,45 +1,48 @@ -# Agent Policy Management Demo Task +You are running inside an OpenShell sandbox. Your job is to write one +markdown file to GitHub via the GitHub Contents API. -You are inside an OpenShell sandbox. The user asked you to write one markdown -file to GitHub using the GitHub Contents API. - -Target: +## Target - Repository: `{{OWNER}}/{{REPO}}` - Branch: `{{BRANCH}}` - File path: `{{FILE_PATH}}` - Run id: `{{RUN_ID}}` -Requirements: - -- Use ordinary shell commands and the `GITHUB_TOKEN` environment variable. -- Do not print, echo, or reveal the token. -- First try to write the file with `PUT /repos/{{OWNER}}/{{REPO}}/contents/{{FILE_PATH}}`. -- If OpenShell returns `policy_denied`, read - `/etc/openshell/skills/policy_advisor.md` and follow the local API workflow - there. -- Submit the narrowest proposal that permits only this write. -- Do not include a `tls` field in the proposed endpoint unless you are - explicitly disabling TLS inspection. -- After submitting a proposal, retry the write for up to 120 seconds. The - developer may approve while you are waiting. -- Do not print the full GitHub response body. It can include temporary - `download_url` query tokens. Extract only `content.path`, `content.html_url`, - and `commit.sha`. -- Finish with a short summary that says whether the write succeeded. Include - the GitHub file path and URL if GitHub returns them. - -Suggested file content: - -```markdown -# OpenShell policy advisor demo - -Run id: {{RUN_ID}} - -This file was written from inside an OpenShell sandbox after the agent read the -policy advisor skill, submitted a narrow policy proposal, and waited for -developer approval. -``` +## What to do + +1. PUT the file with `curl` against + `https://api.github.com/repos/{{OWNER}}/{{REPO}}/contents/{{FILE_PATH}}`. + Authenticate with the `GITHUB_TOKEN` environment variable. Do not echo the + token. Body content (base64-encoded into the JSON `content` field): + + ``` + # OpenShell policy advisor demo + + Run id: {{RUN_ID}} + + Written from inside an OpenShell sandbox after a narrowly-scoped policy + proposal was approved by the developer. + ``` + +2. The first attempt will fail with HTTP 403 and a JSON body containing + `"error": "policy_denied"`. Read the body — it tells you which layer denied + you (`l7`/`rest`), which method/path was missing, and where to learn more. + +3. Read `/etc/openshell/skills/policy_advisor.md` and follow it. Submit the + narrowest possible proposal to `http://policy.local/v1/proposals` — exact + host, exact port, exact method, exact path, binary `/usr/bin/curl`. Do not + include query strings. Do not propose wildcard hosts. + +4. After submitting, retry the PUT every few seconds for up to 120 seconds. + The developer is approving from outside the sandbox; once approved, the + sandbox hot-reloads policy and the same PUT will succeed. + +5. Stop as soon as the PUT returns HTTP 200 or 201. Print a short summary + showing whether it succeeded, plus `content.path` and `content.html_url` + from the GitHub response. Do not print the full response body. + +If anything is unclear, prefer making a narrower proposal and asking for +approval again over widening the rule. diff --git a/examples/agent-driven-policy-management/demo.sh b/examples/agent-driven-policy-management/demo.sh index 8613b1031..efeb2f6f5 100755 --- a/examples/agent-driven-policy-management/demo.sh +++ b/examples/agent-driven-policy-management/demo.sh @@ -3,14 +3,22 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +# Agent-driven policy management demo. +# +# Runs the full loop: a Codex agent inside a sandbox hits an OpenShell policy +# block, reads the policy advisor skill, drafts a narrow rule via policy.local, +# the developer approves from the host, and the agent retries successfully. + set -euo pipefail SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" REPO_ROOT="$(cd "${SCRIPT_DIR}/../.." && pwd)" -DEFAULT_POLICY_FILE="${SCRIPT_DIR}/policy.template.yaml" +POLICY_TEMPLATE="${SCRIPT_DIR}/policy.template.yaml" TASK_TEMPLATE="${SCRIPT_DIR}/agent-task.md" +SANDBOX_AGENT="${SCRIPT_DIR}/sandbox-agent.sh" -if [[ -z "${OPENSHELL_BIN:-}" ]]; then +OPENSHELL_BIN="${OPENSHELL_BIN:-}" +if [[ -z "$OPENSHELL_BIN" ]]; then if [[ -x "${REPO_ROOT}/target/debug/openshell" ]]; then OPENSHELL_BIN="${REPO_ROOT}/target/debug/openshell" else @@ -18,54 +26,45 @@ if [[ -z "${OPENSHELL_BIN:-}" ]]; then fi fi -DEMO_POLICY_FILE="${DEMO_POLICY_FILE:-$DEFAULT_POLICY_FILE}" -DEMO_SANDBOX_FROM="${DEMO_SANDBOX_FROM:-base}" +DEMO_GITHUB_OWNER="${DEMO_GITHUB_OWNER:-}" +DEMO_GITHUB_REPO="${DEMO_GITHUB_REPO:-openshell-policy-demo}" DEMO_BRANCH="${DEMO_BRANCH:-main}" DEMO_RUN_ID="${DEMO_RUN_ID:-$(date +%Y%m%d-%H%M%S)}" DEMO_FILE_DIR="${DEMO_FILE_DIR:-openshell-policy-advisor-demo}" -DEMO_FILE_PATH="${DEMO_FILE_PATH:-${DEMO_FILE_DIR}/${DEMO_RUN_ID}.md}" -DEMO_SANDBOX_NAME="${DEMO_SANDBOX_NAME:-policy-agent-${DEMO_RUN_ID}}" -DEMO_GITHUB_PROVIDER_NAME="${DEMO_GITHUB_PROVIDER_NAME:-github-policy-agent-${DEMO_RUN_ID}}" -DEMO_AGENT_PROVIDERS="${DEMO_AGENT_PROVIDERS:-}" +DEMO_FILE_PATH="${DEMO_FILE_DIR}/${DEMO_RUN_ID}.md" +DEMO_SANDBOX_NAME="${DEMO_SANDBOX_NAME:-policy-demo-${DEMO_RUN_ID}}" +DEMO_CODEX_PROVIDER_NAME="${DEMO_CODEX_PROVIDER_NAME:-codex-policy-demo-${DEMO_RUN_ID}}" +DEMO_GITHUB_PROVIDER_NAME="${DEMO_GITHUB_PROVIDER_NAME:-github-policy-demo-${DEMO_RUN_ID}}" DEMO_APPROVAL_TIMEOUT_SECS="${DEMO_APPROVAL_TIMEOUT_SECS:-240}" DEMO_KEEP_SANDBOX="${DEMO_KEEP_SANDBOX:-0}" -TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/openshell-agent-policy-demo.XXXXXX")" +TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/openshell-policy-demo.XXXXXX")" PAYLOAD_DIR="${TMP_DIR}/payload" -TASK_FILE="${PAYLOAD_DIR}/agent-task.md" +POLICY_FILE="${TMP_DIR}/policy.yaml" AGENT_LOG="${TMP_DIR}/agent.log" -PENDING_FILE="${TMP_DIR}/pending-rule.txt" mkdir -p "$PAYLOAD_DIR" -BOLD='\033[1m' -DIM='\033[2m' -CYAN='\033[36m' -GREEN='\033[32m' -RED='\033[31m' -YELLOW='\033[33m' -RESET='\033[0m' +# Use ANSI-C quoting so the variables hold the actual ESC byte rather than a +# literal backslash sequence. This lets `cat`, heredocs, and any non-printf +# emitter render colors correctly without per-call interpretation. +BOLD=$'\033[1m' +DIM=$'\033[2m' +CYAN=$'\033[36m' +GREEN=$'\033[32m' +RED=$'\033[31m' +YELLOW=$'\033[33m' +RESET=$'\033[0m' AGENT_PID="" -step() { - printf "\n${BOLD}${CYAN}==> %s${RESET}\n\n" "$1" -} - -info() { - printf " %b\n" "$*" -} - -redact_output() { - sed -E \ - -e 's|(download_url": "https://raw\.githubusercontent\.com[^?"]+\?token=)[^"]+|\1|g' \ - -e 's|(Authorization: Bearer )[A-Za-z0-9._-]+|\1|g' -} +step() { printf "\n${BOLD}${CYAN}==> %s${RESET}\n\n" "$1"; } +info() { printf " %b\n" "$*"; } fail() { printf "\n${RED}error:${RESET} %s\n" "$*" >&2 if [[ -f "$AGENT_LOG" ]]; then printf "\n${YELLOW}Agent log tail:${RESET}\n" >&2 - tail -n 120 "$AGENT_LOG" | redact_output | sed 's/^/ /' >&2 || true + tail -n 80 "$AGENT_LOG" | sed 's/^/ /' >&2 || true fi exit 1 } @@ -73,12 +72,17 @@ fail() { cleanup() { local status=$? + if [[ -n "$AGENT_PID" ]] && kill -0 "$AGENT_PID" >/dev/null 2>&1; then + kill "$AGENT_PID" >/dev/null 2>&1 || true + wait "$AGENT_PID" 2>/dev/null || true + fi + if [[ "$DEMO_KEEP_SANDBOX" != "1" ]]; then "$OPENSHELL_BIN" sandbox delete "$DEMO_SANDBOX_NAME" >/dev/null 2>&1 || true else printf "\n${YELLOW}Keeping sandbox because DEMO_KEEP_SANDBOX=1: %s${RESET}\n" "$DEMO_SANDBOX_NAME" fi - + "$OPENSHELL_BIN" provider delete "$DEMO_CODEX_PROVIDER_NAME" >/dev/null 2>&1 || true "$OPENSHELL_BIN" provider delete "$DEMO_GITHUB_PROVIDER_NAME" >/dev/null 2>&1 || true if [[ $status -eq 0 ]]; then @@ -93,33 +97,34 @@ require_command() { command -v "$1" >/dev/null 2>&1 || fail "missing required command: $1" } -validate_name() { - local label="$1" - local value="$2" - [[ "$value" =~ ^[A-Za-z0-9_.-]+$ ]] || fail "$label may contain only letters, numbers, '.', '_', and '-'" -} - -validate_path() { - local label="$1" - local value="$2" - [[ "$value" =~ ^[A-Za-z0-9._/-]+$ ]] || fail "$label may contain only letters, numbers, '.', '_', '-', and '/'" - [[ "$value" != /* ]] || fail "$label must be relative" - [[ "$value" != *..* ]] || fail "$label must not contain '..'" +resolve_github_owner() { + if [[ -n "$DEMO_GITHUB_OWNER" ]]; then + return + fi + if command -v gh >/dev/null 2>&1; then + DEMO_GITHUB_OWNER="$(gh api user --jq .login 2>/dev/null || true)" + fi + [[ -n "$DEMO_GITHUB_OWNER" ]] || fail "set DEMO_GITHUB_OWNER, or sign in with: gh auth login" } resolve_github_token() { - if [[ -z "${DEMO_GITHUB_TOKEN:-}" ]]; then - if [[ -n "${GITHUB_TOKEN:-}" ]]; then - DEMO_GITHUB_TOKEN="$GITHUB_TOKEN" - elif [[ -n "${GH_TOKEN:-}" ]]; then - DEMO_GITHUB_TOKEN="$GH_TOKEN" - elif command -v gh >/dev/null 2>&1; then - DEMO_GITHUB_TOKEN="$(gh auth token 2>/dev/null || true)" - fi + DEMO_GITHUB_TOKEN="${DEMO_GITHUB_TOKEN:-${GITHUB_TOKEN:-${GH_TOKEN:-}}}" + if [[ -z "$DEMO_GITHUB_TOKEN" ]] && command -v gh >/dev/null 2>&1; then + DEMO_GITHUB_TOKEN="$(gh auth token 2>/dev/null || true)" fi + [[ -n "$DEMO_GITHUB_TOKEN" ]] || fail "set DEMO_GITHUB_TOKEN, GITHUB_TOKEN, GH_TOKEN, or sign in with: gh auth login" + export DEMO_GITHUB_TOKEN +} - [[ -n "${DEMO_GITHUB_TOKEN:-}" ]] || fail "set DEMO_GITHUB_TOKEN, GITHUB_TOKEN, GH_TOKEN, or sign in with gh" - export GITHUB_TOKEN="$DEMO_GITHUB_TOKEN" +resolve_codex_auth() { + [[ -f "${HOME}/.codex/auth.json" ]] || fail "missing local Codex sign-in; run: codex login" + export CODEX_AUTH_ACCESS_TOKEN CODEX_AUTH_REFRESH_TOKEN CODEX_AUTH_ACCOUNT_ID + CODEX_AUTH_ACCESS_TOKEN="$(jq -r '.tokens.access_token // empty' "${HOME}/.codex/auth.json")" + CODEX_AUTH_REFRESH_TOKEN="$(jq -r '.tokens.refresh_token // empty' "${HOME}/.codex/auth.json")" + CODEX_AUTH_ACCOUNT_ID="$(jq -r '.tokens.account_id // empty' "${HOME}/.codex/auth.json")" + [[ -n "$CODEX_AUTH_ACCESS_TOKEN" ]] || fail "Codex sign-in is missing an access token; run: codex login" + [[ -n "$CODEX_AUTH_REFRESH_TOKEN" ]] || fail "Codex sign-in is missing a refresh token; run: codex login" + [[ -n "$CODEX_AUTH_ACCOUNT_ID" ]] || fail "Codex sign-in is missing an account id; run: codex login" } validate_env() { @@ -127,157 +132,208 @@ validate_env() { require_command jq require_command "$OPENSHELL_BIN" - [[ -f "$DEMO_POLICY_FILE" ]] || fail "missing policy file: $DEMO_POLICY_FILE" + [[ -f "$POLICY_TEMPLATE" ]] || fail "missing policy template: $POLICY_TEMPLATE" [[ -f "$TASK_TEMPLATE" ]] || fail "missing agent task template: $TASK_TEMPLATE" - [[ -n "${DEMO_GITHUB_OWNER:-}" ]] || fail "set DEMO_GITHUB_OWNER" - [[ -n "${DEMO_GITHUB_REPO:-}" ]] || fail "set DEMO_GITHUB_REPO" - [[ -n "${DEMO_AGENT_COMMAND:-}" ]] || fail "set DEMO_AGENT_COMMAND to a sandbox command that reads /sandbox/payload/agent-task.md" - [[ "$DEMO_RUN_ID" =~ ^[a-z0-9-]+$ ]] || fail "DEMO_RUN_ID may contain only lowercase letters, numbers, and '-'" - [[ "$DEMO_APPROVAL_TIMEOUT_SECS" =~ ^[0-9]+$ ]] || fail "DEMO_APPROVAL_TIMEOUT_SECS must be a number" + [[ -f "$SANDBOX_AGENT" ]] || fail "missing sandbox agent script: $SANDBOX_AGENT" - validate_name "DEMO_GITHUB_OWNER" "$DEMO_GITHUB_OWNER" - validate_name "DEMO_GITHUB_REPO" "$DEMO_GITHUB_REPO" - validate_path "DEMO_BRANCH" "$DEMO_BRANCH" - validate_path "DEMO_FILE_PATH" "$DEMO_FILE_PATH" + [[ "$DEMO_GITHUB_REPO" =~ ^[A-Za-z0-9_.-]+$ ]] || fail "DEMO_GITHUB_REPO contains unsupported characters" + [[ "$DEMO_BRANCH" =~ ^[A-Za-z0-9._/-]+$ ]] || fail "DEMO_BRANCH contains unsupported characters" + [[ "$DEMO_RUN_ID" =~ ^[A-Za-z0-9_.-]+$ ]] || fail "DEMO_RUN_ID contains unsupported characters" + + resolve_github_owner + [[ "$DEMO_GITHUB_OWNER" =~ ^[A-Za-z0-9_.-]+$ ]] || fail "DEMO_GITHUB_OWNER contains unsupported characters" resolve_github_token + resolve_codex_auth } github_api_status() { - local url="$1" - local body="$2" - curl -sS \ - -o "$body" \ - -w "%{http_code}" \ + local url="$1" body="$2" + curl -sS -o "$body" -w "%{http_code}" \ -H "Accept: application/vnd.github+json" \ -H "Authorization: Bearer ${DEMO_GITHUB_TOKEN}" \ -H "X-GitHub-Api-Version: 2022-11-28" \ "$url" } -urlencode() { - jq -rn --arg v "$1" '$v|@uri' +check_gateway() { + local raw version + # `openshell status` colorizes labels with ANSI even when piped, so strip + # escapes before parsing. Use NO_COLOR as a belt-and-suspenders hint for + # libraries that respect it. + raw="$(NO_COLOR=1 "$OPENSHELL_BIN" status 2>/dev/null \ + | sed 's/\x1b\[[0-9;]*m//g')" + version="$(awk -F': *' '/Version:/ { print $2; exit }' <<<"$raw")" + [[ -n "$version" ]] \ + || fail "active OpenShell gateway is not reachable; start one with: openshell gateway start" + info "gateway: connected · ${version}" } -check_gateway() { - step "Checking active OpenShell gateway" - "$OPENSHELL_BIN" status >/dev/null 2>&1 \ - || fail "active OpenShell gateway is not reachable; start one separately" - "$OPENSHELL_BIN" status | sed 's/^/ /' +show_run_summary() { + step "Run summary" + printf " %-9s %s/%s\n" "repo:" "$DEMO_GITHUB_OWNER" "$DEMO_GITHUB_REPO" + printf " %-9s %s\n" "branch:" "$DEMO_BRANCH" + printf " %-9s %s\n" "target:" "$DEMO_FILE_PATH" + printf " %-9s %s\n" "sandbox:" "$DEMO_SANDBOX_NAME" } check_github_access() { - step "Checking GitHub repository access" - local body status branch + local body status branch sha body="${TMP_DIR}/github-repo.json" status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}" "$body")" - [[ "$status" == "200" ]] \ - || fail "GitHub returned HTTP $status for ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}; check the repo name and token access" - - if jq -e 'has("permissions") and (.permissions.push == false and .permissions.admin == false and .permissions.maintain == false)' "$body" >/dev/null; then - fail "GitHub token can read ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO} but does not appear to have write access" + if [[ "$status" != "200" ]]; then + info "${RED}Repo not found:${RESET} ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}" + info "Create a private scratch repo first, then re-run:" + info " ${DIM}gh repo create ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO} --private --add-readme \\${RESET}" + info " ${DIM} --description 'OpenShell policy advisor demo scratch repo'${RESET}" + fail "GitHub returned HTTP $status for ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}" + fi + if jq -e '.permissions.push == false and .permissions.admin == false and .permissions.maintain == false' "$body" >/dev/null; then + fail "GitHub token does not have write access to ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}" fi - branch="$(urlencode "$DEMO_BRANCH")" + branch="$(jq -rn --arg v "$DEMO_BRANCH" '$v|@uri')" body="${TMP_DIR}/github-branch.json" status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/branches/${branch}" "$body")" [[ "$status" == "200" ]] || fail "GitHub returned HTTP $status for branch ${DEMO_BRANCH}" + sha="$(jq -r '.commit.sha[0:7]' "$body")" + info "github: ${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO} @ ${DEMO_BRANCH} (${sha})" - body="${TMP_DIR}/github-demo-file.json" + body="${TMP_DIR}/github-target.json" status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}?ref=${branch}" "$body")" if [[ "$status" == "200" ]]; then - fail "demo output file already exists: ${DEMO_FILE_PATH}; choose a new DEMO_RUN_ID or DEMO_FILE_PATH" + fail "demo output file already exists: ${DEMO_FILE_PATH}; choose a new DEMO_RUN_ID" fi - [[ "$status" == "404" ]] || fail "GitHub returned HTTP $status while checking output path ${DEMO_FILE_PATH}" - - info "${GREEN}GitHub repo, branch, and output path are safe for this run.${RESET}" + [[ "$status" == "404" ]] || fail "GitHub returned HTTP $status while checking output path" } -render_task() { - python3 - "$TASK_TEMPLATE" "$TASK_FILE" "$DEMO_GITHUB_OWNER" "$DEMO_GITHUB_REPO" "$DEMO_BRANCH" "$DEMO_FILE_PATH" "$DEMO_RUN_ID" <<'PY' -from pathlib import Path -import sys - -template, output, owner, repo, branch, file_path, run_id = sys.argv[1:8] -text = Path(template).read_text(encoding="utf-8") -for key, value in { - "OWNER": owner, - "REPO": repo, - "BRANCH": branch, - "FILE_PATH": file_path, - "RUN_ID": run_id, -}.items(): - text = text.replace("{{" + key + "}}", value) -Path(output).write_text(text, encoding="utf-8") -PY +render_payload() { + sed \ + -e "s|{{OWNER}}|${DEMO_GITHUB_OWNER}|g" \ + -e "s|{{REPO}}|${DEMO_GITHUB_REPO}|g" \ + -e "s|{{BRANCH}}|${DEMO_BRANCH}|g" \ + -e "s|{{FILE_PATH}}|${DEMO_FILE_PATH}|g" \ + -e "s|{{RUN_ID}}|${DEMO_RUN_ID}|g" \ + "$TASK_TEMPLATE" > "${PAYLOAD_DIR}/agent-task.md" + cp "$SANDBOX_AGENT" "${PAYLOAD_DIR}/sandbox-agent.sh" + cp "$POLICY_TEMPLATE" "$POLICY_FILE" } -create_github_provider() { - step "Creating temporary GitHub provider" +create_providers() { + "$OPENSHELL_BIN" provider delete "$DEMO_CODEX_PROVIDER_NAME" >/dev/null 2>&1 || true "$OPENSHELL_BIN" provider delete "$DEMO_GITHUB_PROVIDER_NAME" >/dev/null 2>&1 || true + "$OPENSHELL_BIN" provider create \ - --name "$DEMO_GITHUB_PROVIDER_NAME" \ - --type github \ - --credential GITHUB_TOKEN >/dev/null - info "${GREEN}Created GitHub provider for this run.${RESET}" -} + --name "$DEMO_CODEX_PROVIDER_NAME" \ + --type generic \ + --credential CODEX_AUTH_ACCESS_TOKEN \ + --credential CODEX_AUTH_REFRESH_TOKEN \ + --credential CODEX_AUTH_ACCOUNT_ID >/dev/null -provider_args() { - printf '%s\n' "--provider" - printf '%s\n' "$DEMO_GITHUB_PROVIDER_NAME" + "$OPENSHELL_BIN" provider create \ + --name "$DEMO_GITHUB_PROVIDER_NAME" \ + --type generic \ + --credential DEMO_GITHUB_TOKEN >/dev/null - local normalized="${DEMO_AGENT_PROVIDERS//,/ }" - local provider - for provider in $normalized; do - printf '%s\n' "--provider" - printf '%s\n' "$provider" - done + info "providers created (codex, github) — credentials injected as env vars only" } start_agent_sandbox() { - step "Starting agent inside the sandbox" + step "Launching sandbox; agent will hit a policy block and draft a proposal" "$OPENSHELL_BIN" sandbox delete "$DEMO_SANDBOX_NAME" >/dev/null 2>&1 || true - local args=() - while IFS= read -r arg; do - args+=("$arg") - done < <(provider_args) + info "initial policy: read-only access to api.github.com (no PUT)" + info "agent task: PUT /repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}" + info "live log: ${AGENT_LOG}" + # `--upload :/sandbox` preserves the source directory basename + # (matches `scp -r`/`cp -r`, see PRs #952 / #1028), so `${PAYLOAD_DIR}` + # (basename `payload`) lands at `/sandbox/payload/...`. `--upload` accepts + # a single value, so we ship both files in one directory. ( "$OPENSHELL_BIN" sandbox create \ --name "$DEMO_SANDBOX_NAME" \ - --from "$DEMO_SANDBOX_FROM" \ - "${args[@]}" \ - --policy "$DEMO_POLICY_FILE" \ + --from base \ + --provider "$DEMO_CODEX_PROVIDER_NAME" \ + --provider "$DEMO_GITHUB_PROVIDER_NAME" \ + --policy "$POLICY_FILE" \ --upload "${PAYLOAD_DIR}:/sandbox" \ --no-git-ignore \ - --keep \ --no-auto-providers \ --no-tty \ - -- bash -lc "$DEMO_AGENT_COMMAND" + -- bash /sandbox/payload/sandbox-agent.sh ) >"$AGENT_LOG" 2>&1 & AGENT_PID="$!" - info "${DIM}Agent run started; log: ${AGENT_LOG}${RESET}" +} + +# Strip the rule_get output down to the lines a developer needs to make an +# informed approve/reject decision: rationale, binary, endpoint. Filters the +# noisy fields (UUID, agent-generated rule_name, hardcoded confidence, +# duplicate Binaries) until `openshell rule get` learns to print L7 +# method/path itself (tracked separately). +# +# `openshell rule get` colorizes labels with ANSI escapes; strip them before +# parsing so the field-name match works in piped contexts. +summarize_pending() { + local pending="$1" + sed 's/\x1b\[[0-9;]*m//g' "$pending" \ + | awk ' + /Rationale:/ { sub(/^[[:space:]]*/, ""); print " " $0; next } + /Binary:/ { sub(/^[[:space:]]*/, ""); print " " $0; next } + /Endpoints:/ { sub(/^[[:space:]]*/, ""); print " " $0; next } + ' +} + +narrate_sandbox_workflow() { + info "Inside the sandbox right now:" + info "" + info " ${BOLD}[1]${RESET} agent: ${DIM}curl -X PUT https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/...${RESET}" + info " ${BOLD}[2]${RESET} L7 proxy denies the write and returns a structured 403 the" + info " agent can parse and act on:" + cat </dev/null 2>&1; then wait "$AGENT_PID" || true + AGENT_PID="" fail "agent exited before a pending proposal appeared" fi - "$OPENSHELL_BIN" rule get "$DEMO_SANDBOX_NAME" --status pending >"$PENDING_FILE" 2>/dev/null || true - if grep -q "Chunk:" "$PENDING_FILE" && grep -q "pending" "$PENDING_FILE"; then - info "${GREEN}Agent submitted a pending proposal.${RESET}" - sed 's/^/ /' "$PENDING_FILE" + if "$OPENSHELL_BIN" rule get "$DEMO_SANDBOX_NAME" --status pending >"$pending" 2>/dev/null \ + && grep -q "Chunk:" "$pending" && grep -q "pending" "$pending"; then + info "" + info "${GREEN}proposal received:${RESET}" + summarize_pending "$pending" - step "Approving pending draft rule from outside the sandbox" - "$OPENSHELL_BIN" rule approve-all "$DEMO_SANDBOX_NAME" | sed 's/^/ /' + step "Approving and waiting for the agent to retry" + "$OPENSHELL_BIN" rule approve-all "$DEMO_SANDBOX_NAME" \ + | awk '/approved/ { print " " $0 }' return fi @@ -285,54 +341,58 @@ approve_when_pending() { if (( now - start >= DEMO_APPROVAL_TIMEOUT_SECS )); then fail "timed out waiting for the agent to submit a policy proposal" fi - sleep 2 done } wait_for_agent() { - step "Waiting for the agent to retry after approval" if ! wait "$AGENT_PID"; then + AGENT_PID="" fail "agent run failed" fi - info "${GREEN}Agent run completed.${RESET}" + AGENT_PID="" + info "agent retried after policy hot-reload — write succeeded" } verify_github_write() { step "Verifying GitHub write" local body status branch - branch="$(urlencode "$DEMO_BRANCH")" - body="${TMP_DIR}/github-created-file.json" + branch="$(jq -rn --arg v "$DEMO_BRANCH" '$v|@uri')" + body="${TMP_DIR}/github-result.json" status="$(github_api_status "https://api.github.com/repos/${DEMO_GITHUB_OWNER}/${DEMO_GITHUB_REPO}/contents/${DEMO_FILE_PATH}?ref=${branch}" "$body")" [[ "$status" == "200" ]] || fail "expected demo file to exist after agent run; GitHub returned HTTP $status" - - jq -r '"File: \(.path)", "URL: \(.html_url)"' "$body" | sed 's/^/ /' + jq -r '" file: \(.path)", " url: \(.html_url)"' "$body" } +# Print the OCSF JSONL trace, filtered to the three events that *are* the +# demo's story: the L7 PUT deny, the policy hot-reload, and the L7 PUT allow. +# The native OCSF shorthand is informative and consistent with the rest of +# OpenShell's logging — keep it as-is rather than re-formatting. show_logs() { - step "Policy decision trace" - "$OPENSHELL_BIN" logs "$DEMO_SANDBOX_NAME" --since 10m -n 80 2>&1 \ - | grep -E 'HTTP:PUT|CONFIG:LOADED|ReportPolicyStatus' \ - | tail -n 12 \ + step "Policy decision trace (OCSF)" + "$OPENSHELL_BIN" logs "$DEMO_SANDBOX_NAME" --since 10m -n 200 2>&1 \ + | grep -E 'HTTP:PUT.*(DENIED|ALLOWED)|CONFIG:LOADED.*Policy reloaded' \ | sed 's/^/ /' || true } main() { validate_env - render_task + + step "Preflight" check_gateway check_github_access - create_github_provider + render_payload + create_providers + + show_run_summary + start_agent_sandbox approve_when_pending wait_for_agent verify_github_write show_logs - printf "\n${BOLD}${GREEN}✓ Demo complete.${RESET}\n\n" - printf " Sandbox: %s\n" "$DEMO_SANDBOX_NAME" - printf " Repository: https://github.com/%s/%s\n" "$DEMO_GITHUB_OWNER" "$DEMO_GITHUB_REPO" - printf " File: %s\n" "$DEMO_FILE_PATH" + printf "\n${BOLD}${GREEN}✓ Demo complete.${RESET}\n" } main "$@" diff --git a/examples/agent-driven-policy-management/policy.template.yaml b/examples/agent-driven-policy-management/policy.template.yaml index 6452cb01c..e920277b5 100644 --- a/examples/agent-driven-policy-management/policy.template.yaml +++ b/examples/agent-driven-policy-management/policy.template.yaml @@ -1,6 +1,16 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 +# Initial sandbox policy for the agent-driven policy demo. +# +# The agent inside the sandbox can: +# - reach Codex's model and auth endpoints (codex) +# - clone Codex plugin repos read-only (codex_plugins) +# - read api.github.com via curl (github_api_readonly) +# +# The agent CANNOT write to GitHub yet. That's the proposal it has to draft +# and ask the developer to approve. + version: 1 filesystem_policy: @@ -16,6 +26,37 @@ process: run_as_group: sandbox network_policies: + codex: + name: codex + endpoints: + - { host: api.openai.com, port: 443, protocol: rest, enforcement: enforce, access: full } + - { host: auth.openai.com, port: 443, protocol: rest, enforcement: enforce, access: full } + - { host: chatgpt.com, port: 443, protocol: rest, enforcement: enforce, access: full } + - { host: ab.chatgpt.com, port: 443, protocol: rest, enforcement: enforce, access: full } + binaries: + - { path: /usr/bin/codex } + - { path: /usr/bin/node } + - { path: "/usr/lib/node_modules/@openai/**" } + + codex_plugins: + name: codex-plugins + endpoints: + - host: github.com + port: 443 + protocol: rest + enforcement: enforce + rules: + - allow: + method: GET + path: "/openai/plugins.git/info/refs*" + - allow: + method: POST + path: "/openai/plugins.git/git-upload-pack" + binaries: + - { path: /usr/bin/git } + - { path: /usr/lib/git-core/git-remote-http } + - { path: "/usr/lib/node_modules/@openai/**" } + github_api_readonly: name: github-api-readonly endpoints: diff --git a/examples/agent-driven-policy-management/sandbox-agent.sh b/examples/agent-driven-policy-management/sandbox-agent.sh new file mode 100755 index 000000000..b10a00fe4 --- /dev/null +++ b/examples/agent-driven-policy-management/sandbox-agent.sh @@ -0,0 +1,73 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Runs inside the sandbox. Bootstraps Codex from the credentials injected by +# the openshell provider, then drives the agent-task prompt to completion. + +set -euo pipefail + +require_env() { + local name="$1" + [[ -n "${!name:-}" ]] || { echo "missing required env: $name" >&2; exit 1; } +} + +require_env CODEX_AUTH_ACCESS_TOKEN +require_env CODEX_AUTH_REFRESH_TOKEN +require_env CODEX_AUTH_ACCOUNT_ID +require_env DEMO_GITHUB_TOKEN + +# Make the GitHub token visible to Codex's tool loop under the conventional name. +export GITHUB_TOKEN="$DEMO_GITHUB_TOKEN" + +# Codex looks for ~/.codex/auth.json. The OpenShell provider only injects env +# vars, so we materialize the file Codex expects from those credentials. +mkdir -p "$HOME/.codex" +node - <<'NODE' +const fs = require("fs"); +const path = `${process.env.HOME}/.codex/auth.json`; +const b64u = (obj) => Buffer.from(JSON.stringify(obj)).toString("base64url"); +const now = Math.floor(Date.now() / 1000); +// Placeholder id_token is required by Codex but never validated against an +// upstream JWKS in this flow. +const idToken = [ + b64u({ alg: "none", typ: "JWT" }), + b64u({ + iss: "https://auth.openai.com", + aud: "codex", + sub: "openshell-policy-demo", + email: "demo@openshell.local", + iat: now, + exp: now + 3600, + }), + "placeholder", +].join("."); +fs.writeFileSync(path, JSON.stringify({ + auth_mode: "chatgpt", + OPENAI_API_KEY: null, + tokens: { + id_token: idToken, + access_token: process.env.CODEX_AUTH_ACCESS_TOKEN, + refresh_token: process.env.CODEX_AUTH_REFRESH_TOKEN, + account_id: process.env.CODEX_AUTH_ACCOUNT_ID, + }, + last_refresh: new Date().toISOString(), +}, null, 2)); +NODE +chmod 600 "$HOME/.codex/auth.json" + +# Codex needs a writable cwd; /sandbox is uploaded read-only-ish, so work in /tmp. +WORK="$(mktemp -d)" +cd "$WORK" + +# Disable Codex's internal bubblewrap sandbox — OpenShell is already the +# security boundary, and bwrap can't create nested user namespaces inside the +# OpenShell sandbox container without extra capabilities. The "danger" framing +# is from Codex's perspective on a developer host; here the OpenShell network +# policy and filesystem constraints are doing the actual containment. +exec codex exec \ + --skip-git-repo-check \ + --sandbox danger-full-access \ + --ephemeral \ + "$(cat /sandbox/payload/agent-task.md)" From fbd5cf65cf1fc1bc75e468a1153373bb055fec3a Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Mon, 4 May 2026 11:56:50 -0700 Subject: [PATCH 13/23] style(sandbox,cli): apply rustfmt Whitespace-only fixups caught by mise run pre-commit. No functional change. --- crates/openshell-cli/src/run.rs | 6 +----- crates/openshell-sandbox/src/policy_local.rs | 6 +++++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 45702f2e6..678494556 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -5470,11 +5470,7 @@ fn format_endpoint(endpoint: &openshell_core::proto::NetworkEndpoint) -> String } fn non_empty_or<'a>(value: &'a str, fallback: &'a str) -> &'a str { - if value.is_empty() { - fallback - } else { - value - } + if value.is_empty() { fallback } else { value } } /// Format a millisecond timestamp into a readable string. diff --git a/crates/openshell-sandbox/src/policy_local.rs b/crates/openshell-sandbox/src/policy_local.rs index 0efca2721..0573aab3c 100644 --- a/crates/openshell-sandbox/src/policy_local.rs +++ b/crates/openshell-sandbox/src/policy_local.rs @@ -215,7 +215,11 @@ fn collect_ocsf_log_files(log_dir: &Path, max_files: usize) -> std::io::Result Date: Mon, 4 May 2026 11:56:54 -0700 Subject: [PATCH 14/23] perf(examples): cap Codex reasoning at 'low' in policy demo The demo task is mechanical (one HTTP request, parse a structured 403, post a JSON proposal, retry). Codex's default high-effort reasoning roughly doubles the demo's wall time without improving outcomes; running at 'low' lands the same minimal L7 grant in roughly half the time. Override with DEMO_CODEX_REASONING=medium (or higher) to compare runs. --- examples/agent-driven-policy-management/sandbox-agent.sh | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/examples/agent-driven-policy-management/sandbox-agent.sh b/examples/agent-driven-policy-management/sandbox-agent.sh index b10a00fe4..052535c35 100755 --- a/examples/agent-driven-policy-management/sandbox-agent.sh +++ b/examples/agent-driven-policy-management/sandbox-agent.sh @@ -66,8 +66,17 @@ cd "$WORK" # OpenShell sandbox container without extra capabilities. The "danger" framing # is from Codex's perspective on a developer host; here the OpenShell network # policy and filesystem constraints are doing the actual containment. +# +# Cap Codex's reasoning effort at the lower end. The demo task is mechanical +# (one HTTP request, parse a structured 403, post a JSON proposal, retry); the +# default high-effort reasoning roughly doubles the demo's wall time without +# improving outcomes. Override with DEMO_CODEX_REASONING if you want to +# compare runs. +DEMO_CODEX_REASONING="${DEMO_CODEX_REASONING:-low}" + exec codex exec \ --skip-git-repo-check \ --sandbox danger-full-access \ --ephemeral \ + -c "model_reasoning_effort=\"${DEMO_CODEX_REASONING}\"" \ "$(cat /sandbox/payload/agent-task.md)" From 5d1c40ea62c49a6da19ab0a2e03b82e5e3bbd248 Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Mon, 4 May 2026 13:00:49 -0700 Subject: [PATCH 15/23] fix(sandbox): harden policy.local denials endpoint Three changes addressing review feedback before merging the agent-driven policy management MVP: - Distinguish "OCSF JSONL enabled, no denials" from "OCSF JSONL disabled, nothing to read." The endpoint now returns a `log_available` flag and an explanatory `note` when the log file is missing, so the in-sandbox agent can give the developer an accurate hint instead of a misleading empty list. - Stop echoing the OCSF `message` field in the per-denial summary. The proxy's denial messages can include the request path with query string (e.g., `?access_token=...`); the structured `host`/`port`/`method`/ `path`/`binary` fields carry everything the agent needs to draft a proposal, and `path` is sourced from `http_request.url.path` which already excludes the query string. - Cap `read_request_body` at a 15s timeout. Bounds slowloris-style stalls from a misbehaving in-sandbox process. The proxy listener only accepts loopback connections so practical impact is small, but this is cheap defense-in-depth. New tests cover the missing-log signal and the message-redaction guarantee. --- crates/openshell-sandbox/src/policy_local.rs | 103 ++++++++++++++++--- 1 file changed, 89 insertions(+), 14 deletions(-) diff --git a/crates/openshell-sandbox/src/policy_local.rs b/crates/openshell-sandbox/src/policy_local.rs index 0573aab3c..80b82db37 100644 --- a/crates/openshell-sandbox/src/policy_local.rs +++ b/crates/openshell-sandbox/src/policy_local.rs @@ -18,6 +18,11 @@ use tokio::sync::RwLock; pub const POLICY_LOCAL_HOST: &str = "policy.local"; const MAX_POLICY_LOCAL_BODY_BYTES: usize = 64 * 1024; +/// Hard ceiling on how long a single request body read can stall. Bounds a +/// slowloris-style upload from an in-sandbox process; the proxy listener only +/// accepts loopback connections, so practical impact is limited, but this is +/// cheap defense-in-depth. +const POLICY_LOCAL_BODY_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(15); const DEFAULT_DENIALS_LIMIT: usize = 10; const MAX_DENIALS_LIMIT: usize = 100; /// OCSF rolling appender keeps three files (daily rotation); read the most @@ -139,11 +144,29 @@ async fn recent_denials_response( let limit = parse_last_query(query).unwrap_or(DEFAULT_DENIALS_LIMIT); let log_dir = ctx.ocsf_log_dir.clone(); + // Distinguish "OCSF JSONL is enabled and no denials happened" from "OCSF + // JSONL is disabled, so we have nothing to read." Without this flag the + // agent sees `[]` in both cases and cannot tell the difference. + let log_available = matches!( + collect_ocsf_log_files(&log_dir, 1), + Ok(files) if !files.is_empty() + ); + let denials = tokio::task::spawn_blocking(move || read_recent_denials(&log_dir, limit)) .await .unwrap_or_else(|_| Vec::new()); - (200, serde_json::json!({ "denials": denials })) + let mut payload = serde_json::json!({ + "denials": denials, + "log_available": log_available, + }); + if !log_available { + payload["note"] = serde_json::json!( + "no OCSF JSONL log file is present; enable the `ocsf_json_enabled` sandbox setting to populate" + ); + } + + (200, payload) } fn parse_last_query(query: &str) -> Option { @@ -244,9 +267,14 @@ fn denial_summary_from_event(value: &serde_json::Value) -> Option content_length { body.truncate(content_length); } - while body.len() < content_length { - let remaining = content_length - body.len(); - let mut chunk = vec![0u8; remaining.min(8192)]; - let n = client.read(&mut chunk).await.into_diagnostic()?; - if n == 0 { - return Err(miette::miette!("policy.local request body ended early")); + let read_loop = async { + while body.len() < content_length { + let remaining = content_length - body.len(); + let mut chunk = vec![0u8; remaining.min(8192)]; + let n = client.read(&mut chunk).await.into_diagnostic()?; + if n == 0 { + return Err(miette::miette!("policy.local request body ended early")); + } + body.extend_from_slice(&chunk[..n]); } - body.extend_from_slice(&chunk[..n]); - } + Ok::<(), miette::Report>(()) + }; + tokio::time::timeout(POLICY_LOCAL_BODY_READ_TIMEOUT, read_loop) + .await + .map_err(|_| miette::miette!("policy.local request body read timed out"))??; Ok(body) } @@ -886,11 +920,52 @@ mod tests { let ctx = PolicyLocalContext::with_log_dir(None, None, None, dir.path().to_path_buf()); let (status, payload) = recent_denials_response(&ctx, "last=10").await; assert_eq!(status, 200); + assert_eq!(payload["log_available"], true); let denials = payload["denials"].as_array().unwrap(); assert_eq!(denials.len(), 2); // Newest first. - assert_eq!(denials[0]["message"], "second"); - assert_eq!(denials[1]["message"], "first"); + assert_eq!(denials[0]["host"], "second.example"); + assert_eq!(denials[1]["host"], "first.example"); + assert!( + denials[0].get("message").is_none(), + "denial summaries must not echo the OCSF `message` field; it can leak credentials in query strings" + ); + } + + #[tokio::test] + async fn recent_denials_signals_when_log_is_missing() { + let dir = tempfile::tempdir().unwrap(); + let ctx = PolicyLocalContext::with_log_dir(None, None, None, dir.path().to_path_buf()); + let (status, payload) = recent_denials_response(&ctx, "").await; + assert_eq!(status, 200); + assert_eq!(payload["log_available"], false); + assert_eq!(payload["denials"].as_array().unwrap().len(), 0); + assert!( + payload["note"] + .as_str() + .unwrap() + .contains("ocsf_json_enabled") + ); + } + + #[test] + fn denial_summary_does_not_leak_message_field() { + // OCSF `message` strings can include the request path with query + // (e.g., `?access_token=…`); the summary must drop them. + let evt = serde_json::json!({ + "class_uid": 4002, + "action_id": 2, + "message": "FORWARD denied PUT api.github.com:443/x?access_token=secret-token", + "dst_endpoint": {"hostname": "api.github.com", "port": 443}, + "http_request": {"http_method": "PUT", "url": {"path": "/x"}} + }); + let summary = denial_summary_from_event(&evt).unwrap(); + assert_eq!(summary["path"], "/x"); + assert!(summary.get("message").is_none()); + assert!( + !summary.to_string().contains("secret-token"), + "summary must not include credentials from the source message" + ); } #[tokio::test] From 03ad245c9d802b0000c3a43a399e45c648f5be3c Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Mon, 4 May 2026 13:00:57 -0700 Subject: [PATCH 16/23] fix(examples): redact tokens in agent log tail and validate DEMO_FILE_DIR Two small hardening passes on the policy management demo: - `fail()` now pipes the agent log tail through a redactor that masks the GitHub token and Codex credential triple before printing. Codex itself is well-behaved about not echoing the token, but a misbehaving tool call could leak it; this is a final safety net before the log hits the developer's terminal (and any clipboard or chat history that follows). - `validate_env` now regex-checks DEMO_FILE_DIR with the same allow-list the other path-shaped variables use. The value is interpolated through sed with `|` as the delimiter when rendering the agent task; rejecting unsupported characters keeps the templating predictable and stops a user-supplied value from breaking out into a shell context. --- .../agent-driven-policy-management/demo.sh | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/examples/agent-driven-policy-management/demo.sh b/examples/agent-driven-policy-management/demo.sh index efeb2f6f5..bbec37612 100755 --- a/examples/agent-driven-policy-management/demo.sh +++ b/examples/agent-driven-policy-management/demo.sh @@ -60,11 +60,23 @@ AGENT_PID="" step() { printf "\n${BOLD}${CYAN}==> %s${RESET}\n\n" "$1"; } info() { printf " %b\n" "$*"; } +# Redact host-side credentials from the agent log tail before printing on +# failure. Codex shouldn't echo the token, but a misbehaving tool call (e.g., +# `curl -v`) could leak it; sanitize before showing the log. +redact_log() { + local replacement='[redacted]' + sed \ + -e "s|${DEMO_GITHUB_TOKEN:-__no_github_token__}|${replacement}|g" \ + -e "s|${CODEX_AUTH_ACCESS_TOKEN:-__no_codex_access__}|${replacement}|g" \ + -e "s|${CODEX_AUTH_REFRESH_TOKEN:-__no_codex_refresh__}|${replacement}|g" \ + -e "s|${CODEX_AUTH_ACCOUNT_ID:-__no_codex_account__}|${replacement}|g" +} + fail() { printf "\n${RED}error:${RESET} %s\n" "$*" >&2 if [[ -f "$AGENT_LOG" ]]; then printf "\n${YELLOW}Agent log tail:${RESET}\n" >&2 - tail -n 80 "$AGENT_LOG" | sed 's/^/ /' >&2 || true + tail -n 80 "$AGENT_LOG" | redact_log | sed 's/^/ /' >&2 || true fi exit 1 } @@ -139,6 +151,10 @@ validate_env() { [[ "$DEMO_GITHUB_REPO" =~ ^[A-Za-z0-9_.-]+$ ]] || fail "DEMO_GITHUB_REPO contains unsupported characters" [[ "$DEMO_BRANCH" =~ ^[A-Za-z0-9._/-]+$ ]] || fail "DEMO_BRANCH contains unsupported characters" [[ "$DEMO_RUN_ID" =~ ^[A-Za-z0-9_.-]+$ ]] || fail "DEMO_RUN_ID contains unsupported characters" + # DEMO_FILE_DIR is interpolated through `sed` with `|` as the delimiter + # when rendering the agent task; reject any character that would break + # the substitution or escape into a shell context. + [[ "$DEMO_FILE_DIR" =~ ^[A-Za-z0-9._/-]+$ ]] || fail "DEMO_FILE_DIR contains unsupported characters" resolve_github_owner [[ "$DEMO_GITHUB_OWNER" =~ ^[A-Za-z0-9_.-]+$ ]] || fail "DEMO_GITHUB_OWNER contains unsupported characters" From 69fb629a34e7af7eb5c8d676b397ab1b4ffb16cd Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Mon, 4 May 2026 13:11:35 -0700 Subject: [PATCH 17/23] refactor(sandbox): centralize policy.local routes and skill path Addresses review feedback that the deny body's `next_steps` array and the route table could drift apart. The route paths and skill location now live as `pub const`s in `policy_local.rs` and feed both: - the dispatcher in `route_request` that matches against them - a new `agent_next_steps()` helper that builds the JSON the L7 deny body embeds `l7/rest.rs::deny_response_body` calls `policy_local::agent_next_steps()` instead of inlining the array, so adding or renaming a route is a one-line change in `policy_local.rs` and the agent contract follows automatically. --- crates/openshell-sandbox/src/l7/rest.rs | 27 ++-------- crates/openshell-sandbox/src/policy_local.rs | 52 ++++++++++++++++++-- 2 files changed, 54 insertions(+), 25 deletions(-) diff --git a/crates/openshell-sandbox/src/l7/rest.rs b/crates/openshell-sandbox/src/l7/rest.rs index 464425435..0db017738 100644 --- a/crates/openshell-sandbox/src/l7/rest.rs +++ b/crates/openshell-sandbox/src/l7/rest.rs @@ -538,30 +538,13 @@ fn deny_response_body( "rule_missing".to_string(), serde_json::Value::Object(rule_missing), ); + // `next_steps` is generated by the policy_local module so the wire URLs + // and the on-disk skill path stay in sync with the route table. Adding + // or renaming a route only requires touching the constants in that + // module; this side picks up the change automatically. body.insert( "next_steps".to_string(), - serde_json::json!([ - { - "action": "read_skill", - "path": "/etc/openshell/skills/policy_advisor.md" - }, - { - "action": "inspect_policy", - "method": "GET", - "url": "http://policy.local/v1/policy/current" - }, - { - "action": "inspect_recent_denials", - "method": "GET", - "url": "http://policy.local/v1/denials?last=5" - }, - { - "action": "submit_proposal", - "method": "POST", - "url": "http://policy.local/v1/proposals", - "body_type": "PolicyMergeOperation" - } - ]), + crate::policy_local::agent_next_steps(), ); serde_json::Value::Object(body) diff --git a/crates/openshell-sandbox/src/policy_local.rs b/crates/openshell-sandbox/src/policy_local.rs index 80b82db37..f00233b8d 100644 --- a/crates/openshell-sandbox/src/policy_local.rs +++ b/crates/openshell-sandbox/src/policy_local.rs @@ -17,6 +17,21 @@ use tokio::sync::RwLock; pub const POLICY_LOCAL_HOST: &str = "policy.local"; +/// Filesystem path of the static agent guidance bundle inside the sandbox. +/// Single source of truth: the skill installer writes here, the L7 deny body +/// references this path in `next_steps`, and the skill's own documentation +/// renders the same path. Changing the location is a one-line update here. +pub const SKILL_PATH: &str = "/etc/openshell/skills/policy_advisor.md"; + +/// Routes served by the in-sandbox policy advisor API. Held in one place so +/// the L7 deny `next_steps` array, the route dispatcher, the skill content, +/// and tests all stay in sync — change the wire path here and every caller +/// follows. See `agent_next_steps()` for the consumer that surfaces these +/// to the agent on a 403. +pub const ROUTE_POLICY_CURRENT: &str = "/v1/policy/current"; +pub const ROUTE_DENIALS: &str = "/v1/denials"; +pub const ROUTE_PROPOSALS: &str = "/v1/proposals"; + const MAX_POLICY_LOCAL_BODY_BYTES: usize = 64 * 1024; /// Hard ceiling on how long a single request body read can stall. Bounds a /// slowloris-style upload from an in-sandbox process; the proxy listener only @@ -95,9 +110,9 @@ async fn route_request( ) -> (u16, serde_json::Value) { let (route, query) = path.split_once('?').map_or((path, ""), |(r, q)| (r, q)); match (method, route) { - ("GET", "/v1/policy/current") => current_policy_response(ctx).await, - ("GET", "/v1/denials") => recent_denials_response(ctx, query).await, - ("POST", "/v1/proposals") => submit_proposal(ctx, body).await, + ("GET", ROUTE_POLICY_CURRENT) => current_policy_response(ctx).await, + ("GET", ROUTE_DENIALS) => recent_denials_response(ctx, query).await, + ("POST", ROUTE_PROPOSALS) => submit_proposal(ctx, body).await, _ => ( 404, serde_json::json!({ @@ -108,6 +123,37 @@ async fn route_request( } } +/// Build the `next_steps` array embedded in the L7 deny body so the agent has +/// machine-readable pointers to this API. Centralizes the shape here to keep +/// the deny body and the actual route table from drifting — adding or +/// renaming a route only requires touching the route constants above. +#[must_use] +pub fn agent_next_steps() -> serde_json::Value { + let host = POLICY_LOCAL_HOST; + serde_json::json!([ + { + "action": "read_skill", + "path": SKILL_PATH, + }, + { + "action": "inspect_policy", + "method": "GET", + "url": format!("http://{host}{ROUTE_POLICY_CURRENT}"), + }, + { + "action": "inspect_recent_denials", + "method": "GET", + "url": format!("http://{host}{ROUTE_DENIALS}?last=5"), + }, + { + "action": "submit_proposal", + "method": "POST", + "url": format!("http://{host}{ROUTE_PROPOSALS}"), + "body_type": "PolicyMergeOperation", + }, + ]) +} + async fn current_policy_response(ctx: &PolicyLocalContext) -> (u16, serde_json::Value) { let Some(policy) = ctx.current_policy.read().await.clone() else { return ( From 3997e31822125006ef1ca0ef30aaa442619202ae Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Wed, 6 May 2026 11:03:10 -0700 Subject: [PATCH 18/23] feat(sandbox): switch /v1/denials to shorthand log pass-through MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously /v1/denials parsed `/var/log/openshell-ocsf.*.log` (OCSF JSONL) and returned structured per-event objects. JSONL is opt-in via `ocsf_json_enabled`, so the endpoint returned an empty list with a "log not enabled" hint by default — agents had to navigate a setup step before the inspect-recent-denials guidance was useful. Switch to reading the shorthand log at `/var/log/openshell.*.log`, which is always-on and the same human-readable format `openshell logs` displays. The endpoint now returns raw shorthand lines (newest first) — the agent reads them directly, no field parsing. Tradeoffs: - Removes the JSONL-on-by-default debate: shorthand is already on, no defaults change. - Updating shorthand is a single-file change in this repo; no schema rev needed when we want to add fields. Implementation: - `read_recent_denial_lines` walks shorthand log files newest-first, filters lines with ` OCSF ` AND ` DENIED ` (the OCSF action label, uppercase, space-bounded). - `collect_shorthand_log_files` matches `openshell..log`; the trailing dot in `SHORTHAND_LOG_PREFIX = "openshell."` excludes `openshell-ocsf..log` so JSONL-on doesn't bleed into responses. - 4096-byte cap per surfaced line as defense against pathological inputs. - Skill doc updated to reflect that `/v1/denials` returns raw shorthand lines, not structured fields. Defense-in-depth on query-string secrets: - `redact_query_strings` strips `?` to `?[redacted]` from each surfaced line. The L7 relay path emits OCSF events using `redacted_target` (secret-placeholder redaction), but the FORWARD deny path in `proxy.rs` populates `OcsfUrl::new("http", host, path, port)` and `.message(...)` with the raw request path — query string included. Stripping queries at the consumer guards `/v1/denials` regardless of whether the upstream emit sites are tightened. The on-disk log is not rewritten by this change; that is a separate hardening task tracked for the FORWARD path emit sites in proxy.rs. - `truncate_at_char_boundary` is UTF-8 safe; redaction runs before truncation so a cut cannot slice mid-secret. Tests: - `recent_denials_returns_newest_first_from_shorthand_lines` covers the happy path with mixed allowed/denied/non-OCSF lines. - `recent_denials_skips_jsonl_log_files` confirms JSONL files don't surface even if present. - `recent_denials_truncates_pathological_lines` covers the cap. - `is_ocsf_denial_line_filters_correctly` covers the line-level filter. - `redact_query_strings_removes_query_from_url_token` and `redact_query_strings_removes_query_in_reason_tag` cover the redaction in both URL token and `[reason:...]` contexts. - `truncate_at_char_boundary_does_not_panic_on_multibyte` covers the UTF-8 safety. --- crates/openshell-sandbox/src/policy_local.rs | 434 +++++++++--------- .../src/skills/policy_advisor.md | 13 +- 2 files changed, 236 insertions(+), 211 deletions(-) diff --git a/crates/openshell-sandbox/src/policy_local.rs b/crates/openshell-sandbox/src/policy_local.rs index f00233b8d..da04a7eb0 100644 --- a/crates/openshell-sandbox/src/policy_local.rs +++ b/crates/openshell-sandbox/src/policy_local.rs @@ -40,18 +40,27 @@ const MAX_POLICY_LOCAL_BODY_BYTES: usize = 64 * 1024; const POLICY_LOCAL_BODY_READ_TIMEOUT: std::time::Duration = std::time::Duration::from_secs(15); const DEFAULT_DENIALS_LIMIT: usize = 10; const MAX_DENIALS_LIMIT: usize = 100; -/// OCSF rolling appender keeps three files (daily rotation); read the most -/// recent two so a request just past midnight still has yesterday's denials. +/// The shorthand rolling appender keeps three files (daily rotation); read the +/// most recent two so a request just past midnight still has yesterday's +/// denials. const DENIAL_LOG_FILES_TO_SCAN: usize = 2; -const OCSF_LOG_DIR: &str = "/var/log"; -const OCSF_LOG_PREFIX: &str = "openshell-ocsf"; +const LOG_DIR: &str = "/var/log"; +/// Shorthand log filenames are `openshell.YYYY-MM-DD.log`. The trailing dot in +/// the prefix is intentional: it disambiguates from the OCSF JSONL appender's +/// `openshell-ocsf.YYYY-MM-DD.log`, which we never want to surface here (the +/// JSONL is opt-in via `ocsf_json_enabled` and not the source of truth for +/// `/v1/denials`). +const SHORTHAND_LOG_PREFIX: &str = "openshell."; +/// Defensive cap on per-line length returned to the agent so a pathological +/// log entry (very long URL path, etc.) cannot blow up the response. +const MAX_DENIAL_LINE_BYTES: usize = 4096; #[derive(Debug)] pub struct PolicyLocalContext { current_policy: Arc>>, gateway_endpoint: Option, sandbox_name: Option, - ocsf_log_dir: PathBuf, + shorthand_log_dir: PathBuf, } impl PolicyLocalContext { @@ -64,7 +73,7 @@ impl PolicyLocalContext { current_policy, gateway_endpoint, sandbox_name, - PathBuf::from(OCSF_LOG_DIR), + PathBuf::from(LOG_DIR), ) } @@ -72,13 +81,13 @@ impl PolicyLocalContext { current_policy: Option, gateway_endpoint: Option, sandbox_name: Option, - ocsf_log_dir: PathBuf, + shorthand_log_dir: PathBuf, ) -> Self { Self { current_policy: Arc::new(RwLock::new(current_policy)), gateway_endpoint, sandbox_name, - ocsf_log_dir, + shorthand_log_dir, } } @@ -188,19 +197,22 @@ async fn recent_denials_response( query: &str, ) -> (u16, serde_json::Value) { let limit = parse_last_query(query).unwrap_or(DEFAULT_DENIALS_LIMIT); - let log_dir = ctx.ocsf_log_dir.clone(); - - // Distinguish "OCSF JSONL is enabled and no denials happened" from "OCSF - // JSONL is disabled, so we have nothing to read." Without this flag the - // agent sees `[]` in both cases and cannot tell the difference. + let log_dir = ctx.shorthand_log_dir.clone(); + + // Distinguish "shorthand log exists and no denials happened" from "no log + // file yet, so we have nothing to read." Without this flag the agent sees + // `[]` in both cases and cannot tell the difference. The shorthand log is + // always-on (no setting gates it), so the only way `log_available=false` + // happens in practice is if the supervisor has not flushed any events to + // disk yet, or `/var/log` is not writable in this image. let log_available = matches!( - collect_ocsf_log_files(&log_dir, 1), + collect_shorthand_log_files(&log_dir, 1), Ok(files) if !files.is_empty() ); - let denials = tokio::task::spawn_blocking(move || read_recent_denials(&log_dir, limit)) + let denials = tokio::task::spawn_blocking(move || read_recent_denial_lines(&log_dir, limit)) .await - .unwrap_or_else(|_| Vec::new()); + .unwrap_or_default(); let mut payload = serde_json::json!({ "denials": denials, @@ -208,7 +220,7 @@ async fn recent_denials_response( }); if !log_available { payload["note"] = serde_json::json!( - "no OCSF JSONL log file is present; enable the `ocsf_json_enabled` sandbox setting to populate" + "no shorthand log file is present yet at /var/log/openshell.YYYY-MM-DD.log; the supervisor may not have emitted any events to disk yet" ); } @@ -233,49 +245,114 @@ fn parse_last_query(query: &str) -> Option { None } -/// Walk the OCSF JSONL log files (most-recent first) and return up to `limit` -/// summarized denial events in newest-first order. +/// Walk the shorthand log files (most-recent first) and return up to `limit` +/// raw denial lines in newest-first order. The agent receives the same +/// human-readable text that `openshell logs` displays — no parsing back into +/// structured form. Updating the shorthand format adds fields automatically; +/// no schema rev required. /// /// Reads files synchronously and is intended to run inside `spawn_blocking`. -fn read_recent_denials(log_dir: &Path, limit: usize) -> Vec { - let Ok(files) = collect_ocsf_log_files(log_dir, DENIAL_LOG_FILES_TO_SCAN) else { +fn read_recent_denial_lines(log_dir: &Path, limit: usize) -> Vec { + let Ok(files) = collect_shorthand_log_files(log_dir, DENIAL_LOG_FILES_TO_SCAN) else { return Vec::new(); }; - let mut summaries: Vec = Vec::with_capacity(limit); + let mut lines: Vec = Vec::with_capacity(limit); for path in files { let Ok(contents) = std::fs::read_to_string(&path) else { continue; }; - // Walk lines newest-first. Within a single file, last line written is - // the freshest event. + // Walk lines newest-first. Within a single file, the last line written + // is the freshest event. for line in contents.lines().rev() { - if line.is_empty() { + if !is_ocsf_denial_line(line) { continue; } - let Ok(value) = serde_json::from_str::(line) else { - continue; - }; - let Some(summary) = denial_summary_from_event(&value) else { - continue; - }; - summaries.push(summary); - if summaries.len() >= limit { - return summaries; + // Defense-in-depth: redact query strings before truncation. The + // FORWARD deny path in `proxy.rs` populates the OCSF `message` + // and URL with the raw request path including `?query=...`, which + // the shorthand layer then renders verbatim. Stripping queries + // here means the agent never sees the secret even if an upstream + // emit site forgets to redact (TODO: harden the emit sites in + // proxy.rs FORWARD path so the on-disk shorthand log itself is + // clean — tracked separately). Redact first so truncation cannot + // slice mid-secret. + let redacted = redact_query_strings(line); + let surfaced = truncate_at_char_boundary(&redacted, MAX_DENIAL_LINE_BYTES); + lines.push(surfaced); + if lines.len() >= limit { + return lines; } } } - summaries + lines +} + +/// Replace any `?` substring with `?[redacted]` to keep query-string +/// secrets out of the agent's view. Walks per Unicode scalar value so multi-byte +/// content is safe. A query is everything from `?` until the next whitespace or +/// `]` (the shorthand format uses `[...]` for context tags). +fn redact_query_strings(line: &str) -> String { + let mut out = String::with_capacity(line.len()); + let mut chars = line.chars(); + while let Some(c) = chars.next() { + if c == '?' { + out.push('?'); + out.push_str("[redacted]"); + // Consume until whitespace or `]` (preserved as the next token's + // boundary by writing it back out). + for next in chars.by_ref() { + if next.is_whitespace() || next == ']' { + out.push(next); + break; + } + } + } else { + out.push(c); + } + } + out +} + +/// Truncate `s` at the largest UTF-8 char boundary <= `max_bytes`, appending a +/// `...[truncated]` suffix. Returning a `String` (not `&str`) avoids surprising +/// callers about lifetime relationships with `s`. +fn truncate_at_char_boundary(s: &str, max_bytes: usize) -> String { + if s.len() <= max_bytes { + return s.to_string(); + } + let mut end = max_bytes; + while end > 0 && !s.is_char_boundary(end) { + end -= 1; + } + let mut out = String::with_capacity(end + "...[truncated]".len()); + out.push_str(&s[..end]); + out.push_str("...[truncated]"); + out } -fn collect_ocsf_log_files(log_dir: &Path, max_files: usize) -> std::io::Result> { +/// True for OCSF denial events as rendered by the shorthand layer. The format +/// is ` OCSF <[SEV]> ...`. The literal +/// ` OCSF ` substring identifies an OCSF event (vs. a non-OCSF tracing line); +/// ` DENIED ` is the OCSF action label uppercased and surrounded by spaces, so +/// matching it is safe against substring collisions in URLs or hostnames. +fn is_ocsf_denial_line(line: &str) -> bool { + line.contains(" OCSF ") && line.contains(" DENIED ") +} + +fn collect_shorthand_log_files( + log_dir: &Path, + max_files: usize, +) -> std::io::Result> { let mut entries: Vec<(std::time::SystemTime, PathBuf)> = std::fs::read_dir(log_dir)? .filter_map(std::result::Result::ok) .filter_map(|entry| { let path = entry.path(); let name = entry.file_name(); let name = name.to_string_lossy(); - if !name.starts_with(OCSF_LOG_PREFIX) { + // `openshell.YYYY-MM-DD.log` only — the trailing dot in the prefix + // disambiguates from `openshell-ocsf.YYYY-MM-DD.log`. + if !name.starts_with(SHORTHAND_LOG_PREFIX) || !name.ends_with(".log") { return None; } let modified = entry.metadata().and_then(|m| m.modified()).ok()?; @@ -291,78 +368,6 @@ fn collect_ocsf_log_files(log_dir: &Path, max_files: usize) -> std::io::Result Option { - // OCSF action_id 2 = Denied. Filter aggressively to avoid leaking unrelated - // events (allowed connections, app lifecycle, etc.) into the agent's view. - if value.get("action_id").and_then(serde_json::Value::as_u64) != Some(2) { - return None; - } - - let class_uid = value.get("class_uid").and_then(serde_json::Value::as_u64)?; - let layer = match class_uid { - 4001 => "l4", - 4002 => "l7", - _ => return None, - }; - - let mut summary = serde_json::Map::new(); - summary.insert("layer".to_string(), serde_json::json!(layer)); - - if let Some(time) = value.get("time").and_then(serde_json::Value::as_i64) { - summary.insert("time_ms".to_string(), serde_json::json!(time)); - } - // Deliberately do NOT echo `message` from the OCSF event. The proxy's - // shorthand denial messages can include the request path with query - // string (e.g., `?access_token=…`), which would expose secrets back to an - // in-sandbox agent that is by definition outside the trust boundary - // protecting that token. The structured fields below (host, port, method, - // path, binary, policy) carry everything the agent needs to draft a - // proposal, and `path` is sourced from `http_request.url.path` which - // already excludes the query string. - if let Some(dst) = value.get("dst_endpoint") { - if let Some(host) = dst - .get("hostname") - .and_then(serde_json::Value::as_str) - .or_else(|| dst.get("ip").and_then(serde_json::Value::as_str)) - { - summary.insert("host".to_string(), serde_json::json!(host)); - } - if let Some(port) = dst.get("port").and_then(serde_json::Value::as_u64) { - summary.insert("port".to_string(), serde_json::json!(port)); - } - } - if let Some(req) = value.get("http_request") { - if let Some(method) = req.get("http_method").and_then(serde_json::Value::as_str) { - summary.insert("method".to_string(), serde_json::json!(method)); - } - if let Some(url) = req.get("url") - && let Some(path) = url.get("path").and_then(serde_json::Value::as_str) - { - summary.insert("path".to_string(), serde_json::json!(path)); - } - } - if let Some(binary) = value - .get("actor") - .and_then(|a| a.get("process")) - .and_then(|p| p.get("file")) - .and_then(|f| f.get("path")) - .and_then(serde_json::Value::as_str) - { - summary.insert("binary".to_string(), serde_json::json!(binary)); - } - if let Some(rule) = value - .get("firewall_rule") - .and_then(|r| r.get("name")) - .and_then(serde_json::Value::as_str) - { - summary.insert("policy".to_string(), serde_json::json!(rule)); - } - - Some(serde_json::Value::Object(summary)) -} - async fn submit_proposal(ctx: &PolicyLocalContext, body: &[u8]) -> (u16, serde_json::Value) { let Some(endpoint) = ctx.gateway_endpoint.as_deref() else { return ( @@ -880,87 +885,42 @@ mod tests { } #[test] - fn denial_summary_filters_to_l4_l7_denied_only() { - let allowed = serde_json::json!({ - "class_uid": 4001, - "action_id": 1, - "dst_endpoint": {"hostname": "api.github.com", "port": 443} - }); - assert!(denial_summary_from_event(&allowed).is_none()); + fn is_ocsf_denial_line_filters_correctly() { + // OCSF denial — match. + assert!(is_ocsf_denial_line( + "2026-05-06T17:02:00.000Z OCSF HTTP:PUT [MED] DENIED PUT http://api.github.com:443/x [policy:p engine:l7]" + )); + assert!(is_ocsf_denial_line( + "2026-05-06T17:02:00.000Z OCSF NET:OPEN [MED] DENIED curl(42) -> blocked.com:443 [policy:- engine:opa]" + )); - let unrelated = serde_json::json!({ - "class_uid": 6002, - "action_id": 2, - "message": "supervisor lifecycle" - }); - assert!(denial_summary_from_event(&unrelated).is_none()); - - let l4_denied = serde_json::json!({ - "class_uid": 4001, - "action_id": 2, - "time": 1_742_054_400_000_i64, - "message": "CONNECT denied api.github.com:443", - "dst_endpoint": {"hostname": "api.github.com", "port": 443}, - "actor": {"process": {"file": {"path": "/usr/bin/curl"}}}, - "firewall_rule": {"name": "github-readonly"} - }); - let summary = denial_summary_from_event(&l4_denied).unwrap(); - assert_eq!(summary["layer"], "l4"); - assert_eq!(summary["host"], "api.github.com"); - assert_eq!(summary["port"], 443); - assert_eq!(summary["binary"], "/usr/bin/curl"); - assert_eq!(summary["policy"], "github-readonly"); - assert_eq!(summary["time_ms"], 1_742_054_400_000_i64); - - let l7_denied = serde_json::json!({ - "class_uid": 4002, - "action_id": 2, - "message": "FORWARD denied PUT /repos/foo/bar/contents/x", - "dst_endpoint": {"hostname": "api.github.com", "port": 443}, - "http_request": { - "http_method": "PUT", - "url": {"path": "/repos/foo/bar/contents/x"} - } - }); - let summary = denial_summary_from_event(&l7_denied).unwrap(); - assert_eq!(summary["layer"], "l7"); - assert_eq!(summary["method"], "PUT"); - assert_eq!(summary["path"], "/repos/foo/bar/contents/x"); + // OCSF allowed — must not match. + assert!(!is_ocsf_denial_line( + "2026-05-06T17:02:00.000Z OCSF NET:OPEN [INFO] ALLOWED curl(42) -> api.example.com:443" + )); + + // Non-OCSF tracing line — must not match even if it contains the word DENIED. + assert!(!is_ocsf_denial_line( + "2026-05-06T17:02:00.000Z INFO some::module: request DENIED in upstream" + )); + + // Empty line — must not match. + assert!(!is_ocsf_denial_line("")); } #[tokio::test] - async fn recent_denials_returns_newest_first_from_jsonl_files() { + async fn recent_denials_returns_newest_first_from_shorthand_lines() { let dir = tempfile::tempdir().unwrap(); - let log_path = dir.path().join("openshell-ocsf.2026-05-04.log"); - let lines = [ - serde_json::json!({ - "class_uid": 4001, - "action_id": 2, - "time": 1, - "message": "first", - "dst_endpoint": {"hostname": "first.example", "port": 443} - }), - // An allowed event mixed in — must be filtered out. - serde_json::json!({ - "class_uid": 4001, - "action_id": 1, - "time": 2, - "dst_endpoint": {"hostname": "ok.example", "port": 443} - }), - serde_json::json!({ - "class_uid": 4002, - "action_id": 2, - "time": 3, - "message": "second", - "dst_endpoint": {"hostname": "second.example", "port": 443}, - "http_request": {"http_method": "PUT", "url": {"path": "/x"}} - }), - ]; - let body: String = lines - .iter() - .map(|v| format!("{v}\n")) - .collect::>() - .concat(); + let log_path = dir.path().join("openshell.2026-05-06.log"); + // Mixed file: allowed events, non-OCSF info lines, two denials. + // Lines are written in chronological order; reader walks newest-first. + let body = "\ +2026-05-06T17:02:00.000Z OCSF NET:OPEN [INFO] ALLOWED curl(10) -> api.example.com:443 [policy:default engine:opa] +2026-05-06T17:02:01.000Z INFO some::module: routine status check +2026-05-06T17:02:02.000Z OCSF HTTP:GET [MED] DENIED GET http://blocked.example/v1/data [policy:default-deny engine:l7] +2026-05-06T17:02:03.000Z OCSF NET:OPEN [INFO] ALLOWED curl(11) -> api.example.com:443 +2026-05-06T17:02:04.000Z OCSF HTTP:PUT [MED] DENIED PUT http://api.github.com:443/repos/x/y/contents/z [policy:gh_readonly engine:l7] +"; std::fs::write(&log_path, body).unwrap(); let ctx = PolicyLocalContext::with_log_dir(None, None, None, dir.path().to_path_buf()); @@ -970,12 +930,35 @@ mod tests { let denials = payload["denials"].as_array().unwrap(); assert_eq!(denials.len(), 2); // Newest first. - assert_eq!(denials[0]["host"], "second.example"); - assert_eq!(denials[1]["host"], "first.example"); + assert!(denials[0].as_str().unwrap().contains("HTTP:PUT")); assert!( - denials[0].get("message").is_none(), - "denial summaries must not echo the OCSF `message` field; it can leak credentials in query strings" + denials[0] + .as_str() + .unwrap() + .contains("/repos/x/y/contents/z") ); + assert!(denials[1].as_str().unwrap().contains("HTTP:GET")); + assert!(denials[1].as_str().unwrap().contains("blocked.example")); + } + + #[tokio::test] + async fn recent_denials_skips_jsonl_log_files() { + // The shorthand reader must not surface `openshell-ocsf.*.log` content + // even if a deny-looking line is present, so the response stays + // independent of the JSONL appender's enabled state. + let dir = tempfile::tempdir().unwrap(); + let jsonl = dir.path().join("openshell-ocsf.2026-05-06.log"); + std::fs::write( + &jsonl, + r#"{"class_uid":4002,"action_id":2,"message":"DENIED","time":1}"#, + ) + .unwrap(); + + let ctx = PolicyLocalContext::with_log_dir(None, None, None, dir.path().to_path_buf()); + let (status, payload) = recent_denials_response(&ctx, "").await; + assert_eq!(status, 200); + assert_eq!(payload["log_available"], false); + assert_eq!(payload["denials"].as_array().unwrap().len(), 0); } #[tokio::test] @@ -990,28 +973,67 @@ mod tests { payload["note"] .as_str() .unwrap() - .contains("ocsf_json_enabled") + .contains("/var/log/openshell.") ); } #[test] - fn denial_summary_does_not_leak_message_field() { - // OCSF `message` strings can include the request path with query - // (e.g., `?access_token=…`); the summary must drop them. - let evt = serde_json::json!({ - "class_uid": 4002, - "action_id": 2, - "message": "FORWARD denied PUT api.github.com:443/x?access_token=secret-token", - "dst_endpoint": {"hostname": "api.github.com", "port": 443}, - "http_request": {"http_method": "PUT", "url": {"path": "/x"}} - }); - let summary = denial_summary_from_event(&evt).unwrap(); - assert_eq!(summary["path"], "/x"); - assert!(summary.get("message").is_none()); - assert!( - !summary.to_string().contains("secret-token"), - "summary must not include credentials from the source message" + fn redact_query_strings_removes_query_from_url_token() { + let line = "2026-05-06T17:02:00.000Z OCSF HTTP:PUT [MED] DENIED PUT http://api.github.com/x?access_token=secret-token-1234 [policy:p engine:l7]"; + let redacted = redact_query_strings(line); + assert!(!redacted.contains("secret-token-1234")); + assert!(!redacted.contains("access_token")); + assert!(redacted.contains("?[redacted]")); + // Bracketed tag after the URL preserved. + assert!(redacted.contains("[policy:p engine:l7]")); + } + + #[test] + fn redact_query_strings_removes_query_in_reason_tag() { + // The FORWARD deny path's `message` becomes `[reason:...]` and may + // include a path with query string lacking a `://` prefix. + let line = "2026-05-06T17:02:00.000Z OCSF HTTP:PUT [MED] DENIED PUT http://api.github.com/x [policy:p engine:opa] [reason:FORWARD denied PUT api.github.com:443/x?token=secret-456]"; + let redacted = redact_query_strings(line); + assert!(!redacted.contains("secret-456")); + assert!(!redacted.contains("token=secret")); + assert!(redacted.contains("?[redacted]]")); + } + + #[test] + fn redact_query_strings_handles_multibyte_chars() { + let line = "ÜLÅUTF8 ? secret-x [policy:p]"; + // No `?` here, so no redaction — but must not panic. + let _ = redact_query_strings(line); + } + + #[test] + fn truncate_at_char_boundary_does_not_panic_on_multibyte() { + // 4-byte emoji sequence so byte-naive slicing would panic. + let s = "🚀".repeat(2000); // 8000 bytes + let truncated = truncate_at_char_boundary(&s, 4096); + assert!(truncated.len() <= 4096 + "...[truncated]".len()); + assert!(truncated.ends_with("...[truncated]")); + // Result must be valid UTF-8 — implicit if we return without panic. + } + + #[tokio::test] + async fn recent_denials_truncates_pathological_lines() { + let dir = tempfile::tempdir().unwrap(); + let log_path = dir.path().join("openshell.2026-05-06.log"); + // A single OCSF denial line exceeding MAX_DENIAL_LINE_BYTES. + let huge_path = "/".to_string() + &"a".repeat(MAX_DENIAL_LINE_BYTES + 100); + let line = format!( + "2026-05-06T17:02:00.000Z OCSF HTTP:PUT [MED] DENIED PUT http://x{huge_path} [policy:p engine:l7]\n" ); + std::fs::write(&log_path, line).unwrap(); + + let ctx = PolicyLocalContext::with_log_dir(None, None, None, dir.path().to_path_buf()); + let (_, payload) = recent_denials_response(&ctx, "last=1").await; + let denials = payload["denials"].as_array().unwrap(); + assert_eq!(denials.len(), 1); + let surfaced = denials[0].as_str().unwrap(); + assert!(surfaced.len() <= MAX_DENIAL_LINE_BYTES + "...[truncated]".len()); + assert!(surfaced.ends_with("...[truncated]")); } #[tokio::test] diff --git a/crates/openshell-sandbox/src/skills/policy_advisor.md b/crates/openshell-sandbox/src/skills/policy_advisor.md index c6552de2d..57546145c 100644 --- a/crates/openshell-sandbox/src/skills/policy_advisor.md +++ b/crates/openshell-sandbox/src/skills/policy_advisor.md @@ -15,7 +15,10 @@ The sandbox-local policy API is reachable at `http://policy.local`: - `GET /v1/policy/current` — current effective policy as YAML. - `GET /v1/denials?last=10` — most recent network/L7 denials seen by this - sandbox (newest first). + sandbox (newest first), returned as raw shorthand log lines. Each line + carries the timestamp, class, severity, action, host/port, binary, policy + name, and (for denied events) a short reason. Read the lines directly; you + do not need to parse them into structured fields. - `POST /v1/proposals` — submit a proposal for developer approval. The proposal body takes an `intent_summary` and one or more `addRule` @@ -91,7 +94,7 @@ Two local files complement the API and are useful when debugging policy behavior: - `/var/log/openshell.YYYY-MM-DD.log` — shorthand log of sandbox activity. -- `/var/log/openshell-ocsf.YYYY-MM-DD.log` — OCSF JSONL events when enabled. - -The `/v1/denials` endpoint reads these structured events for you; the files -are listed here only as a fallback for inspection. + This is what `/v1/denials` reads from. +- `/var/log/openshell-ocsf.YYYY-MM-DD.log` — full OCSF JSON events, only + written when the `ocsf_json_enabled` setting is on. Not used by + `/v1/denials`; useful for SIEM ingestion. From 05dffb60e1c41510ccdca148b85dd636cb4ac666 Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Wed, 6 May 2026 22:36:54 -0700 Subject: [PATCH 19/23] chore(sandbox): align proto inits with main's L7 GraphQL additions Post-rebase fixups after #1083 (GraphQL L7 inspection) landed on main and introduced new fields on the proto types this branch constructs: - `crates/openshell-sandbox/src/l7/relay.rs`: two `deny_with_redacted_target` call sites (REST and GraphQL relay deny paths) now pass the `DenyResponseContext` argument that `rest::send_deny_response` expects. Both sites pass `host`, `port`, and `binary` from the existing `L7EvalContext`, matching the pattern used at the primary deny site. - `crates/openshell-sandbox/src/policy_local.rs`: `L7Allow`, `L7DenyRule`, and `NetworkEndpoint` proto initializers now populate the new GraphQL and path-scoping fields with empty defaults. Agent-authored proposals via `policy.local` target REST/SQL/L4 today; GraphQL operation matching is set on the gateway side or via direct YAML, so empty defaults are correct here. No behavior change. `cargo test -p openshell-sandbox --lib` (650 tests) and `cargo clippy -p openshell-sandbox --lib --tests -- -D warnings` clean. --- crates/openshell-sandbox/src/l7/relay.rs | 10 ++++++++++ crates/openshell-sandbox/src/policy_local.rs | 15 +++++++++++++++ 2 files changed, 25 insertions(+) diff --git a/crates/openshell-sandbox/src/l7/relay.rs b/crates/openshell-sandbox/src/l7/relay.rs index ff765e354..f099c3558 100644 --- a/crates/openshell-sandbox/src/l7/relay.rs +++ b/crates/openshell-sandbox/src/l7/relay.rs @@ -305,6 +305,11 @@ where &reason, client, Some(&redacted_target), + Some(crate::l7::rest::DenyResponseContext { + host: Some(&ctx.host), + port: Some(ctx.port), + binary: Some(&ctx.binary_path), + }), ) .await?; return Ok(()); @@ -794,6 +799,11 @@ where &reason, client, Some(&redacted_target), + Some(crate::l7::rest::DenyResponseContext { + host: Some(&ctx.host), + port: Some(ctx.port), + binary: Some(&ctx.binary_path), + }), ) .await?; return Ok(()); diff --git a/crates/openshell-sandbox/src/policy_local.rs b/crates/openshell-sandbox/src/policy_local.rs index da04a7eb0..5be3efffb 100644 --- a/crates/openshell-sandbox/src/policy_local.rs +++ b/crates/openshell-sandbox/src/policy_local.rs @@ -567,6 +567,12 @@ fn network_endpoint_from_json( path: rule.allow.path, command: rule.allow.command, query: HashMap::new(), + // GraphQL fields default empty — agent-authored proposals from + // policy.local target REST/SQL/L4 endpoints; GraphQL operation + // matching is set on the policy server side or via direct YAML. + operation_type: String::new(), + operation_name: String::new(), + fields: Vec::new(), }), }) .collect(); @@ -578,6 +584,9 @@ fn network_endpoint_from_json( path: rule.path, command: rule.command, query: HashMap::new(), + operation_type: String::new(), + operation_name: String::new(), + fields: Vec::new(), }) .collect(); @@ -593,6 +602,12 @@ fn network_endpoint_from_json( ports, deny_rules, allow_encoded_slash: endpoint.allow_encoded_slash, + // GraphQL persisted-query knobs and path scoping default empty — + // agent proposals don't author them today. + persisted_queries: String::new(), + graphql_persisted_queries: HashMap::new(), + graphql_max_body_bytes: 0, + path: String::new(), }) } From 872dcc0ecc0a5c12229fd8b64f93609156358b3d Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Thu, 7 May 2026 21:52:32 -0700 Subject: [PATCH 20/23] feat(sandbox): gate agent policy proposals behind opt-in feature flag MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The agent-driven policy proposal surface delivered by this PR (skill install, `policy.local` API, `next_steps` array on L7 deny bodies) is now opt-in via the new `agent_policy_proposals_enabled` setting. Default false. Same shape as `providers_v2_enabled`: registered in `openshell-core::settings`, sandbox-level, hot-toggleable via the existing settings poll loop. Why: the surface is a novel agent-controlled mutation point in every sandbox. The per-proposal developer approval gate is a correctness control, but it doesn't address "should this sandbox have an agent-authoring API at all" — compliance teams may want that question closed. The flag is the second gate. Implementation: - New registry entry + `AGENT_POLICY_PROPOSALS_ENABLED_KEY` constant in `openshell-core::settings`. - `lib.rs`: process-wide `OnceLock>` mirroring the `OCSF_CTX` pattern. `agent_proposals_enabled()` is the single read point. - Initial settings fetch added to `run_sandbox` so skill install honors the flag at startup (not just on the poll loop's first tick). - Skill install in `run_sandbox` is gated on the flag. - `policy_local::route_request` returns `404 feature_disabled` for all routes when the flag is off — including the otherwise-public `current_policy` and `denials` routes. When the surface is off it's off entirely. - `policy_local::agent_next_steps` returns an empty array when the flag is off so deny bodies don't advertise routes that 404. - Poll loop updates the atomic on each tick, lazily installs the skill on a false→true transition (no claw-back on true→false; stale skill on disk is harmless because route + next_steps gate on the live atom). Tests: - Shared `test_helpers::ProposalsFlagGuard` mutex+atomic guard for the process-wide flag, used across `policy_local::tests` and `l7::rest::tests`. - New: `agent_next_steps_returns_empty_when_flag_off`, `agent_next_steps_returns_full_array_when_flag_on`, `route_request_returns_feature_disabled_when_flag_off`. - Updated existing tests that exercise the deny body or the route dispatcher to set the flag on first. - Full sandbox lib test suite: 653 pass, clippy clean. Demo and e2e: - `examples/agent-driven-policy-management/demo.sh` and `e2e/policy-advisor/test.sh` now snapshot the prior global value of the setting, set it to true before sandbox creation (so the supervisor's initial poll picks it up), and restore on exit (delete if previously unset, otherwise write the prior value back). Docs: - RFC 0001 MVP-implementation note documents the flag, default, and intended soft-launch posture. --- crates/openshell-core/src/settings.rs | 15 ++ crates/openshell-sandbox/src/l7/rest.rs | 4 + crates/openshell-sandbox/src/lib.rs | 146 ++++++++++++++++-- crates/openshell-sandbox/src/policy_local.rs | 75 +++++++++ e2e/policy-advisor/test.sh | 28 ++++ .../agent-driven-policy-management/demo.sh | 28 ++++ rfc/0001-agent-driven-policy-management.md | 2 + 7 files changed, 289 insertions(+), 9 deletions(-) diff --git a/crates/openshell-core/src/settings.rs b/crates/openshell-core/src/settings.rs index 2765ebeda..897317a5a 100644 --- a/crates/openshell-core/src/settings.rs +++ b/crates/openshell-core/src/settings.rs @@ -50,6 +50,15 @@ pub struct RegisteredSetting { /// 5. Add a unit test in this module's `tests` section to cover the new key. pub const PROVIDERS_V2_ENABLED_KEY: &str = "providers_v2_enabled"; +/// Sandbox-level opt-in for the agent-driven policy proposal surface. +/// +/// When true, the supervisor installs the `policy_advisor` skill, serves +/// the `policy.local` API routes, and includes `next_steps` in L7 deny +/// bodies. See `crates/openshell-sandbox/src/policy_local.rs`. Defaults to +/// false. Independent of the per-proposal developer approval gate, which +/// still applies when this flag is on. +pub const AGENT_POLICY_PROPOSALS_ENABLED_KEY: &str = "agent_policy_proposals_enabled"; + pub const REGISTERED_SETTINGS: &[RegisteredSetting] = &[ // Gateway-level opt-in for provider profile policy composition. Defaults // to false when unset. @@ -64,6 +73,12 @@ pub const REGISTERED_SETTINGS: &[RegisteredSetting] = &[ key: "ocsf_json_enabled", kind: SettingValueKind::Bool, }, + // Sandbox-level opt-in for the agent-driven policy proposal surface. + // See AGENT_POLICY_PROPOSALS_ENABLED_KEY for details. Defaults to false. + RegisteredSetting { + key: AGENT_POLICY_PROPOSALS_ENABLED_KEY, + kind: SettingValueKind::Bool, + }, // Test-only keys live behind the `dev-settings` feature flag so they // don't appear in production builds. #[cfg(feature = "dev-settings")] diff --git a/crates/openshell-sandbox/src/l7/rest.rs b/crates/openshell-sandbox/src/l7/rest.rs index 0db017738..4d488f2b9 100644 --- a/crates/openshell-sandbox/src/l7/rest.rs +++ b/crates/openshell-sandbox/src/l7/rest.rs @@ -1052,6 +1052,8 @@ mod tests { #[test] fn deny_response_body_is_agent_readable_and_redacted() { + // Agent-readable next_steps is gated on the proposals feature flag. + let _proposals = crate::test_helpers::ProposalsFlagGuard::set(true); let req = L7Request { action: "PUT".to_string(), target: "/repos/NVIDIA/OpenShell/contents/README.md?access_token=secret-token" @@ -1110,6 +1112,8 @@ mod tests { #[tokio::test] async fn send_deny_response_writes_structured_json_403() { + // Agent-readable next_steps is gated on the proposals feature flag. + let _proposals = crate::test_helpers::ProposalsFlagGuard::set(true); let (mut client, mut server) = tokio::io::duplex(4096); let send = tokio::spawn(async move { let req = L7Request { diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index c2f1a4262..0d7e09763 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -90,6 +90,70 @@ pub(crate) fn ocsf_ctx() -> &'static SandboxContext { OCSF_CTX.get().unwrap_or(&OCSF_CTX_FALLBACK) } +/// Process-wide flag for the agent-driven policy proposal surface. +/// Set once during `run_sandbox()` startup and updated by the settings poll +/// loop when `agent_policy_proposals_enabled` changes. Read by the +/// `policy.local` route handler and the L7 deny body's `next_steps` builder +/// to gate the agent-controlled mutation surface. Exposed `pub(crate)` so +/// unit tests in sibling modules can flip the flag through a serialized +/// guard (see `policy_local::tests::ProposalsFlagGuard`). +pub(crate) static AGENT_PROPOSALS_ENABLED: + OnceLock> = OnceLock::new(); + +/// Read the current value of the agent proposals feature flag. +/// +/// Returns `false` if `run_sandbox()` has not initialized the flag (e.g. +/// during unit tests), matching the documented default for the setting. +pub(crate) fn agent_proposals_enabled() -> bool { + AGENT_PROPOSALS_ENABLED + .get() + .is_some_and(|flag| flag.load(Ordering::Relaxed)) +} + +/// Test-only helpers shared across sibling test modules. +#[cfg(test)] +pub(crate) mod test_helpers { + #![allow(clippy::redundant_pub_crate, reason = "intentional crate-private module")] + use std::sync::Arc; + use std::sync::Mutex; + use std::sync::MutexGuard; + use std::sync::atomic::{AtomicBool, Ordering}; + + /// Guard for tests that toggle the process-wide + /// `AGENT_PROPOSALS_ENABLED` flag. Acquires a process-wide mutex on + /// construction so concurrent tests don't race on the atomic, swaps in + /// the requested value, and restores the previous value on drop. Hold + /// the guard for the duration of any code that reads + /// `agent_proposals_enabled()`. + pub(crate) struct ProposalsFlagGuard { + prev: bool, + flag: Arc, + _lock: MutexGuard<'static, ()>, + } + + impl ProposalsFlagGuard { + pub(crate) fn set(enabled: bool) -> Self { + static LOCK: Mutex<()> = Mutex::new(()); + let lock = LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner); + let flag = super::AGENT_PROPOSALS_ENABLED + .get_or_init(|| Arc::new(AtomicBool::new(false))) + .clone(); + let prev = flag.swap(enabled, Ordering::Relaxed); + Self { + prev, + flag, + _lock: lock, + } + } + } + + impl Drop for ProposalsFlagGuard { + fn drop(&mut self) { + self.flag.store(self.prev, Ordering::Relaxed); + } + } +} + use crate::identity::BinaryIdentityCache; use crate::l7::tls::{ CertCache, ProxyTlsState, SandboxCa, build_upstream_client_config, read_system_ca_bundle, @@ -325,16 +389,46 @@ pub async fn run_sandbox( // Prepare filesystem: create and chown read_write directories prepare_filesystem(&policy)?; - match skills::install_static_skills() { - Ok(installed) => { - info!( - path = %installed.policy_advisor.display(), - "Installed sandbox agent skill" - ); - } - Err(error) => { - warn!(error = %error, "Failed to install sandbox agent skill"); + // Initialize the agent-proposals feature flag. Default false until the + // initial settings fetch (or the poll loop) tells us otherwise. The flag + // gates the skill install, the policy.local route handler, and the L7 + // deny body's `next_steps` field — see `agent_proposals_enabled()`. + let proposals_enabled = Arc::new(std::sync::atomic::AtomicBool::new(false)); + if AGENT_PROPOSALS_ENABLED.set(proposals_enabled.clone()).is_err() { + debug!("agent proposals flag already initialized, keeping existing"); + } + + // Eagerly fetch the initial settings so skill install can honor the flag + // at startup rather than waiting for the poll loop's first tick. In + // offline/file-mode there is no gateway, so the flag stays false. + if let (Some(id), Some(endpoint)) = (&sandbox_id, &openshell_endpoint) + && let Ok(client) = grpc_client::CachedOpenShellClient::connect(endpoint).await + && let Ok(result) = client.poll_settings(id).await + { + let initial = extract_bool_setting( + &result.settings, + openshell_core::settings::AGENT_POLICY_PROPOSALS_ENABLED_KEY, + ) + .unwrap_or(false); + proposals_enabled.store(initial, Ordering::Relaxed); + } + + if agent_proposals_enabled() { + match skills::install_static_skills() { + Ok(installed) => { + info!( + path = %installed.policy_advisor.display(), + "Installed sandbox agent skill" + ); + } + Err(error) => { + warn!(error = %error, "Failed to install sandbox agent skill"); + } } + } else { + debug!( + "agent_policy_proposals_enabled is false at startup; skipping skill install" + ); } // Generate ephemeral CA and TLS state for HTTPS L7 inspection. @@ -2369,6 +2463,40 @@ async fn run_policy_poll_loop( info!(ocsf_json_enabled = new_ocsf, "OCSF JSONL logging toggled"); } + // Apply the agent-proposals feature toggle. On a false→true transition + // we lazily install the skill so a sandbox that started with the flag + // off picks up the surface without a recreate. We never uninstall on + // a true→false transition: stale skill content on disk is harmless + // because route_request and agent_next_steps both gate on the live + // atomic, so the agent that reads the skill will see 404s and an + // empty `next_steps` array regardless. + if let Some(flag) = AGENT_PROPOSALS_ENABLED.get() { + let new_proposals = extract_bool_setting( + &result.settings, + openshell_core::settings::AGENT_POLICY_PROPOSALS_ENABLED_KEY, + ) + .unwrap_or(false); + let prev_proposals = flag.swap(new_proposals, Ordering::Relaxed); + if new_proposals != prev_proposals { + info!( + agent_policy_proposals_enabled = new_proposals, + "agent-driven policy proposals toggled" + ); + if new_proposals && !prev_proposals { + match skills::install_static_skills() { + Ok(installed) => info!( + path = %installed.policy_advisor.display(), + "Installed sandbox agent skill on toggle-on" + ), + Err(error) => warn!( + error = %error, + "Failed to install sandbox agent skill on toggle-on" + ), + } + } + } + } + current_config_revision = result.config_revision; current_policy_hash = result.policy_hash; current_settings = result.settings; diff --git a/crates/openshell-sandbox/src/policy_local.rs b/crates/openshell-sandbox/src/policy_local.rs index 5be3efffb..a5ffed911 100644 --- a/crates/openshell-sandbox/src/policy_local.rs +++ b/crates/openshell-sandbox/src/policy_local.rs @@ -118,6 +118,19 @@ async fn route_request( body: &[u8], ) -> (u16, serde_json::Value) { let (route, query) = path.split_once('?').map_or((path, ""), |(r, q)| (r, q)); + // Gate every route on the feature flag so the agent surface is fully off + // when the flag is off — including the diagnostic `current_policy` and + // `denials` routes. The skill is also not installed in that mode, so a + // disabled sandbox has no entry point into this API at all. + if !crate::agent_proposals_enabled() { + return ( + 404, + serde_json::json!({ + "error": "feature_disabled", + "detail": "agent-driven policy proposals are not enabled in this sandbox; set the `agent_policy_proposals_enabled` setting to true to enable" + }), + ); + } match (method, route) { ("GET", ROUTE_POLICY_CURRENT) => current_policy_response(ctx).await, ("GET", ROUTE_DENIALS) => recent_denials_response(ctx, query).await, @@ -136,8 +149,15 @@ async fn route_request( /// machine-readable pointers to this API. Centralizes the shape here to keep /// the deny body and the actual route table from drifting — adding or /// renaming a route only requires touching the route constants above. +/// +/// Returns an empty array when `agent_proposals_enabled()` is false so a +/// disabled sandbox doesn't advertise a surface that 404s. The deny body +/// caller still emits the field (with `[]`) so the wire shape is stable. #[must_use] pub fn agent_next_steps() -> serde_json::Value { + if !crate::agent_proposals_enabled() { + return serde_json::json!([]); + } let host = POLICY_LOCAL_HOST; serde_json::json!([ { @@ -1051,8 +1071,63 @@ mod tests { assert!(surfaced.ends_with("...[truncated]")); } + use crate::test_helpers::ProposalsFlagGuard; + + #[test] + fn agent_next_steps_returns_empty_when_flag_off() { + let _guard = ProposalsFlagGuard::set(false); + let steps = agent_next_steps(); + let arr = steps.as_array().expect("agent_next_steps is an array"); + assert!( + arr.is_empty(), + "expected empty next_steps when feature is off, got {steps}" + ); + } + + #[test] + fn agent_next_steps_returns_full_array_when_flag_on() { + let _guard = ProposalsFlagGuard::set(true); + let steps = agent_next_steps(); + let arr = steps.as_array().expect("agent_next_steps is an array"); + assert_eq!(arr.len(), 4, "expected 4 next_steps when feature is on"); + let actions: Vec<&str> = arr + .iter() + .filter_map(|v| v.get("action").and_then(serde_json::Value::as_str)) + .collect(); + assert!(actions.contains(&"read_skill")); + assert!(actions.contains(&"submit_proposal")); + } + + #[tokio::test] + async fn route_request_returns_feature_disabled_when_flag_off() { + let _guard = ProposalsFlagGuard::set(false); + let ctx = PolicyLocalContext::new( + Some(ProtoSandboxPolicy { + version: 1, + ..Default::default() + }), + None, + None, + ); + + // Even the otherwise-public `current_policy` route returns 404 with + // a feature_disabled error: when the surface is off it's off + // entirely, not selectively. + let (status, payload) = route_request(&ctx, "GET", ROUTE_POLICY_CURRENT, &[]).await; + assert_eq!(status, 404); + assert_eq!(payload["error"], "feature_disabled"); + assert!( + payload["detail"] + .as_str() + .unwrap() + .contains("agent_policy_proposals_enabled"), + "feature_disabled detail must name the setting key for actionability" + ); + } + #[tokio::test] async fn current_policy_route_returns_yaml_envelope() { + let _guard = ProposalsFlagGuard::set(true); let ctx = PolicyLocalContext::new( Some(ProtoSandboxPolicy { version: 1, diff --git a/e2e/policy-advisor/test.sh b/e2e/policy-advisor/test.sh index 8f956eb0e..18bf01e62 100755 --- a/e2e/policy-advisor/test.sh +++ b/e2e/policy-advisor/test.sh @@ -63,6 +63,18 @@ cleanup() { printf "\n${YELLOW}Keeping sandbox because DEMO_KEEP_SANDBOX=1: %s${RESET}\n" "$DEMO_SANDBOX_NAME" fi + # Restore the agent_policy_proposals_enabled setting to what it was + # before this run. We saved the prior value in $PRIOR_PROPOSALS_FLAG. + if [[ -n "${PRIOR_PROPOSALS_FLAG:-}" ]]; then + if [[ "$PRIOR_PROPOSALS_FLAG" == "(unset)" ]]; then + "$OPENSHELL_BIN" settings delete --global --key agent_policy_proposals_enabled \ + >/dev/null 2>&1 || true + else + "$OPENSHELL_BIN" settings set --global --key agent_policy_proposals_enabled \ + --value "$PRIOR_PROPOSALS_FLAG" >/dev/null 2>&1 || true + fi + fi + "$OPENSHELL_BIN" provider delete "$DEMO_GITHUB_PROVIDER_NAME" >/dev/null 2>&1 || true if [[ $status -eq 0 ]]; then @@ -197,6 +209,21 @@ create_provider() { --credential GITHUB_TOKEN } +enable_agent_proposals() { + step "Enabling agent-driven policy proposals" + # Snapshot the prior value so cleanup() can restore it. Use a sentinel + # for "unset" so we can distinguish from an explicit false on restore. + local prior + prior="$("$OPENSHELL_BIN" settings get --global --json 2>/dev/null \ + | grep -o '"agent_policy_proposals_enabled"[^,}]*' \ + | grep -o 'true\|false' | head -1)" + PRIOR_PROPOSALS_FLAG="${prior:-(unset)}" + info " Prior global value: $PRIOR_PROPOSALS_FLAG" + "$OPENSHELL_BIN" settings set --global \ + --key agent_policy_proposals_enabled --value true >/dev/null \ + || fail "could not set agent_policy_proposals_enabled globally" +} + create_sandbox() { step "Creating sandbox with read-only GitHub L7 policy" cp "$POLICY_TEMPLATE" "$POLICY_FILE" @@ -368,6 +395,7 @@ main() { check_gateway check_github_access create_provider + enable_agent_proposals create_sandbox connect_ssh run_policy_local_checks diff --git a/examples/agent-driven-policy-management/demo.sh b/examples/agent-driven-policy-management/demo.sh index bbec37612..6c3c60cfa 100755 --- a/examples/agent-driven-policy-management/demo.sh +++ b/examples/agent-driven-policy-management/demo.sh @@ -97,6 +97,18 @@ cleanup() { "$OPENSHELL_BIN" provider delete "$DEMO_CODEX_PROVIDER_NAME" >/dev/null 2>&1 || true "$OPENSHELL_BIN" provider delete "$DEMO_GITHUB_PROVIDER_NAME" >/dev/null 2>&1 || true + # Restore the agent_policy_proposals_enabled setting to what it was + # before this run. + if [[ -n "${PRIOR_PROPOSALS_FLAG:-}" ]]; then + if [[ "$PRIOR_PROPOSALS_FLAG" == "(unset)" ]]; then + "$OPENSHELL_BIN" settings delete --global --key agent_policy_proposals_enabled \ + >/dev/null 2>&1 || true + else + "$OPENSHELL_BIN" settings set --global --key agent_policy_proposals_enabled \ + --value "$PRIOR_PROPOSALS_FLAG" >/dev/null 2>&1 || true + fi + fi + if [[ $status -eq 0 ]]; then rm -rf "$TMP_DIR" else @@ -391,6 +403,21 @@ show_logs() { | sed 's/^/ /' || true } +enable_agent_proposals() { + # The agent-driven proposal surface (skill, policy.local routes, deny + # next_steps) is opt-in. Snapshot the prior global value so cleanup() + # can restore it; the sentinel "(unset)" round-trips through `settings + # delete` rather than a value write. + local prior + prior="$("$OPENSHELL_BIN" settings get --global --json 2>/dev/null \ + | grep -o '"agent_policy_proposals_enabled"[^,}]*' \ + | grep -o 'true\|false' | head -1)" + PRIOR_PROPOSALS_FLAG="${prior:-(unset)}" + "$OPENSHELL_BIN" settings set --global \ + --key agent_policy_proposals_enabled --value true >/dev/null \ + || fail "could not enable agent_policy_proposals_enabled globally" +} + main() { validate_env @@ -399,6 +426,7 @@ main() { check_github_access render_payload create_providers + enable_agent_proposals show_run_summary diff --git a/rfc/0001-agent-driven-policy-management.md b/rfc/0001-agent-driven-policy-management.md index 07c9bb8a0..6816d1331 100644 --- a/rfc/0001-agent-driven-policy-management.md +++ b/rfc/0001-agent-driven-policy-management.md @@ -51,6 +51,8 @@ The first implementation is tracked in [#1062](https://github.com/NVIDIA/OpenShe The MVP deliberately defers the supervisor Unix-socket API, server-streaming multi-sandbox inbox, Slack/web adapters, org ceilings, trusted auto-apply, and in-process prover optimization. Those remain aligned with the RFC direction, but they are not required to prove the initial loop. +The entire MVP surface is gated behind the `agent_policy_proposals_enabled` setting (see `crates/openshell-core/src/settings.rs`), default false. When disabled, the supervisor does not install the skill, the `policy.local` routes return `404 feature_disabled`, and L7 deny bodies omit the `next_steps` array. The flag is independent of the per-proposal developer approval gate; both apply when the feature is on. Treat this as a soft launch: enable per-sandbox or globally once the loop is validated, and leave it off in environments where agent-authored proposals should not be available at all. + ## Non-goals - Allowing an in-sandbox agent to self-approve or unilaterally apply its own policy changes. From 6f79bbbf668c8326f561dd8f63424df462f48d5c Mon Sep 17 00:00:00 2001 From: Alexander Watson Date: Fri, 8 May 2026 10:40:05 -0700 Subject: [PATCH 21/23] test(policy-advisor): require proposal opt-in for e2e --- crates/openshell-cli/src/run.rs | 8 +-- crates/openshell-sandbox/src/l7/rest.rs | 4 +- crates/openshell-sandbox/src/lib.rs | 46 ++++++++++------ crates/openshell-sandbox/src/policy_local.rs | 13 ++--- e2e/policy-advisor/README.md | 31 ++++++++++- e2e/policy-advisor/test.sh | 55 +++++++++---------- .../agent-task.md | 2 + 7 files changed, 97 insertions(+), 62 deletions(-) diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index 678494556..165713b6e 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -5493,10 +5493,10 @@ fn format_timestamp_ms(ms: i64) -> String { mod tests { use super::{ TlsOptions, dockerfile_sources_supported_for_gateway, format_endpoint, - format_gateway_select_header, - format_gateway_select_items, format_provider_attachment_table, gateway_add, - gateway_auth_label, gateway_env_override_warning, gateway_select_with, gateway_type_label, - git_sync_files, http_health_check, image_requests_gpu, import_local_package_mtls_bundle, + format_gateway_select_header, format_gateway_select_items, + format_provider_attachment_table, gateway_add, gateway_auth_label, + gateway_env_override_warning, gateway_select_with, gateway_type_label, git_sync_files, + http_health_check, image_requests_gpu, import_local_package_mtls_bundle, inferred_provider_type, package_managed_tls_dirs, parse_cli_setting_value, parse_credential_pairs, plaintext_gateway_is_remote, provisioning_timeout_message, ready_false_condition_message, resolve_from, sandbox_should_persist, diff --git a/crates/openshell-sandbox/src/l7/rest.rs b/crates/openshell-sandbox/src/l7/rest.rs index 4d488f2b9..85ae01290 100644 --- a/crates/openshell-sandbox/src/l7/rest.rs +++ b/crates/openshell-sandbox/src/l7/rest.rs @@ -1053,7 +1053,7 @@ mod tests { #[test] fn deny_response_body_is_agent_readable_and_redacted() { // Agent-readable next_steps is gated on the proposals feature flag. - let _proposals = crate::test_helpers::ProposalsFlagGuard::set(true); + let _proposals = crate::test_helpers::ProposalsFlagGuard::set_blocking(true); let req = L7Request { action: "PUT".to_string(), target: "/repos/NVIDIA/OpenShell/contents/README.md?access_token=secret-token" @@ -1113,7 +1113,7 @@ mod tests { #[tokio::test] async fn send_deny_response_writes_structured_json_403() { // Agent-readable next_steps is gated on the proposals feature flag. - let _proposals = crate::test_helpers::ProposalsFlagGuard::set(true); + let _proposals = crate::test_helpers::ProposalsFlagGuard::set(true).await; let (mut client, mut server) = tokio::io::duplex(4096); let send = tokio::spawn(async move { let req = L7Request { diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index 0d7e09763..1d47bb450 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -97,8 +97,8 @@ pub(crate) fn ocsf_ctx() -> &'static SandboxContext { /// to gate the agent-controlled mutation surface. Exposed `pub(crate)` so /// unit tests in sibling modules can flip the flag through a serialized /// guard (see `policy_local::tests::ProposalsFlagGuard`). -pub(crate) static AGENT_PROPOSALS_ENABLED: - OnceLock> = OnceLock::new(); +pub(crate) static AGENT_PROPOSALS_ENABLED: OnceLock> = + OnceLock::new(); /// Read the current value of the agent proposals feature flag. /// @@ -113,17 +113,22 @@ pub(crate) fn agent_proposals_enabled() -> bool { /// Test-only helpers shared across sibling test modules. #[cfg(test)] pub(crate) mod test_helpers { - #![allow(clippy::redundant_pub_crate, reason = "intentional crate-private module")] + #![allow( + clippy::redundant_pub_crate, + reason = "intentional crate-private module" + )] use std::sync::Arc; - use std::sync::Mutex; - use std::sync::MutexGuard; + use std::sync::LazyLock; use std::sync::atomic::{AtomicBool, Ordering}; + use tokio::sync::MutexGuard; + + static PROPOSALS_FLAG_LOCK: LazyLock> = + LazyLock::new(|| tokio::sync::Mutex::new(())); /// Guard for tests that toggle the process-wide - /// `AGENT_PROPOSALS_ENABLED` flag. Acquires a process-wide mutex on - /// construction so concurrent tests don't race on the atomic, swaps in - /// the requested value, and restores the previous value on drop. Hold - /// the guard for the duration of any code that reads + /// `AGENT_PROPOSALS_ENABLED` flag. Acquires a process-wide async mutex, + /// swaps in the requested value, and restores the previous value on drop. + /// Hold the guard for the duration of any code that reads /// `agent_proposals_enabled()`. pub(crate) struct ProposalsFlagGuard { prev: bool, @@ -132,9 +137,17 @@ pub(crate) mod test_helpers { } impl ProposalsFlagGuard { - pub(crate) fn set(enabled: bool) -> Self { - static LOCK: Mutex<()> = Mutex::new(()); - let lock = LOCK.lock().unwrap_or_else(std::sync::PoisonError::into_inner); + pub(crate) async fn set(enabled: bool) -> Self { + let lock = PROPOSALS_FLAG_LOCK.lock().await; + Self::with_lock(enabled, lock) + } + + pub(crate) fn set_blocking(enabled: bool) -> Self { + let lock = PROPOSALS_FLAG_LOCK.blocking_lock(); + Self::with_lock(enabled, lock) + } + + fn with_lock(enabled: bool, lock: MutexGuard<'static, ()>) -> Self { let flag = super::AGENT_PROPOSALS_ENABLED .get_or_init(|| Arc::new(AtomicBool::new(false))) .clone(); @@ -394,7 +407,10 @@ pub async fn run_sandbox( // gates the skill install, the policy.local route handler, and the L7 // deny body's `next_steps` field — see `agent_proposals_enabled()`. let proposals_enabled = Arc::new(std::sync::atomic::AtomicBool::new(false)); - if AGENT_PROPOSALS_ENABLED.set(proposals_enabled.clone()).is_err() { + if AGENT_PROPOSALS_ENABLED + .set(proposals_enabled.clone()) + .is_err() + { debug!("agent proposals flag already initialized, keeping existing"); } @@ -426,9 +442,7 @@ pub async fn run_sandbox( } } } else { - debug!( - "agent_policy_proposals_enabled is false at startup; skipping skill install" - ); + debug!("agent_policy_proposals_enabled is false at startup; skipping skill install"); } // Generate ephemeral CA and TLS state for HTTPS L7 inspection. diff --git a/crates/openshell-sandbox/src/policy_local.rs b/crates/openshell-sandbox/src/policy_local.rs index a5ffed911..21556ec6a 100644 --- a/crates/openshell-sandbox/src/policy_local.rs +++ b/crates/openshell-sandbox/src/policy_local.rs @@ -360,10 +360,7 @@ fn is_ocsf_denial_line(line: &str) -> bool { line.contains(" OCSF ") && line.contains(" DENIED ") } -fn collect_shorthand_log_files( - log_dir: &Path, - max_files: usize, -) -> std::io::Result> { +fn collect_shorthand_log_files(log_dir: &Path, max_files: usize) -> std::io::Result> { let mut entries: Vec<(std::time::SystemTime, PathBuf)> = std::fs::read_dir(log_dir)? .filter_map(std::result::Result::ok) .filter_map(|entry| { @@ -1075,7 +1072,7 @@ mod tests { #[test] fn agent_next_steps_returns_empty_when_flag_off() { - let _guard = ProposalsFlagGuard::set(false); + let _guard = ProposalsFlagGuard::set_blocking(false); let steps = agent_next_steps(); let arr = steps.as_array().expect("agent_next_steps is an array"); assert!( @@ -1086,7 +1083,7 @@ mod tests { #[test] fn agent_next_steps_returns_full_array_when_flag_on() { - let _guard = ProposalsFlagGuard::set(true); + let _guard = ProposalsFlagGuard::set_blocking(true); let steps = agent_next_steps(); let arr = steps.as_array().expect("agent_next_steps is an array"); assert_eq!(arr.len(), 4, "expected 4 next_steps when feature is on"); @@ -1100,7 +1097,7 @@ mod tests { #[tokio::test] async fn route_request_returns_feature_disabled_when_flag_off() { - let _guard = ProposalsFlagGuard::set(false); + let _guard = ProposalsFlagGuard::set(false).await; let ctx = PolicyLocalContext::new( Some(ProtoSandboxPolicy { version: 1, @@ -1127,7 +1124,7 @@ mod tests { #[tokio::test] async fn current_policy_route_returns_yaml_envelope() { - let _guard = ProposalsFlagGuard::set(true); + let _guard = ProposalsFlagGuard::set(true).await; let ctx = PolicyLocalContext::new( Some(ProtoSandboxPolicy { version: 1, diff --git a/e2e/policy-advisor/README.md b/e2e/policy-advisor/README.md index b9796e1a6..79f496e3e 100644 --- a/e2e/policy-advisor/README.md +++ b/e2e/policy-advisor/README.md @@ -18,12 +18,37 @@ runs the same loop with Codex driving from inside the sandbox. ## Run it +Run against an ephemeral Docker gateway: + +```bash +DEMO_GITHUB_OWNER= \ +DEMO_GITHUB_REPO=openshell-policy-demo \ +e2e/with-docker-gateway.sh bash -lc ' + target/debug/openshell settings set --global \ + --key agent_policy_proposals_enabled \ + --value true \ + --yes + OPENSHELL_BIN="$PWD/target/debug/openshell" bash e2e/policy-advisor/test.sh +' +``` + +To keep the sandbox for debugging, start a local gateway first with +`mise run gateway:docker`, then run: + ```bash +target/debug/openshell settings set --global \ + --key agent_policy_proposals_enabled \ + --value true \ + --yes + +OPENSHELL_GATEWAY=docker-dev \ +OPENSHELL_BIN="$PWD/target/debug/openshell" \ +DEMO_KEEP_SANDBOX=1 \ DEMO_GITHUB_OWNER= \ DEMO_GITHUB_REPO=openshell-policy-demo \ bash e2e/policy-advisor/test.sh ``` -Requires an active OpenShell gateway (`openshell gateway start`) and a GitHub -token with contents write on the repository (auto-resolved from `gh auth token`, -`GITHUB_TOKEN`, or `GH_TOKEN`). +Requires Docker, `agent_policy_proposals_enabled=true`, and a GitHub token with +contents write on the repository. The test auto-resolves the token from +`DEMO_GITHUB_TOKEN`, `GITHUB_TOKEN`, `GH_TOKEN`, or `gh auth token`. diff --git a/e2e/policy-advisor/test.sh b/e2e/policy-advisor/test.sh index 18bf01e62..cef09d1ed 100755 --- a/e2e/policy-advisor/test.sh +++ b/e2e/policy-advisor/test.sh @@ -28,9 +28,9 @@ DEMO_KEEP_SANDBOX="${DEMO_KEEP_SANDBOX:-0}" DEMO_RETRY_ATTEMPTS="${DEMO_RETRY_ATTEMPTS:-30}" DEMO_RETRY_SLEEP="${DEMO_RETRY_SLEEP:-2}" -TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/openshell-agent-policy.XXXXXX")" -POLICY_FILE="${TMP_DIR}/policy.yaml" -SSH_CONFIG="${TMP_DIR}/ssh_config" +TMP_DIR="" +POLICY_FILE="" +SSH_CONFIG="" SSH_HOST="" BOLD='\033[1m' @@ -63,20 +63,12 @@ cleanup() { printf "\n${YELLOW}Keeping sandbox because DEMO_KEEP_SANDBOX=1: %s${RESET}\n" "$DEMO_SANDBOX_NAME" fi - # Restore the agent_policy_proposals_enabled setting to what it was - # before this run. We saved the prior value in $PRIOR_PROPOSALS_FLAG. - if [[ -n "${PRIOR_PROPOSALS_FLAG:-}" ]]; then - if [[ "$PRIOR_PROPOSALS_FLAG" == "(unset)" ]]; then - "$OPENSHELL_BIN" settings delete --global --key agent_policy_proposals_enabled \ - >/dev/null 2>&1 || true - else - "$OPENSHELL_BIN" settings set --global --key agent_policy_proposals_enabled \ - --value "$PRIOR_PROPOSALS_FLAG" >/dev/null 2>&1 || true - fi - fi - "$OPENSHELL_BIN" provider delete "$DEMO_GITHUB_PROVIDER_NAME" >/dev/null 2>&1 || true + if [[ -z "$TMP_DIR" ]]; then + return + fi + if [[ $status -eq 0 ]]; then rm -rf "$TMP_DIR" else @@ -209,19 +201,23 @@ create_provider() { --credential GITHUB_TOKEN } -enable_agent_proposals() { - step "Enabling agent-driven policy proposals" - # Snapshot the prior value so cleanup() can restore it. Use a sentinel - # for "unset" so we can distinguish from an explicit false on restore. - local prior - prior="$("$OPENSHELL_BIN" settings get --global --json 2>/dev/null \ - | grep -o '"agent_policy_proposals_enabled"[^,}]*' \ - | grep -o 'true\|false' | head -1)" - PRIOR_PROPOSALS_FLAG="${prior:-(unset)}" - info " Prior global value: $PRIOR_PROPOSALS_FLAG" - "$OPENSHELL_BIN" settings set --global \ - --key agent_policy_proposals_enabled --value true >/dev/null \ - || fail "could not set agent_policy_proposals_enabled globally" +check_agent_proposals_enabled() { + step "Checking agent-driven policy proposal opt-in" + local value + value="$("$OPENSHELL_BIN" settings get --global --json 2>/dev/null \ + | jq -r '.settings.agent_policy_proposals_enabled // ""')" + if [[ "$value" != "true" ]]; then + fail "agent_policy_proposals_enabled must be true before running this test. +Enable it with: + $OPENSHELL_BIN settings set --global --key agent_policy_proposals_enabled --value true --yes" + fi + info "${GREEN}agent_policy_proposals_enabled=true${RESET}" +} + +create_temp_workspace() { + TMP_DIR="$(mktemp -d "${TMPDIR:-/tmp}/openshell-agent-policy.XXXXXX")" + POLICY_FILE="${TMP_DIR}/policy.yaml" + SSH_CONFIG="${TMP_DIR}/ssh_config" } create_sandbox() { @@ -393,9 +389,10 @@ show_logs() { main() { validate_env check_gateway + check_agent_proposals_enabled + create_temp_workspace check_github_access create_provider - enable_agent_proposals create_sandbox connect_ssh run_policy_local_checks diff --git a/examples/agent-driven-policy-management/agent-task.md b/examples/agent-driven-policy-management/agent-task.md index 6aa56b5a3..9c7588181 100644 --- a/examples/agent-driven-policy-management/agent-task.md +++ b/examples/agent-driven-policy-management/agent-task.md @@ -1,6 +1,8 @@ +# Agent Task + You are running inside an OpenShell sandbox. Your job is to write one markdown file to GitHub via the GitHub Contents API. From 65ed2b9fb7d9fd7629a5d98482cc1c430c0b50f1 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Fri, 8 May 2026 13:56:42 -0700 Subject: [PATCH 22/23] refactor(sandbox): group policy poll loop state --- crates/openshell-sandbox/src/lib.rs | 72 ++++++++++++++++------------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/crates/openshell-sandbox/src/lib.rs b/crates/openshell-sandbox/src/lib.rs index 1d47bb450..25a28af54 100644 --- a/crates/openshell-sandbox/src/lib.rs +++ b/crates/openshell-sandbox/src/lib.rs @@ -934,20 +934,19 @@ pub async fn run_sandbox( .ok() .and_then(|v| v.parse().ok()) .unwrap_or(10); + let poll_ctx = PolicyPollLoopContext { + endpoint: poll_endpoint, + sandbox_id: poll_id, + opa_engine: poll_engine, + entrypoint_pid: poll_pid, + interval_secs: poll_interval_secs, + ocsf_enabled: poll_ocsf_enabled, + provider_credentials: poll_provider_credentials, + policy_local_ctx: Some(poll_policy_local), + }; tokio::spawn(async move { - if let Err(e) = run_policy_poll_loop( - &poll_endpoint, - &poll_id, - &poll_engine, - &poll_pid, - poll_interval_secs, - &poll_ocsf_enabled, - poll_provider_credentials, - Some(poll_policy_local), - ) - .await - { + if let Err(e) = run_policy_poll_loop(poll_ctx).await { ocsf_emit!( AppLifecycleBuilder::new(ocsf_ctx()) .activity(ActivityId::Fail) @@ -2281,23 +2280,25 @@ async fn flush_proposals_to_gateway( /// /// When the entrypoint PID is available, policy reloads include symlink /// resolution for binary paths via the container filesystem. -async fn run_policy_poll_loop( - endpoint: &str, - sandbox_id: &str, - opa_engine: &Arc, - entrypoint_pid: &Arc, +struct PolicyPollLoopContext { + endpoint: String, + sandbox_id: String, + opa_engine: Arc, + entrypoint_pid: Arc, interval_secs: u64, - ocsf_enabled: &std::sync::atomic::AtomicBool, + ocsf_enabled: Arc, provider_credentials: provider_credentials::ProviderCredentialState, policy_local_ctx: Option>, -) -> Result<()> { +} + +async fn run_policy_poll_loop(ctx: PolicyPollLoopContext) -> Result<()> { use crate::grpc_client::CachedOpenShellClient; use openshell_core::proto::PolicySource; use std::sync::atomic::Ordering; - let client = CachedOpenShellClient::connect(endpoint).await?; + let client = CachedOpenShellClient::connect(&ctx.endpoint).await?; let mut current_config_revision: u64 = 0; - let mut current_provider_env_revision: u64 = provider_credentials.snapshot().revision; + let mut current_provider_env_revision: u64 = ctx.provider_credentials.snapshot().revision; let mut current_policy_hash = String::new(); let mut current_settings: std::collections::HashMap< String, @@ -2305,7 +2306,7 @@ async fn run_policy_poll_loop( > = std::collections::HashMap::new(); // Initialize revision from the first poll. - match client.poll_settings(sandbox_id).await { + match client.poll_settings(&ctx.sandbox_id).await { Ok(result) => { current_config_revision = result.config_revision; current_policy_hash = result.policy_hash.clone(); @@ -2320,11 +2321,11 @@ async fn run_policy_poll_loop( } } - let interval = Duration::from_secs(interval_secs); + let interval = Duration::from_secs(ctx.interval_secs); loop { tokio::time::sleep(interval).await; - let result = match client.poll_settings(sandbox_id).await { + let result = match client.poll_settings(&ctx.sandbox_id).await { Ok(r) => r, Err(e) => { debug!(error = %e, "Settings poll: server unreachable, will retry"); @@ -2357,9 +2358,9 @@ async fn run_policy_poll_loop( .build()); if provider_env_changed { - match grpc_client::fetch_provider_environment(endpoint, sandbox_id).await { + match grpc_client::fetch_provider_environment(&ctx.endpoint, &ctx.sandbox_id).await { Ok(env_result) => { - let env_count = provider_credentials.install_environment( + let env_count = ctx.provider_credentials.install_environment( env_result.provider_env_revision, env_result.environment, ); @@ -2404,11 +2405,11 @@ async fn run_policy_poll_loop( continue; }; - let pid = entrypoint_pid.load(Ordering::Acquire); - match opa_engine.reload_from_proto_with_pid(policy, pid) { + let pid = ctx.entrypoint_pid.load(Ordering::Acquire); + match ctx.opa_engine.reload_from_proto_with_pid(policy, pid) { Ok(()) => { - if let Some(ctx) = policy_local_ctx.as_ref() { - ctx.set_current_policy(policy.clone()).await; + if let Some(policy_local_ctx) = ctx.policy_local_ctx.as_ref() { + policy_local_ctx.set_current_policy(policy.clone()).await; } if result.global_policy_version > 0 { ocsf_emit!(ConfigStateChangeBuilder::new(ocsf_ctx()) @@ -2440,7 +2441,7 @@ async fn run_policy_poll_loop( if result.version > 0 && result.policy_source == PolicySource::Sandbox && let Err(e) = client - .report_policy_status(sandbox_id, result.version, true, "") + .report_policy_status(&ctx.sandbox_id, result.version, true, "") .await { warn!(error = %e, "Failed to report policy load success"); @@ -2461,7 +2462,12 @@ async fn run_policy_poll_loop( if result.version > 0 && result.policy_source == PolicySource::Sandbox && let Err(report_err) = client - .report_policy_status(sandbox_id, result.version, false, &e.to_string()) + .report_policy_status( + &ctx.sandbox_id, + result.version, + false, + &e.to_string(), + ) .await { warn!(error = %report_err, "Failed to report policy load failure"); @@ -2472,7 +2478,7 @@ async fn run_policy_poll_loop( // Apply OCSF JSON toggle from the `ocsf_json_enabled` setting. let new_ocsf = extract_bool_setting(&result.settings, "ocsf_json_enabled").unwrap_or(false); - let prev_ocsf = ocsf_enabled.swap(new_ocsf, Ordering::Relaxed); + let prev_ocsf = ctx.ocsf_enabled.swap(new_ocsf, Ordering::Relaxed); if new_ocsf != prev_ocsf { info!(ocsf_json_enabled = new_ocsf, "OCSF JSONL logging toggled"); } From 2b3c64e50d495c3d7c25bbd4965d8273370ced40 Mon Sep 17 00:00:00 2001 From: John Myers <9696606+johntmyers@users.noreply.github.com> Date: Fri, 8 May 2026 14:21:59 -0700 Subject: [PATCH 23/23] test(e2e): isolate Kubernetes user namespace test --- e2e/rust/Cargo.toml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/e2e/rust/Cargo.toml b/e2e/rust/Cargo.toml index 57bc1ff68..0da2e417b 100644 --- a/e2e/rust/Cargo.toml +++ b/e2e/rust/Cargo.toml @@ -19,6 +19,7 @@ publish = false e2e = [] e2e-docker = ["e2e"] e2e-docker-gpu = ["e2e-docker"] +e2e-kubernetes = ["e2e"] [[test]] name = "custom_image" @@ -40,6 +41,11 @@ name = "gateway_resume" path = "tests/gateway_resume.rs" required-features = ["e2e-docker"] +[[test]] +name = "user_namespaces" +path = "tests/user_namespaces.rs" +required-features = ["e2e-kubernetes"] + [dependencies] tokio = { version = "1.43", features = ["full"] } tempfile = "3"