diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 8c96345..34f07bb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -44,9 +44,30 @@ jobs:
- name: GitHub Action Bridge Smoke
run: target/debug/deepseek github fixture-smoke --json
+ - name: Shell Supervisor Fixture Smoke
+ run: |
+ if command -v timeout >/dev/null 2>&1; then
+ timeout 20s target/debug/deepseek agents shell-fixture-smoke --json
+ else
+ target/debug/deepseek agents shell-fixture-smoke --json
+ fi
+
+ - name: Local Runtime Service Smoke
+ run: |
+ smoke_workdir="/tmp/deepseek-service-smoke-${GITHUB_RUN_ID:-local}"
+ mkdir -p "$smoke_workdir"
+ if command -v timeout >/dev/null 2>&1; then
+ timeout 25s target/debug/deepseek agents service-smoke --workdir "$smoke_workdir" --bin target/debug/deepseek --json
+ else
+ target/debug/deepseek agents service-smoke --workdir "$smoke_workdir" --bin target/debug/deepseek --json
+ fi
+
- name: TUI Entrypoint Smoke
run: target/debug/deepseek tui --entrypoint-smoke --smoke-bin target/debug/deepseek
+ - name: Multi-file External Fixture Scaffold Smoke
+ run: scripts/create-multifile-external-fixture.sh "$RUNNER_TEMP/deepseek-external-fixture"
+
- name: Secret Scan
run: node scripts/check-secrets.js
@@ -106,6 +127,30 @@ jobs:
- name: GitHub Action Bridge Smoke
run: ${{ matrix.binary }} github fixture-smoke --json
+ - name: Shell Supervisor Fixture Smoke
+ if: runner.os != 'Windows'
+ run: |
+ if command -v timeout >/dev/null 2>&1; then
+ timeout 20s ${{ matrix.binary }} agents shell-fixture-smoke --json
+ else
+ ${{ matrix.binary }} agents shell-fixture-smoke --json
+ fi
+
+ - name: Local Runtime Service Smoke
+ if: runner.os != 'Windows'
+ run: |
+ smoke_workdir="/tmp/deepseek-service-smoke-${GITHUB_RUN_ID:-local}"
+ mkdir -p "$smoke_workdir"
+ if command -v timeout >/dev/null 2>&1; then
+ timeout 25s ${{ matrix.binary }} agents service-smoke --workdir "$smoke_workdir" --bin ${{ matrix.binary }} --json
+ else
+ ${{ matrix.binary }} agents service-smoke --workdir "$smoke_workdir" --bin ${{ matrix.binary }} --json
+ fi
+
+ - name: Multi-file External Fixture Scaffold Smoke
+ if: runner.os != 'Windows'
+ run: scripts/create-multifile-external-fixture.sh "$RUNNER_TEMP/deepseek-external-fixture"
+
- name: Windows Shell Supervisor ConPTY Smoke
if: runner.os == 'Windows'
shell: pwsh
diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index 61c86a6..3aecd8f 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -74,6 +74,30 @@ jobs:
- name: Verify Release Binary GitHub Action Bridge
run: ${{ matrix.binary }} github fixture-smoke --json
+ - name: Verify Release Binary Shell Supervisor Fixture
+ if: runner.os != 'Windows'
+ run: |
+ if command -v timeout >/dev/null 2>&1; then
+ timeout 20s ${{ matrix.binary }} agents shell-fixture-smoke --json
+ else
+ ${{ matrix.binary }} agents shell-fixture-smoke --json
+ fi
+
+ - name: Verify Release Binary Local Runtime Service Smoke
+ if: runner.os != 'Windows'
+ run: |
+ smoke_workdir="/tmp/deepseek-service-smoke-${GITHUB_RUN_ID:-local}"
+ mkdir -p "$smoke_workdir"
+ if command -v timeout >/dev/null 2>&1; then
+ timeout 25s ${{ matrix.binary }} agents service-smoke --workdir "$smoke_workdir" --bin ${{ matrix.binary }} --json
+ else
+ ${{ matrix.binary }} agents service-smoke --workdir "$smoke_workdir" --bin ${{ matrix.binary }} --json
+ fi
+
+ - name: Verify Multi-file External Fixture Scaffold
+ if: runner.os != 'Windows'
+ run: scripts/create-multifile-external-fixture.sh "$RUNNER_TEMP/deepseek-external-fixture"
+
- name: Verify Release Binary TUI Snapshot
if: runner.os != 'Windows'
env:
diff --git a/README.ja-JP.md b/README.ja-JP.md
index e81b454..105dc09 100644
--- a/README.ja-JP.md
+++ b/README.ja-JP.md
@@ -10,8 +10,8 @@ DeepSeekCode は DeepSeek-first のターミナル向けコーディングエー
> 状態: dogfood とリポジトリ内の開発作業には利用できます。`v0.1.1` では
> GitHub Release のバイナリと検証済み GHCR イメージを公開済みです。bare
> `deepseek` TUI entrypoint は Linux、macOS、Windows CI で smoke 済みです。
-> npm と Homebrew の公開には registry/tap の資格情報がまだ必要で、より厚い
-> hosted IDE/GitHub evidence は継続中です。
+> hosted GitHub write workflow evidence は記録済みです。hosted IDE evidence と
+> npm/Homebrew の公開には外部の資格情報や実行環境がまだ必要です。
@@ -120,15 +120,19 @@ deepseek tui --runtime-url http://127.0.0.1:13000
## 現在の差分
DeepSeekCode は自身の開発に使える段階ですが、Claude Code CLI / Codex CLI
-ほどの製品成熟度にはまだ届いていません。大きな残差は次の通りです。
-
-- compile-check 済み backend、現行 Linux PTY fd handoff、bounded interactive
- attach、CI-smoked default TUI entrypoint を超える Windows shell-supervisor
- ConPTY/TCP daemon runtime proof。
-- hosted GitHub/VS Code workflow evidence と、よりリッチな multi-file external fixture サンプル。
-- npm registry 公開と Homebrew tap。どちらも資格情報が未設定です。
+ほどの製品成熟度にはまだ届いていません。Linux/macOS のローカル coding-agent
+CLI に絞ると、残差は主に evidence depth と配布面の polish です。
+
+- macOS shell/runtime evidence を entrypoint smoke 以上に厚くすること。CI/release
+ matrix は `agents shell-fixture-smoke` と `agents service-smoke` を実行します。
+- よりリッチな multi-file external fixture サンプル。disposable Python invoice
+ fixture の scaffold script を追加しています。
+- Homebrew 公開。tap 資格情報が未設定です。
- コミット済み model-backed SVG を超える、任意の polish 済み GIF/MP4 キャプチャ。
+Windows ConPTY/service proof、hosted IDE evidence、npm publishing はより広い
+product hardening ですが、Linux/macOS ローカル CLI milestone の blocker ではありません。
+
現在の状態、次の作業、最終目標は
[docs/current-status.md](./docs/current-status.md) にまとめています。
@@ -188,6 +192,7 @@ deepseek update publish-status --dist dist-assets --npm-dist npm-dist --strict
deepseek update publish-status --json
deepseek agents service-doctor --kind all --workdir "$PWD" --bin "$(command -v deepseek)" --json
deepseek agents service-smoke --workdir "$PWD" --bin "$(command -v deepseek)" --json
+deepseek agents shell-fixture-smoke --json
deepseek tui --entrypoint-smoke --smoke-bin "$(command -v deepseek)"
```
@@ -204,6 +209,7 @@ repository を使います。まず dry-run で preflight し、その後 isolat
実行して dogfood report に記録します。
```bash
+scripts/create-multifile-external-fixture.sh /tmp/deepseek-external-fixtures/python-invoice-multifile
deepseek dogfood external-fixture --workdir /tmp/disposable-repo --dry-run \
'replace `a - b` with `a + b` in src/lib.rs and validate with cargo test'
deepseek dogfood external-fixture --workdir /tmp/disposable-repo --benchmark-gate \
diff --git a/README.md b/README.md
index 1cd3348..b860c77 100644
--- a/README.md
+++ b/README.md
@@ -9,9 +9,9 @@ iterating from the same terminal.
> Status: usable for dogfooding and repository work. `v0.1.1` has GitHub
> Release binaries and a verified GHCR image; the bare `deepseek` TUI entrypoint
-> is CI-smoked on Linux, macOS, and Windows. npm and Homebrew publishing still
-> need registry/tap credentials, and hosted IDE/GitHub evidence remains in
-> progress.
+> is CI-smoked on Linux, macOS, and Windows. Hosted GitHub write workflow
+> evidence is recorded; hosted IDE evidence and npm/Homebrew publishing still
+> need external credentials or machines.
@@ -119,16 +119,21 @@ git.
## Current Gap
DeepSeekCode is close enough to use as its own coding CLI, but it is not yet at
-Claude Code CLI / Codex CLI polish. The largest remaining gaps are:
-
-- Windows shell-supervisor ConPTY/TCP daemon runtime proof beyond the new
- compile-checked backend, current Linux PTY fd handoff, bounded interactive
- attach, and CI-smoked default TUI entrypoint;
-- hosted GitHub/VS Code workflow evidence and richer multi-file external
- fixture samples;
-- npm registry publishing and a Homebrew tap, both blocked on credentials;
+Claude Code CLI / Codex CLI polish. For a Linux/macOS local coding-agent CLI,
+the remaining gaps are mostly evidence depth and distribution polish:
+
+- macOS shell/runtime CI and release-binary evidence beyond the entrypoint
+ smoke, now covered by `agents shell-fixture-smoke` and `agents service-smoke`
+ workflow gates;
+- richer multi-file external fixture samples, with a scaffold script for a
+ disposable Python invoice fixture;
+- Homebrew publishing, still blocked on tap credentials;
- optional polished GIF/MP4 capture beyond the committed model-backed SVG.
+Windows ConPTY/service proof, hosted IDE evidence, and npm publishing remain
+broader product-hardening work, but they are not blockers for the Linux/macOS
+local CLI milestone.
+
See [docs/current-status.md](./docs/current-status.md) for the current Chinese
status, roadmap, and final target.
@@ -191,6 +196,7 @@ deepseek update publish-status --json
deepseek agents service-doctor --kind all --workdir "$PWD" --bin "$(command -v deepseek)" --json
mkdir -p /tmp/dsc-smk
deepseek agents service-smoke --workdir /tmp/dsc-smk --bin "$(command -v deepseek)" --json
+deepseek agents shell-fixture-smoke --json
deepseek tui --entrypoint-smoke --smoke-bin "$(command -v deepseek)"
```
@@ -207,6 +213,7 @@ outside this checkout. The command dry-runs preflight first, then runs against
an isolated copy and records the result in the dogfood report:
```bash
+scripts/create-multifile-external-fixture.sh /tmp/deepseek-external-fixtures/python-invoice-multifile
deepseek dogfood external-fixture --workdir /tmp/disposable-repo --dry-run \
'replace `a - b` with `a + b` in src/lib.rs and validate with cargo test'
deepseek dogfood external-fixture --workdir /tmp/disposable-repo --benchmark-gate \
diff --git a/README.zh-CN.md b/README.zh-CN.md
index da790c1..c60d370 100644
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -8,8 +8,8 @@ TUI/runtime workbench。它面向真实写代码循环:阅读仓库、修改
> 当前状态:已经可以用于 dogfood 和仓库内编码任务。`v0.1.1` 已有 GitHub
> Release 二进制包和实测可用的 GHCR 镜像;裸 `deepseek` TUI 入口已经在
-> Linux、macOS、Windows CI 里做真实 smoke。npm 与 Homebrew 发布还需要
-> registry/tap 凭据,hosted IDE/GitHub 证据仍在推进中。
+> Linux、macOS、Windows CI 里做真实 smoke。hosted GitHub 写入 workflow
+> 证据已经记录;hosted IDE 证据和 npm/Homebrew 发布仍需要外部凭据或机器。
@@ -115,15 +115,19 @@ deepseek tui --runtime-url http://127.0.0.1:13000
## 当前差距
DeepSeekCode 已经可以直接拿来写自己的代码,但还没有达到 Claude Code CLI /
-Codex CLI 的产品成熟度。最大差距集中在:
-
-- 新增 compile-checked backend、当前 Linux PTY fd handoff、bounded interactive
- attach 和 CI 已验证默认 TUI 入口之外,Windows shell-supervisor ConPTY/TCP
- daemon 运行证明;
-- hosted GitHub/VS Code workflow 证据,以及更丰富的 multi-file external fixture 样本;
-- npm registry 发布和 Homebrew tap,这两项还缺少对应凭据;
+Codex CLI 的产品成熟度。如果只看 Linux/macOS 本地 coding-agent CLI,剩余差距主要是
+证据厚度和分发打磨:
+
+- macOS shell/runtime 证据需要超过入口 smoke;CI/release matrix 现在会跑
+ `agents shell-fixture-smoke` 和 `agents service-smoke`;
+- 更丰富的 multi-file external fixture 样本;仓库已提供 disposable Python invoice
+ fixture 脚手架;
+- Homebrew 发布仍缺 tap 凭据;
- 已提交 model-backed SVG 之外,可选的更精致 GIF/MP4 录屏素材。
+Windows ConPTY/service proof、hosted IDE 证据和 npm 发布属于更大的产品硬化目标,
+不再阻塞 Linux/macOS 本地 CLI milestone。
+
当前状态、下一步路线和最终目标见 [docs/current-status.md](./docs/current-status.md)。
## Demo 素材
@@ -181,6 +185,7 @@ deepseek update publish-status --dist dist-assets --npm-dist npm-dist --strict
deepseek update publish-status --json
deepseek agents service-doctor --kind all --workdir "$PWD" --bin "$(command -v deepseek)" --json
deepseek agents service-smoke --workdir "$PWD" --bin "$(command -v deepseek)" --json
+deepseek agents shell-fixture-smoke --json
deepseek tui --entrypoint-smoke --smoke-bin "$(command -v deepseek)"
```
@@ -196,6 +201,7 @@ deepseek pr live-status owner/repo#42 --json
命令会先 dry-run 检查,然后在 isolated copy 中执行,并把结果写入 dogfood report:
```bash
+scripts/create-multifile-external-fixture.sh /tmp/deepseek-external-fixtures/python-invoice-multifile
deepseek dogfood external-fixture --workdir /tmp/disposable-repo --dry-run \
'replace `a - b` with `a + b` in src/lib.rs and validate with cargo test'
deepseek dogfood external-fixture --workdir /tmp/disposable-repo --benchmark-gate \
diff --git a/docs/current-status.md b/docs/current-status.md
index 1667a54..411ad98 100644
--- a/docs/current-status.md
+++ b/docs/current-status.md
@@ -8,6 +8,12 @@ DeepSeekCode 的目标是成为一个 DeepSeek-first 的 code agent CLI:用户
`deepseek` 后,可以像使用 Claude Code CLI、Codex CLI 或 DeepSeek-TUI 一样,完成真实
仓库里的读代码、改代码、跑命令、查看 diff、继续修复、恢复会话和发布前验证。
+当前执行口径先收敛到 Linux/macOS 本地 code agent CLI:只要用户能在 Linux/macOS
+安装并运行 `deepseek`,稳定进入 TUI/REPL,完成模型读写代码、shell 验证、diff
+review、resume 和本地 runtime/shell-supervisor 工作流,就可以认为这个 milestone
+成立。Windows ConPTY/service proof、hosted IDE 证据和 npm 发布属于后续跨平台/集成
+硬化,不再阻塞 Linux/macOS 本地 CLI 目标。
+
最终验收口径不是“功能列表看起来很多”,而是:
- 裸 `deepseek` 在真实 TTY 里稳定进入 coding-agent TUI;
@@ -31,6 +37,10 @@ DeepSeekCode 的目标是成为一个 DeepSeek-first 的 code agent CLI:用户
- 模型协议:OpenAI-compatible tool calls,同轮 batch tool calls,DeepSeek provider/model alias 兼容。
- 审批与安全:approve-once、approve-for-session、deny fingerprint、secret scan、shell/network policy、rollback snapshot。
- Shell/PTY:后台 shell job、wait/replay/attach/stdin/resize/cancel;Linux native-supervisor PTY;workspace shell-supervisor protocol bridge。
+- Linux/macOS CLI readiness:CI 和 Release Matrix 会在非 Windows 平台跑
+ `agents shell-fixture-smoke --json`、`agents service-smoke --json`、TUI entrypoint
+ smoke、task worktree smoke 和 GitHub bridge smoke;这把 Linux/macOS 本地 CLI 的入口、
+ runtime、shell-supervisor 和后台 worktree 基线纳入同一类 release gate。
- 本轮新增:`deepseek chat` / `deepseek repl` / `deepseek interactive` 的真实 TTY 输入现在走内置 raw-mode line editor,补齐 Claude Code-like REPL 的 Up/Down history、history draft restore、左右移动、Home/End、Backspace/Delete、Ctrl+A/E/U/K/W、Tab slash/session completion、空行 Ctrl+D 和 prompt Ctrl+C 退出;运行中的 REPL turn 也会把 SIGINT 接到 `AgentLoopOptions.cancel_check`,让模型 stream 和 cancel-aware tools 协作取消,并在取消后恢复本轮 transcript/snapshot 指针,避免半截 prompt 污染后续上下文;`/sessions [prefix]` 可以列出保存的 REPL session,`/load ` 后 Tab 可补全 session 名;非交互测试路径仍保留 buffered reader,不需要真实终端。
- 本轮新增:Phase 12E background worktree runner 第一片。新增 `deepseek task start/list/show/stop/diff/merge/reject` 和 `deepseek task fixture-smoke --json`:`task start` 会在当前 git repo 的 `.dscode/task-runner/worktrees/` 创建隔离 worktree 和默认 `deepseek-task/` 分支,把记录写到 `.dscode/task-runner/records/`,stdout/stderr 写到 `.dscode/task-runner/logs/`,并在该 worktree 中启动 `deepseek exec --json`;父 CLI 退出后 child 进程仍可继续。`--no-run` 可只创建 worktree/record,用于无 API key 的本地 gate;`task diff` 展示 task worktree 的 tracked patch/stat 和 untracked files,`task merge --check` dry-run 验证,`task merge` 要求原 worktree 干净后把 patch 和 untracked regular files 合回原 repo,`task reject` 默认删除受管 task worktree 并把记录标记为 rejected。`deepseek github action --background-task` 现在也可把解析出的 GitHub PR review/fix/patch 请求委派到同一 task runner,`--task-id` 支持 workflow 稳定 id,`--task-no-run` 支持无凭据本地 workflow gate。当前 `task fixture-smoke --json` 实测 `ok=true`、`worktree_created=true`、`record_listed=true`、`merge_check_ok=true`、`merge_apply_ok=true`、`reject_ok=true`、`cleanup_ok=true`;CI 已把该 smoke 接到 Linux/macOS/Windows debug binary,Release Matrix 也会在各平台 release binary packaging 前运行。
- 本轮新增:`deepseek agents shell attach --interactive` / `--takeover`。它会进入本地 raw mode,把按键转发到 supervisor `stdin`,把 resize 转发到 supervisor `resize`,并把 output 事件的 raw bytes replay 回当前终端;Linux 集成 smoke 已覆盖 raw-mode PTY 启动、`tty=true` job、stdin、resize、replay 和 bounded detach。它是可用的 bounded interactive attach,不是字节级 PTY fd 直连代理。
@@ -49,6 +59,12 @@ DeepSeekCode 的目标是成为一个 DeepSeek-first 的 code agent CLI:用户
- 本轮新增:`deepseek dogfood live-plan` 的推荐命令改为 `deepseek dogfood live-run ...`,文本和 JSON 都同时输出 dry-run 与 `--execute` 命令,避免 release operator 为 model-backed 证据误走 offline-friendly `replay-benchmark` 路径。`deepseek dogfood live-plan` 和 `deepseek dogfood live-run --json` 现在还输出 `post_run_report_command` / `evidence_gate`,直接给出 `dogfood report --require-live-runs ... --require-live-category ...` 的后置验收命令,让真实 online 执行后的 model-backed 证据可以 fail closed。`deepseek dogfood live-run --json` 保持机器可读 dry-run plan,包含 selected cases、online readiness、execute blocker 和 follow-up `--execute` command;它故意不和 `--execute` 混用,避免在线执行日志污染 JSON。`dogfood live-run` 还支持 `--api-key-file`/`--key-file` 指向仓库外 key 文件,只把 key 注入当前进程的 `model.api_key_env` 并在返回时恢复,JSON 只记录 `credential_source` 和文件路径,不输出 key 值。`dogfood live-run --execute --evidence-out ` 现在会在批次结束或首个失败后写出 `deepseek.dogfood.live_run_evidence.v1` JSON,记录 before/after ledger live counts、每个 case 追加的 model-backed ledger 行、benchmark gate 结果、同一条 post-run report gate,以及当前 ledger 文件的 `fnv1a64` fingerprint,仍不写入 API key 值。`deepseek dogfood live-evidence --file ` 现在可验证该 evidence 文件,默认要求 completed、online、至少 1 条 appended model-backed row;`--require-benchmark-gate` 可把 benchmark gate 也纳入 release fail-closed 检查,`--require-report-gate` 会读取 evidence 的 structured `evidence_gate` 和 ledger path,用 `dogfood report` 同一套 live requirement 逻辑验证 full live gate,重新计算 ledger fingerprint 并逐条核对 evidence 中 appended case 的 timestamp/outcome/model_transport/category 能在 ledger 中找到匹配记录,而不是执行 JSON 里的 shell command;`--json` 输出 `deepseek.dogfood.live_evidence_verification.v1`,`--out ` 可把 verification JSON 落盘作为 release evidence artifact。`dogfood external-fixture` 真实执行现在也默认要求 `model_transport=online`,离线只能 dry-run 或显式 `--allow-offline` 做 rehearsal,避免把 offline disposable repo 样本误计为 release evidence;`--evidence-out` 会写出 `deepseek.dogfood.external_fixture_evidence.v1`,包含 appended external fixture row、release-evidence readiness 和 ledger fingerprint,便于上传发布证据。
- 本轮新增:在线 DeepSeek dogfood 从 smoke 推进到完整 release gate。使用当前进程注入的 DeepSeek key 执行 `dogfood live-run --execute --evidence-out ...`,最终 `deepseek dogfood report --limit 100 --require-live-runs 100 --require-live-success-rate 90 --require-live-category write_validate:25:90 --require-live-category recovery:25:90 --require-live-category pr_workflow:25:90` 通过;外部 fixture 跑完后 `live-plan` 显示 `105` 条 online run、`99` 条 success,分类为 `write_validate 29/30`、`recovery 23/25`、`pr_workflow 47/50`。执行过程中又修掉两类真实模型卡点:Python pytest retry readback 现在能识别 `def test_` / `assert ` 测试文件,并从错误的 `a * b` 回退到 `a + b`;空搜索恢复任务在看到 no matches 后完成 repository layout inspection 会 clean finish,不再重复列目录。release evidence verification 落在 `.dscode/dogfood/live-evidence-final-total-pr-4-release-verification.json`,`report_gate_passed=true`。
- 本轮新增:外部 disposable repo write-fixture 证据第一批。已在 `/tmp/deepseek-external-fixtures/` 下构造 Rust、Python、JavaScript 三个独立 git repo,初始测试均失败,然后用真实 online DeepSeek 跑 `dogfood external-fixture --workdir ... --evidence-out ...`,三条都完成 `read_file -> apply_patch -> validation -> finish`,并分别通过 `dogfood external-evidence --require-successful-external-fixtures 1`:`.dscode/dogfood/external-fixture-rust-add-v3-verification.json`、`.dscode/dogfood/external-fixture-python-add-verification.json`、`.dscode/dogfood/external-fixture-js-add-verification.json`。本轮还修复了 external fixture evidence record 缺少 `model_backed` 字段导致 verifier 无法和 ledger online row 对齐的问题。
+- 本轮新增:multi-file external fixture scaffold。`scripts/create-multifile-external-fixture.sh`
+ 会在 checkout 之外创建 disposable Python invoice repo,初始 `python -m unittest discover -s tests`
+ 按预期失败,并输出 dry-run/evidence 两条 `deepseek dogfood external-fixture` 命令;任务要求同时修改
+ `src/invoice_math/pricing.py` 和 `src/invoice_math/summary.py`。CI/Release Matrix 在 Linux/macOS
+ 上会执行该脚手架,先保证更真实的 multi-file 样本可重复构造;真实 online evidence 仍由带 key 的
+ `dogfood external-fixture --evidence-out ...` 产生。
- 本轮新增:README 真实 model-backed demo SVG。`docs/demo/record-model-backed-demo.sh` 使用当前 DeepSeek key 录制了 disposable Rust crate 的 failure -> `deepseek exec` -> patch -> passing `cargo test` -> diff transcript,`docs/demo/verify-model-backed-demo.js` 验证通过后由 `docs/demo/render-model-backed-demo-svg.js` 渲染为 `docs/demo/deepseek-code-model-demo.svg`。本轮还修复了 explicit edit parser 对 `in src/lib.rs, validate ...` 的路径截断问题,以及 renderer 把 `test result: ok ... 0 failed` 误标红的问题;README 英文、中文、日文都已引用该真实模型 SVG。
- 本轮新增:`deepseek update publish-status` 现在支持 `--live-evidence-verification `(别名 `--live-evidence`),会读取 `dogfood live-evidence --out` 生成的 `deepseek.dogfood.live_evidence_verification.v1`,要求 `ok=true`、completed、online、appended model-backed row、report gate required/passed、ledger fingerprint/current ledger fingerprint 都成立。`--strict` 因此会把缺失或无效的 online dogfood verification artifact 计入 not-ready,`public_install` 对 GitHub Release、npm、Homebrew 和 GHCR 的 `ready_to_publish` 也不再只看包材料,还要求 release evidence 已验证。
- 本轮新增:Windows target warning cleanup。Unix-only shell byte-stream/PTY helpers、hook fixture helpers、rollback Unix metadata helpers 和相关测试 fixture 现在只在对应 Unix cfg 下编译;`cargo check --target x86_64-pc-windows-gnu --all-targets` 当前已无 warnings 通过。这让 Windows ConPTY/TCP runtime proof 的编译面更接近 release-quality,而不是只做到“能编过但带一串条件编译噪音”。
@@ -72,6 +88,11 @@ deepseek agents shell-fixture-smoke --json
当前距离 Claude Code CLI / Codex CLI / DeepSeek-TUI 的成熟产品形态,主要差在以下几类:
+如果只看 Linux/macOS 本地 CLI milestone,核心交互能力已经成立;剩下主要是
+Homebrew 发布凭据、macOS CI/release smoke 证据落地、更多 online multi-file external
+fixture 样本和文档压缩。Windows/IDE/hosted 发布证据继续保留在更大产品目标里,但不是
+这个 milestone 的 blocker。
+
1. Shell/PTY 深水区
- 已有 bounded interactive attach、duplex `byte_stream` raw-output proxy slice、human `agents shell proxy` raw-mode wrapper、Windows `native-supervisor` ConPTY backend compile gate,以及 Linux 本地 `pty_fd` / SCM_RIGHTS PTY master fd handoff slice。
- `deepseek agents shell-fixture-smoke --json` 已把 Linux native PTY、duplex `byte_stream`、`raw_proxy`、`pty_fd` fd handoff 和 human `agents shell proxy` wrapper 纳入本地单命令 gate;direct `pty_fd` 与 CLI `fd-proxy` 测试已覆盖交还后 supervisor stdin/resize/replay 恢复,CLI `fd-proxy` Ctrl-C、Ctrl-D/PTY EOF、SIGWINCH resize 和异常 client 退出恢复也已有集成测试。
@@ -104,14 +125,18 @@ deepseek agents shell-fixture-smoke --json
当前执行 spec:`docs/superpowers/specs/2026-05-23-final-parity-execution-spec.md`。
该 spec 固化了本轮重新核对后的剩余差距、可执行命令、外部阻塞项和停止条件。
-1. 做 Shell/PTY 跨平台和安装态证明
- - 在现有 `raw_base64` terminal event、`attach_stream` frame channel、duplex `byte_stream` proxy slice、human `agents shell proxy` wrapper、Windows ConPTY/TCP daemon smoke wiring 和 Linux `pty_fd` fd handoff edge coverage 基础上,收集 Windows CI ConPTY/TCP smoke 结果和 installed service smoke。
- - Windows shell-supervisor 下一步是拿到 CI runner 的 TCP daemon/client、真实二进制 shell fixture、targeted start/resize smoke 证据;如果 loopback TCP 不能满足安装态要求,再评估 named pipe。
+1. 固化 Linux/macOS CLI release gate
+ - CI 和 Release Matrix 已新增非 Windows `agents shell-fixture-smoke --json`、
+ `agents service-smoke --json` 和 multi-file external fixture scaffold smoke。
+ - 下一步等 GitHub Actions 跑出 Linux/macOS debug/release 证据后,把对应 run 链接写回
+ status/spec;Windows shell-supervisor 继续作为后续跨平台目标。
2. 补外部 model-backed 证据和真实 demo
- 先轮换任何已经泄漏到聊天记录里的 key。
- 保留 `.dscode/dogfood/live-evidence-final-total-pr-4-release-verification.json` 作为当前 online dogfood release 证据。
- - 已完成 3 个 disposable repo/write-fixture 样本;下一步可以扩到 5 个,并补一个 multi-file 或 dependency-backed 的真实项目样本。
+ - 已完成 3 个 disposable repo/write-fixture 样本;下一步用
+ `scripts/create-multifile-external-fixture.sh` 生成 Python invoice multi-file 样本并跑一次
+ online `dogfood external-fixture --evidence-out ...`。
3. 补 README 真实录屏
- 已完成 CLI 版真实模型 SVG:失败测试、模型修改、通过测试和 diff。
@@ -132,6 +157,11 @@ deepseek agents shell-fixture-smoke --json
DeepSeekCode 现在已经是一个可以实际使用的 code agent CLI,尤其适合在本仓库继续 dogfood。
但它还不是“可以公开宣称等同 Claude Code CLI / Codex CLI”的成熟产品。
+如果目标限定为 Linux/macOS 本地 code agent CLI,则当前判断更强:Linux 本机已经通过
+TUI entrypoint、shell fixture、service smoke 和在线 dogfood release gate;macOS 入口
+smoke 已有,shell/runtime smoke 已加入 CI/Release Matrix 等待 hosted run 产出。Windows
+不再影响这个限定目标。
+
最准确的公开表述是:
-> DeepSeekCode is usable today for dogfooding and repository work, with a full-screen TUI, durable runtime, permissioned tools, release binaries, cross-platform entrypoint smoke, a 100-run online dogfood release gate, initial external disposable-repo write-fixture evidence, real hosted GitHub workflow evidence, and a committed real model-backed README demo SVG. The remaining work is hosted IDE evidence, Windows/service proof, optional richer demo media, and public package-channel publishing.
+> DeepSeekCode is usable today for Linux/macOS dogfooding and repository work, with a full-screen TUI, REPL, durable runtime, permissioned tools, shell-supervisor smoke gates, release binaries, a 100-run online dogfood release gate, initial external disposable-repo write-fixture evidence, real hosted GitHub workflow evidence, and a committed real model-backed README demo SVG. The remaining Linux/macOS CLI work is Homebrew publishing, richer online multi-file fixture evidence, and documentation polish; hosted IDE, Windows/service proof, npm publishing, and optional richer demo media remain broader product-hardening work.
diff --git a/docs/superpowers/specs/2026-05-23-final-parity-execution-spec.md b/docs/superpowers/specs/2026-05-23-final-parity-execution-spec.md
index 4108a4c..be50dcb 100644
--- a/docs/superpowers/specs/2026-05-23-final-parity-execution-spec.md
+++ b/docs/superpowers/specs/2026-05-23-final-parity-execution-spec.md
@@ -15,6 +15,12 @@ let the model inspect and edit code, run validation, review the diff, recover
from failures, resume context, and use external integrations without hidden
manual glue.
+Scope update from the 2026-05-23 goal review: the immediate milestone is the
+Linux/macOS local code-agent CLI. Windows ConPTY/service proof, hosted IDE
+evidence, and npm publishing remain broader product-hardening work, but they do
+not block the Linux/macOS CLI milestone. The Linux/macOS gate is entrypoint +
+REPL/TUI + local runtime + shell-supervisor + task worktree + dogfood evidence.
+
## Current Evidence Snapshot
Local checks run during this execution pass:
@@ -85,8 +91,9 @@ Live execution update from this pass:
| Area | Current state | Gap to close | Gate |
|---|---|---|---|
| Core CLI/TUI coding loop | Usable; full tests and 82-case benchmark baseline are green in existing reports | Mostly evidence depth, not missing local primitives | Full test + default benchmark + recent no-stuck dogfood |
+| Linux/macOS local CLI gate | TUI entrypoint, task worktree, GitHub fixture smoke, online dogfood, and Linux shell/runtime smoke are available | CI/release matrix now needs to publish macOS shell/runtime smoke results | Non-Windows `agents shell-fixture-smoke --json`, `agents service-smoke --json`, and TUI entrypoint smoke |
| Model-backed dogfood | Release live gate passed; current live plan reports `105` online runs and `99` successes, with categories `write_validate 29/30`, `recovery 23/25`, `pr_workflow 47/50` | Preserve verified evidence and keep the gate fail-closed in release status | `dogfood report --require-live-runs 100 --require-live-success-rate 90 --require-live-category write_validate:25:90 --require-live-category recovery:25:90 --require-live-category pr_workflow:25:90` |
-| External write fixtures | `3` disposable real repo online write-fixture samples verified for Rust, Python, and JavaScript | Optionally expand to 5 samples and add a multi-file/dependency-backed fixture | `dogfood external-fixture ... --evidence-out` plus `dogfood external-evidence --require-successful-external-fixtures 1` |
+| External write fixtures | `3` disposable real repo online write-fixture samples verified for Rust, Python, and JavaScript; multi-file Python invoice fixture scaffold is now repo-native | Run the new multi-file fixture with an online model and verify evidence | `scripts/create-multifile-external-fixture.sh`, then `dogfood external-fixture ... --evidence-out` plus `dogfood external-evidence --require-successful-external-fixtures 1` |
| README real demo | Committed model-backed SVG exists at `docs/demo/deepseek-code-model-demo.svg`, generated from a verified online transcript | Optional polish: TUI/GIF/MP4 capture for launch pages | `record-model-backed-demo.sh`, verifier, rendered media committed |
| Windows Shell/PTY proof | Linux PTY fd/proxy path is strong; Windows ConPTY/TCP compile and workflow wiring exist | Need actual Windows runner evidence for ConPTY/TCP shell supervisor and fixture smoke | Windows CI/release job logs and artifact summary |
| Installed service proof | service-doctor/service-smoke local gates exist | Need clean-machine installed systemd/launchd smoke evidence | `agents service-smoke --installed ... --json` on real install |
@@ -138,10 +145,14 @@ Live execution update from this pass:
- This is blocked on VS Code CLI availability and hosted GitHub credentials or
a fixture repository.
-5. Close shell/service platform proof.
- - Preserve Linux PTY/fd/proxy evidence.
- - Collect Windows ConPTY/TCP shell fixture CI evidence.
- - Run installed service smoke on clean Linux/macOS machines.
+5. Close Linux/macOS shell/service platform proof.
+ - Done locally: Linux `agents shell-fixture-smoke --json` and
+ `agents service-smoke --json` pass with native PTY/fd/proxy coverage.
+ - Added to CI/release: non-Windows debug/release binaries now run
+ `agents shell-fixture-smoke --json`, `agents service-smoke --json`, and
+ the multi-file external fixture scaffold smoke.
+ - Remaining for this milestone: record the hosted macOS CI/release run links.
+ - Windows ConPTY/TCP remains a later cross-platform proof item.
6. Publish and update final public docs.
- Configure `NPM_TOKEN` or `NODE_AUTH_TOKEN`.
@@ -161,15 +172,17 @@ Cleared in this pass:
Do not claim the 5% target while any of these remaining conditions are true:
-- VS Code and GitHub hosted evidence is only local/headless;
-- npm/Homebrew publish checks remain credential-skipped;
-- Windows shell-supervisor ConPTY/TCP evidence has not completed on a real
- Windows runner.
+- For the broad product target: VS Code evidence is only local/headless,
+ npm/Homebrew publish checks remain credential-skipped, or Windows
+ shell-supervisor ConPTY/TCP evidence has not completed on a real Windows
+ runner.
+- For the narrower Linux/macOS local CLI milestone: do not claim closure until
+ the non-Windows shell/runtime CI/release gates have passed and at least one
+ online multi-file external fixture evidence artifact is recorded.
## Next Local Action
-The next unblocked local action is to keep the repo green and preserve the
-fail-closed gates while collecting the remaining external evidence: hosted
-GitHub workflow runs, VS Code CLI evidence, Windows ConPTY/TCP CI evidence,
-optional richer external fixtures/demo media, and release-channel publish
-artifacts.
+The next unblocked local action is to run the new multi-file external fixture
+with an online model key, verify the evidence artifact, and then record the
+Linux/macOS CI/release run links once GitHub Actions has executed the new
+non-Windows shell/runtime gates.
diff --git a/scripts/create-multifile-external-fixture.sh b/scripts/create-multifile-external-fixture.sh
new file mode 100755
index 0000000..cafbb61
--- /dev/null
+++ b/scripts/create-multifile-external-fixture.sh
@@ -0,0 +1,122 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+root="${1:-/tmp/deepseek-external-fixtures/python-invoice-multifile}"
+force="${2:-}"
+
+if [[ -e "$root" ]]; then
+ if [[ "$force" != "--force" ]]; then
+ echo "fixture path already exists: $root" >&2
+ echo "rerun with --force to replace it" >&2
+ exit 2
+ fi
+ rm -rf "$root"
+fi
+
+python_bin="${PYTHON:-}"
+if [[ -z "$python_bin" ]]; then
+ if command -v python3 >/dev/null 2>&1; then
+ python_bin="python3"
+ elif command -v python >/dev/null 2>&1; then
+ python_bin="python"
+ else
+ echo "python3 or python is required to create the fixture" >&2
+ exit 1
+ fi
+fi
+
+mkdir -p "$root/src/invoice_math" "$root/tests"
+
+cat > "$root/src/invoice_math/__init__.py" <<'PY'
+"""Small invoice fixture for DeepSeekCode external dogfood."""
+PY
+
+cat > "$root/src/invoice_math/pricing.py" <<'PY'
+def subtotal(items):
+ return sum(item["quantity"] * item["unit_price"] for item in items)
+
+
+def apply_discount(amount, discount):
+ return amount - discount
+PY
+
+cat > "$root/src/invoice_math/summary.py" <<'PY'
+from .pricing import apply_discount, subtotal
+
+
+def render_invoice(items, discount=0.0):
+ total = apply_discount(subtotal(items), discount)
+ return f"Invoice total: {total:.2f}"
+PY
+
+cat > "$root/tests/test_invoice.py" <<'PY'
+import pathlib
+import sys
+import unittest
+
+sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[1] / "src"))
+
+from invoice_math.summary import render_invoice
+
+
+class InvoiceSummaryTests(unittest.TestCase):
+ def test_discount_is_capped_at_zero(self):
+ items = [{"quantity": 1, "unit_price": 8.0}]
+ self.assertEqual(render_invoice(items, discount=10.0), "Final total: 0.00")
+
+ def test_summary_uses_final_total_label(self):
+ items = [{"quantity": 2, "unit_price": 7.75}]
+ self.assertEqual(render_invoice(items), "Final total: 15.50")
+
+
+if __name__ == "__main__":
+ unittest.main()
+PY
+
+cat > "$root/README.md" <<'MD'
+# Invoice Multi-file Fixture
+
+Disposable external dogfood fixture for a two-file edit:
+
+- cap discounts at zero in `src/invoice_math/pricing.py`
+- rename the rendered invoice label in `src/invoice_math/summary.py`
+
+Validation command:
+
+```bash
+python -m unittest discover -s tests
+```
+MD
+
+(
+ cd "$root"
+ git init -q
+ git config user.email "deepseek-fixture@example.invalid"
+ git config user.name "DeepSeek Fixture"
+ git add README.md src tests
+ git commit -q -m "Create invoice multi-file fixture"
+)
+
+set +e
+(
+ cd "$root"
+ "$python_bin" -m unittest discover -s tests >/tmp/deepseek-multifile-fixture-test.log 2>&1
+)
+test_status=$?
+set -e
+
+if [[ "$test_status" -eq 0 ]]; then
+ echo "expected initial fixture tests to fail, but they passed" >&2
+ cat /tmp/deepseek-multifile-fixture-test.log >&2
+ exit 1
+fi
+
+task='replace `return amount - discount` with `return max(amount - discount, 0.0)` in src/invoice_math/pricing.py and replace `Invoice total` with `Final total` in src/invoice_math/summary.py, validate with python -m unittest discover -s tests'
+
+cat < PathBuf {
.unwrap_or_default()
.as_millis()
% 100_000;
- std::env::temp_dir().join(format!("dsc-shell-fixture-{}-{suffix}", std::process::id()))
+ short_temp_root().join(format!("dsc-shell-fixture-{}-{suffix}", std::process::id()))
+}
+
+fn short_temp_root() -> PathBuf {
+ let tmp = PathBuf::from("/tmp");
+ if tmp.is_dir() {
+ return tmp;
+ }
+ std::env::temp_dir()
}
fn run_shell_fixture_smoke_checks(report: &mut ShellFixtureSmokeReport) {
@@ -7180,11 +7188,15 @@ fn shell_supervisor_control_smoke(
) -> AppResult {
let tty = cfg!(all(unix, target_os = "linux"));
let wait_timeout = timeout_ms.min(5000);
- let start_request = format!(
- "{{\"method\":\"start\",\"arguments\":{{\"command\":\"echo deepseek-shell-supervisor-smoke\",\"tty\":{},\"tty_rows\":24,\"tty_cols\":80,\"timeout_ms\":{}}}}}\n",
- if tty { "true" } else { "false" },
- wait_timeout
- );
+ let start_request = if tty {
+ format!(
+ "{{\"method\":\"start\",\"arguments\":{{\"command\":\"echo deepseek-shell-supervisor-smoke\",\"tty\":true,\"tty_rows\":24,\"tty_cols\":80,\"timeout_ms\":{wait_timeout}}}}}\n"
+ )
+ } else {
+ format!(
+ "{{\"method\":\"start\",\"arguments\":{{\"command\":\"echo deepseek-shell-supervisor-smoke\",\"tty\":false,\"timeout_ms\":{wait_timeout}}}}}\n"
+ )
+ };
let start_response = shell_supervisor_request_raw(socket, "start", &start_request)?;
let task_id = shell_supervisor_response_string(&start_response, "task_id")
.ok_or_else(|| app_error("shell supervisor start smoke response missing task_id"))?;
diff --git a/src/tools/exec_shell.rs b/src/tools/exec_shell.rs
index e8396ca..bfdb31a 100644
--- a/src/tools/exec_shell.rs
+++ b/src/tools/exec_shell.rs
@@ -12,7 +12,9 @@ use std::error::Error;
#[cfg(unix)]
use std::fs::File;
use std::fs::{self, OpenOptions};
-use std::io::{Read, Write};
+#[cfg(any(all(unix, target_os = "linux"), windows))]
+use std::io::Read;
+use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Child, ChildStdin, Command, Stdio};
#[cfg(all(unix, target_os = "linux"))]