diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8c96345..34f07bb 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,9 +44,30 @@ jobs: - name: GitHub Action Bridge Smoke run: target/debug/deepseek github fixture-smoke --json + - name: Shell Supervisor Fixture Smoke + run: | + if command -v timeout >/dev/null 2>&1; then + timeout 20s target/debug/deepseek agents shell-fixture-smoke --json + else + target/debug/deepseek agents shell-fixture-smoke --json + fi + + - name: Local Runtime Service Smoke + run: | + smoke_workdir="/tmp/deepseek-service-smoke-${GITHUB_RUN_ID:-local}" + mkdir -p "$smoke_workdir" + if command -v timeout >/dev/null 2>&1; then + timeout 25s target/debug/deepseek agents service-smoke --workdir "$smoke_workdir" --bin target/debug/deepseek --json + else + target/debug/deepseek agents service-smoke --workdir "$smoke_workdir" --bin target/debug/deepseek --json + fi + - name: TUI Entrypoint Smoke run: target/debug/deepseek tui --entrypoint-smoke --smoke-bin target/debug/deepseek + - name: Multi-file External Fixture Scaffold Smoke + run: scripts/create-multifile-external-fixture.sh "$RUNNER_TEMP/deepseek-external-fixture" + - name: Secret Scan run: node scripts/check-secrets.js @@ -106,6 +127,30 @@ jobs: - name: GitHub Action Bridge Smoke run: ${{ matrix.binary }} github fixture-smoke --json + - name: Shell Supervisor Fixture Smoke + if: runner.os != 'Windows' + run: | + if command -v timeout >/dev/null 2>&1; then + timeout 20s ${{ matrix.binary }} agents shell-fixture-smoke --json + else + ${{ matrix.binary }} agents shell-fixture-smoke --json + fi + + - name: Local Runtime Service Smoke + if: runner.os != 'Windows' + run: | + smoke_workdir="/tmp/deepseek-service-smoke-${GITHUB_RUN_ID:-local}" + mkdir -p "$smoke_workdir" + if command -v timeout >/dev/null 2>&1; then + timeout 25s ${{ matrix.binary }} agents service-smoke --workdir "$smoke_workdir" --bin ${{ matrix.binary }} --json + else + ${{ matrix.binary }} agents service-smoke --workdir "$smoke_workdir" --bin ${{ matrix.binary }} --json + fi + + - name: Multi-file External Fixture Scaffold Smoke + if: runner.os != 'Windows' + run: scripts/create-multifile-external-fixture.sh "$RUNNER_TEMP/deepseek-external-fixture" + - name: Windows Shell Supervisor ConPTY Smoke if: runner.os == 'Windows' shell: pwsh diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 61c86a6..3aecd8f 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -74,6 +74,30 @@ jobs: - name: Verify Release Binary GitHub Action Bridge run: ${{ matrix.binary }} github fixture-smoke --json + - name: Verify Release Binary Shell Supervisor Fixture + if: runner.os != 'Windows' + run: | + if command -v timeout >/dev/null 2>&1; then + timeout 20s ${{ matrix.binary }} agents shell-fixture-smoke --json + else + ${{ matrix.binary }} agents shell-fixture-smoke --json + fi + + - name: Verify Release Binary Local Runtime Service Smoke + if: runner.os != 'Windows' + run: | + smoke_workdir="/tmp/deepseek-service-smoke-${GITHUB_RUN_ID:-local}" + mkdir -p "$smoke_workdir" + if command -v timeout >/dev/null 2>&1; then + timeout 25s ${{ matrix.binary }} agents service-smoke --workdir "$smoke_workdir" --bin ${{ matrix.binary }} --json + else + ${{ matrix.binary }} agents service-smoke --workdir "$smoke_workdir" --bin ${{ matrix.binary }} --json + fi + + - name: Verify Multi-file External Fixture Scaffold + if: runner.os != 'Windows' + run: scripts/create-multifile-external-fixture.sh "$RUNNER_TEMP/deepseek-external-fixture" + - name: Verify Release Binary TUI Snapshot if: runner.os != 'Windows' env: diff --git a/README.ja-JP.md b/README.ja-JP.md index e81b454..105dc09 100644 --- a/README.ja-JP.md +++ b/README.ja-JP.md @@ -10,8 +10,8 @@ DeepSeekCode は DeepSeek-first のターミナル向けコーディングエー > 状態: dogfood とリポジトリ内の開発作業には利用できます。`v0.1.1` では > GitHub Release のバイナリと検証済み GHCR イメージを公開済みです。bare > `deepseek` TUI entrypoint は Linux、macOS、Windows CI で smoke 済みです。 -> npm と Homebrew の公開には registry/tap の資格情報がまだ必要で、より厚い -> hosted IDE/GitHub evidence は継続中です。 +> hosted GitHub write workflow evidence は記録済みです。hosted IDE evidence と +> npm/Homebrew の公開には外部の資格情報や実行環境がまだ必要です。

DeepSeekCode animated TUI demo recording @@ -120,15 +120,19 @@ deepseek tui --runtime-url http://127.0.0.1:13000 ## 現在の差分 DeepSeekCode は自身の開発に使える段階ですが、Claude Code CLI / Codex CLI -ほどの製品成熟度にはまだ届いていません。大きな残差は次の通りです。 - -- compile-check 済み backend、現行 Linux PTY fd handoff、bounded interactive - attach、CI-smoked default TUI entrypoint を超える Windows shell-supervisor - ConPTY/TCP daemon runtime proof。 -- hosted GitHub/VS Code workflow evidence と、よりリッチな multi-file external fixture サンプル。 -- npm registry 公開と Homebrew tap。どちらも資格情報が未設定です。 +ほどの製品成熟度にはまだ届いていません。Linux/macOS のローカル coding-agent +CLI に絞ると、残差は主に evidence depth と配布面の polish です。 + +- macOS shell/runtime evidence を entrypoint smoke 以上に厚くすること。CI/release + matrix は `agents shell-fixture-smoke` と `agents service-smoke` を実行します。 +- よりリッチな multi-file external fixture サンプル。disposable Python invoice + fixture の scaffold script を追加しています。 +- Homebrew 公開。tap 資格情報が未設定です。 - コミット済み model-backed SVG を超える、任意の polish 済み GIF/MP4 キャプチャ。 +Windows ConPTY/service proof、hosted IDE evidence、npm publishing はより広い +product hardening ですが、Linux/macOS ローカル CLI milestone の blocker ではありません。 + 現在の状態、次の作業、最終目標は [docs/current-status.md](./docs/current-status.md) にまとめています。 @@ -188,6 +192,7 @@ deepseek update publish-status --dist dist-assets --npm-dist npm-dist --strict deepseek update publish-status --json deepseek agents service-doctor --kind all --workdir "$PWD" --bin "$(command -v deepseek)" --json deepseek agents service-smoke --workdir "$PWD" --bin "$(command -v deepseek)" --json +deepseek agents shell-fixture-smoke --json deepseek tui --entrypoint-smoke --smoke-bin "$(command -v deepseek)" ``` @@ -204,6 +209,7 @@ repository を使います。まず dry-run で preflight し、その後 isolat 実行して dogfood report に記録します。 ```bash +scripts/create-multifile-external-fixture.sh /tmp/deepseek-external-fixtures/python-invoice-multifile deepseek dogfood external-fixture --workdir /tmp/disposable-repo --dry-run \ 'replace `a - b` with `a + b` in src/lib.rs and validate with cargo test' deepseek dogfood external-fixture --workdir /tmp/disposable-repo --benchmark-gate \ diff --git a/README.md b/README.md index 1cd3348..b860c77 100644 --- a/README.md +++ b/README.md @@ -9,9 +9,9 @@ iterating from the same terminal. > Status: usable for dogfooding and repository work. `v0.1.1` has GitHub > Release binaries and a verified GHCR image; the bare `deepseek` TUI entrypoint -> is CI-smoked on Linux, macOS, and Windows. npm and Homebrew publishing still -> need registry/tap credentials, and hosted IDE/GitHub evidence remains in -> progress. +> is CI-smoked on Linux, macOS, and Windows. Hosted GitHub write workflow +> evidence is recorded; hosted IDE evidence and npm/Homebrew publishing still +> need external credentials or machines.

DeepSeekCode animated TUI demo recording @@ -119,16 +119,21 @@ git. ## Current Gap DeepSeekCode is close enough to use as its own coding CLI, but it is not yet at -Claude Code CLI / Codex CLI polish. The largest remaining gaps are: - -- Windows shell-supervisor ConPTY/TCP daemon runtime proof beyond the new - compile-checked backend, current Linux PTY fd handoff, bounded interactive - attach, and CI-smoked default TUI entrypoint; -- hosted GitHub/VS Code workflow evidence and richer multi-file external - fixture samples; -- npm registry publishing and a Homebrew tap, both blocked on credentials; +Claude Code CLI / Codex CLI polish. For a Linux/macOS local coding-agent CLI, +the remaining gaps are mostly evidence depth and distribution polish: + +- macOS shell/runtime CI and release-binary evidence beyond the entrypoint + smoke, now covered by `agents shell-fixture-smoke` and `agents service-smoke` + workflow gates; +- richer multi-file external fixture samples, with a scaffold script for a + disposable Python invoice fixture; +- Homebrew publishing, still blocked on tap credentials; - optional polished GIF/MP4 capture beyond the committed model-backed SVG. +Windows ConPTY/service proof, hosted IDE evidence, and npm publishing remain +broader product-hardening work, but they are not blockers for the Linux/macOS +local CLI milestone. + See [docs/current-status.md](./docs/current-status.md) for the current Chinese status, roadmap, and final target. @@ -191,6 +196,7 @@ deepseek update publish-status --json deepseek agents service-doctor --kind all --workdir "$PWD" --bin "$(command -v deepseek)" --json mkdir -p /tmp/dsc-smk deepseek agents service-smoke --workdir /tmp/dsc-smk --bin "$(command -v deepseek)" --json +deepseek agents shell-fixture-smoke --json deepseek tui --entrypoint-smoke --smoke-bin "$(command -v deepseek)" ``` @@ -207,6 +213,7 @@ outside this checkout. The command dry-runs preflight first, then runs against an isolated copy and records the result in the dogfood report: ```bash +scripts/create-multifile-external-fixture.sh /tmp/deepseek-external-fixtures/python-invoice-multifile deepseek dogfood external-fixture --workdir /tmp/disposable-repo --dry-run \ 'replace `a - b` with `a + b` in src/lib.rs and validate with cargo test' deepseek dogfood external-fixture --workdir /tmp/disposable-repo --benchmark-gate \ diff --git a/README.zh-CN.md b/README.zh-CN.md index da790c1..c60d370 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -8,8 +8,8 @@ TUI/runtime workbench。它面向真实写代码循环:阅读仓库、修改 > 当前状态:已经可以用于 dogfood 和仓库内编码任务。`v0.1.1` 已有 GitHub > Release 二进制包和实测可用的 GHCR 镜像;裸 `deepseek` TUI 入口已经在 -> Linux、macOS、Windows CI 里做真实 smoke。npm 与 Homebrew 发布还需要 -> registry/tap 凭据,hosted IDE/GitHub 证据仍在推进中。 +> Linux、macOS、Windows CI 里做真实 smoke。hosted GitHub 写入 workflow +> 证据已经记录;hosted IDE 证据和 npm/Homebrew 发布仍需要外部凭据或机器。

DeepSeekCode animated TUI demo recording @@ -115,15 +115,19 @@ deepseek tui --runtime-url http://127.0.0.1:13000 ## 当前差距 DeepSeekCode 已经可以直接拿来写自己的代码,但还没有达到 Claude Code CLI / -Codex CLI 的产品成熟度。最大差距集中在: - -- 新增 compile-checked backend、当前 Linux PTY fd handoff、bounded interactive - attach 和 CI 已验证默认 TUI 入口之外,Windows shell-supervisor ConPTY/TCP - daemon 运行证明; -- hosted GitHub/VS Code workflow 证据,以及更丰富的 multi-file external fixture 样本; -- npm registry 发布和 Homebrew tap,这两项还缺少对应凭据; +Codex CLI 的产品成熟度。如果只看 Linux/macOS 本地 coding-agent CLI,剩余差距主要是 +证据厚度和分发打磨: + +- macOS shell/runtime 证据需要超过入口 smoke;CI/release matrix 现在会跑 + `agents shell-fixture-smoke` 和 `agents service-smoke`; +- 更丰富的 multi-file external fixture 样本;仓库已提供 disposable Python invoice + fixture 脚手架; +- Homebrew 发布仍缺 tap 凭据; - 已提交 model-backed SVG 之外,可选的更精致 GIF/MP4 录屏素材。 +Windows ConPTY/service proof、hosted IDE 证据和 npm 发布属于更大的产品硬化目标, +不再阻塞 Linux/macOS 本地 CLI milestone。 + 当前状态、下一步路线和最终目标见 [docs/current-status.md](./docs/current-status.md)。 ## Demo 素材 @@ -181,6 +185,7 @@ deepseek update publish-status --dist dist-assets --npm-dist npm-dist --strict deepseek update publish-status --json deepseek agents service-doctor --kind all --workdir "$PWD" --bin "$(command -v deepseek)" --json deepseek agents service-smoke --workdir "$PWD" --bin "$(command -v deepseek)" --json +deepseek agents shell-fixture-smoke --json deepseek tui --entrypoint-smoke --smoke-bin "$(command -v deepseek)" ``` @@ -196,6 +201,7 @@ deepseek pr live-status owner/repo#42 --json 命令会先 dry-run 检查,然后在 isolated copy 中执行,并把结果写入 dogfood report: ```bash +scripts/create-multifile-external-fixture.sh /tmp/deepseek-external-fixtures/python-invoice-multifile deepseek dogfood external-fixture --workdir /tmp/disposable-repo --dry-run \ 'replace `a - b` with `a + b` in src/lib.rs and validate with cargo test' deepseek dogfood external-fixture --workdir /tmp/disposable-repo --benchmark-gate \ diff --git a/docs/current-status.md b/docs/current-status.md index 1667a54..411ad98 100644 --- a/docs/current-status.md +++ b/docs/current-status.md @@ -8,6 +8,12 @@ DeepSeekCode 的目标是成为一个 DeepSeek-first 的 code agent CLI:用户 `deepseek` 后,可以像使用 Claude Code CLI、Codex CLI 或 DeepSeek-TUI 一样,完成真实 仓库里的读代码、改代码、跑命令、查看 diff、继续修复、恢复会话和发布前验证。 +当前执行口径先收敛到 Linux/macOS 本地 code agent CLI:只要用户能在 Linux/macOS +安装并运行 `deepseek`,稳定进入 TUI/REPL,完成模型读写代码、shell 验证、diff +review、resume 和本地 runtime/shell-supervisor 工作流,就可以认为这个 milestone +成立。Windows ConPTY/service proof、hosted IDE 证据和 npm 发布属于后续跨平台/集成 +硬化,不再阻塞 Linux/macOS 本地 CLI 目标。 + 最终验收口径不是“功能列表看起来很多”,而是: - 裸 `deepseek` 在真实 TTY 里稳定进入 coding-agent TUI; @@ -31,6 +37,10 @@ DeepSeekCode 的目标是成为一个 DeepSeek-first 的 code agent CLI:用户 - 模型协议:OpenAI-compatible tool calls,同轮 batch tool calls,DeepSeek provider/model alias 兼容。 - 审批与安全:approve-once、approve-for-session、deny fingerprint、secret scan、shell/network policy、rollback snapshot。 - Shell/PTY:后台 shell job、wait/replay/attach/stdin/resize/cancel;Linux native-supervisor PTY;workspace shell-supervisor protocol bridge。 +- Linux/macOS CLI readiness:CI 和 Release Matrix 会在非 Windows 平台跑 + `agents shell-fixture-smoke --json`、`agents service-smoke --json`、TUI entrypoint + smoke、task worktree smoke 和 GitHub bridge smoke;这把 Linux/macOS 本地 CLI 的入口、 + runtime、shell-supervisor 和后台 worktree 基线纳入同一类 release gate。 - 本轮新增:`deepseek chat` / `deepseek repl` / `deepseek interactive` 的真实 TTY 输入现在走内置 raw-mode line editor,补齐 Claude Code-like REPL 的 Up/Down history、history draft restore、左右移动、Home/End、Backspace/Delete、Ctrl+A/E/U/K/W、Tab slash/session completion、空行 Ctrl+D 和 prompt Ctrl+C 退出;运行中的 REPL turn 也会把 SIGINT 接到 `AgentLoopOptions.cancel_check`,让模型 stream 和 cancel-aware tools 协作取消,并在取消后恢复本轮 transcript/snapshot 指针,避免半截 prompt 污染后续上下文;`/sessions [prefix]` 可以列出保存的 REPL session,`/load ` 后 Tab 可补全 session 名;非交互测试路径仍保留 buffered reader,不需要真实终端。 - 本轮新增:Phase 12E background worktree runner 第一片。新增 `deepseek task start/list/show/stop/diff/merge/reject` 和 `deepseek task fixture-smoke --json`:`task start` 会在当前 git repo 的 `.dscode/task-runner/worktrees/` 创建隔离 worktree 和默认 `deepseek-task/` 分支,把记录写到 `.dscode/task-runner/records/`,stdout/stderr 写到 `.dscode/task-runner/logs/`,并在该 worktree 中启动 `deepseek exec --json`;父 CLI 退出后 child 进程仍可继续。`--no-run` 可只创建 worktree/record,用于无 API key 的本地 gate;`task diff` 展示 task worktree 的 tracked patch/stat 和 untracked files,`task merge --check` dry-run 验证,`task merge` 要求原 worktree 干净后把 patch 和 untracked regular files 合回原 repo,`task reject` 默认删除受管 task worktree 并把记录标记为 rejected。`deepseek github action --background-task` 现在也可把解析出的 GitHub PR review/fix/patch 请求委派到同一 task runner,`--task-id` 支持 workflow 稳定 id,`--task-no-run` 支持无凭据本地 workflow gate。当前 `task fixture-smoke --json` 实测 `ok=true`、`worktree_created=true`、`record_listed=true`、`merge_check_ok=true`、`merge_apply_ok=true`、`reject_ok=true`、`cleanup_ok=true`;CI 已把该 smoke 接到 Linux/macOS/Windows debug binary,Release Matrix 也会在各平台 release binary packaging 前运行。 - 本轮新增:`deepseek agents shell attach --interactive` / `--takeover`。它会进入本地 raw mode,把按键转发到 supervisor `stdin`,把 resize 转发到 supervisor `resize`,并把 output 事件的 raw bytes replay 回当前终端;Linux 集成 smoke 已覆盖 raw-mode PTY 启动、`tty=true` job、stdin、resize、replay 和 bounded detach。它是可用的 bounded interactive attach,不是字节级 PTY fd 直连代理。 @@ -49,6 +59,12 @@ DeepSeekCode 的目标是成为一个 DeepSeek-first 的 code agent CLI:用户 - 本轮新增:`deepseek dogfood live-plan` 的推荐命令改为 `deepseek dogfood live-run ...`,文本和 JSON 都同时输出 dry-run 与 `--execute` 命令,避免 release operator 为 model-backed 证据误走 offline-friendly `replay-benchmark` 路径。`deepseek dogfood live-plan` 和 `deepseek dogfood live-run --json` 现在还输出 `post_run_report_command` / `evidence_gate`,直接给出 `dogfood report --require-live-runs ... --require-live-category ...` 的后置验收命令,让真实 online 执行后的 model-backed 证据可以 fail closed。`deepseek dogfood live-run --json` 保持机器可读 dry-run plan,包含 selected cases、online readiness、execute blocker 和 follow-up `--execute` command;它故意不和 `--execute` 混用,避免在线执行日志污染 JSON。`dogfood live-run` 还支持 `--api-key-file`/`--key-file` 指向仓库外 key 文件,只把 key 注入当前进程的 `model.api_key_env` 并在返回时恢复,JSON 只记录 `credential_source` 和文件路径,不输出 key 值。`dogfood live-run --execute --evidence-out ` 现在会在批次结束或首个失败后写出 `deepseek.dogfood.live_run_evidence.v1` JSON,记录 before/after ledger live counts、每个 case 追加的 model-backed ledger 行、benchmark gate 结果、同一条 post-run report gate,以及当前 ledger 文件的 `fnv1a64` fingerprint,仍不写入 API key 值。`deepseek dogfood live-evidence --file ` 现在可验证该 evidence 文件,默认要求 completed、online、至少 1 条 appended model-backed row;`--require-benchmark-gate` 可把 benchmark gate 也纳入 release fail-closed 检查,`--require-report-gate` 会读取 evidence 的 structured `evidence_gate` 和 ledger path,用 `dogfood report` 同一套 live requirement 逻辑验证 full live gate,重新计算 ledger fingerprint 并逐条核对 evidence 中 appended case 的 timestamp/outcome/model_transport/category 能在 ledger 中找到匹配记录,而不是执行 JSON 里的 shell command;`--json` 输出 `deepseek.dogfood.live_evidence_verification.v1`,`--out ` 可把 verification JSON 落盘作为 release evidence artifact。`dogfood external-fixture` 真实执行现在也默认要求 `model_transport=online`,离线只能 dry-run 或显式 `--allow-offline` 做 rehearsal,避免把 offline disposable repo 样本误计为 release evidence;`--evidence-out` 会写出 `deepseek.dogfood.external_fixture_evidence.v1`,包含 appended external fixture row、release-evidence readiness 和 ledger fingerprint,便于上传发布证据。 - 本轮新增:在线 DeepSeek dogfood 从 smoke 推进到完整 release gate。使用当前进程注入的 DeepSeek key 执行 `dogfood live-run --execute --evidence-out ...`,最终 `deepseek dogfood report --limit 100 --require-live-runs 100 --require-live-success-rate 90 --require-live-category write_validate:25:90 --require-live-category recovery:25:90 --require-live-category pr_workflow:25:90` 通过;外部 fixture 跑完后 `live-plan` 显示 `105` 条 online run、`99` 条 success,分类为 `write_validate 29/30`、`recovery 23/25`、`pr_workflow 47/50`。执行过程中又修掉两类真实模型卡点:Python pytest retry readback 现在能识别 `def test_` / `assert ` 测试文件,并从错误的 `a * b` 回退到 `a + b`;空搜索恢复任务在看到 no matches 后完成 repository layout inspection 会 clean finish,不再重复列目录。release evidence verification 落在 `.dscode/dogfood/live-evidence-final-total-pr-4-release-verification.json`,`report_gate_passed=true`。 - 本轮新增:外部 disposable repo write-fixture 证据第一批。已在 `/tmp/deepseek-external-fixtures/` 下构造 Rust、Python、JavaScript 三个独立 git repo,初始测试均失败,然后用真实 online DeepSeek 跑 `dogfood external-fixture --workdir ... --evidence-out ...`,三条都完成 `read_file -> apply_patch -> validation -> finish`,并分别通过 `dogfood external-evidence --require-successful-external-fixtures 1`:`.dscode/dogfood/external-fixture-rust-add-v3-verification.json`、`.dscode/dogfood/external-fixture-python-add-verification.json`、`.dscode/dogfood/external-fixture-js-add-verification.json`。本轮还修复了 external fixture evidence record 缺少 `model_backed` 字段导致 verifier 无法和 ledger online row 对齐的问题。 +- 本轮新增:multi-file external fixture scaffold。`scripts/create-multifile-external-fixture.sh` + 会在 checkout 之外创建 disposable Python invoice repo,初始 `python -m unittest discover -s tests` + 按预期失败,并输出 dry-run/evidence 两条 `deepseek dogfood external-fixture` 命令;任务要求同时修改 + `src/invoice_math/pricing.py` 和 `src/invoice_math/summary.py`。CI/Release Matrix 在 Linux/macOS + 上会执行该脚手架,先保证更真实的 multi-file 样本可重复构造;真实 online evidence 仍由带 key 的 + `dogfood external-fixture --evidence-out ...` 产生。 - 本轮新增:README 真实 model-backed demo SVG。`docs/demo/record-model-backed-demo.sh` 使用当前 DeepSeek key 录制了 disposable Rust crate 的 failure -> `deepseek exec` -> patch -> passing `cargo test` -> diff transcript,`docs/demo/verify-model-backed-demo.js` 验证通过后由 `docs/demo/render-model-backed-demo-svg.js` 渲染为 `docs/demo/deepseek-code-model-demo.svg`。本轮还修复了 explicit edit parser 对 `in src/lib.rs, validate ...` 的路径截断问题,以及 renderer 把 `test result: ok ... 0 failed` 误标红的问题;README 英文、中文、日文都已引用该真实模型 SVG。 - 本轮新增:`deepseek update publish-status` 现在支持 `--live-evidence-verification `(别名 `--live-evidence`),会读取 `dogfood live-evidence --out` 生成的 `deepseek.dogfood.live_evidence_verification.v1`,要求 `ok=true`、completed、online、appended model-backed row、report gate required/passed、ledger fingerprint/current ledger fingerprint 都成立。`--strict` 因此会把缺失或无效的 online dogfood verification artifact 计入 not-ready,`public_install` 对 GitHub Release、npm、Homebrew 和 GHCR 的 `ready_to_publish` 也不再只看包材料,还要求 release evidence 已验证。 - 本轮新增:Windows target warning cleanup。Unix-only shell byte-stream/PTY helpers、hook fixture helpers、rollback Unix metadata helpers 和相关测试 fixture 现在只在对应 Unix cfg 下编译;`cargo check --target x86_64-pc-windows-gnu --all-targets` 当前已无 warnings 通过。这让 Windows ConPTY/TCP runtime proof 的编译面更接近 release-quality,而不是只做到“能编过但带一串条件编译噪音”。 @@ -72,6 +88,11 @@ deepseek agents shell-fixture-smoke --json 当前距离 Claude Code CLI / Codex CLI / DeepSeek-TUI 的成熟产品形态,主要差在以下几类: +如果只看 Linux/macOS 本地 CLI milestone,核心交互能力已经成立;剩下主要是 +Homebrew 发布凭据、macOS CI/release smoke 证据落地、更多 online multi-file external +fixture 样本和文档压缩。Windows/IDE/hosted 发布证据继续保留在更大产品目标里,但不是 +这个 milestone 的 blocker。 + 1. Shell/PTY 深水区 - 已有 bounded interactive attach、duplex `byte_stream` raw-output proxy slice、human `agents shell proxy` raw-mode wrapper、Windows `native-supervisor` ConPTY backend compile gate,以及 Linux 本地 `pty_fd` / SCM_RIGHTS PTY master fd handoff slice。 - `deepseek agents shell-fixture-smoke --json` 已把 Linux native PTY、duplex `byte_stream`、`raw_proxy`、`pty_fd` fd handoff 和 human `agents shell proxy` wrapper 纳入本地单命令 gate;direct `pty_fd` 与 CLI `fd-proxy` 测试已覆盖交还后 supervisor stdin/resize/replay 恢复,CLI `fd-proxy` Ctrl-C、Ctrl-D/PTY EOF、SIGWINCH resize 和异常 client 退出恢复也已有集成测试。 @@ -104,14 +125,18 @@ deepseek agents shell-fixture-smoke --json 当前执行 spec:`docs/superpowers/specs/2026-05-23-final-parity-execution-spec.md`。 该 spec 固化了本轮重新核对后的剩余差距、可执行命令、外部阻塞项和停止条件。 -1. 做 Shell/PTY 跨平台和安装态证明 - - 在现有 `raw_base64` terminal event、`attach_stream` frame channel、duplex `byte_stream` proxy slice、human `agents shell proxy` wrapper、Windows ConPTY/TCP daemon smoke wiring 和 Linux `pty_fd` fd handoff edge coverage 基础上,收集 Windows CI ConPTY/TCP smoke 结果和 installed service smoke。 - - Windows shell-supervisor 下一步是拿到 CI runner 的 TCP daemon/client、真实二进制 shell fixture、targeted start/resize smoke 证据;如果 loopback TCP 不能满足安装态要求,再评估 named pipe。 +1. 固化 Linux/macOS CLI release gate + - CI 和 Release Matrix 已新增非 Windows `agents shell-fixture-smoke --json`、 + `agents service-smoke --json` 和 multi-file external fixture scaffold smoke。 + - 下一步等 GitHub Actions 跑出 Linux/macOS debug/release 证据后,把对应 run 链接写回 + status/spec;Windows shell-supervisor 继续作为后续跨平台目标。 2. 补外部 model-backed 证据和真实 demo - 先轮换任何已经泄漏到聊天记录里的 key。 - 保留 `.dscode/dogfood/live-evidence-final-total-pr-4-release-verification.json` 作为当前 online dogfood release 证据。 - - 已完成 3 个 disposable repo/write-fixture 样本;下一步可以扩到 5 个,并补一个 multi-file 或 dependency-backed 的真实项目样本。 + - 已完成 3 个 disposable repo/write-fixture 样本;下一步用 + `scripts/create-multifile-external-fixture.sh` 生成 Python invoice multi-file 样本并跑一次 + online `dogfood external-fixture --evidence-out ...`。 3. 补 README 真实录屏 - 已完成 CLI 版真实模型 SVG:失败测试、模型修改、通过测试和 diff。 @@ -132,6 +157,11 @@ deepseek agents shell-fixture-smoke --json DeepSeekCode 现在已经是一个可以实际使用的 code agent CLI,尤其适合在本仓库继续 dogfood。 但它还不是“可以公开宣称等同 Claude Code CLI / Codex CLI”的成熟产品。 +如果目标限定为 Linux/macOS 本地 code agent CLI,则当前判断更强:Linux 本机已经通过 +TUI entrypoint、shell fixture、service smoke 和在线 dogfood release gate;macOS 入口 +smoke 已有,shell/runtime smoke 已加入 CI/Release Matrix 等待 hosted run 产出。Windows +不再影响这个限定目标。 + 最准确的公开表述是: -> DeepSeekCode is usable today for dogfooding and repository work, with a full-screen TUI, durable runtime, permissioned tools, release binaries, cross-platform entrypoint smoke, a 100-run online dogfood release gate, initial external disposable-repo write-fixture evidence, real hosted GitHub workflow evidence, and a committed real model-backed README demo SVG. The remaining work is hosted IDE evidence, Windows/service proof, optional richer demo media, and public package-channel publishing. +> DeepSeekCode is usable today for Linux/macOS dogfooding and repository work, with a full-screen TUI, REPL, durable runtime, permissioned tools, shell-supervisor smoke gates, release binaries, a 100-run online dogfood release gate, initial external disposable-repo write-fixture evidence, real hosted GitHub workflow evidence, and a committed real model-backed README demo SVG. The remaining Linux/macOS CLI work is Homebrew publishing, richer online multi-file fixture evidence, and documentation polish; hosted IDE, Windows/service proof, npm publishing, and optional richer demo media remain broader product-hardening work. diff --git a/docs/superpowers/specs/2026-05-23-final-parity-execution-spec.md b/docs/superpowers/specs/2026-05-23-final-parity-execution-spec.md index 4108a4c..be50dcb 100644 --- a/docs/superpowers/specs/2026-05-23-final-parity-execution-spec.md +++ b/docs/superpowers/specs/2026-05-23-final-parity-execution-spec.md @@ -15,6 +15,12 @@ let the model inspect and edit code, run validation, review the diff, recover from failures, resume context, and use external integrations without hidden manual glue. +Scope update from the 2026-05-23 goal review: the immediate milestone is the +Linux/macOS local code-agent CLI. Windows ConPTY/service proof, hosted IDE +evidence, and npm publishing remain broader product-hardening work, but they do +not block the Linux/macOS CLI milestone. The Linux/macOS gate is entrypoint + +REPL/TUI + local runtime + shell-supervisor + task worktree + dogfood evidence. + ## Current Evidence Snapshot Local checks run during this execution pass: @@ -85,8 +91,9 @@ Live execution update from this pass: | Area | Current state | Gap to close | Gate | |---|---|---|---| | Core CLI/TUI coding loop | Usable; full tests and 82-case benchmark baseline are green in existing reports | Mostly evidence depth, not missing local primitives | Full test + default benchmark + recent no-stuck dogfood | +| Linux/macOS local CLI gate | TUI entrypoint, task worktree, GitHub fixture smoke, online dogfood, and Linux shell/runtime smoke are available | CI/release matrix now needs to publish macOS shell/runtime smoke results | Non-Windows `agents shell-fixture-smoke --json`, `agents service-smoke --json`, and TUI entrypoint smoke | | Model-backed dogfood | Release live gate passed; current live plan reports `105` online runs and `99` successes, with categories `write_validate 29/30`, `recovery 23/25`, `pr_workflow 47/50` | Preserve verified evidence and keep the gate fail-closed in release status | `dogfood report --require-live-runs 100 --require-live-success-rate 90 --require-live-category write_validate:25:90 --require-live-category recovery:25:90 --require-live-category pr_workflow:25:90` | -| External write fixtures | `3` disposable real repo online write-fixture samples verified for Rust, Python, and JavaScript | Optionally expand to 5 samples and add a multi-file/dependency-backed fixture | `dogfood external-fixture ... --evidence-out` plus `dogfood external-evidence --require-successful-external-fixtures 1` | +| External write fixtures | `3` disposable real repo online write-fixture samples verified for Rust, Python, and JavaScript; multi-file Python invoice fixture scaffold is now repo-native | Run the new multi-file fixture with an online model and verify evidence | `scripts/create-multifile-external-fixture.sh`, then `dogfood external-fixture ... --evidence-out` plus `dogfood external-evidence --require-successful-external-fixtures 1` | | README real demo | Committed model-backed SVG exists at `docs/demo/deepseek-code-model-demo.svg`, generated from a verified online transcript | Optional polish: TUI/GIF/MP4 capture for launch pages | `record-model-backed-demo.sh`, verifier, rendered media committed | | Windows Shell/PTY proof | Linux PTY fd/proxy path is strong; Windows ConPTY/TCP compile and workflow wiring exist | Need actual Windows runner evidence for ConPTY/TCP shell supervisor and fixture smoke | Windows CI/release job logs and artifact summary | | Installed service proof | service-doctor/service-smoke local gates exist | Need clean-machine installed systemd/launchd smoke evidence | `agents service-smoke --installed ... --json` on real install | @@ -138,10 +145,14 @@ Live execution update from this pass: - This is blocked on VS Code CLI availability and hosted GitHub credentials or a fixture repository. -5. Close shell/service platform proof. - - Preserve Linux PTY/fd/proxy evidence. - - Collect Windows ConPTY/TCP shell fixture CI evidence. - - Run installed service smoke on clean Linux/macOS machines. +5. Close Linux/macOS shell/service platform proof. + - Done locally: Linux `agents shell-fixture-smoke --json` and + `agents service-smoke --json` pass with native PTY/fd/proxy coverage. + - Added to CI/release: non-Windows debug/release binaries now run + `agents shell-fixture-smoke --json`, `agents service-smoke --json`, and + the multi-file external fixture scaffold smoke. + - Remaining for this milestone: record the hosted macOS CI/release run links. + - Windows ConPTY/TCP remains a later cross-platform proof item. 6. Publish and update final public docs. - Configure `NPM_TOKEN` or `NODE_AUTH_TOKEN`. @@ -161,15 +172,17 @@ Cleared in this pass: Do not claim the 5% target while any of these remaining conditions are true: -- VS Code and GitHub hosted evidence is only local/headless; -- npm/Homebrew publish checks remain credential-skipped; -- Windows shell-supervisor ConPTY/TCP evidence has not completed on a real - Windows runner. +- For the broad product target: VS Code evidence is only local/headless, + npm/Homebrew publish checks remain credential-skipped, or Windows + shell-supervisor ConPTY/TCP evidence has not completed on a real Windows + runner. +- For the narrower Linux/macOS local CLI milestone: do not claim closure until + the non-Windows shell/runtime CI/release gates have passed and at least one + online multi-file external fixture evidence artifact is recorded. ## Next Local Action -The next unblocked local action is to keep the repo green and preserve the -fail-closed gates while collecting the remaining external evidence: hosted -GitHub workflow runs, VS Code CLI evidence, Windows ConPTY/TCP CI evidence, -optional richer external fixtures/demo media, and release-channel publish -artifacts. +The next unblocked local action is to run the new multi-file external fixture +with an online model key, verify the evidence artifact, and then record the +Linux/macOS CI/release run links once GitHub Actions has executed the new +non-Windows shell/runtime gates. diff --git a/scripts/create-multifile-external-fixture.sh b/scripts/create-multifile-external-fixture.sh new file mode 100755 index 0000000..cafbb61 --- /dev/null +++ b/scripts/create-multifile-external-fixture.sh @@ -0,0 +1,122 @@ +#!/usr/bin/env bash +set -euo pipefail + +root="${1:-/tmp/deepseek-external-fixtures/python-invoice-multifile}" +force="${2:-}" + +if [[ -e "$root" ]]; then + if [[ "$force" != "--force" ]]; then + echo "fixture path already exists: $root" >&2 + echo "rerun with --force to replace it" >&2 + exit 2 + fi + rm -rf "$root" +fi + +python_bin="${PYTHON:-}" +if [[ -z "$python_bin" ]]; then + if command -v python3 >/dev/null 2>&1; then + python_bin="python3" + elif command -v python >/dev/null 2>&1; then + python_bin="python" + else + echo "python3 or python is required to create the fixture" >&2 + exit 1 + fi +fi + +mkdir -p "$root/src/invoice_math" "$root/tests" + +cat > "$root/src/invoice_math/__init__.py" <<'PY' +"""Small invoice fixture for DeepSeekCode external dogfood.""" +PY + +cat > "$root/src/invoice_math/pricing.py" <<'PY' +def subtotal(items): + return sum(item["quantity"] * item["unit_price"] for item in items) + + +def apply_discount(amount, discount): + return amount - discount +PY + +cat > "$root/src/invoice_math/summary.py" <<'PY' +from .pricing import apply_discount, subtotal + + +def render_invoice(items, discount=0.0): + total = apply_discount(subtotal(items), discount) + return f"Invoice total: {total:.2f}" +PY + +cat > "$root/tests/test_invoice.py" <<'PY' +import pathlib +import sys +import unittest + +sys.path.insert(0, str(pathlib.Path(__file__).resolve().parents[1] / "src")) + +from invoice_math.summary import render_invoice + + +class InvoiceSummaryTests(unittest.TestCase): + def test_discount_is_capped_at_zero(self): + items = [{"quantity": 1, "unit_price": 8.0}] + self.assertEqual(render_invoice(items, discount=10.0), "Final total: 0.00") + + def test_summary_uses_final_total_label(self): + items = [{"quantity": 2, "unit_price": 7.75}] + self.assertEqual(render_invoice(items), "Final total: 15.50") + + +if __name__ == "__main__": + unittest.main() +PY + +cat > "$root/README.md" <<'MD' +# Invoice Multi-file Fixture + +Disposable external dogfood fixture for a two-file edit: + +- cap discounts at zero in `src/invoice_math/pricing.py` +- rename the rendered invoice label in `src/invoice_math/summary.py` + +Validation command: + +```bash +python -m unittest discover -s tests +``` +MD + +( + cd "$root" + git init -q + git config user.email "deepseek-fixture@example.invalid" + git config user.name "DeepSeek Fixture" + git add README.md src tests + git commit -q -m "Create invoice multi-file fixture" +) + +set +e +( + cd "$root" + "$python_bin" -m unittest discover -s tests >/tmp/deepseek-multifile-fixture-test.log 2>&1 +) +test_status=$? +set -e + +if [[ "$test_status" -eq 0 ]]; then + echo "expected initial fixture tests to fail, but they passed" >&2 + cat /tmp/deepseek-multifile-fixture-test.log >&2 + exit 1 +fi + +task='replace `return amount - discount` with `return max(amount - discount, 0.0)` in src/invoice_math/pricing.py and replace `Invoice total` with `Final total` in src/invoice_math/summary.py, validate with python -m unittest discover -s tests' + +cat < PathBuf { .unwrap_or_default() .as_millis() % 100_000; - std::env::temp_dir().join(format!("dsc-shell-fixture-{}-{suffix}", std::process::id())) + short_temp_root().join(format!("dsc-shell-fixture-{}-{suffix}", std::process::id())) +} + +fn short_temp_root() -> PathBuf { + let tmp = PathBuf::from("/tmp"); + if tmp.is_dir() { + return tmp; + } + std::env::temp_dir() } fn run_shell_fixture_smoke_checks(report: &mut ShellFixtureSmokeReport) { @@ -7180,11 +7188,15 @@ fn shell_supervisor_control_smoke( ) -> AppResult { let tty = cfg!(all(unix, target_os = "linux")); let wait_timeout = timeout_ms.min(5000); - let start_request = format!( - "{{\"method\":\"start\",\"arguments\":{{\"command\":\"echo deepseek-shell-supervisor-smoke\",\"tty\":{},\"tty_rows\":24,\"tty_cols\":80,\"timeout_ms\":{}}}}}\n", - if tty { "true" } else { "false" }, - wait_timeout - ); + let start_request = if tty { + format!( + "{{\"method\":\"start\",\"arguments\":{{\"command\":\"echo deepseek-shell-supervisor-smoke\",\"tty\":true,\"tty_rows\":24,\"tty_cols\":80,\"timeout_ms\":{wait_timeout}}}}}\n" + ) + } else { + format!( + "{{\"method\":\"start\",\"arguments\":{{\"command\":\"echo deepseek-shell-supervisor-smoke\",\"tty\":false,\"timeout_ms\":{wait_timeout}}}}}\n" + ) + }; let start_response = shell_supervisor_request_raw(socket, "start", &start_request)?; let task_id = shell_supervisor_response_string(&start_response, "task_id") .ok_or_else(|| app_error("shell supervisor start smoke response missing task_id"))?; diff --git a/src/tools/exec_shell.rs b/src/tools/exec_shell.rs index e8396ca..bfdb31a 100644 --- a/src/tools/exec_shell.rs +++ b/src/tools/exec_shell.rs @@ -12,7 +12,9 @@ use std::error::Error; #[cfg(unix)] use std::fs::File; use std::fs::{self, OpenOptions}; -use std::io::{Read, Write}; +#[cfg(any(all(unix, target_os = "linux"), windows))] +use std::io::Read; +use std::io::Write; use std::path::{Path, PathBuf}; use std::process::{Child, ChildStdin, Command, Stdio}; #[cfg(all(unix, target_os = "linux"))]