diff --git a/docs/demo/README.md b/docs/demo/README.md index 2621f8e..ccbbe83 100644 --- a/docs/demo/README.md +++ b/docs/demo/README.md @@ -90,6 +90,18 @@ Record with a local preview server for GIF/MP4 capture: DEEPSEEK_2048_KEY_FILE=/tmp/deepseek-2048.key docs/demo/record-2048-demo.sh --serve ``` +The 2048 recorder defaults to `DEEPSEEK_2048_MODEL=deepseek-v4-pro`, a longer +model stream timeout, a larger per-turn output cap for code-generating tool +calls, and one retry because launch captures are expensive to restart: +`DSCODE_MODEL_STREAM_TIMEOUT_SECS=240`, `DSCODE_MODEL_MAX_TOKENS=4096`, and +`DEEPSEEK_2048_ATTEMPTS=2`. +It also validates the generated app with `node --check app.js`, required DOM +ids, linked assets, byte counts, `git status --short`, and `git diff --stat`. +The model process runs with an isolated `HOME` and a temporary `demo-2048` skill +that exposes only `list_files` and `write_file`, so local/user skill +auto-selection cannot change the demo tool surface or spend steps on unrelated +tools. + The script prints the disposable demo repo and transcript path. Keep raw transcripts only after reviewing them for local paths and generated content quality. Use `--cleanup` only after recording any browser gameplay you need. diff --git a/docs/demo/record-2048-demo.sh b/docs/demo/record-2048-demo.sh index 7b01117..a516126 100755 --- a/docs/demo/record-2048-demo.sh +++ b/docs/demo/record-2048-demo.sh @@ -17,11 +17,22 @@ Environment: DEEPSEEK_2048_BIN DeepSeekCode binary to run. Defaults to target/debug/deepseek, then PATH deepseek, then builds target/debug/deepseek. + DEEPSEEK_2048_MODEL Model override for the demo run. Defaults to + deepseek-v4-pro. DEEPSEEK_2048_BUDGET Agent step budget. Defaults to 16. + DEEPSEEK_2048_ATTEMPTS Model-backed exec attempts. Defaults to 2. DEEPSEEK_2048_OUT Transcript path. Defaults to a timestamped file in docs/demo/. DEEPSEEK_2048_WORKDIR Parent directory for the disposable repo. + DEEPSEEK_2048_HOME Isolated HOME for the model-backed demo process. + Defaults to a timestamped directory next to the + disposable repo. DEEPSEEK_2048_PROMPT Override the coding task prompt. + DSCODE_MODEL_STREAM_TIMEOUT_SECS + Streaming model request timeout. Defaults to 240 + for this demo script. + DSCODE_MODEL_MAX_TOKENS Maximum model output tokens per tool-call turn. + Defaults to 4096 for this demo script. The transcript is source evidence for GIF/MP4 capture. Review generated media before committing it. Do not publish a run unless it used a real model call. @@ -72,10 +83,20 @@ script_dir=$(CDPATH= cd -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd) repo_root=$(CDPATH= cd -- "$script_dir/../.." && pwd) run_id=$(date +%Y%m%d-%H%M%S) demo_budget=${DEEPSEEK_2048_BUDGET:-16} +demo_attempts=${DEEPSEEK_2048_ATTEMPTS:-2} +demo_model=${DEEPSEEK_2048_MODEL:-${DEEPSEEK_MODEL:-deepseek-v4-pro}} +stream_timeout=${DSCODE_MODEL_STREAM_TIMEOUT_SECS:-240} +max_tokens=${DSCODE_MODEL_MAX_TOKENS:-4096} demo_out=${DEEPSEEK_2048_OUT:-"$repo_root/docs/demo/deepseek-code-2048-demo-$run_id.log"} work_parent=${DEEPSEEK_2048_WORKDIR:-"${TMPDIR:-/tmp}"} demo_repo="$work_parent/deepseek-code-2048-demo-$run_id" -demo_prompt=${DEEPSEEK_2048_PROMPT:-"Build a playable 2048 web game in this empty repository using plain HTML, CSS, and JavaScript. Create index.html, styles.css, and app.js. Requirements: a 4x4 board, keyboard arrow controls, tile merging, score tracking, random new tiles, win/game-over messaging, and a restart button. Keep the UI polished, lightweight, and concise. Use small, clear patches for each file. After writing files, run a validation command that verifies index.html, styles.css, and app.js exist and prints their byte sizes."} +demo_runtime_home=${DEEPSEEK_2048_HOME:-"$work_parent/deepseek-code-2048-home-$run_id"} +demo_prompt=${DEEPSEEK_2048_PROMPT:-"Build a minimal playable 2048 web game in exactly three files: app.js, index.html, and styles.css. The repo has only README.md. Use this tool order: list_files once, write_file app.js, write_file index.html, write_file styles.css, then final text. Do not call todo_write, checklist tools, read_file, run_shell, exec_shell, or git tools. Keep app.js compact: no comments, no animation, no localStorage, use short helper functions, target under 90 lines. Requirements: 4x4 board, arrow keys, merge equal tiles, score, random new tiles, win or game-over text, and a restart button. Use exactly these DOM ids: grid, score, message, restart. Do not use a different board id. Keep styling plain."} + +if [[ ! "$demo_attempts" =~ ^[1-9][0-9]*$ ]]; then + echo "DEEPSEEK_2048_ATTEMPTS must be a positive integer: $demo_attempts" >&2 + exit 2 +fi redact_demo_stream() { awk ' @@ -146,8 +167,14 @@ if [[ "$dry_run" -eq 1 ]]; then echo "DeepSeekCode 2048 demo dry run" echo "repo_root: $repo_root" echo "demo_repo: $demo_repo" + echo "runtime_home: $demo_runtime_home" echo "transcript: $demo_out" + echo "model: $demo_model" + echo "skill: demo-2048" echo "budget: $demo_budget" + echo "attempts: $demo_attempts" + echo "stream timeout seconds: $stream_timeout" + echo "max tokens: $max_tokens" echo "serve: $serve" echo "prompt: $demo_prompt" echo "status: dry-run only; no API call, repository creation, or transcript write" @@ -200,9 +227,29 @@ if [[ ! -x "$deepseek_bin" ]]; then fi mkdir -p "$demo_repo" +mkdir -p "$demo_runtime_home" +mkdir -p "$demo_runtime_home/skills" mkdir -p "$(dirname -- "$demo_out")" +cat > "$demo_runtime_home/skills/demo-2048.toml" <<'EOF' +name = "demo-2048" +description = "Build the 2048 launch demo with a narrow file-generation tool surface" +allowed_tools = ["list_files", "write_file"] +system_append = """ +Follow the user's requested tool order exactly. +Do not inspect files after writing them. +After writing styles.css, finish with a one-sentence summary. +""" +suggested_steps = [] + +[policy] +require_write_confirmation = false +require_shell_confirmation = false +shell_allowlist = [] +EOF + git -C "$demo_repo" init -q +printf '\n.dscode/\n' >> "$demo_repo/.git/info/exclude" git -C "$demo_repo" config user.email "demo@deepseekcode.local" git -C "$demo_repo" config user.name "DeepSeekCode Demo" cat > "$demo_repo/README.md" <<'EOF' @@ -215,45 +262,127 @@ EOF git -C "$demo_repo" add README.md git -C "$demo_repo" commit -q -m "Create empty 2048 demo repo" -run_session() { - cd "$demo_repo" - echo "DeepSeekCode 2048 model-backed demo" - echo "workspace: $demo_repo" - echo - echo "$ find . -maxdepth 2 -type f | sort" - find . -maxdepth 2 -type f | sort - echo - echo "$ DSCODE_AUTO_APPROVE_WRITES=1 DSCODE_AUTO_APPROVE_SHELL=1 $deepseek_bin exec --budget $demo_budget \"<2048 prompt>\"" - local exec_status=0 - DSCODE_AUTO_APPROVE_WRITES=1 \ - DSCODE_AUTO_APPROVE_SHELL=1 \ - "$deepseek_bin" exec --budget "$demo_budget" "$demo_prompt" || exec_status=$? - if [[ "$exec_status" -ne 0 ]]; then - echo "deepseek exec failed with status $exec_status" >&2 - return "$exec_status" - fi +reset_demo_attempt() { + rm -f -- index.html styles.css app.js + git reset -q --hard HEAD +} + +require_html_id() { + local id=$1 + grep -Eq "id=['\"]${id}['\"]" index.html +} + +validate_demo_output() { + local validation_status=0 echo echo "$ test -s index.html && test -s styles.css && test -s app.js" - local missing=0 for required_file in index.html styles.css app.js; do if [[ ! -s "$required_file" ]]; then echo "missing or empty required file: $required_file" >&2 - missing=1 + validation_status=1 fi done - if [[ "$missing" -ne 0 ]]; then - return 1 + if [[ "$validation_status" -ne 0 ]]; then + return "$validation_status" fi echo "required files present" + + echo + echo "$ node --check app.js" + if command -v node >/dev/null 2>&1; then + if node --check app.js; then + echo "app.js syntax ok" + else + validation_status=1 + fi + else + echo "node is required for demo validation but was not found" >&2 + validation_status=1 + fi + + echo + echo "$ grep expected DOM ids in index.html" + for id in grid score message restart; do + if require_html_id "$id"; then + echo "html id present: $id" + else + echo "missing expected html id: $id" >&2 + validation_status=1 + fi + done + + if grep -Eq "]+src=['\"]app.js['\"]" index.html; then + echo "script link present: app.js" + else + echo "missing script link to app.js" >&2 + validation_status=1 + fi + if grep -Eq "]+href=['\"]styles.css['\"]" index.html; then + echo "stylesheet link present: styles.css" + else + echo "missing stylesheet link to styles.css" >&2 + validation_status=1 + fi + echo echo "$ wc -c index.html styles.css app.js" wc -c index.html styles.css app.js echo - echo "$ git diff --stat" - git diff --stat + git add -N index.html styles.css app.js >/dev/null 2>&1 || true + echo "$ git status --short" + git status --short + echo + echo "$ git diff --stat -- index.html styles.css app.js" + git diff --stat -- index.html styles.css app.js echo echo "$ git diff -- index.html styles.css app.js | sed -n '1,220p'" git diff -- index.html styles.css app.js | sed -n '1,220p' + return "$validation_status" +} + +run_session() { + cd "$demo_repo" + echo "DeepSeekCode 2048 model-backed demo" + echo "workspace: $demo_repo" + echo + echo "$ find . -maxdepth 2 -type f | sort" + find . -maxdepth 2 -type f | sort + echo + echo "$ HOME=$demo_runtime_home DSCODE_SKILLS_DIR=$demo_runtime_home/skills DEEPSEEK_MODEL=$demo_model DSCODE_MODEL_STREAM_TIMEOUT_SECS=$stream_timeout DSCODE_MODEL_MAX_TOKENS=$max_tokens DSCODE_AUTO_APPROVE_WRITES=1 DSCODE_AUTO_APPROVE_SHELL=1 $deepseek_bin exec --skill demo-2048 --budget $demo_budget \"<2048 prompt>\"" + local exec_status=0 + local attempt=1 + while [[ "$attempt" -le "$demo_attempts" ]]; do + if [[ "$demo_attempts" -gt 1 ]]; then + echo "attempt: $attempt/$demo_attempts" + fi + exec_status=0 + DEEPSEEK_MODEL="$demo_model" \ + HOME="$demo_runtime_home" \ + DSCODE_SKILLS_DIR="$demo_runtime_home/skills" \ + DSCODE_MODEL_STREAM_TIMEOUT_SECS="$stream_timeout" \ + DSCODE_MODEL_MAX_TOKENS="$max_tokens" \ + DSCODE_AUTO_APPROVE_WRITES=1 \ + DSCODE_AUTO_APPROVE_SHELL=1 \ + "$deepseek_bin" exec --skill demo-2048 --budget "$demo_budget" "$demo_prompt" || exec_status=$? + if [[ "$exec_status" -eq 0 ]]; then + if validate_demo_output; then + break + fi + exec_status=1 + echo "demo validation attempt $attempt failed" >&2 + else + echo "deepseek exec attempt $attempt failed with status $exec_status" >&2 + fi + attempt=$((attempt + 1)) + if [[ "$attempt" -le "$demo_attempts" ]]; then + reset_demo_attempt + echo "retrying model-backed demo run in the same disposable repo" >&2 + fi + done + if [[ "$exec_status" -ne 0 ]]; then + echo "deepseek exec failed with status $exec_status" >&2 + return "$exec_status" + fi if [[ "$serve" -eq 1 ]]; then echo echo "$ python3 -m http.server 4173" @@ -283,7 +412,9 @@ fi if [[ "$cleanup" -eq 1 ]]; then rm -rf "$demo_repo" + rm -rf "$demo_runtime_home" echo "demo repo removed" + echo "runtime home removed" fi exit "$session_status" diff --git a/src/model/deepseek.rs b/src/model/deepseek.rs index c919421..2dbef77 100644 --- a/src/model/deepseek.rs +++ b/src/model/deepseek.rs @@ -24,6 +24,11 @@ use crate::util::json::{ use crate::util::process::StreamingProcess; use crate::util::sse::{read_frame, SseFrame}; +const DEFAULT_MODEL_STREAM_TIMEOUT_SECS: u64 = 180; +const MAX_MODEL_STREAM_TIMEOUT_SECS: u64 = 900; +const DEFAULT_MODEL_MAX_TOKENS: u64 = 4096; +const MAX_MODEL_COMPLETION_TOKENS: u64 = 32_768; + pub struct DeepSeekClient { pub config: ModelConfig, } @@ -239,13 +244,14 @@ impl DeepSeekClient { }; let tool_fields = openai_tool_fields(&input.available_tools, reasoning); let reasoning_fields = openai_reasoning_fields(&self.config.base_url, reasoning); + let max_tokens = model_max_tokens().to_string(); let body = format!( concat!( "{{", "\"model\":\"{}\",", "{}", "{}", - "\"max_tokens\":1024,", + "\"max_tokens\":{},", "\"stream\":true,", "\"stream_options\":{{\"include_usage\":true}},", "{}", @@ -258,17 +264,19 @@ impl DeepSeekClient { json_escape(&route.model), temperature_field, reasoning_fields, + max_tokens, tool_fields, json_escape(&system_prompt), user_message, ); let auth = format!("Authorization: Bearer {api_key}"); + let stream_timeout = model_stream_timeout_secs().to_string(); let args = [ "-sS", "-N", "--max-time", - "60", + stream_timeout.as_str(), "-X", "POST", endpoint.as_str(), @@ -321,11 +329,12 @@ impl DeepSeekClient { let reasoning = route.reasoning; let tool_fields = anthropic_tool_fields(&input.available_tools, reasoning); let reasoning_fields = reasoning.anthropic_fields(); + let max_tokens = model_max_tokens().to_string(); let body = format!( concat!( "{{", "\"model\":\"{}\",", - "\"max_tokens\":1024,", + "\"max_tokens\":{},", "\"stream\":true,", "{}", "{}", @@ -336,6 +345,7 @@ impl DeepSeekClient { "}}" ), json_escape(&route.model), + max_tokens, reasoning_fields, tool_fields, json_escape(&system_prompt), @@ -343,11 +353,12 @@ impl DeepSeekClient { ); let api_header = format!("x-api-key: {api_key}"); + let stream_timeout = model_stream_timeout_secs().to_string(); let args = [ "-sS", "-N", "--max-time", - "60", + stream_timeout.as_str(), "-X", "POST", endpoint.as_str(), @@ -1348,6 +1359,28 @@ Rules:\n\ ) } +fn model_stream_timeout_secs() -> u64 { + model_stream_timeout_secs_from(env::var("DSCODE_MODEL_STREAM_TIMEOUT_SECS").ok().as_deref()) +} + +fn model_stream_timeout_secs_from(raw: Option<&str>) -> u64 { + raw.and_then(|value| value.trim().parse::().ok()) + .filter(|value| *value > 0) + .map(|value| value.min(MAX_MODEL_STREAM_TIMEOUT_SECS)) + .unwrap_or(DEFAULT_MODEL_STREAM_TIMEOUT_SECS) +} + +fn model_max_tokens() -> u64 { + model_max_tokens_from(env::var("DSCODE_MODEL_MAX_TOKENS").ok().as_deref()) +} + +fn model_max_tokens_from(raw: Option<&str>) -> u64 { + raw.and_then(|value| value.trim().parse::().ok()) + .filter(|value| *value > 0) + .map(|value| value.min(MAX_MODEL_COMPLETION_TOKENS)) + .unwrap_or(DEFAULT_MODEL_MAX_TOKENS) +} + fn run_curl_json(args: &[&str], body: &str) -> AppResult { let mut process = crate::util::process::spawn_streaming_with_stdin("curl", args, body)?; let mut output = String::new(); @@ -9880,6 +9913,26 @@ diff --git a/src/cli/app.rs b/src/cli/app.rs\n"; assert_eq!(usage.prompt_cache_miss, 21); } + #[test] + fn model_stream_timeout_uses_agent_friendly_default_and_bounds_env() { + assert_eq!(super::model_stream_timeout_secs_from(None), 180); + assert_eq!(super::model_stream_timeout_secs_from(Some("")), 180); + assert_eq!(super::model_stream_timeout_secs_from(Some("abc")), 180); + assert_eq!(super::model_stream_timeout_secs_from(Some("0")), 180); + assert_eq!(super::model_stream_timeout_secs_from(Some("240")), 240); + assert_eq!(super::model_stream_timeout_secs_from(Some("9999")), 900); + } + + #[test] + fn model_max_tokens_uses_code_agent_default_and_bounds_env() { + assert_eq!(super::model_max_tokens_from(None), 4096); + assert_eq!(super::model_max_tokens_from(Some("")), 4096); + assert_eq!(super::model_max_tokens_from(Some("abc")), 4096); + assert_eq!(super::model_max_tokens_from(Some("0")), 4096); + assert_eq!(super::model_max_tokens_from(Some("8192")), 8192); + assert_eq!(super::model_max_tokens_from(Some("999999")), 32_768); + } + use crate::error::AppResult; use crate::ui::stream::{NoopStreamEvents, StreamEvents}; use crate::util::cancel::CancellationCheck;