From 76fe737156d5bf619c89c79c9176a53142fcf747 Mon Sep 17 00:00:00 2001 From: Peter L Jones Date: Sun, 26 Apr 2026 15:36:28 +0100 Subject: [PATCH] AI ReleaseAnnouncement.md maintenance --- .github/release-announcement-template.md | 41 + .../backfill-release-announcement.yml | 130 +++ .github/workflows/coding-style-check.yml | 3 +- .../workflows/update-release-announcement.yml | 246 ++++ .gitignore | 4 + ReleaseAnnouncement.md | 41 + tools/release_announcement/README.md | 137 +++ .../prompts/classification.prompt.yml | 51 + .../prompts/consolidation.prompt.yml | 46 + .../prompts/extraction.prompt.yml | 44 + .../prompts/ranking.prompt.yml | 25 + .../prompts/release-announcement.prompt.yml | 158 +++ tools/release_announcement/pyproject.toml | 26 + tools/release_announcement/pytest.ini | 4 + .../src/release_announcement/__init__.py | 1 + .../src/release_announcement/__main__.py | 7 + .../src/release_announcement/app_logger.py | 69 ++ .../release_announcement/backends/__init__.py | 1 + .../common_adapter_utils.py | 20 + .../distillation_adapters/github_adapter.py | 428 +++++++ .../distillation_adapters/ollama_adapter.py | 392 +++++++ .../backends/github_backend.py | 596 ++++++++++ .../backends/ollama_backend.py | 83 ++ .../capability_probing.py | 183 +++ .../src/release_announcement/cli_config.py | 398 +++++++ .../src/release_announcement/distillation.py | 987 ++++++++++++++++ .../src/release_announcement/main.py | 1003 +++++++++++++++++ .../src/release_announcement/registry.py | 130 +++ .../src/release_announcement/skip_rules.py | 49 + .../staged_routing_adapter.py | 94 ++ .../src/release_announcement/token_utils.py | 61 + .../template-ReleaseAnnouncement.md | 41 + .../tests/BASELINE_COMPLETION.txt | 31 + .../tests/BASELINE_MATRIX_README.md | 254 +++++ tools/release_announcement/tests/__init__.py | 0 .../release_announcement/tests/assertions.py | 50 + .../tests/cli_invocation.py | 34 + tools/release_announcement/tests/conftest.py | 43 + .../tests/dummy_backend.py | 185 +++ .../run-release-announcement-baseline.sh | 167 +++ tools/release_announcement/tests/test_data.py | 17 + .../tests/test_delay_functionality.py | 124 ++ .../tests/test_distillation_dummy_adapter.py | 555 +++++++++ .../tests/test_distillation_github_adapter.py | 254 +++++ .../tests/test_distillation_ollama_adapter.py | 459 ++++++++ .../tests/test_distillation_orchestration.py | 597 ++++++++++ .../tests/test_distillation_schemas.py | 581 ++++++++++ .../tests/test_hybrid_backend_integration.py | 53 + .../tests/test_placeholder_prompts.py | 472 ++++++++ .../tests/test_registry.py | 82 ++ .../tests/test_registry_lazy_init.py | 119 ++ .../tests/test_regression_matrix.py | 207 ++++ .../tests/test_staged_pipeline_integration.py | 96 ++ .../tests/test_step2_pipeline.py | 510 +++++++++ .../tests/test_step2_stub_integration.py | 229 ++++ .../tests/test_step3_cli_capabilities.py | 477 ++++++++ ...est_step4_process_commit_error_handling.py | 151 +++ .../test_step4_startup_probe_ordering.py | 93 ++ .../tests/test_step8_output_parity.py | 83 ++ tools/update-release-announcement.sh | 43 + 60 files changed, 11464 insertions(+), 1 deletion(-) create mode 100644 .github/release-announcement-template.md create mode 100644 .github/workflows/backfill-release-announcement.yml create mode 100644 .github/workflows/update-release-announcement.yml create mode 100644 ReleaseAnnouncement.md create mode 100644 tools/release_announcement/README.md create mode 100644 tools/release_announcement/prompts/classification.prompt.yml create mode 100644 tools/release_announcement/prompts/consolidation.prompt.yml create mode 100644 tools/release_announcement/prompts/extraction.prompt.yml create mode 100644 tools/release_announcement/prompts/ranking.prompt.yml create mode 100644 tools/release_announcement/prompts/release-announcement.prompt.yml create mode 100644 tools/release_announcement/pyproject.toml create mode 100644 tools/release_announcement/pytest.ini create mode 100644 tools/release_announcement/src/release_announcement/__init__.py create mode 100644 tools/release_announcement/src/release_announcement/__main__.py create mode 100644 tools/release_announcement/src/release_announcement/app_logger.py create mode 100644 tools/release_announcement/src/release_announcement/backends/__init__.py create mode 100644 tools/release_announcement/src/release_announcement/backends/distillation_adapters/common_adapter_utils.py create mode 100644 tools/release_announcement/src/release_announcement/backends/distillation_adapters/github_adapter.py create mode 100644 tools/release_announcement/src/release_announcement/backends/distillation_adapters/ollama_adapter.py create mode 100644 tools/release_announcement/src/release_announcement/backends/github_backend.py create mode 100644 tools/release_announcement/src/release_announcement/backends/ollama_backend.py create mode 100644 tools/release_announcement/src/release_announcement/capability_probing.py create mode 100644 tools/release_announcement/src/release_announcement/cli_config.py create mode 100644 tools/release_announcement/src/release_announcement/distillation.py create mode 100644 tools/release_announcement/src/release_announcement/main.py create mode 100644 tools/release_announcement/src/release_announcement/registry.py create mode 100644 tools/release_announcement/src/release_announcement/skip_rules.py create mode 100644 tools/release_announcement/src/release_announcement/staged_routing_adapter.py create mode 100644 tools/release_announcement/src/release_announcement/token_utils.py create mode 100644 tools/release_announcement/template-ReleaseAnnouncement.md create mode 100644 tools/release_announcement/tests/BASELINE_COMPLETION.txt create mode 100644 tools/release_announcement/tests/BASELINE_MATRIX_README.md create mode 100644 tools/release_announcement/tests/__init__.py create mode 100644 tools/release_announcement/tests/assertions.py create mode 100644 tools/release_announcement/tests/cli_invocation.py create mode 100644 tools/release_announcement/tests/conftest.py create mode 100644 tools/release_announcement/tests/dummy_backend.py create mode 100755 tools/release_announcement/tests/run-release-announcement-baseline.sh create mode 100644 tools/release_announcement/tests/test_data.py create mode 100644 tools/release_announcement/tests/test_delay_functionality.py create mode 100644 tools/release_announcement/tests/test_distillation_dummy_adapter.py create mode 100644 tools/release_announcement/tests/test_distillation_github_adapter.py create mode 100644 tools/release_announcement/tests/test_distillation_ollama_adapter.py create mode 100644 tools/release_announcement/tests/test_distillation_orchestration.py create mode 100644 tools/release_announcement/tests/test_distillation_schemas.py create mode 100644 tools/release_announcement/tests/test_hybrid_backend_integration.py create mode 100644 tools/release_announcement/tests/test_placeholder_prompts.py create mode 100644 tools/release_announcement/tests/test_registry.py create mode 100644 tools/release_announcement/tests/test_registry_lazy_init.py create mode 100644 tools/release_announcement/tests/test_regression_matrix.py create mode 100644 tools/release_announcement/tests/test_staged_pipeline_integration.py create mode 100644 tools/release_announcement/tests/test_step2_pipeline.py create mode 100644 tools/release_announcement/tests/test_step2_stub_integration.py create mode 100644 tools/release_announcement/tests/test_step3_cli_capabilities.py create mode 100644 tools/release_announcement/tests/test_step4_process_commit_error_handling.py create mode 100644 tools/release_announcement/tests/test_step4_startup_probe_ordering.py create mode 100644 tools/release_announcement/tests/test_step8_output_parity.py create mode 100755 tools/update-release-announcement.sh diff --git a/.github/release-announcement-template.md b/.github/release-announcement-template.md new file mode 100644 index 0000000000..b14257c852 --- /dev/null +++ b/.github/release-announcement-template.md @@ -0,0 +1,41 @@ +# Jamulus Next Release — Working Announcement Draft + +> **Note for maintainers:** This is a working draft, automatically updated by GitHub Copilot +> as PRs are merged to `main`. Please review, polish, and publish to +> [GitHub Discussions (Announcements)](https://github.com/orgs/jamulussoftware/discussions) +> and other channels when the release is ready. +> +> Run [`tools/get_release_contributors.py`](tools/get_release_contributors.py) to compile +> the full contributor list before publishing. +> +> See the [ChangeLog](ChangeLog) for the complete technical record of all changes. + +Here's what's new in the next release of Jamulus: + + + +## For everyone + +## For Windows users + +## For macOS users + +## For mobile users (iOS & Android) + +## For server operators + +## Translations + +--- + +As always, all feedback on the new version is welcome. Please raise any problems in a new bug report or discussion topic. + +--- + +**REMINDER:** Those of you with virus checkers are likely to find the Windows installer incorrectly flagged as a virus. This is because the installer is open source and virus checkers cannot be bothered to check what it installs, so assume that it's going to be malign. If you download the installer *only from the official release*, you should be safe to ignore any warning. + +--- + +*A big thanks to all contributors who made this release possible.* + +*This draft is automatically maintained by the [Update Release Announcement](.github/workflows/update-release-announcement.yml) workflow.* diff --git a/.github/workflows/backfill-release-announcement.yml b/.github/workflows/backfill-release-announcement.yml new file mode 100644 index 0000000000..16549c51c3 --- /dev/null +++ b/.github/workflows/backfill-release-announcement.yml @@ -0,0 +1,130 @@ +name: Backfill Release Announcement +run-name: Backfill from ${{ inputs.since_tag || 'r3_11_0' }}${{ inputs.dry_run == 'true' && ' (dry run)' || '' }} + +# This workflow runs the release_announcement Python package to populate +# ReleaseAnnouncement.md with every merged PR since a given release tag. +# +# Trigger it once after a release tag is cut (e.g. r3_11_0) and this workflow +# file is merged to main. It processes PRs in chronological order, calling +# the GitHub Models API (openai/gpt-4o) for each one so the announcement builds +# up exactly as it would have done if the per-PR workflow had been running all +# along. +# +# The script commits one separate commit per PR that produced a user-relevant +# change, then those commits are pushed back to the branch for review via PR. +# Run this workflow from a non-main branch; the push step will update that branch. + +on: + workflow_dispatch: + inputs: + since_tag: + description: >- + **Git release tag** to backfill from. + PRs merged *after* this tag will be processed. + (default: r3_11_0) + required: false + default: 'r3_11_0' + dry_run: + description: >- + **Dry run**: commit changes locally but do not push; attach + ReleaseAnnouncement.md as a workflow artifact for review. + required: false + default: 'false' + type: choice + options: + - 'false' + - 'true' + allow_repo_override: + description: >- + **Allow non-jamulussoftware repositories**: set to 'true' to run this workflow + outside the canonical repository owner. + required: false + default: 'false' + type: choice + options: + - 'false' + - 'true' + +permissions: {} + +jobs: + backfill: + name: Backfill release announcement + # Default safety behavior limits execution to the canonical owner. + # Set workflow_dispatch input allow_repo_override=true to bypass this. + if: github.repository_owner == 'jamulussoftware' || inputs.allow_repo_override == 'true' + runs-on: ubuntu-latest + env: + SINCE_TAG: ${{ inputs.since_tag || 'r3_11_0' }} + DRY_RUN: ${{ inputs.dry_run || 'false' }} + permissions: + contents: write + models: read + + steps: + - name: Refuse to run on main + if: github.ref == 'refs/heads/main' + run: | + echo "::error::Backfill must not run directly on main. Trigger this workflow from a non-main branch and merge via PR." + exit 1 + + - uses: actions/checkout@v6 + with: + fetch-depth: 0 + + - name: Fetch tags + run: | + git fetch --force --tags "https://github.com/${{ github.repository }}.git" + + - name: Configure git identity + run: | + git config user.email "41898282+github-actions[bot]@users.noreply.github.com" + git config user.name "github-actions[bot]" + + - name: Install release announcement package + run: | + python3 -m pip install --user ./tools/release_announcement + + - name: Record baseline commit + id: baseline + run: | + echo "commit=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT" + + - name: Run backfill script + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + DRY_RUN_FLAG="" + [[ "$DRY_RUN" == "true" ]] && DRY_RUN_FLAG="--dry-run" + python3 -m release_announcement \ + "$SINCE_TAG" \ + "HEAD" \ + --file ReleaseAnnouncement.md \ + --backend actions \ + --delay-secs 5 \ + $DRY_RUN_FLAG + + - name: Check if backfill created commits + id: check-updated + run: | + current_commit=$(git rev-parse HEAD) + if [[ "$current_commit" == "${{ steps.baseline.outputs.commit }}" ]]; then + echo "updated=false" >> "$GITHUB_OUTPUT" + else + echo "updated=true" >> "$GITHUB_OUTPUT" + fi + + - name: Push commits + if: env.DRY_RUN != 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + git push + + - name: Upload updated announcement + if: steps.check-updated.outputs.updated == 'true' + uses: actions/upload-artifact@v7 + with: + name: ReleaseAnnouncement-updated + path: ReleaseAnnouncement.md + diff --git a/.github/workflows/coding-style-check.yml b/.github/workflows/coding-style-check.yml index 494cbb3b14..af43727b0d 100644 --- a/.github/workflows/coding-style-check.yml +++ b/.github/workflows/coding-style-check.yml @@ -58,6 +58,7 @@ jobs: steps: - uses: actions/checkout@v6 - name: Install pylint - run: pip install --user "pylint < 3.0" + # Keep pylint >= 3.0: pylint<3.0 can crash with Python 3.12 (astroid visit_typealias error). + run: pip install --user "pylint >= 3.0" - name: Check Python files with pylint run: find ./tools -name '*.py' -print -exec pylint {} + diff --git a/.github/workflows/update-release-announcement.yml b/.github/workflows/update-release-announcement.yml new file mode 100644 index 0000000000..c1264661aa --- /dev/null +++ b/.github/workflows/update-release-announcement.yml @@ -0,0 +1,246 @@ +name: Update Release Announcement + +# This workflow maintains ReleaseAnnouncement.md — a working draft of the release +# announcement for Client users and Server operators — separate from the technical ChangeLog. +# +# On every merged PR to main: GitHub Copilot updates the draft with any user-relevant +# changes from that PR, in the same conversational bullet-point style used in real +# Jamulus beta/release announcements on GitHub Discussions. +# +# On every push to an autobuild* branch: GitHub Copilot generates a draft announcement +# for the HEAD commit and uploads it as a workflow artifact. This lets developers preview +# how their changes would appear in the announcement before the PR is merged, without +# creating any commits on the branch. +# +# On every full release tag (r__, no suffix): the draft is reset to the pristine +# template, ready for the next development cycle. Pre-release tags (beta, rc) do NOT reset +# the draft, so it can continue to build up until the final release. +# +# Security note (pull_request_target): +# - The workflow file and the AI prompt always come from main, never from the PR branch. +# - PR content is written to a temp file via an env variable before being passed to the +# AI — it never touches a YAML value directly, preventing injection issues. +# - No code from the PR is ever checked out or executed. +# See: https://securitylab.github.com/research/github-actions-preventing-pwn-requests/ + +on: + pull_request_target: + types: + - closed + branches: + - main + push: + branches: + - "autobuild**" + tags: + - "r*" + +permissions: {} + +jobs: + update-announcement: + name: Update announcement for merged PR + # Only run on actual merges (not just closed PRs) in the main jamulussoftware repo. + if: >- + github.repository_owner == 'jamulussoftware' && + github.event.pull_request.merged == true + runs-on: ubuntu-latest + permissions: + contents: write + models: read + + steps: + - uses: actions/checkout@v6 + with: + # Always check out the base branch (main), never the PR branch. + ref: main + fetch-depth: 2 + + - name: Check if announcement update should be skipped + id: check-skip + env: + PR_BODY: ${{ github.event.pull_request.body }} + run: | + # Skip when the PR author explicitly marked the change as not user-facing. + if printf '%s\n' "$PR_BODY" | grep -qE '^CHANGELOG:[[:space:]]*SKIP[[:space:]]*$'; then + echo "Skipping: PR is marked CHANGELOG: SKIP" + echo "skip=true" >> "$GITHUB_OUTPUT" + else + echo "skip=false" >> "$GITHUB_OUTPUT" + fi + + - name: Prepare PR metadata for AI prompt + id: prep-pr-info + if: steps.check-skip.outputs.skip == 'false' + env: + PR_NUMBER: ${{ github.event.pull_request.number }} + PR_TITLE: ${{ github.event.pull_request.title }} + PR_AUTHOR: ${{ github.event.pull_request.user.login }} + PR_BODY: ${{ github.event.pull_request.body }} + run: | + # Write all PR metadata to a temp file so it can be safely passed to the AI, + # avoiding any injection issues from PR body content. + printf 'PR #%s — %s\nby @%s\n\n%s\n' \ + "$PR_NUMBER" "$PR_TITLE" "$PR_AUTHOR" "$PR_BODY" \ + > "${RUNNER_TEMP}/pr_info.txt" + + - name: Install release announcement package + if: steps.check-skip.outputs.skip == 'false' + run: | + python3 -m pip install --user ./tools/release_announcement + + - name: Update Release Announcement with GitHub Copilot + if: steps.check-skip.outputs.skip == 'false' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + python3 -m release_announcement \ + "pr${{ github.event.pull_request.number }}" \ + --file ReleaseAnnouncement.md \ + --backend actions + + - name: Push updated announcement commits + if: steps.check-skip.outputs.skip == 'false' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + git config --global user.email "actions@github.com" + git config --global user.name "github-actions[bot]" + git push + + update-announcement-on-autobuild: + name: Update announcement for autobuild push + # Run on autobuild branch pushes. The result is uploaded as an artifact, not + # committed, so there is no push-loop risk and no contents: write needed. + if: startsWith(github.ref, 'refs/heads/autobuild') + runs-on: ubuntu-latest + permissions: + models: read + + steps: + - uses: actions/checkout@v6 + with: + # Check out the autobuild branch being pushed so the script can resolve + # SHA~ and read the current ReleaseAnnouncement.md as a baseline. + ref: ${{ github.ref }} + fetch-depth: 2 + + - name: Check if announcement update should be skipped + id: check-skip + env: + COMMIT_MESSAGE: ${{ github.event.head_commit.message }} + run: | + # Skip when the commit message explicitly marks the change as not user-facing. + if printf '%s\n' "$COMMIT_MESSAGE" | grep -qE '^CHANGELOG:[[:space:]]*SKIP[[:space:]]*$'; then + echo "Skipping: commit is marked CHANGELOG: SKIP" + echo "skip=true" >> "$GITHUB_OUTPUT" + else + echo "skip=false" >> "$GITHUB_OUTPUT" + fi + + - name: Prepare commit metadata for AI prompt + id: prep-commit-info + if: steps.check-skip.outputs.skip == 'false' + env: + COMMIT_SHA: ${{ github.event.head_commit.id }} + COMMIT_MESSAGE: ${{ github.event.head_commit.message }} + COMMIT_AUTHOR: ${{ github.event.head_commit.author.username }} + BRANCH_NAME: ${{ github.ref_name }} + run: | + # Write commit metadata to a temp file so it can be safely passed to the AI, + # avoiding any injection issues from the commit message content. + printf 'Commit %s on branch %s\nby @%s\n\n%s\n' \ + "$COMMIT_SHA" "$BRANCH_NAME" "$COMMIT_AUTHOR" "$COMMIT_MESSAGE" \ + > "${RUNNER_TEMP}/commit_info.txt" + + - name: Install release announcement package + if: steps.check-skip.outputs.skip == 'false' + run: | + python3 -m pip install --user ./tools/release_announcement + + - name: Update Release Announcement with GitHub Copilot + if: steps.check-skip.outputs.skip == 'false' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + # Get the commit message and author + COMMIT_MSG=$(git log -1 --pretty=%B) + COMMIT_AUTHOR=$(git log -1 --pretty=%an) + + # Create a temporary file with commit info + printf 'Commit %s\nby @%s\n\n%s\n' \ + "${{ github.sha }}" "$COMMIT_AUTHOR" "$COMMIT_MSG" \ + > "${RUNNER_TEMP}/commit_info.txt" + + # Call the release announcement script + python3 -m release_announcement \ + "${{ github.sha }}" \ + --file ReleaseAnnouncement.md \ + --backend actions + + - name: Check if announcement was updated + id: check-updated + if: steps.check-skip.outputs.skip == 'false' + run: | + if git diff --quiet ReleaseAnnouncement.md; then + echo "updated=false" >> "$GITHUB_OUTPUT" + else + echo "updated=true" >> "$GITHUB_OUTPUT" + fi + + - name: Upload draft announcement as artifact + if: steps.check-skip.outputs.skip == 'false' && steps.check-updated.outputs.updated == 'true' + uses: actions/upload-artifact@v7 + with: + name: draft-release-announcement + path: ReleaseAnnouncement.md + retention-days: 14 + + reset-after-release: + name: Reset announcement after full release + # Only run on tag pushes in the main jamulussoftware repo. + if: >- + github.repository_owner == 'jamulussoftware' && + github.event_name == 'push' + runs-on: ubuntu-latest + permissions: + contents: write + + steps: + - uses: actions/checkout@v6 + with: + ref: main + + - name: Check if this is a full (non-prerelease) release tag + id: check-tag + run: | + # Match only clean version tags like r3_12_0. + # Tags with any suffix (e.g. r3_12_0beta1, r3_12_0rc1) are pre-releases + # and intentionally do NOT reset the draft, so it keeps building up + # towards the final release announcement. + if [[ "${GITHUB_REF_NAME}" =~ ^r([0-9]+)_([0-9]+)_([0-9]+)$ ]]; then + major="${BASH_REMATCH[1]}" + minor="${BASH_REMATCH[2]}" + patch="${BASH_REMATCH[3]}" + echo "is_full_release=true" >> "$GITHUB_OUTPUT" + echo "version=${major}.${minor}.${patch}" >> "$GITHUB_OUTPUT" + else + echo "is_full_release=false" >> "$GITHUB_OUTPUT" + fi + + - name: Reset Release Announcement to template + if: steps.check-tag.outputs.is_full_release == 'true' + run: | + cp .github/release-announcement-template.md ReleaseAnnouncement.md + + - name: Commit and push reset announcement + if: steps.check-tag.outputs.is_full_release == 'true' + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + git config --global user.email "actions@github.com" + git config --global user.name "github-actions[bot]" + git add ReleaseAnnouncement.md + git diff --staged --quiet && exit 0 + git commit -m "docs: reset Release Announcement after v${{ steps.check-tag.outputs.version }} release" + git push diff --git a/.gitignore b/.gitignore index 3b23a7cd6d..6371f4de80 100644 --- a/.gitignore +++ b/.gitignore @@ -57,3 +57,7 @@ jamulus_plugin_import.cpp .github_release_changelog.md /debian/ .github/instructions/awesome-copilot-instructions +**/__pycache__ +dist/ +*.egg-info/ +*.egg-info diff --git a/ReleaseAnnouncement.md b/ReleaseAnnouncement.md new file mode 100644 index 0000000000..b14257c852 --- /dev/null +++ b/ReleaseAnnouncement.md @@ -0,0 +1,41 @@ +# Jamulus Next Release — Working Announcement Draft + +> **Note for maintainers:** This is a working draft, automatically updated by GitHub Copilot +> as PRs are merged to `main`. Please review, polish, and publish to +> [GitHub Discussions (Announcements)](https://github.com/orgs/jamulussoftware/discussions) +> and other channels when the release is ready. +> +> Run [`tools/get_release_contributors.py`](tools/get_release_contributors.py) to compile +> the full contributor list before publishing. +> +> See the [ChangeLog](ChangeLog) for the complete technical record of all changes. + +Here's what's new in the next release of Jamulus: + + + +## For everyone + +## For Windows users + +## For macOS users + +## For mobile users (iOS & Android) + +## For server operators + +## Translations + +--- + +As always, all feedback on the new version is welcome. Please raise any problems in a new bug report or discussion topic. + +--- + +**REMINDER:** Those of you with virus checkers are likely to find the Windows installer incorrectly flagged as a virus. This is because the installer is open source and virus checkers cannot be bothered to check what it installs, so assume that it's going to be malign. If you download the installer *only from the official release*, you should be safe to ignore any warning. + +--- + +*A big thanks to all contributors who made this release possible.* + +*This draft is automatically maintained by the [Update Release Announcement](.github/workflows/update-release-announcement.yml) workflow.* diff --git a/tools/release_announcement/README.md b/tools/release_announcement/README.md new file mode 100644 index 0000000000..c38c4106b8 --- /dev/null +++ b/tools/release_announcement/README.md @@ -0,0 +1,137 @@ +# release_announcement + +Python package for generating and updating `ReleaseAnnouncement.md` from merged PRs. + +## Entry Point + +Run from the repository root after installing the package: + +```bash +python -m release_announcement START [END] --file ReleaseAnnouncement.md [options] +``` + +Examples: + +```bash +python -m release_announcement HEAD~20 HEAD --file ReleaseAnnouncement.md --backend ollama --dry-run +python -m release_announcement pr3500 --file ReleaseAnnouncement.md --backend actions +``` + +## CLI Flags + +- Positional: `start` (required), `end` (optional) +- `--file` (required): Markdown file to update. +- `--prompt`: Prompt template file path. Default: `tools/release_announcement/prompts/release-announcement.prompt.yml`. +- `--chat-model`, `--model`: Chat model override. +- `--embedding-model`, `--embed`: Embedding model override (GitHub/actions backends). +- `--backend`: One of `ollama`, `github`, `actions`. +- `--delay-secs`: Delay before each PR is processed. +- `--dry-run`: Discover and report work without calling the LLM. +- `--pipeline`: One of `legacy` or `staged` (default). + - `legacy`: Existing raw-PR prompt path. + - `staged`: Step 2 stubbed preprocessing path; currently logs stub stages and falls back to legacy. + +## Install + +From repository root: + +```bash +python3 -m pip install ./tools/release_announcement +``` + +For development/test dependencies: + +```bash +python3 -m pip install -e ./tools/release_announcement[dev] +``` + +## Test Suite + +Use a virtual environment under `/tmp`: + +```bash +cd tools/release_announcement +rm -rf /tmp/release_announcement-test-venv +python3 -m venv /tmp/release_announcement-test-venv +. /tmp/release_announcement-test-venv/bin/activate +python -m pip install -e .[dev] +``` + +Run the default test suite (fast, no network): + +```bash +python -m pytest +``` + +Expected report for the default run: + +- Unit tests should pass. +- Integration/regression tests are expected to be skipped unless explicitly enabled. +- Typical output shape is similar to `6 passed, 2 skipped`. + +Run only unit tests: + +```bash +python -m pytest -m "not integration" +``` + +Optional integration checks: + +```bash +# Step 2 real-PR legacy vs staged-stub parity check (writes artifacts under build/) +RA_RUN_STEP2_E2E=1 python -m pytest tests/test_step2_stub_integration.py -m integration + +# Full regression matrix against baseline artifacts (writes artifacts under build/) +RA_RUN_MATRIX=1 python -m pytest tests/test_regression_matrix.py -m "integration and regression" +``` + +Expected report for integration/regression runs: + +- Tests run only when the required environment variable is set. +- If prerequisites are missing (tokens, clean worktree, baseline artifacts), pytest reports these tests as skipped with a reason. +- If enabled and prerequisites are satisfied, these tests compare output against baseline artifacts and fail on mismatches. + +### Troubleshooting Test Runs + +- `No module named pytest`: + - Ensure the `/tmp` venv is activated. + - Reinstall dev dependencies: `python -m pip install -e .[dev]`. + +- Integration tests are skipped unexpectedly: + - Confirm required env vars are set: + - `RA_RUN_STEP2_E2E=1` for Step 2 real-PR parity check. + - `RA_RUN_MATRIX=1` for full matrix regression. + - For GitHub backend checks, either set `GH_TOKEN`/`GITHUB_TOKEN` or ensure `gh auth token` works. + +- Regression test skipped for missing baseline: + - Ensure baseline artifacts exist under `tests/build/release-announcement-baseline/`. + - Or point to a different baseline with `RA_BASELINE_DIR=/path/to/baseline`. + +- Integration/regression tests skip because worktree is not clean: + - Commit/stash tracked changes first. + - Untracked files are allowed by the current checks. + +- Matrix regression fails with output mismatches: + - Compare generated artifacts under `tests/build/`. + - Strict markdown byte-identity checks are opt-in. Set `RA_STRICT_OUTPUT=1` to enforce exact output matching. + +## Baseline Matrix + +The release-announcement tool includes a baseline matrix runner for establishing reference outputs before restructuring: + +```bash +tests/run-release-announcement-baseline.sh +``` + +This captures the tool's current behavior against 4 representative PR scenarios with both Ollama and GitHub backends. Output is stored in `tests/build/release-announcement-baseline-YYYYMMDD_HHMMSS/`. See [tests/BASELINE_MATRIX_README.md](tests/BASELINE_MATRIX_README.md) for detailed guidance. + +## Integration Test Artifacts + +Integration and regression tests write artifacts to `tests/build/`, which is git-ignored. This includes: + +- Baseline matrix run outputs +- Step 2 real-PR parity check snapshots +- Regression matrix comparison results +- Temporary test run logs + + diff --git a/tools/release_announcement/prompts/classification.prompt.yml b/tools/release_announcement/prompts/classification.prompt.yml new file mode 100644 index 0000000000..07fbd50bcd --- /dev/null +++ b/tools/release_announcement/prompts/classification.prompt.yml @@ -0,0 +1,51 @@ +# Classification stage prompt for release announcement distillation. +# Assigns consolidated signals to release-note categories. + +messages: + - role: system + content: | + You classify consolidated release-note signals into categories. + + WHAT JAMULUS IS — read this carefully before writing any audience sections: + + Jamulus runs in two completely separate modes: + * CLIENT MODE (the default) — what the musicians run. It has a full GUI (by default), + audio input and output, control over mix heard, MIDI control, chat, + and connects to a server. + Client GUI features, MIDI features, audio improvements, platform-specific fixes + (Windows/macOS/Linux/iOS/Android), and JSON-RPC client-specific control belong here. + * SERVER MODE — a network relay daemon. It receives audio packets from connected + clients, mixes them for each client, and sends the mix back. It optionally runs with no GUI, + has NO audio input or output of its own, NO MIDI, or other client-specific features. + It can also be controlled via JSON-RPC, but only through server-specific RPC commands. + A "For server operators" section is only appropriate when the PR demonstrably changes + server behaviour (e.g. server networking, server-side logging, server list registration, + server JSON-RPC API). Never add a "For server operators" section for MIDI, audio, or + other client-only changes — those do not affect the server at all. + + Input: + - A JSON array of signal objects with fields: + change, impact, users_affected, confidence, final_outcome. + + Allowed categories: + - internal + - minor + - targeted + - major + - no_user_facing_changes + + Output contract: + - Return ONLY valid JSON. + - Top-level value MUST be a JSON object with keys: + - classified: array + - summary: string + - Each classified array item MUST be: + - signal: original signal object + - category: one allowed category value + + Classification rules: + - Use no_user_facing_changes only for signals that are not user-visible. + - Do not fabricate user-facing impact. + - An empty user-facing result is valid. + - If no user-visible changes exist, set summary to exactly: no_user_facing_changes + - Prefer classifications grounded in final outcomes over speculative discussion. diff --git a/tools/release_announcement/prompts/consolidation.prompt.yml b/tools/release_announcement/prompts/consolidation.prompt.yml new file mode 100644 index 0000000000..715ec2525d --- /dev/null +++ b/tools/release_announcement/prompts/consolidation.prompt.yml @@ -0,0 +1,46 @@ +# Consolidation stage prompt for release announcement distillation. +# Merges extracted Signal JSON objects into a deduplicated intermediate set. + +messages: + - role: system + content: | + You consolidate extracted release-note signals. + + WHAT JAMULUS IS — read this carefully before writing any audience sections: + + Jamulus runs in two completely separate modes: + * CLIENT MODE (the default) — what the musicians run. It has a full GUI (by default), + audio input and output, control over mix heard, MIDI control, chat, + and connects to a server. + Client GUI features, MIDI features, audio improvements, platform-specific fixes + (Windows/macOS/Linux/iOS/Android), and JSON-RPC client-specific control belong here. + * SERVER MODE — a network relay daemon. It receives audio packets from connected + clients, mixes them for each client, and sends the mix back. It optionally runs with no GUI, + has NO audio input or output of its own, NO MIDI, or other client-specific features. + It can also be controlled via JSON-RPC, but only through server-specific RPC commands. + A "For server operators" section is only appropriate when the PR demonstrably changes + server behaviour (e.g. server networking, server-side logging, server list registration, + server JSON-RPC API). Never add a "For server operators" section for MIDI, audio, or + other client-only changes — those do not affect the server at all. + + Input: + - A JSON array of signal objects. + - Signal schema fields: change, impact, users_affected, confidence, final_outcome. + + Output contract: + - Return ONLY valid JSON. + - Top-level value MUST be a JSON array. + - Each output item MUST preserve the same signal schema fields: + - change (string) + - impact (string) + - users_affected (string) + - confidence (string) + - final_outcome (boolean) + + Consolidation rules: + - Deduplicate semantically overlapping signals. + - Merge near-duplicates conservatively; do not invent new facts. + - Prefer final decisions over earlier speculation. + - If earlier comments conflict with later agreed outcomes, keep the later agreed outcome. + - Preserve meaningful coverage of distinct user-facing changes. + - If no valid user-facing signals remain, return [] (empty array). diff --git a/tools/release_announcement/prompts/extraction.prompt.yml b/tools/release_announcement/prompts/extraction.prompt.yml new file mode 100644 index 0000000000..8bfeec5f6e --- /dev/null +++ b/tools/release_announcement/prompts/extraction.prompt.yml @@ -0,0 +1,44 @@ +# Extraction stage prompt for release announcement distillation. +# Converts one ordered PR discussion chunk into structured Signal JSON records. + +messages: + - role: system + content: | + You extract release-note signals from a single PR discussion chunk. + + WHAT JAMULUS IS — read this carefully before writing any audience sections: + + Jamulus runs in two completely separate modes: + * CLIENT MODE (the default) — what the musicians run. It has a full GUI (by default), + audio input and output, control over mix heard, MIDI control, chat, + and connects to a server. + Client GUI features, MIDI features, audio improvements, platform-specific fixes + (Windows/macOS/Linux/iOS/Android), and JSON-RPC client-specific control belong here. + * SERVER MODE — a network relay daemon. It receives audio packets from connected + clients, mixes them for each client, and sends the mix back. It optionally runs with no GUI, + has NO audio input or output of its own, NO MIDI, or other client-specific features. + It can also be controlled via JSON-RPC, but only through server-specific RPC commands. + A "For server operators" section is only appropriate when the PR demonstrably changes + server behaviour (e.g. server networking, server-side logging, server list registration, + server JSON-RPC API). Never add a "For server operators" section for MIDI, audio, or + other client-only changes — those do not affect the server at all. + + Input scope: + - The chunk may come from PR body text, timeline comments, or inline code-review comments. + - Treat inline code-review comments as first-class evidence. + + Output contract: + - Return ONLY valid JSON. + - Top-level value MUST be a JSON array. + - Each array item MUST be an object with exactly these required fields: + - change (string): user-visible change description. + - impact (string): scope/severity (for example: low, medium, high). + - users_affected (string): user group(s) impacted. + - confidence (string): extraction confidence (for example: low, medium, high). + - final_outcome (boolean): true when this reflects the final agreed outcome. + + Decision rules: + - Include only user-visible changes or user-visible risks. + - Exclude implementation-only chatter, speculation, and internal process noise. + - Prefer merged outcomes and maintainer decisions over early discussion. + - If the chunk has no user-visible signal, return [] (empty array). diff --git a/tools/release_announcement/prompts/ranking.prompt.yml b/tools/release_announcement/prompts/ranking.prompt.yml new file mode 100644 index 0000000000..a623b990a2 --- /dev/null +++ b/tools/release_announcement/prompts/ranking.prompt.yml @@ -0,0 +1,25 @@ +# Ranking stage prompt for release announcement distillation. +# Used by tools/release_announcement/ during stage 2 (select relevant chunks). +# This prompt guides the model to rank PR discussion chunks by relevance. +# Template variables: {chunk_count}, {chunks} + +messages: + - role: system + content: | + You are a release announcement distillation assistant. Your task is to rank PR discussion chunks by their relevance to a release announcement. + + CONSTRAINTS: + - Return ONLY a valid JSON object with chunk indices as keys and scores as values. + - Each score must be between 0.0 (not relevant) and 1.0 (highly relevant). + - Never include explanatory text or markdown formatting outside the JSON object. + - role: user + content: | + Rank the following {chunk_count} chunks by relevance to a release announcement. + + Return a JSON object with chunk indices (0, 1, 2, etc.) as keys and scores (0.0-1.0) as values. + + EXAMPLE: + If you receive 3 chunks, return: {"0": 0.92, "1": 0.45, "2": 0.78} + + CHUNKS: + {chunks} diff --git a/tools/release_announcement/prompts/release-announcement.prompt.yml b/tools/release_announcement/prompts/release-announcement.prompt.yml new file mode 100644 index 0000000000..36a343ffd7 --- /dev/null +++ b/tools/release_announcement/prompts/release-announcement.prompt.yml @@ -0,0 +1,158 @@ +# AI prompt for maintaining ReleaseAnnouncement.md. +# Used by .github/workflows/update-release-announcement.yml via actions/ai-inference. +# Edit this file to adjust the style or rules without touching the workflow YAML. + +messages: + - role: system + content: | + You are a technical writer maintaining the working Release Announcement draft for Jamulus — + a free, open-source application that lets musicians rehearse, perform, and jam together in + real time over the internet. + + WHAT JAMULUS IS — read this carefully before writing any audience sections: + + Jamulus runs in two completely separate modes: + * CLIENT MODE (the default) — what the musicians run. It has a full GUI (by default), + audio input and output, control over mix heard, MIDI control, chat, + and connects to a server. + Client GUI features, MIDI features, audio improvements, platform-specific fixes + (Windows/macOS/Linux/iOS/Android), and JSON-RPC client-specific control belong here. + * SERVER MODE — a network relay daemon. It receives audio packets from connected + clients, mixes them for each client, and sends the mix back. It optionally runs with no GUI, + has NO audio input or output of its own, NO MIDI, or other client-specific features. + It can also be controlled via JSON-RPC, but only through server-specific RPC commands. + A "For server operators" section is only appropriate when the PR demonstrably changes + server behaviour (e.g. server networking, server-side logging, server list registration, + server JSON-RPC API). Never add a "For server operators" section for MIDI, audio, or + other client-only changes — those do not affect the server at all. + + This is a RELEASE ANNOUNCEMENT for end users, NOT a technical ChangeLog. Write in a + direct, editorial voice — as if sharing news with the community. Users are musicians, + not necessarily developers: speak to what they can now DO or what has IMPROVED for them, + not to what code was changed. + + STYLE — NARRATIVE PROSE, NOT BULLET POINTS: + The announcement is organised into audience-grouped sections, each written as short, + narrative paragraphs — the way a magazine might preview an upcoming release. + DO NOT use bullet-point lists. Instead, weave each change into flowing prose that + explains what changed AND why it matters to the reader. + + Each audience section starts with a level-2 heading (##) and is separated from the next + by "---". Use these standard section headings when applicable and NEVER INVENT CONTENT + just to populated a section: + + ## For everyone + ## For Windows users + ## For macOS users + ## For mobile users (iOS & Android) + ## For server operators + ## Translations + + When a change is significant enough to deserve its own spotlight, give it a dedicated + level-2 heading (e.g. "## MIDI gets a proper settings interface") placed BEFORE the + audience sections it relates to. Use subsections (### ⚠️ Breaking change, + ### Deprecation notice) when warranted. + A dedicated feature heading REPLACES directly related content in an ordinary audience-section + for that same feature. + If you give a change its own dedicated level-2 heading, do not repeat the same + feature as a separate narrative item under "For everyone" or other audience headings. + Mention each user-visible change only once in the body of the document. Audience sections + may add only genuinely SUBSTANTIAL audience-specific consequences that are not already + covered by the dedicated feature section. + + Where a change appears only to bump the version of a dependency, ensure there is a general + "General maintenance and bug fixes" section under "For everyone". Collect all such + changes there, and do not mention the specific dependency or technical details unless they + have a direct, noticeable impact on users (e.g. a bundled library upgrade that fixes a crash + or enables a new feature). This decision may require more careful review of the PR comments. + + Rules: + - STRICT source-of-truth rule: every factual statement must be traceable to + text contained within the current_announcement or pr_info. + NEVER infer, assume, speculate, or add likely user impact that is not EXPLICITLY supported by those inputs. + Only reword minimally to transform technical details into user-friendly language. + - Uncertainty rule: if relevance or user impact is ambiguous, make no edit + for that part. If uncertain overall, return the announcement unchanged. + - Minimal-edit rule: change only the smallest necessary text span needed to + integrate this PR. Do not rephrase unrelated paragraphs for style. + - Make NO CHANGES to the document structure outside of the audience section(s) relevant to this PR. + Only edit the section(s) that directly relate to the changes in this PR; + do not rearrange, add, or remove unrelated sections or paragraphs. + Do not re-order sections. + - Integrate each new change into the MOST APPROPRIATE audience section of the document + as narrative prose. Create a new audience section only when pr_info explicitly + describes a user-visible change for that audience and no existing section can + represent it cleanly. Do not add, remove, or modify the maintainer note block + or the REMINDER section. + - Empty audience sections are acceptable. DO NOT ADD FILLER TEXT OR DUPLICATE PROSE just + to avoid leaving an audience section empty. + - Do not use "## For everyone" as overflow for a feature that already has its own + dedicated level-2 heading. Leave "## For everyone" empty unless there is a separate, + general-audience change that is not already covered elsewhere. + - Do not describe the same feature twice. If a dedicated feature section already explains + the change, do not restate that same feature in an audience section unless there is a + SUBSTANTIAL, audience-specific consequence that is genuinely new information. + - When the document contains the HTML placeholder comment + "", + treat that comment as a fixed anchor, not editable content. Keep it immediately below + "Here's what's new in the next release of Jamulus:" and above all generated sections. + Never move it lower in the document. Add new sections after that comment, not before it, + and never rewrite or relocate the comment itself. + - Write in plain, friendly language. Use past tense for bug fixes, present tense for new + features or improvements. Every paragraph must be complete, grammatically correct prose. + - Use the CHANGELOG: line in the PR description (if present) as the starting point, + but transform it into plain, user-friendly language that conveys the benefit to + users rather than technical implementation details. Strip the category prefix + (Client:, Server:, Build:, Tools:, etc.) unless keeping it adds helpful context for + a non-technical reader — e.g. keep "Windows:", "macOS:", "iOS:", "Android:" for + OS-specific changes; keep "Server:" when distinguishing a server-only change is + genuinely useful. + - Do NOT credit individual contributors inline. The document ends with a single generic + thank-you line: "*A big thanks to all contributors who made this release possible.*" + - Only include changes that are relevant to end users or server operators. + Omit purely internal changes: CI configuration, build system, code style, developer + tooling, and routine dependency bumps — unless they have a direct, noticeable impact on + users (e.g. a bundled library upgrade that fixes a crash or enables a new feature). + - Within each section, mention more impactful changes first. + - When a new PR updates or extends a feature already described in the announcement, + revise the existing paragraph to reflect the final state of that feature rather than + adding a separate entry. The reader should see one clear description of what the + feature does NOW, not a history of how it evolved across PRs. + - Before returning the final document, VERIFY THAT NO FEATURE IS DESCRIBED TWICE under + different headings. If duplication exists, keep the strongest placement and remove the + duplicate wording. + - Example: if "## MIDI gets a proper settings interface" already explains the MIDI GUI, + learn mode, runtime toggling, or pick-up mode, DO NOT REPEAT those same points again in + "## For everyone" or OS-specific sections unless there is a SUBSTANTIAL, platform-specific + note that is genuinely not already covered. + - Do not remove paragraphs about unrelated features. Only rewrite prose that directly + overlaps with the new PR's changes. + - If this PR introduces no user-relevant changes, return the announcement COMPLETELY + UNCHANGED — identical bytes, same whitespace, same comments. Check this carefully. + - Output contract: return exactly one complete Markdown document and nothing else. + The first output character must be the first character of the document and the + last output character must be the final character of the document. + - Forbidden output: do not emit any preamble, explanation, confirmation text, + markdown code fences, metadata wrappers, or trailing notes. + + - role: user + content: | + Current working announcement: + ==== + {{current_announcement}} + ==== + + Newly merged pull request: + {{pr_info}} + ==== + + Update the Release Announcement to include any user-relevant changes from this PR. + Return the complete updated Markdown document only. + +model: openai/gpt-4o +modelParameters: + # High token limit to ensure the full document is always returned without truncation. + # The default max-tokens in actions/ai-inference is only 200, which would cut off the document. + maxCompletionTokens: 16384 + # Low temperature for consistent, deterministic output when editing a structured document. + temperature: 0.2 diff --git a/tools/release_announcement/pyproject.toml b/tools/release_announcement/pyproject.toml new file mode 100644 index 0000000000..4cee48722d --- /dev/null +++ b/tools/release_announcement/pyproject.toml @@ -0,0 +1,26 @@ +[build-system] +requires = ["setuptools>=68", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "release-announcement" +version = "0.1.0" +description = "Jamulus release announcement automation tool" +readme = "README.md" +requires-python = ">=3.10" +dependencies = [ + "pyyaml", + "ollama", + "pydantic", +] + +[project.optional-dependencies] +dev = [ + "pytest", +] + +[tool.setuptools] +package-dir = {"" = "src"} + +[tool.setuptools.packages.find] +where = ["src"] diff --git a/tools/release_announcement/pytest.ini b/tools/release_announcement/pytest.ini new file mode 100644 index 0000000000..b3beeaeb38 --- /dev/null +++ b/tools/release_announcement/pytest.ini @@ -0,0 +1,4 @@ +[pytest] +markers = + integration: Requires networked services and repository state. + regression: Runs baseline matrix comparisons against stored artifacts. diff --git a/tools/release_announcement/src/release_announcement/__init__.py b/tools/release_announcement/src/release_announcement/__init__.py new file mode 100644 index 0000000000..e870f1f2d8 --- /dev/null +++ b/tools/release_announcement/src/release_announcement/__init__.py @@ -0,0 +1 @@ +"""release_announcement package.""" diff --git a/tools/release_announcement/src/release_announcement/__main__.py b/tools/release_announcement/src/release_announcement/__main__.py new file mode 100644 index 0000000000..f3178ccf37 --- /dev/null +++ b/tools/release_announcement/src/release_announcement/__main__.py @@ -0,0 +1,7 @@ +"""Module entry point for `python -m release_announcement`.""" + +from .main import run_cli + + +if __name__ == "__main__": + run_cli() diff --git a/tools/release_announcement/src/release_announcement/app_logger.py b/tools/release_announcement/src/release_announcement/app_logger.py new file mode 100644 index 0000000000..fbaed66fa8 --- /dev/null +++ b/tools/release_announcement/src/release_announcement/app_logger.py @@ -0,0 +1,69 @@ +"""Shared logger for release_announcement runtime output.""" + +from __future__ import annotations + +import sys + + +class GitOperationError(Exception): + """Raised when git operations fail (commits, references, etc.).""" + + +class BackendValidationError(Exception): + """Raised when backend validation fails (unknown backend, etc.).""" + + +class AppLogger: + """Minimal level-based logger writing to stdout/stderr.""" + + LEVELS = { + "CRITICAL": 50, + "ERROR": 40, + "WARNING": 30, + "INFO": 20, + "DEBUG": 10, + "TRACE": 5, + } + + def __init__(self, level: str = "INFO") -> None: + self.set_level(level) + + def set_level(self, level: str) -> None: + """Set the active minimum log level.""" + self.level = self.LEVELS.get(level.upper(), 20) + + def log(self, level: str, message: str) -> None: + """Emit a log line if it meets the active level threshold.""" + lvl = self.LEVELS.get(level.upper(), 20) + if lvl < self.level: + return + stream = sys.stderr if lvl >= 30 else sys.stdout + formatted_message = f"[{level.upper()}] {message}" + print(formatted_message, file=stream) + + def critical(self, message: str) -> None: + """Log a critical message.""" + self.log("CRITICAL", message) + + def error(self, message: str) -> None: + """Log an error message.""" + self.log("ERROR", message) + + def warning(self, message: str) -> None: + """Log a warning message.""" + self.log("WARNING", message) + + def info(self, message: str) -> None: + """Log an informational message.""" + self.log("INFO", message) + + def debug(self, message: str) -> None: + """Log a debug message.""" + self.log("DEBUG", message) + + def trace(self, message: str) -> None: + """Log a trace message.""" + self.log("TRACE", message) + + +logger = AppLogger() diff --git a/tools/release_announcement/src/release_announcement/backends/__init__.py b/tools/release_announcement/src/release_announcement/backends/__init__.py new file mode 100644 index 0000000000..39695dcef6 --- /dev/null +++ b/tools/release_announcement/src/release_announcement/backends/__init__.py @@ -0,0 +1 @@ +"""Backend implementations for release announcement generation.""" diff --git a/tools/release_announcement/src/release_announcement/backends/distillation_adapters/common_adapter_utils.py b/tools/release_announcement/src/release_announcement/backends/distillation_adapters/common_adapter_utils.py new file mode 100644 index 0000000000..623acca92d --- /dev/null +++ b/tools/release_announcement/src/release_announcement/backends/distillation_adapters/common_adapter_utils.py @@ -0,0 +1,20 @@ +"""Common utilities for distillation adapters (Ollama, GitHub, etc).""" + +from typing import Any +import json + +def build_ranking_messages(ranking_prompts: list[dict[str, str]], chunks: list[Any]) -> list[dict[str, str]]: + """Format ranking prompts and chunk text for LLM ranking.""" + chunks_text = "\n\n".join(f"[{i}] {chunk.text}" for i, chunk in enumerate(chunks)) + messages = [] + for prompt_msg in ranking_prompts: + role = str(prompt_msg.get("role", "")) + content = str(prompt_msg.get("content", "")) + if role == "user": + content = content.format(chunk_count=len(chunks), chunks=chunks_text) + messages.append({"role": role, "content": content}) + return messages + +def build_signal_payload(signals: list[Any]) -> str: + """Serialize signals for LLM input.""" + return json.dumps([s.model_dump() for s in signals]) diff --git a/tools/release_announcement/src/release_announcement/backends/distillation_adapters/github_adapter.py b/tools/release_announcement/src/release_announcement/backends/distillation_adapters/github_adapter.py new file mode 100644 index 0000000000..b17679f6bd --- /dev/null +++ b/tools/release_announcement/src/release_announcement/backends/distillation_adapters/github_adapter.py @@ -0,0 +1,428 @@ +"""GitHub Models adapters for staged distillation pipeline phases. + +This module provides a single adapter class with two factory functions: +- github backend: env token first, then gh auth token fallback +- actions backend: GITHUB_TOKEN env only +""" + +from __future__ import annotations + +import json +import os +import subprocess +import urllib.error +import urllib.request +from dataclasses import dataclass +from typing import Any, Callable + +from ...distillation import ( + build_distilled_context, + Chunk, + ClassifiedSignals, + DistillationAdapter, + DistilledContext, + DistilledContextMetadata, + Signal, + _parse_classified_signals, + _parse_signal_list, +) +from .common_adapter_utils import build_ranking_messages, build_signal_payload +from ...registry import ModelNotFoundError, registry +from ...app_logger import logger + + +@dataclass +class GitHubAPIError(Exception): + """Structured API error for GitHub Models calls.""" + + message: str + status_code: int | None = None + headers: dict[str, str] | None = None + body: str | None = None + request_payload: dict[str, Any] | None = None + endpoint: str | None = None + + def __str__(self) -> str: + parts = [self.message] + if self.status_code is not None: + parts.append(f"(status code: {self.status_code})") + if self.body: + parts.append(f"response_body={self.body}") + return " ".join(parts) + + +class GitHubDistillationAdapter(DistillationAdapter): + """GitHub Models adapter implementing the staged distillation protocol.""" + + def __init__(self, token_resolver: Callable[[], str]) -> None: + self.token_resolver = token_resolver + self.chat_model = "openai/gpt-4o" + self.embedding_model = "openai/text-embedding-3-small" + # Preserve compatibility with callers that read legacy default-model attributes. + self._default_chat_model = self.chat_model + self._default_embedding_model = self.embedding_model + self.chat_endpoint = "https://models.github.ai/inference/chat/completions" + self.embedding_endpoint = "https://models.github.ai/inference/embeddings" + self._token_cache: str | None = None + + @property + def token(self) -> str: + if self._token_cache is None: + self._token_cache = self.token_resolver().strip() + return self._token_cache + + def _github_post_json( + self, + endpoint: str, + request_payload: dict[str, Any], + ) -> dict[str, Any]: + req = urllib.request.Request( + endpoint, + data=json.dumps(request_payload).encode("utf-8"), + headers={ + "Authorization": f"Bearer {self.token}", + "Content-Type": "application/json", + }, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=60) as resp: + raw_body = resp.read().decode("utf-8") + return json.loads(raw_body) + except urllib.error.HTTPError as err: + body = err.read().decode("utf-8", errors="replace") + raise GitHubAPIError( + message="GitHub Models HTTP error", + status_code=err.code, + headers=dict(err.headers.items()) if err.headers else None, + body=body, + request_payload=request_payload, + endpoint=endpoint, + ) from err + except urllib.error.URLError as err: + raise GitHubAPIError( + message="GitHub Models network error", + body=str(err), + request_payload=request_payload, + endpoint=endpoint, + ) from err + except json.JSONDecodeError as err: + raise GitHubAPIError( + message="GitHub Models returned non-JSON response", + body=str(err), + request_payload=request_payload, + endpoint=endpoint, + ) from err + + def _github_chat_completion(self, request_payload: dict[str, Any]) -> str: + data = self._github_post_json(self.chat_endpoint, request_payload) + try: + return str(data["choices"][0]["message"]["content"]) + except (KeyError, IndexError, TypeError) as err: + raise GitHubAPIError( + message="GitHub chat response missing choices[0].message.content", + body=json.dumps(data, ensure_ascii=True), + request_payload=request_payload, + endpoint=self.chat_endpoint, + ) from err + + def _maybe_raise_model_not_found( + self, + model: str | None, + backend_name: str, + err: Exception, + ) -> None: + text = str(err).lower() + if model and "model" in text and ( + "not found" in text or "does not exist" in text or "unknown" in text + ): + raise ModelNotFoundError( + f"model '{model}' not found on backend '{backend_name}'" + ) from err + + def _rank_chunks_with_chat( + self, + chunks: list[Chunk], + ranking_prompts: list[dict[str, str]], + ) -> list[Chunk]: + messages = build_ranking_messages(ranking_prompts, chunks) + request_payload = { + "model": self.chat_model, + "messages": messages, + } + content = self._github_chat_completion(request_payload) + scores = json.loads(content) + if not isinstance(scores, dict): + raise ValueError("Ranking response is not a JSON object") + ranked_chunks = [] + for index, chunk in enumerate(chunks): + score_raw = scores.get(str(index), 0.0) + ranked_chunks.append( + Chunk( + text=chunk.text, + source=chunk.source, + relevance_score=float(score_raw), + chunk_index=chunk.chunk_index, + ) + ) + return ranked_chunks + + def _rank_chunks_with_embeddings(self, chunks: list[Chunk]) -> list[Chunk]: + request_payload = { + "model": self.embedding_model, + "input": [chunk.text for chunk in chunks], + } + data = self._github_post_json(self.embedding_endpoint, request_payload) + embeddings_data = data.get("data") + if not isinstance(embeddings_data, list) or len(embeddings_data) != len(chunks): + raise ValueError("Invalid embedding response length for ranked chunk batch") + + vectors = [ + item.get("embedding") if isinstance(item, dict) else None + for item in embeddings_data + ] + + scored_chunks: list[Chunk] = [] + for index, chunk in enumerate(chunks): + vector = vectors[index] + if not isinstance(vector, list) or not vector: + raise ValueError(f"Missing embedding vector for chunk index {index}") + score = sum(float(v) * float(v) for v in vector) ** 0.5 + scored_chunks.append( + Chunk( + text=chunk.text, + source=chunk.source, + relevance_score=score, + chunk_index=chunk.chunk_index, + ) + ) + + return scored_chunks + + def probe_chat(self, model: str | None) -> bool: + probe_model = model or self.chat_model + request_payload = { + "model": probe_model, + "messages": [{"role": "user", "content": "Reply with 'ok'."}], + } + try: + content = self._github_chat_completion(request_payload) + return bool(content.strip()) + except GitHubAPIError as err: + self._maybe_raise_model_not_found(probe_model, "github", err) + raise + + def probe_embeddings(self, model: str | None) -> bool: + probe_model = model or self.embedding_model + request_payload = {"model": probe_model, "input": ["test"]} + try: + data = self._github_post_json(self.embedding_endpoint, request_payload) + vectors = data.get("data") + return isinstance(vectors, list) and len(vectors) > 0 + except GitHubAPIError as err: + self._maybe_raise_model_not_found(probe_model, "github", err) + return False + + def call_chat(self, prompt: dict) -> str: + request_payload = { + "model": str(prompt.get("model") or self.chat_model), + "messages": prompt.get("messages", []), + } + model_parameters = prompt.get("modelParameters") + if isinstance(model_parameters, dict): + request_payload.update(model_parameters) + return self._github_chat_completion(request_payload) + + def select_relevant_chunks( + self, + chunks: list[Chunk], + use_embeddings: bool, + ranking_prompts: list[dict[str, str]], + ) -> list[Chunk]: + if use_embeddings: + try: + return self._rank_chunks_with_embeddings(chunks) + except Exception as err: + request_payload = { + "model": self.embedding_model, + "input": [chunk.text for chunk in chunks], + } + diagnostic = ( + "[GitHub ranking] phase=select_relevant_chunks " + f"mode=embeddings endpoint={self.embedding_endpoint} " + f"request_payload={json.dumps(request_payload, ensure_ascii=True)} " + f"error={str(err)}" + ) + logger.error(diagnostic) + raise RuntimeError(diagnostic) from err + try: + return self._rank_chunks_with_chat(chunks, ranking_prompts) + except Exception as err: + messages = build_ranking_messages(ranking_prompts, chunks) + request_payload = { + "model": self.chat_model, + "messages": messages, + } + diagnostic = ( + "[GitHub ranking] phase=select_relevant_chunks " + f"mode=chat endpoint={self.chat_endpoint} " + f"request_payload={json.dumps(request_payload, ensure_ascii=True)} " + f"error={str(err)}" + ) + logger.error(diagnostic) + raise RuntimeError(diagnostic) from err + + def extract_chunk_signals( + self, + chunk: Chunk, + extraction_prompts: list[dict[str, str]], + ) -> list[Signal]: + request_payload = { + "model": self.chat_model, + "messages": [ + *[ + { + "role": str(prompt_msg.get("role", "")), + "content": str(prompt_msg.get("content", "")), + } + for prompt_msg in extraction_prompts + ], + {"role": "user", "content": chunk.text}, + ], + } + try: + content = self._github_chat_completion(request_payload) + return _parse_signal_list(content) + except Exception as err: + diagnostic = ( + "[GitHub extraction] phase=extract_chunk_signals " + f"chunk_index={chunk.chunk_index} endpoint={self.chat_endpoint} " + f"request_payload={json.dumps(request_payload, ensure_ascii=True)} " + f"error={str(err)}" + ) + logger.error(diagnostic) + raise RuntimeError(diagnostic) from err + + def consolidate_signals( + self, + signals: list[Signal], + consolidation_prompts: list[dict[str, str]], + ) -> list[Signal]: + request_payload = { + "model": self.chat_model, + "messages": [ + *[ + { + "role": str(prompt_msg.get("role", "")), + "content": str(prompt_msg.get("content", "")), + } + for prompt_msg in consolidation_prompts + ], + { + "role": "user", + "content": build_signal_payload(signals), + }, + ], + } + try: + content = self._github_chat_completion(request_payload) + return _parse_signal_list(content) + except Exception as err: + diagnostic = ( + "[GitHub consolidation] phase=consolidate_signals " + f"signal_count={len(signals)} endpoint={self.chat_endpoint} " + f"request_payload={json.dumps(request_payload, ensure_ascii=True)} " + f"error={str(err)}" + ) + logger.error(diagnostic) + raise RuntimeError(diagnostic) from err + + def classify_signals( + self, + signals: list[Signal], + classification_prompts: list[dict[str, str]], + ) -> ClassifiedSignals: + request_payload = { + "model": self.chat_model, + "messages": [ + *[ + { + "role": str(prompt_msg.get("role", "")), + "content": str(prompt_msg.get("content", "")), + } + for prompt_msg in classification_prompts + ], + { + "role": "user", + "content": build_signal_payload(signals), + }, + ], + } + try: + content = self._github_chat_completion(request_payload) + return _parse_classified_signals(content) + except Exception as err: + diagnostic = ( + "[GitHub classification] phase=classify_signals " + f"signal_count={len(signals)} endpoint={self.chat_endpoint} " + f"request_payload={json.dumps(request_payload, ensure_ascii=True)} " + f"error={str(err)}" + ) + logger.error(diagnostic) + raise RuntimeError(diagnostic) from err + + def render_final_context( + self, + classified: ClassifiedSignals, + metadata: DistilledContextMetadata, + ) -> DistilledContext: + return build_distilled_context(classified=classified, metadata=metadata) + + def get_adapter_tag(self) -> str: + """Return the adapter identifier for logging.""" + return "github" + + +def _normalize_token(raw_token: str) -> str: + return raw_token.replace("\r", "").replace("\n", "").strip() + + +def _create_github_adapter() -> GitHubDistillationAdapter: + """Factory for `--backend github` using env token or gh fallback.""" + + def _resolver() -> str: + raw_token = os.getenv("GH_TOKEN") or os.getenv("GITHUB_TOKEN") + if raw_token: + return _normalize_token(raw_token) + + try: + gh_token = subprocess.check_output( + ["gh", "auth", "token"], + text=True, + stderr=subprocess.STDOUT, + ) + return _normalize_token(gh_token) + except (FileNotFoundError, subprocess.CalledProcessError) as err: + raise RuntimeError( + "Unable to resolve GitHub token for backend 'github'. " + "Set GH_TOKEN/GITHUB_TOKEN or ensure `gh auth token` works." + ) from err + + return GitHubDistillationAdapter(token_resolver=_resolver) + + +def _create_actions_adapter() -> GitHubDistillationAdapter: + """Factory for `--backend actions` requiring GITHUB_TOKEN only.""" + + def _resolver() -> str: + raw_token = os.getenv("GITHUB_TOKEN") + if not raw_token: + raise RuntimeError( + "--backend actions requires GITHUB_TOKEN to be set." + ) + return _normalize_token(raw_token) + + return GitHubDistillationAdapter(token_resolver=_resolver) + +registry.register("github", _create_github_adapter) +registry.register("actions", _create_actions_adapter) diff --git a/tools/release_announcement/src/release_announcement/backends/distillation_adapters/ollama_adapter.py b/tools/release_announcement/src/release_announcement/backends/distillation_adapters/ollama_adapter.py new file mode 100644 index 0000000000..e0cd669596 --- /dev/null +++ b/tools/release_announcement/src/release_announcement/backends/distillation_adapters/ollama_adapter.py @@ -0,0 +1,392 @@ +"""Ollama adapter for the staged distillation pipeline. + +Implements the DistillationAdapter protocol for the Ollama backend. +""" + +import json +import os +from ...app_logger import logger + +import ollama + +from ...distillation import ( + build_distilled_context, + DistillationAdapter, + Chunk, + Signal, + ClassifiedSignal, + ClassifiedSignals, + DistilledContext, + DistilledContextMetadata, + default_fallback_signal, + PARSE_EXCEPTIONS, +) +from .common_adapter_utils import build_ranking_messages, build_signal_payload + +# Import the call_ollama_model function for making actual Ollama API calls +from ..ollama_backend import call_ollama_model +from ...registry import registry + + +class OllamaDistillationAdapter(DistillationAdapter): + """Ollama adapter implementing DistillationAdapter protocol.""" + + def __init__(self) -> None: + """Initialize the Ollama adapter with model names from environment variables.""" + self.chat_model = os.getenv("OLLAMA_MODEL", "mistral-large-3:675b-cloud") + self.embedding_model = os.getenv("OLLAMA_EMBEDDING_MODEL") # No default + # Preserve compatibility with callers that read legacy default-model attributes. + self._default_chat_model = self.chat_model + self._default_embedding_model = self.embedding_model + + def probe_chat(self, model: str | None) -> bool: + """Probe chat capability directly via Ollama chat API.""" + chat_model = model if model is not None else self.chat_model + payload = { + "model": chat_model, + "messages": [{"role": "user", "content": "Reply with 'ok'."}], + } + try: + response = ollama.chat(model=chat_model, messages=payload["messages"]) + content = response.get("message", {}).get("content", "") + return isinstance(content, str) and bool(content.strip()) + except Exception as err: + status_code = getattr(err, "status_code", None) + response_body = getattr(err, "response", None) + raise RuntimeError( + "Ollama chat capability probe failed. " + f"request_payload={json.dumps(payload, ensure_ascii=True)} " + f"response_status={status_code} " + f"response_body={response_body} " + f"error={err}" + ) from err + + def probe_embeddings(self, model: str | None) -> bool: + """Probe embedding capability directly via Ollama batch embed API.""" + embedding_model = model if model is not None else self.embedding_model + if embedding_model is None: + return False + payload = {"model": embedding_model, "input": ["test"]} + try: + response = ollama.embed(model=embedding_model, input=["test"]) + embeddings = response.get("embeddings") + return isinstance(embeddings, list) and bool(embeddings) + except Exception as err: + status_code = getattr(err, "status_code", None) + response_body = getattr(err, "response", None) + raise RuntimeError( + "Ollama embedding capability probe failed. " + f"request_payload={json.dumps(payload, ensure_ascii=True)} " + f"response_status={status_code} " + f"response_body={response_body} " + f"error={err}" + ) from err + + def call_chat(self, prompt: dict) -> str: + """Call the chat model.""" + return call_ollama_model(prompt, self.chat_model, self.embedding_model) + + def select_relevant_chunks( + self, + chunks: list[Chunk], + use_embeddings: bool, + ranking_prompts: list[dict[str, str]], + ) -> list[Chunk]: + """Select relevant chunks using embeddings or chat-based ranking.""" + if use_embeddings and self.embedding_model: + embed_payload = {"model": self.embedding_model, "input": [c.text for c in chunks]} + try: + embeddings_response = ollama.embed( + model=self.embedding_model, + input=embed_payload["input"], + ) + return _score_chunks_from_embeddings(chunks, embeddings_response) + except Exception as err: + diagnostic = ( + f"[Ollama ranking] phase=select_relevant_chunks " + f"mode=embeddings model={self.embedding_model} " + f"chunk_count={len(chunks)} " + f"request_payload={json.dumps(embed_payload, ensure_ascii=True)} " + f"error={err}" + ) + logger.error(diagnostic) + raise RuntimeError(diagnostic) from err + + messages = build_ranking_messages(ranking_prompts, chunks) + ranking_payload = {"model": self.chat_model, "messages": messages} + try: + response = call_ollama_model( + {"messages": messages}, + chat_model_override=self.chat_model, + ) + scores = _parse_chat_scores(response, len(chunks)) + return _score_chunks_from_scores(chunks, scores) + except Exception as err: + diagnostic = ( + f"[Ollama ranking] phase=select_relevant_chunks " + f"mode=chat model={self.chat_model} " + f"chunk_count={len(chunks)} " + f"request_payload={json.dumps(ranking_payload, ensure_ascii=True)} " + f"error={err}" + ) + logger.error(diagnostic) + raise RuntimeError(diagnostic) from err + + def extract_chunk_signals( + self, + chunk: Chunk, + extraction_prompts: list[dict[str, str]], + ) -> list[Signal]: + """Extract signals from a chunk. + + Args: + chunk: The chunk to extract signals from. + extraction_prompts: Prompt list for extraction. + + Returns: + List of extracted signals. + + Raises: + RuntimeError: If the Ollama API call fails. + """ + messages = _messages_with_user_content(extraction_prompts, chunk.text) + request_payload = {"model": self.chat_model, "messages": messages} + try: + response = call_ollama_model( + {"messages": messages}, + chat_model_override=self.chat_model, + ) + + try: + signals_data = json.loads(response) + return [ + Signal( + change=signal_data.get("change", ""), + impact=signal_data.get("impact", "low"), + users_affected=signal_data.get("users_affected", ""), + confidence=signal_data.get("confidence", "low"), + final_outcome=signal_data.get("final_outcome", False), + ) + for signal_data in signals_data + ] + except (json.JSONDecodeError, KeyError, TypeError): + return [default_fallback_signal(chunk.source)] + except Exception as err: + diagnostic = ( + f"[Ollama extraction] phase=extract_chunk_signals " + f"chunk_index={chunk.chunk_index} model={self.chat_model} " + f"request_payload={json.dumps(request_payload, ensure_ascii=True)} " + f"error={err}" + ) + logger.error(diagnostic) + raise RuntimeError(diagnostic) from err + + def consolidate_signals( + self, + signals: list[Signal], + consolidation_prompts: list[dict[str, str]], + ) -> list[Signal]: + """Consolidate signals.""" + messages = _messages_with_user_content(consolidation_prompts, build_signal_payload(signals)) + request_payload = {"model": self.chat_model, "messages": messages} + try: + response = call_ollama_model( + {"messages": messages}, + chat_model_override=self.chat_model, + ) + try: + consolidated_signals_data = json.loads(response) + return [ + Signal( + change=signal_data.get("change", ""), + impact=signal_data.get("impact", "low"), + users_affected=signal_data.get("users_affected", ""), + confidence=signal_data.get("confidence", "low"), + final_outcome=signal_data.get("final_outcome", False), + ) + for signal_data in consolidated_signals_data + ] + except (json.JSONDecodeError, KeyError, TypeError): + return signals + except Exception as err: + diagnostic = ( + f"[Ollama consolidation] phase=consolidate_signals " + f"signal_count={len(signals)} model={self.chat_model} " + f"request_payload={json.dumps(request_payload, ensure_ascii=True)} " + f"error={err}" + ) + logger.error(diagnostic) + raise RuntimeError(diagnostic) from err + + def classify_signals( + self, + signals: list[Signal], + classification_prompts: list[dict[str, str]], + ) -> ClassifiedSignals: + """Classify signals.""" + messages = _messages_with_user_content(classification_prompts, build_signal_payload(signals)) + request_payload = {"model": self.chat_model, "messages": messages} + try: + response = call_ollama_model( + {"messages": messages}, + chat_model_override=self.chat_model, + ) + try: + classified_data = json.loads(response) + classified_signals = [ + ClassifiedSignal( + signal=signal, + category=classified_data.get("classified", [{}])[ + i].get("category", "minor"), + ) + for i, signal in enumerate(signals) + ] + summary = classified_data.get("summary", "Classification summary") + except (json.JSONDecodeError, KeyError, TypeError, IndexError): + classified_signals = [ + ClassifiedSignal(signal=signal, category="minor") + for signal in signals + ] + summary = f"Fallback classification: {len(signals)} signals classified as minor" + return ClassifiedSignals( + classified=classified_signals, + summary=summary, + ) + except Exception as err: + diagnostic = ( + f"[Ollama classification] phase=classify_signals " + f"signal_count={len(signals)} model={self.chat_model} " + f"request_payload={json.dumps(request_payload, ensure_ascii=True)} " + f"error={err}" + ) + logger.error(diagnostic) + raise RuntimeError(diagnostic) from err + + def render_final_context( + self, + classified: ClassifiedSignals, + metadata: DistilledContextMetadata, + ) -> DistilledContext: + """Render the final distilled context. + + Args: + classified: ClassifiedSignals object. + metadata: DistilledContextMetadata object. + + Returns: + DistilledContext object. + """ + context = build_distilled_context(classified=classified, metadata=metadata) + return context + + def get_adapter_tag(self) -> str: + """Return the adapter identifier for logging.""" + return "ollama" + + +def _create_ollama_adapter() -> OllamaDistillationAdapter: + """Factory function for creating an Ollama adapter.""" + return OllamaDistillationAdapter() + + +def _messages_with_user_content( + base_messages: list[dict[str, str]], + user_content: str, +) -> list[dict[str, str]]: + """Clone prompt messages and append a user message payload for stage input.""" + messages = [ + {"role": str(msg.get("role", "")), "content": str(msg.get("content", ""))} + for msg in base_messages + if isinstance(msg, dict) + ] + messages.append({"role": "user", "content": user_content}) + return messages + + +def _score_chunks_from_embeddings( + chunks: list[Chunk], + embeddings_response: dict, +) -> list[Chunk]: + embeddings_list = embeddings_response.get("embeddings") + if not isinstance(embeddings_list, list) or len(embeddings_list) < len(chunks): + raise RuntimeError( + "Ollama embeddings response missing vectors for ranked chunks. " + f"response={json.dumps(embeddings_response, ensure_ascii=True)}" + ) + + scored_chunks: list[Chunk] = [] + for i, (chunk, embedding) in enumerate(zip(chunks, embeddings_list)): + if not isinstance(embedding, list) or not embedding: + raise RuntimeError( + "Ollama embeddings response missing embedding vector. " + f"chunk_index={i} response={json.dumps(embeddings_response, ensure_ascii=True)}" + ) + relevance_score = sum(x * x for x in embedding) ** 0.5 + scored_chunks.append( + Chunk( + text=chunk.text, + source=chunk.source, + relevance_score=relevance_score, + chunk_index=chunk.chunk_index, + ) + ) + return scored_chunks + + +def _parse_chat_scores(response: str, chunk_count: int) -> list[float]: + try: + scores_dict = json.loads(response) + if not isinstance(scores_dict, dict): + raise ValueError( + "Expected JSON object with chunk indices as keys, " + f"got {type(scores_dict).__name__}" + ) + + scores: list[float] = [] + for i in range(chunk_count): + chunk_key = str(i) + if chunk_key not in scores_dict: + raise ValueError( + f"Missing chunk index '{chunk_key}' in response. " + f"Expected indices 0-{chunk_count - 1}" + ) + score_value = scores_dict[chunk_key] + try: + score = float(score_value) + except (TypeError, ValueError) as score_err: + raise ValueError( + f"Invalid score for chunk {i}: {score_value} - {score_err}" + ) from score_err + if not 0.0 <= score <= 1.0: + raise ValueError(f"Score {score} for chunk {i} is outside 0.0-1.0 range") + scores.append(score) + return scores + except (json.JSONDecodeError, ValueError, TypeError) as parse_err: + diagnostic = ( + f"[Ollama ranking] phase=select_relevant_chunks parse_error=True " + f"response_length={len(response)} " + "expected_format='JSON object with indices as keys, " + "e.g. {\"0\": 0.92, \"1\": 0.45}' " + f"parse_error_detail={str(parse_err)}" + ) + logger.error(diagnostic) + raise RuntimeError(diagnostic) from parse_err + + +def _score_chunks_from_scores(chunks: list[Chunk], scores: list[float]) -> list[Chunk]: + scored_chunks: list[Chunk] = [] + for i, chunk in enumerate(chunks): + relevance_score = scores[i] if i < len(scores) else 0.0 + scored_chunks.append( + Chunk( + text=chunk.text, + source=chunk.source, + relevance_score=relevance_score, + chunk_index=chunk.chunk_index, + ) + ) + return scored_chunks + + +# Register the Ollama adapter at import time. +registry.register("ollama", _create_ollama_adapter) diff --git a/tools/release_announcement/src/release_announcement/backends/github_backend.py b/tools/release_announcement/src/release_announcement/backends/github_backend.py new file mode 100644 index 0000000000..4e097a2764 --- /dev/null +++ b/tools/release_announcement/src/release_announcement/backends/github_backend.py @@ -0,0 +1,596 @@ +"""GitHub Models backend for release-announcement generation.""" + +import json +import os +import sys +import urllib.error +import urllib.request +from dataclasses import dataclass +from typing import Any, Callable +import math +from ..app_logger import logger + +_embedding_support_cache: dict[str, bool] = {} +"""Per-process cache so we only ping the embeddings endpoint once per model.""" + +DEFAULT_BACKEND_CONFIG: dict[str, Any] = { + "default_chat_model": "openai/gpt-4o", + "default_embedding_model": "openai/text-embedding-3-small", + "chat_endpoint": "https://models.github.ai/inference/chat/completions", + "embeddings_endpoint": "https://models.github.ai/inference/embeddings", + "token_limit": 7500, +} + + +@dataclass(frozen=True) +class GitHubCallOptions: + """Optional overrides and backend config for chat completion requests.""" + + chat_model_override: str | None = None + embedding_model_override: str | None = None + backend_config: dict[str, Any] | None = None + + +@dataclass(frozen=True) +class GitHubProbeRequest: + """All values required to probe chat/embedding support for GitHub Models.""" + + chat_model: str | None + embedding_model: str | None + chat_endpoint: str + embeddings_endpoint: str + probe_embeddings: bool + + +def _estimate_tokens(text: str) -> int: + """Conservative token estimate for budget trimming before API calls.""" + # Use a stricter 3-chars-per-token approximation and round up so we + # under-shoot model limits less often on large PR payloads. + return max(1, math.ceil(len(text) / 3)) + + +def _probe_embedding_support(model: str, token: str, embeddings_endpoint: str) -> bool: + """Return True when a model supports embeddings at the configured endpoint.""" + if model in _embedding_support_cache: + return _embedding_support_cache[model] + + payload = {"model": model, "input": ["ping"]} + req = urllib.request.Request( + embeddings_endpoint, + data=json.dumps(payload).encode("utf-8"), + headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=15) as resp: + resp.read() + supported = True + except (urllib.error.HTTPError, urllib.error.URLError): + supported = False + + _embedding_support_cache[model] = supported + support_text = "supported" if supported else "not supported" + logger.info(f" [budget] Embeddings probe ({model}): {support_text}") + return supported + + +def _call_github_embeddings( + texts: list[str], model: str, token: str, embeddings_endpoint: str +) -> list[list[float]]: + """Call the GitHub Models embeddings endpoint and return one vector per input text.""" + payload = {"model": model, "input": texts} + req = urllib.request.Request( + embeddings_endpoint, + data=json.dumps(payload).encode("utf-8"), + headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=60) as resp: + body = resp.read().decode("utf-8") + except urllib.error.HTTPError as err: + error_body = err.read().decode("utf-8", errors="replace") + raise RuntimeError( + "Embeddings API call failed during semantic chunk selection. " + f"endpoint={embeddings_endpoint} model={model} " + f"http={err.code} body={error_body}" + ) from err + except urllib.error.URLError as err: + raise RuntimeError( + "Embeddings API connection error during semantic chunk selection. " + f"endpoint={embeddings_endpoint} model={model} error={err}" + ) from err + + data = json.loads(body) + return [item["embedding"] for item in sorted(data["data"], key=lambda x: x["index"])] + + +def _cosine_similarity(a: list[float], b: list[float]) -> float: + """Cosine similarity between two vectors (pure Python, no numpy required).""" + dot = sum(x * y for x, y in zip(a, b)) + mag_a = math.sqrt(sum(x * x for x in a)) + mag_b = math.sqrt(sum(x * x for x in b)) + return dot / (mag_a * mag_b) if mag_a and mag_b else 0.0 + + +def _extract_pr_section_from_user_text(user_text: str) -> tuple[str, str, str] | None: + """Split user message into preamble, PR block, and suffix.""" + split_marker = "\nNewly merged pull request:\n" + marker_idx = user_text.find(split_marker) + if marker_idx == -1: + return None + + preamble = user_text[: marker_idx + len(split_marker)] + rest = user_text[marker_idx + len(split_marker):] + + json_end = rest.find("\n====\n") + if json_end == -1: + return preamble, rest, "" + return preamble, rest[:json_end], rest[json_end:] + + +def _parse_pr_payload_from_user_text(user_text: str) -> tuple[str, dict[str, Any], str] | None: + """Parse PR payload from a prompt user message.""" + parts = _extract_pr_section_from_user_text(user_text) + if parts is None: + return None + + preamble, pr_json_str, suffix = parts + try: + pr_data = json.loads(pr_json_str) + except json.JSONDecodeError: + return None + return preamble, pr_data, suffix + + +def _collect_pr_text_chunks( + pr_data: dict[str, Any], +) -> tuple[dict[str, Any], str, list[str], list[str], list[tuple[str, str]]]: + """Extract header/body/comments/reviews and embeddable chunks from PR data.""" + pr_header = {"number": pr_data.get("number"), "title": pr_data.get("title")} + body: str = pr_data.get("body") or "" + comments: list[str] = pr_data.get("comments") or [] + reviews: list[str] = pr_data.get("reviews") or [] + + chunks: list[tuple[str, str]] = [] + if body.strip(): + chunks.append(("body", body)) + chunks.extend((f"comment_{i}", c) for i, c in enumerate(comments) if c and c.strip()) + chunks.extend((f"review_{i}", r) for i, r in enumerate(reviews) if r and r.strip()) + return pr_header, body, comments, reviews, chunks + + +def _pick_relevant_chunk_labels(selection: dict[str, Any]) -> set[str]: + """Return labels for chunks selected by embedding similarity within the token budget.""" + embed_trunc = 4000 + chunks = selection["chunks"] + vectors = _call_github_embeddings( + [selection["query_text"][:embed_trunc]] + + [text[:embed_trunc] for _, text in chunks], + selection["model"], + selection["token"], + selection["embeddings_endpoint"], + ) + scored = sorted( + enumerate(chunks), + key=lambda ic: _cosine_similarity(vectors[0], vectors[ic[0] + 1]), + reverse=True, + ) + + selected: set[str] = set() + used_tokens = 0 + for _, (label, text) in scored: + tokens = _estimate_tokens(text) + if used_tokens + tokens <= selection["budget"]: + selected.add(label) + used_tokens += tokens + return selected + + +def _build_trimmed_pr_payload( + pr_header: dict[str, Any], + body: str, + comments: list[str], + reviews: list[str], + selected: set[str], +) -> dict[str, Any]: + """Rebuild PR payload preserving original field order with selected text chunks.""" + trimmed: dict[str, Any] = dict(pr_header) + trimmed["body"] = body if "body" in selected else "" + trimmed["comments"] = [c for i, c in enumerate(comments) if f"comment_{i}" in selected] + trimmed["reviews"] = [r for i, r in enumerate(reviews) if f"review_{i}" in selected] + return trimmed + + +def _replace_user_message_content( + messages: list[dict[str, Any]], new_content: str +) -> list[dict[str, Any]]: + """Return a copy of messages with the user message content replaced.""" + return [{**m, "content": new_content} if m["role"] == "user" else m for m in messages] + + +def _prepare_embedding_trim_context( + prompt: dict[str, Any], token_limit: int +) -> dict[str, Any] | None: + """Prepare parsed prompt data needed for embedding-based trimming.""" + messages = prompt["messages"] + system_text = next((m["content"] for m in messages if m["role"] == "system"), "") + user_text = next((m["content"] for m in reversed(messages) if m["role"] == "user"), None) + if user_text is None: + return None + + parsed = _parse_pr_payload_from_user_text(user_text) + if parsed is None: + return None + + preamble, pr_data, suffix = parsed + pr_header, body, comments, reviews, chunks = _collect_pr_text_chunks(pr_data) + if not chunks: + return None + + fixed_tokens = ( + _estimate_tokens(preamble) + + _estimate_tokens(json.dumps(pr_header, indent=2)) + + _estimate_tokens(suffix) + + 200 + ) + return { + "messages": messages, + "user_text": user_text, + "query_text": (system_text[:300] + " " + pr_header.get("title", "")).strip(), + "budget": token_limit - fixed_tokens, + "preamble": preamble, + "suffix": suffix, + "pr_header": pr_header, + "body": body, + "comments": comments, + "reviews": reviews, + "chunks": chunks, + } + + +def _chunk_text_for_summarization(text: str, chunk_chars: int) -> list[str]: + """Split text into near-size chunks preferring newline boundaries.""" + parts: list[str] = [] + remaining = text + while len(remaining) > chunk_chars: + split_at = remaining.rfind("\n", 0, chunk_chars) or chunk_chars + parts.append(remaining[:split_at]) + remaining = remaining[split_at:] + if remaining.strip(): + parts.append(remaining) + return parts + + +def _summarise_chunk_via_chat( + chunk: str, + model: str, + token: str, + endpoint: str, + chunk_chars: int, +) -> str: + """Summarise one chunk using chat completions, falling back to truncation on failures.""" + payload = { + "model": model, + "messages": [ + { + "role": "system", + "content": ( + "Summarise the following portion of a pull request discussion in " + "2-4 sentences, focusing on user-visible changes and key technical " + "context. Be concise and factual." + ), + }, + {"role": "user", "content": chunk}, + ], + "max_completion_tokens": 256, + } + req = urllib.request.Request( + endpoint, + data=json.dumps(payload).encode("utf-8"), + headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=60) as resp: + decoded = json.loads(resp.read().decode()) + return decoded["choices"][0]["message"]["content"].strip() + except urllib.error.HTTPError as err: + error_body = err.read().decode("utf-8", errors="replace") + logger.warning( + " [budget] Chunk summarisation failed during chat fallback " + f"(endpoint={endpoint}, model={model}, http={err.code}, body={error_body}) " + "— keeping truncated text" + ) + except urllib.error.URLError as err: + logger.warning( + " [budget] Chunk summarisation connection failure during chat fallback " + f"(endpoint={endpoint}, model={model}, error={err}) " + "— keeping truncated text" + ) + except (KeyError, json.JSONDecodeError) as err: + logger.warning( + " [budget] Chunk summarisation parsing failed during chat fallback " + f"(endpoint={endpoint}, model={model}, error={err}) " + "— keeping truncated text" + ) + return chunk[:chunk_chars] + + +def _execute_github_chat_completion( + endpoint: str, + payload: dict[str, Any], + token: str, + model: str, +) -> str: + """Execute chat-completion request and return response text or exit with rich diagnostics.""" + req = urllib.request.Request( + endpoint, + data=json.dumps(payload).encode("utf-8"), + headers={ + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + }, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=60) as resp: + body = resp.read().decode("utf-8") + except urllib.error.HTTPError as err: + error_body = err.read().decode("utf-8", errors="replace") + logger.error("GitHub Models chat completion failed.") + logger.error(" Operation: final release-announcement generation call") + logger.error(f" Endpoint: {endpoint}") + logger.error(f" Model: {model}") + logger.error(f" HTTP: {err.code}") + logger.error(f" Response body: {error_body}") + if "unsupported" in error_body.lower(): + logger.error( + " Hint: This model likely does not support chat completions on this endpoint. " + "Use a chat-capable model (for example openai/gpt-4o) for --model." + ) + sys.exit(1) + except urllib.error.URLError as err: + logger.error( + "GitHub Models chat completion request failed before " + "a response was received." + ) + logger.error(" Operation: final release-announcement generation call") + logger.error(f" Endpoint: {endpoint}") + logger.error(f" Model: {model}") + logger.error(f" network_error={err}") + sys.exit(1) + + try: + data = json.loads(body) + content = data.get("choices", [{}])[0].get("message", {}).get("content", "") + except (json.JSONDecodeError, IndexError, TypeError) as err: + logger.error(f"Invalid response from GitHub Models API: {err}") + sys.exit(1) + + if not content: + logger.error("GitHub Models API returned an empty response.") + sys.exit(1) + return content.strip() + + +def _trim_prompt_via_embeddings( + prompt: dict[str, Any], + model: str, + token: str, + backend_config: dict[str, Any], +) -> dict[str, Any]: + context = _prepare_embedding_trim_context(prompt, backend_config["token_limit"]) + if context is None: + return prompt + + try: + selected = _pick_relevant_chunk_labels( + { + "chunks": context["chunks"], + "query_text": context["query_text"], + "model": model, + "token": token, + "budget": context["budget"], + "embeddings_endpoint": backend_config["embeddings_endpoint"], + } + ) + except RuntimeError as err: + logger.warning(f" [budget] Embedding call failed ({err}) — falling back to summarisation") + return _summarize_pr_chunks(prompt, model, token, backend_config) + + trimmed = _build_trimmed_pr_payload( + context["pr_header"], + context["body"], + context["comments"], + context["reviews"], + selected, + ) + new_pr_json = json.dumps(trimmed, indent=2) + new_user_content = context["preamble"] + new_pr_json + context["suffix"] + logger.info( + f" [budget] Embedding trim: ~{_estimate_tokens(context['user_text'])} → " + f"~{_estimate_tokens(new_user_content)} tokens " + f"(kept {len(selected)}/{len(context['chunks'])} PR text chunks)" + ) + new_messages = _replace_user_message_content(context["messages"], new_user_content) + return {**prompt, "messages": new_messages} + + +def _summarize_pr_chunks( + prompt: dict[str, Any], + model: str, + token: str, + backend_config: dict[str, Any], +) -> dict[str, Any]: + """Trim an over-budget prompt by summarising the PR discussion in chunks.""" + messages = prompt["messages"] + user_text = next((m["content"] for m in reversed(messages) if m["role"] == "user"), None) + if user_text is None: + return prompt + + parts = _extract_pr_section_from_user_text(user_text) + if parts is None: + return prompt + + preamble, pr_block, suffix = parts + effective_chat_endpoint = os.getenv("MODELS_ENDPOINT", backend_config["chat_endpoint"]) + chunk_chars = 3000 * 4 + + parts_to_summarize = _chunk_text_for_summarization(pr_block, chunk_chars) + if len(parts_to_summarize) <= 1: + return prompt + + logger.info(f" [budget] Chat-only: summarising PR in {len(parts_to_summarize)} chunks...") + condensed = "\n\n".join( + _summarise_chunk_via_chat(chunk, model, token, effective_chat_endpoint, chunk_chars) + for chunk in parts_to_summarize + ) + if _estimate_tokens(preamble + condensed + suffix) > backend_config["token_limit"]: + logger.info(" [budget] Still over budget - consolidating summaries...") + condensed = _summarise_chunk_via_chat( + condensed, model, token, effective_chat_endpoint, chunk_chars + ) + logger.info( + f" [budget] Summarisation trim: ~{_estimate_tokens(user_text)} → " + f"~{_estimate_tokens(preamble + condensed + suffix)} tokens" + ) + new_messages = _replace_user_message_content(messages, preamble + condensed + suffix) + return {**prompt, "messages": new_messages} + + +def call_github_models_api( + prompt: dict[str, Any], + resolve_token: Callable[[], str], + options: GitHubCallOptions | None = None, +) -> str: + """Call GitHub Models API with token-budget-aware prompt reduction.""" + options = options or GitHubCallOptions() + config = DEFAULT_BACKEND_CONFIG | (options.backend_config or {}) + token = resolve_token() + + configured_chat_model = prompt.get("model", config["default_chat_model"]) + chat_model = configured_chat_model + embedding_model = config["default_embedding_model"] + + if options.chat_model_override: + chat_model = options.chat_model_override + if options.embedding_model_override: + embedding_model = options.embedding_model_override + + all_text = " ".join(m.get("content", "") for m in prompt.get("messages", [])) + estimated = _estimate_tokens(all_text) + if estimated > config["token_limit"]: + logger.info( + f" [budget] Prompt ~{estimated} tokens exceeds " + f"{config['token_limit']} limit - reducing..." + ) + if _probe_embedding_support(embedding_model, token, config["embeddings_endpoint"]): + prompt = _trim_prompt_via_embeddings( + prompt, + embedding_model, + token, + config, + ) + else: + prompt = _summarize_pr_chunks(prompt, chat_model, token, config) + + endpoint = os.getenv("MODELS_ENDPOINT", config["chat_endpoint"]) + model_parameters = prompt.get("modelParameters", {}) + payload: dict[str, Any] = {"model": chat_model, "messages": prompt["messages"]} + if "maxCompletionTokens" in model_parameters: + payload["max_completion_tokens"] = model_parameters["maxCompletionTokens"] + if "temperature" in model_parameters: + payload["temperature"] = model_parameters["temperature"] + return _execute_github_chat_completion(endpoint, payload, token, chat_model) + + +def _probe_request( + endpoint: str, + payload: dict[str, Any], + token: str, +) -> dict[str, Any]: + """Execute a probe request and return decoded JSON, raising rich RuntimeError on failures.""" + req = urllib.request.Request( + endpoint, + data=json.dumps(payload).encode("utf-8"), + headers={"Authorization": f"Bearer {token}", "Content-Type": "application/json"}, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=30) as resp: + raw = resp.read().decode("utf-8") + return json.loads(raw) + except urllib.error.HTTPError as err: + body = err.read().decode("utf-8", errors="replace") + headers = dict(err.headers.items()) if err.headers else {} + raise RuntimeError( + "GitHub Models capability probe HTTP failure. " + f"request_payload={json.dumps(payload, ensure_ascii=True)} " + f"response_status={err.code} " + f"response_headers={json.dumps(headers, ensure_ascii=True)} " + f"response_body={body}" + ) from err + except urllib.error.URLError as err: + raise RuntimeError( + "GitHub Models capability probe network failure. " + f"request_payload={json.dumps(payload, ensure_ascii=True)} " + f"error={err}" + ) from err + except json.JSONDecodeError as err: + raise RuntimeError( + "GitHub Models capability probe returned non-JSON response. " + f"request_payload={json.dumps(payload, ensure_ascii=True)} " + f"error={err}" + ) from err + + +def probe_capabilities( + token: str, + request: GitHubProbeRequest, +) -> dict[str, bool]: + """Probe GitHub Models chat/embedding support for selected models.""" + supports_chat = False + if request.chat_model is not None: + chat_payload = { + "model": request.chat_model, + "messages": [{"role": "user", "content": "Reply with 'ok'."}], + "max_completion_tokens": 8, + } + chat_response = _probe_request(request.chat_endpoint, chat_payload, token) + content = ( + chat_response.get("choices", [{}])[0] + .get("message", {}) + .get("content", "") + ) + if not isinstance(content, str) or not content.strip(): + raise RuntimeError( + "GitHub Models chat capability probe returned empty content. " + f"request_payload={json.dumps(chat_payload, ensure_ascii=True)} " + f"response_body={json.dumps(chat_response, ensure_ascii=True)}" + ) + supports_chat = True + + supports_embeddings = False + if request.probe_embeddings and request.embedding_model is not None: + embed_payload = {"model": request.embedding_model, "input": ["test"]} + try: + embed_response = _probe_request(request.embeddings_endpoint, embed_payload, token) + data = embed_response.get("data") or [] + vector = data[0].get("embedding") if data and isinstance(data[0], dict) else None + supports_embeddings = isinstance(vector, list) and bool(vector) + if not supports_embeddings: + raise RuntimeError( + "GitHub Models embeddings probe returned no embedding vector. " + f"request_payload={json.dumps(embed_payload, ensure_ascii=True)} " + f"response_body={json.dumps(embed_response, ensure_ascii=True)}" + ) + except RuntimeError as err: + logger.warning( + "GitHub embeddings capability probe failed; " + "continuing with chat-only staged mode. " + f"{err}" + ) + supports_embeddings = False + + return {"supports_chat": supports_chat, "supports_embeddings": supports_embeddings} diff --git a/tools/release_announcement/src/release_announcement/backends/ollama_backend.py b/tools/release_announcement/src/release_announcement/backends/ollama_backend.py new file mode 100644 index 0000000000..576c48a8bf --- /dev/null +++ b/tools/release_announcement/src/release_announcement/backends/ollama_backend.py @@ -0,0 +1,83 @@ +"""Ollama backend for release-announcement generation.""" + +import json +import os +from typing import Any + +import ollama +from ..app_logger import logger +from ..distillation import PARSE_EXCEPTIONS + + +def call_ollama_model( + prompt: dict[str, Any], + chat_model_override: str | None = None, + embedding_model_override: str | None = None, + model_override: str | None = None, +) -> str: + """Call Ollama API for local model inference.""" + del embedding_model_override # Ollama backend currently uses chat model only. + model = ( + chat_model_override + or model_override + or os.getenv("OLLAMA_MODEL", "mistral-large-3:675b-cloud") + ) + response = ollama.chat(model=model, messages=prompt["messages"]) + return response["message"]["content"].strip() + + +def probe_capabilities( + chat_model: str | None, + embedding_model: str | None, + probe_embeddings: bool, +) -> dict[str, bool]: + """Probe Ollama chat/embedding support for the selected models.""" + supports_chat = False + if not probe_embeddings and chat_model is not None: + chat_payload = { + "model": chat_model, + "messages": [{"role": "user", "content": "Reply with 'ok'."}], + } + try: + response = ollama.chat(model=chat_model, messages=chat_payload["messages"]) + content = response.get("message", {}).get("content", "") + if not isinstance(content, str) or not content.strip(): + raise RuntimeError( + "Ollama chat probe returned an empty or invalid message content. " + f"request_payload={json.dumps(chat_payload, ensure_ascii=True)} " + f"response={json.dumps(response, ensure_ascii=True)}" + ) + supports_chat = True + except Exception as err: + raise RuntimeError( + "Ollama chat capability probe failed. " + f"request_payload={json.dumps(chat_payload, ensure_ascii=True)} " + f"error={err}" + ) from err + + supports_embeddings = False + if probe_embeddings: + model = embedding_model or os.getenv("OLLAMA_EMBEDDING_MODEL") + if not model: + return {"supports_chat": supports_chat, "supports_embeddings": False} + embed_payload = {"model": model, "input": "test"} + try: + response = ollama.embeddings(model=model, prompt="test") + vector = response.get("embedding") + supports_embeddings = isinstance(vector, list) and bool(vector) + if not supports_embeddings: + raise RuntimeError( + "Ollama embeddings probe returned no embedding vector. " + f"request_payload={json.dumps(embed_payload, ensure_ascii=True)} " + f"response={json.dumps(response, ensure_ascii=True)}" + ) + except PARSE_EXCEPTIONS as err: + logger.warning( + "Ollama embedding capability probe failed; " + "continuing with chat-only staged mode. " + f"request_payload={json.dumps(embed_payload, ensure_ascii=True)} " + f"error={err}" + ) + supports_embeddings = False + + return {"supports_chat": supports_chat, "supports_embeddings": supports_embeddings} diff --git a/tools/release_announcement/src/release_announcement/capability_probing.py b/tools/release_announcement/src/release_announcement/capability_probing.py new file mode 100644 index 0000000000..7116043c8b --- /dev/null +++ b/tools/release_announcement/src/release_announcement/capability_probing.py @@ -0,0 +1,183 @@ +"""Backend capability probing and staged-mode validation logic.""" + +from __future__ import annotations + +import json +from dataclasses import dataclass +from typing import Callable + +from .cli_config import BackendCapabilities, BackendConfig +from .distillation import PARSE_EXCEPTIONS +from .registry import BackendProtocol, ModelNotFoundError + + +PROBE_EXCEPTIONS = PARSE_EXCEPTIONS + (OSError, AttributeError) + + +@dataclass(frozen=True) +class CapabilityProbeDeps: + """Injected callables and endpoint constants used by capability probing.""" + + get_backend: Callable[[str], BackendProtocol | None] + log_warning: Callable[[str], None] + + +def _format_probe_error( + *, + phase: str, + backend_name: str, + model: str | None, + err: Exception, +) -> str: + """Return a diagnostics-rich probe error message suitable for user-facing logs.""" + parts = [ + f"Capability probe failure: phase={phase}", + f"backend={backend_name}", + f"model={model!r}", + ] + + endpoint = getattr(err, "endpoint", None) + if endpoint is not None: + parts.append(f"endpoint={endpoint}") + + request_payload = getattr(err, "request_payload", None) + if request_payload is not None: + parts.append( + "request_payload=" + f"{json.dumps(request_payload, ensure_ascii=True, default=str)}" + ) + + status_code = getattr(err, "status_code", None) + if status_code is not None: + parts.append(f"response_status={status_code}") + + headers = getattr(err, "headers", None) + if headers is not None: + parts.append( + "response_headers=" + f"{json.dumps(headers, ensure_ascii=True, default=str)}" + ) + + body = getattr(err, "body", None) + if body is not None: + parts.append(f"response_body={body}") + + parts.append(f"error={err}") + return " ".join(parts) + + +def probe_capabilities( + config: BackendConfig, + deps: CapabilityProbeDeps, +) -> BackendCapabilities: + """Probe backend capabilities for the resolved model/backends.""" + should_probe_embeddings = not ( + config.pipeline_mode == "staged" and config.staged_mode == "chat-only" + ) + + if config.chat_model is None: + raise RuntimeError("Chat model resolution failed before capability probing.") + + chat_backend = deps.get_backend(config.chat_model_backend) + if chat_backend is None: + raise RuntimeError( + f"Unsupported chat model backend for probing: {config.chat_model_backend}" + ) + try: + supports_chat = bool(chat_backend.probe_chat(config.chat_model)) + except ModelNotFoundError as err: + raise RuntimeError(str(err)) from err + except PROBE_EXCEPTIONS as err: + raise RuntimeError( + _format_probe_error( + phase="chat", + backend_name=config.chat_model_backend, + model=config.chat_model, + err=err, + ) + ) from err + except Exception as err: + raise RuntimeError( + _format_probe_error( + phase="chat", + backend_name=config.chat_model_backend, + model=config.chat_model, + err=err, + ) + ) from err + + supports_embeddings = False + if should_probe_embeddings: + embedding_backend = deps.get_backend(config.embedding_model_backend) + if embedding_backend is None: + raise RuntimeError( + "Unsupported embedding model backend for probing: " + f"{config.embedding_model_backend}" + ) + try: + supports_embeddings = bool(embedding_backend.probe_embeddings(config.embedding_model)) + except ModelNotFoundError as err: + deps.log_warning( + "Embedding capability probe failed; " + "continuing with chat-only fallback. " + + _format_probe_error( + phase="embedding", + backend_name=config.embedding_model_backend, + model=config.embedding_model, + err=err, + ) + ) + supports_embeddings = False + except PROBE_EXCEPTIONS as err: + deps.log_warning( + "Embedding capability probe failed; " + "continuing with chat-only fallback. " + + _format_probe_error( + phase="embedding", + backend_name=config.embedding_model_backend, + model=config.embedding_model, + err=err, + ) + ) + supports_embeddings = False + + capabilities = BackendCapabilities( + supports_chat=supports_chat, + supports_embeddings=supports_embeddings, + ) + config.capabilities = capabilities + return capabilities + + +def validate_mode( + config: BackendConfig, + log_warning: Callable[[str], None] | None = None, +) -> None: + """Validate and reconcile pipeline/staged mode against probed capabilities.""" + if not config.capabilities.supports_chat: + raise RuntimeError("Backend chat capability is required but unavailable.") + + if config.pipeline_mode == "legacy": + return + + if config.pipeline_mode != "staged": + raise RuntimeError(f"Unsupported pipeline mode: {config.pipeline_mode}") + + if config.staged_mode == "chat-only": + config.capabilities.supports_embeddings = False + return + + if config.staged_mode == "embedding-assisted": + if not config.capabilities.supports_embeddings: + warning_message = ( + "--staged-mode embedding-assisted requested but embeddings are " + "unavailable; downgrading to chat-only." + ) + if log_warning is not None: + log_warning(warning_message) + config.staged_mode = "chat-only" + return + + config.staged_mode = ( + "embedding-assisted" if config.capabilities.supports_embeddings else "chat-only" + ) diff --git a/tools/release_announcement/src/release_announcement/cli_config.py b/tools/release_announcement/src/release_announcement/cli_config.py new file mode 100644 index 0000000000..03cc85bd8d --- /dev/null +++ b/tools/release_announcement/src/release_announcement/cli_config.py @@ -0,0 +1,398 @@ +"""CLI parsing and backend configuration resolution for release_announcement.""" + +from __future__ import annotations + + +import argparse +import os +from pathlib import Path +from dataclasses import dataclass, field + +from .registry import registry + + +def _resolve_default_prompt_file() -> str: + # Package-relative path works for editable (dev) installs. + # CWD-relative path works when invoked via the shell script from the repo root. + pkg_relative = ( + Path(__file__).resolve().parents[2] + / "prompts" + / "release-announcement.prompt.yml" + ) + if pkg_relative.exists(): + return str(pkg_relative) + return str( + Path("tools") / "release_announcement" / "prompts" / "release-announcement.prompt.yml" + ) + + +DEFAULT_PROMPT_FILE = _resolve_default_prompt_file() + + +@dataclass +class BackendCapabilities: + """Runtime capability probe result for the selected backend/models.""" + + supports_chat: bool + supports_embeddings: bool = False + + +@dataclass +class ModelSelection: + """Resolved model state for one model role (chat or embedding).""" + + model: str | None + backend: str + source: str + + +@dataclass +class PipelineOptions: + """CLI pipeline options that can vary independently from model selection.""" + + dry_run: bool = False + mode: str = "staged" + staged_mode: str | None = None + delay_secs: int = 30 + + +@dataclass +class ModelRouting: + """Resolved backend/source metadata for chat and embedding model selections.""" + + chat_backend: str = "ollama" + embedding_backend: str = "ollama" + chat_source: str = "default" + embedding_source: str = "default" + + +@dataclass(init=False) +class BackendConfig: + """Resolved backend/model configuration and capability probe state.""" + + backend: str = "ollama" + chat_model: str | None = None + embedding_model: str | None = None + pipeline: PipelineOptions = field(default_factory=PipelineOptions) + capabilities: BackendCapabilities = field( + default_factory=lambda: BackendCapabilities(supports_chat=False, supports_embeddings=False) + ) + routing: ModelRouting = field(default_factory=ModelRouting) + + def __init__(self, **kwargs: object) -> None: + self.backend = str(kwargs.get("backend", "ollama")) + self.chat_model = kwargs.get("chat_model") if isinstance( + kwargs.get("chat_model"), str) or kwargs.get("chat_model") is None else None + self.embedding_model = kwargs.get("embedding_model") if isinstance( + kwargs.get("embedding_model"), str) or kwargs.get("embedding_model") is None else None + + dry_run = bool(kwargs.get("dry_run", False)) + pipeline_mode = str(kwargs.get("pipeline_mode", "staged")) + staged_mode = kwargs.get("staged_mode") + staged_mode = str(staged_mode) if isinstance(staged_mode, str) else None + delay_secs = int(kwargs.get("delay_secs", 0)) + + capabilities = kwargs.get("capabilities") + chat_model_backend = str(kwargs.get("chat_model_backend", self.backend)) + embedding_model_backend = str(kwargs.get("embedding_model_backend", self.backend)) + chat_model_source = str(kwargs.get("chat_model_source", "default")) + embedding_model_source = str(kwargs.get("embedding_model_source", "default")) + + pipeline = kwargs.get("pipeline") + routing = kwargs.get("routing") + self.pipeline = pipeline or PipelineOptions( + dry_run=dry_run, + mode=pipeline_mode, + staged_mode=staged_mode, + delay_secs=delay_secs, + ) + self.capabilities = ( + capabilities + if isinstance(capabilities, BackendCapabilities) + else BackendCapabilities( + supports_chat=False, + supports_embeddings=False, + ) + ) + self.routing = routing or ModelRouting( + chat_backend=chat_model_backend, + embedding_backend=embedding_model_backend, + chat_source=chat_model_source, + embedding_source=embedding_model_source, + ) + + @property + def dry_run(self) -> bool: + """Return whether dry-run mode is enabled.""" + return self.pipeline.dry_run + + @dry_run.setter + def dry_run(self, value: bool) -> None: + self.pipeline.dry_run = value + + @property + def pipeline_mode(self) -> str: + """Return the selected pipeline mode.""" + return self.pipeline.mode + + @pipeline_mode.setter + def pipeline_mode(self, value: str) -> None: + self.pipeline.mode = value + + @property + def delay_secs(self) -> int: + """Return the inter-PR delay in seconds.""" + return self.pipeline.delay_secs + + @delay_secs.setter + def delay_secs(self, value: int) -> None: + self.pipeline.delay_secs = value + + @property + def staged_mode(self) -> str | None: + """Return the selected staged mode override, if any.""" + return self.pipeline.staged_mode + + @staged_mode.setter + def staged_mode(self, value: str | None) -> None: + self.pipeline.staged_mode = value + + @property + def chat_model_backend(self) -> str: + """Return the backend used for chat model calls.""" + return self.routing.chat_backend + + @chat_model_backend.setter + def chat_model_backend(self, value: str) -> None: + self.routing.chat_backend = value + + @property + def embedding_model_backend(self) -> str: + """Return the backend used for embedding model calls.""" + return self.routing.embedding_backend + + @embedding_model_backend.setter + def embedding_model_backend(self, value: str) -> None: + self.routing.embedding_backend = value + + @property + def chat_model_source(self) -> str: + """Return where chat model selection came from.""" + return self.routing.chat_source + + @chat_model_source.setter + def chat_model_source(self, value: str) -> None: + self.routing.chat_source = value + + @property + def embedding_model_source(self) -> str: + """Return where embedding model selection came from.""" + return self.routing.embedding_source + + @embedding_model_source.setter + def embedding_model_source(self, value: str) -> None: + self.routing.embedding_source = value + + +def _parse_model_selector(raw: str, selected_backend: str | + None = None) -> tuple[str | None, str | None]: + """Parse optional BACKEND/model syntax. + + The separator is '/'. A prefix before the first '/' is treated as the + backend name. A trailing '/' with no model (e.g. 'github/') selects that + backend's default model. Without a '/' prefix, a bare known backend name + (e.g. '--embed github') is a backend-only selector. Anything else + (including Ollama model tags like 'mistral-large-3:675b-cloud') is treated + as a plain model name under the default backend. If selected_backend is + None and no prefix is found, returns None for backend. + """ + if "/" in raw: + prefix, remainder = raw.split("/", 1) + # 'github/' → (github, None); 'github/model' → (github, model) + return prefix, remainder if remainder else None + + # No '/' — treat as a plain model name under the default backend. + return selected_backend, raw + + +def _resolve_chat_selection(args: argparse.Namespace) -> ModelSelection: + if args.chat_model is None: + return ModelSelection(model=None, backend=args.backend, source="default") + + backend, model = _parse_model_selector(args.chat_model, args.backend) + return ModelSelection(model=model, backend=backend, source="flag") + + +def _resolve_embedding_selection(args: argparse.Namespace) -> ModelSelection: + if args.embedding_model is None: + return ModelSelection(model=None, backend=args.backend, source="default") + + backend, model = _parse_model_selector(args.embedding_model, args.backend) + return ModelSelection(model=model, backend=backend, source="flag") + + +def resolve_backend_config(args: argparse.Namespace) -> BackendConfig: + """Resolve parsed CLI args into a full backend configuration object.""" + chat = _resolve_chat_selection(args) + embedding = _resolve_embedding_selection(args) + + # Resolve backend names with defaults via registry + resolved_chat_backend = registry.resolve_backend_name(chat.backend) + resolved_embedding_backend = registry.resolve_backend_name(embedding.backend) + primary_backend = registry.resolve_backend_name(args.backend) + + # If no chat model is provided, use the backend's default + if chat.model is None: + chat_backend = registry.get(resolved_chat_backend) + chat_model = ( + getattr(chat_backend, "_default_chat_model", None) + if chat_backend + else None + ) + chat_model_source = "backend-default" + else: + chat_model = chat.model + chat_model_source = chat.source + + # If no embedding model is provided, use the backend's default + if embedding.model is None: + embedding_backend = registry.get(resolved_embedding_backend) + embedding_model = ( + getattr(embedding_backend, "_default_embedding_model", None) + if embedding_backend + else None + ) + embedding_model_source = "backend-default" + else: + embedding_model = embedding.model + embedding_model_source = embedding.source + + return BackendConfig( + backend=primary_backend, + chat_model=chat_model, + embedding_model=embedding_model, + pipeline=PipelineOptions( + dry_run=args.dry_run, + mode=args.pipeline, + staged_mode=args.staged_mode, + delay_secs=args.delay_secs, + ), + routing=ModelRouting( + chat_backend=resolved_chat_backend, + embedding_backend=resolved_embedding_backend, + chat_source=chat_model_source, + embedding_source=embedding_model_source, + ), + ) + + +def validate_cli_args(parser: argparse.ArgumentParser, args: argparse.Namespace) -> None: + """Validate cross-argument invariants before startup probing or processing.""" + if args.staged_mode and args.pipeline != "staged": + parser.error("--staged-mode is only valid when --pipeline staged is set.") + + if args.staged_mode == "chat-only" and args.embedding_model: + parser.error("--staged-mode chat-only cannot be combined with --embedding-model.") + + +def build_arg_parser() -> argparse.ArgumentParser: + """Build and return the command-line argument parser.""" + prog = os.environ.get("RELEASE_ANNOUNCEMENT_PROG") + parser = argparse.ArgumentParser( + description="Progressive Release Announcement Generator", + prog=prog, + ) + parser.add_argument( + "start", + help="Starting boundary, or upper bound if end is omitted" + " (e.g. pr3409 or v3.11.0)", + ) + parser.add_argument( + "end", + nargs="?", + help="Ending boundary (e.g. pr3500 or HEAD). Defaults to start if omitted.", + ) + parser.add_argument("--file", required=True, help="Markdown file to update") + parser.add_argument( + "--prompt", + default=DEFAULT_PROMPT_FILE, + help="YAML prompt template file", + ) + parser.add_argument( + "--backend", + default="ollama", + help=( + "Default model provider when no provider prefix is given in --chat-model or " + "--embedding-model. Accepts any registered provider name; defaults to 'ollama'. " + "Affects both model selection and capability probing." + ), + ) + parser.add_argument( + "--chat-model", + "--model", + dest="chat_model", + default=None, + help=( + "Chat model (alias: --model). Supports an optional provider prefix separated " + "by '/' (e.g. 'github/gpt-4o', 'ollama/mistral-large-3:675b-cloud'). " + "Without a prefix the provider from --backend is used. " + "Omit entirely to use that provider's default model. " + "Examples: --chat-model gpt-4o, --chat-model github/gpt-4o, " + "--model ollama/mistral-large-3:675b-cloud" + ), + ) + parser.add_argument( + "--embedding-model", + "--embed", + dest="embedding_model", + default=None, + help=( + "Embedding model (alias: --embed). Supports the same optional provider prefix " + "syntax as --chat-model, separated by '/' " + "(e.g. 'github/text-embedding-3-small', 'ollama/all-minilm'). " + "Without a prefix the provider from --backend is used. " + "A trailing '/' with no model name (e.g. '--embed github/') uses that " + "provider's default embedding model. " + "Omit entirely if embeddings are not required. " + "Examples: --embed all-minilm, --embed github/, " + "--embedding-model github/text-embedding-3-small" + ), + ) + parser.add_argument( + "--delay-secs", + type=int, + default=int(os.getenv("DELAY_SECS", "30")), + help="Seconds to sleep before each PR is processed", + ) + parser.add_argument( + "--dry-run", + action="store_true", + help="Show which PRs would be processed/skipped without calling the LLM", + ) + parser.add_argument( + "--pipeline", + default="staged", + choices=["legacy", "staged"], + help=( + "Preprocessing pipeline mode. " + "'legacy': single-shot — raw PR data sent directly to the LLM in one call. " + "'staged': progressive distillation — PRs are chunked, extracted, consolidated, " + "and classified before the final LLM call (default)." + ), + ) + parser.add_argument( + "--staged-mode", + default=None, + choices=["chat-only", "embedding-assisted"], + help=( + "Staged-mode execution preference. Valid only with --pipeline staged. " + "If unset, mode is auto-selected from runtime capabilities." + ), + ) + parser.add_argument( + "--log-level", + default="INFO", + choices=["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"], + help="Set log verbosity: CRITICAL, ERROR, WARNING, INFO, DEBUG, TRACE (default: INFO)", + ) + return parser diff --git a/tools/release_announcement/src/release_announcement/distillation.py b/tools/release_announcement/src/release_announcement/distillation.py new file mode 100644 index 0000000000..72366a77ba --- /dev/null +++ b/tools/release_announcement/src/release_announcement/distillation.py @@ -0,0 +1,987 @@ +"""Staged-distillation pipeline: schemas, adapter protocol, and shared helpers. + +This module defines the type foundation and adapter interface for the staged +PR-discussion distillation pipeline. It includes Pydantic models for signal +extraction and classification, the abstract backend adapter protocol, and +shared JSON parsing helpers that all adapters use after unwrapping their +provider-specific response envelopes. + +No pipeline orchestration code is included in this module; orchestration +and phase sequencing are implemented in later substeps. +""" + +from __future__ import annotations + +import json +import time +from dataclasses import dataclass +from typing import TYPE_CHECKING, Any, Literal, Protocol, runtime_checkable + +from pydantic import BaseModel, ConfigDict, Field +from .app_logger import logger + +if TYPE_CHECKING: + from .cli_config import BackendConfig + + +PARSE_EXCEPTIONS = ( + RuntimeError, + ValueError, + TypeError, + KeyError, + IndexError, + json.JSONDecodeError, +) + + +# ============================================================================ +# Pydantic Models (Signal Extraction and Classification) +# ============================================================================ + + +class Signal(BaseModel): + """Extracted user-visible signal from PR discussion. + + Machine-targeted JSON schema capturing the essential user-facing + information from one section of the PR discussion thread. + """ + + change: str = Field( + ..., + description="User-visible change description", + ) + impact: str = Field( + ..., + description="Severity or scope of the change (e.g., 'high', 'medium', 'low')", + ) + users_affected: str = Field( + ..., + description="User groups affected (e.g., 'server operators', 'musicians')", + ) + confidence: str = Field( + ..., + description="How certain the extraction is (e.g., 'high', 'medium', 'low')", + ) + final_outcome: bool = Field( + ..., + description="Whether this reflects a final decision (True) or early speculation (False)", + ) + + model_config = ConfigDict(extra="allow") + + +class ClassifiedSignal(BaseModel): + """Classified signal assigned to a release-note category. + + Result of the classification stage: a Signal with its assigned category. + """ + + signal: Signal = Field( + ..., + description="The original extracted signal", + ) + category: Literal["internal", "minor", "targeted", "major", "no_user_facing_changes"] = Field( + ..., + description="Release-note category assignment", + ) + + model_config = ConfigDict(extra="allow") + + +class ClassifiedSignals(BaseModel): + """Complete classification result for a set of signals. + + Returned by the classification stage; can be empty if no user-facing + changes were identified. + """ + + classified: list[ClassifiedSignal] = Field( + default_factory=list, + description="List of classified signals, empty if no user-facing changes", + ) + summary: str = Field( + default="", + description="Optional summary of the classification result", + ) + + model_config = ConfigDict(extra="allow") + + +# ============================================================================ +# Data Classes (Chunking and Metadata) +# ============================================================================ + + +@dataclass +class Chunk: + """One ordered segment of PR discussion. + + Represents a contiguous section of the PR discussion with associated + metadata for relevance tracking and source attribution. + """ + + text: str + """The actual content text of this chunk.""" + + source: str = "unknown" + """Source identifier (e.g., 'pr_body', 'comment_123', 'review_456').""" + + relevance_score: float = 0.0 + """Relevance score assigned by ranking (embedding, chat, or positional).""" + + chunk_index: int = 0 + """Original discussion order index (preserved through selection).""" + + +@dataclass +class DistilledContextMetadata: + """Metadata about the distilled context production.""" + + pr_number: int = 0 + """PR number being processed.""" + + total_chunks: int = 0 + """Total chunks produced before relevance selection.""" + + selected_chunks: int = 0 + """Number of chunks selected after relevance filtering.""" + + extraction_phase_duration_ms: float = 0.0 + """Time spent in extraction phase (milliseconds).""" + + consolidation_phase_duration_ms: float = 0.0 + """Time spent in consolidation phase (milliseconds).""" + + classification_phase_duration_ms: float = 0.0 + """Time spent in classification phase (milliseconds).""" + + +@dataclass(frozen=True) +class DistillationPrompts: + """Prompt bundles for staged distillation phases.""" + + extraction: list[dict[str, str]] + consolidation: list[dict[str, str]] + classification: list[dict[str, str]] + ranking: list[dict[str, str]] + + +@dataclass(frozen=True) +class DistillationOptions: + """Execution tuning options for staged distillation orchestration.""" + + max_direct_consolidation_chunks: int = 20 + max_ranking_chunks: int = 30 + maintainer_keywords: list[str] | None = None + last_n_fallback_chunks: int = 3 + """Time spent in classification phase (milliseconds).""" + + +@dataclass +class DistilledContext: + """Structured staged-preprocessing output passed to prompt builders in later steps.""" + + summary: str + """Summary of key changes from the distillation pipeline.""" + + structured_signals: list[dict[str, Any]] + """Signals classified and ready for release note rendering.""" + + classification: dict[str, Any] + """Classification result with category breakdowns.""" + + metadata: dict[str, Any] + """Pipeline execution metadata (timing, chunk counts, etc.).""" + + +def default_fallback_signal(source: str) -> Signal: + """Return a deterministic fallback signal when parsing fails.""" + return Signal( + change=f"Change from {source[:30]}", + impact="low", + users_affected="some users", + confidence="medium", + final_outcome=False, + ) + + +def metadata_to_dict(metadata: DistilledContextMetadata) -> dict[str, Any]: + """Convert metadata dataclass to stable dict representation.""" + return { + "pr_number": metadata.pr_number, + "total_chunks": metadata.total_chunks, + "selected_chunks": metadata.selected_chunks, + "extraction_phase_duration_ms": metadata.extraction_phase_duration_ms, + "consolidation_phase_duration_ms": metadata.consolidation_phase_duration_ms, + "classification_phase_duration_ms": metadata.classification_phase_duration_ms, + } + + +def classified_signals_to_dicts(classified: ClassifiedSignals) -> list[dict[str, Any]]: + """Flatten classified signal objects into rendering-friendly dictionaries.""" + signals_dicts = [] + for classified_signal in classified.classified: + signal_dict = classified_signal.signal.model_dump() + signal_dict["category"] = classified_signal.category + signals_dicts.append(signal_dict) + return signals_dicts + + +def build_distilled_context( + classified: ClassifiedSignals, + metadata: DistilledContextMetadata, + classification_payload: dict[str, Any] | None = None, +) -> DistilledContext: + """Build final DistilledContext payload with shared metadata formatting.""" + signals_dicts = classified_signals_to_dicts(classified) + return DistilledContext( + summary=classified.summary, + structured_signals=signals_dicts, + classification=classification_payload or classified.model_dump(), + metadata=metadata_to_dict(metadata), + ) + + +# ============================================================================ +# Shared Parsing Helpers +# ============================================================================ + + +def _parse_signal_list(content: str) -> list[Signal]: + """Parse and validate a list of Signal objects from LLM response content. + + The LLM is expected to produce JSON array of signals matching the + Signal schema. This helper extracts the JSON, validates it against + the Pydantic model, and propagates validation errors. + + Args: + content: Raw response content string (may include preamble/postamble) + + Returns: + List of validated Signal objects + + Raises: + ValueError: If JSON array cannot be found or extracted + pydantic.ValidationError: If JSON does not match Signal schema + """ + # Try to extract JSON array from response, handling common LLM patterns. + # First try: look for ```json...``` code block. + if "```json" in content: + start = content.index("```json") + len("```json") + end = content.find("```", start) + if end > start: + json_text = content[start:end].strip() + else: + json_text = content + elif "```" in content: + # Fallback: generic code block + start = content.index("```") + 3 + end = content.find("```", start) + if end > start: + json_text = content[start:end].strip() + else: + json_text = content + else: + json_text = content + + # Try to parse as JSON array + try: + data = json.loads(json_text) + except json.JSONDecodeError as e: + raise ValueError( + f"Failed to parse response as JSON: {e}\n" + f"Response text (first 200 chars): {content[:200]}" + ) from e + + if not isinstance(data, list): + raise ValueError( + f"Expected JSON array at top level, got {type(data).__name__}" + ) + + # Validate each element against Signal schema + signals = [] + for idx, item in enumerate(data): + try: + signals.append(Signal(**item)) + except Exception as e: + raise ValueError( + f"Signal validation failed at index {idx}: {e}\n" + f"Item: {item}" + ) from e + + return signals + + +def _parse_classified_signals(content: str) -> ClassifiedSignals: + """Parse and validate ClassifiedSignals result from LLM response content. + + The LLM is expected to produce a JSON object matching the ClassifiedSignals + schema, which contains a list of classified signals and optional summary. + + Args: + content: Raw response content string (may include preamble/postamble) + + Returns: + Validated ClassifiedSignals object + + Raises: + ValueError: If JSON object cannot be found or extracted + pydantic.ValidationError: If JSON does not match ClassifiedSignals schema + """ + # Try to extract JSON object from response, handling common LLM patterns. + if "```json" in content: + start = content.index("```json") + len("```json") + end = content.find("```", start) + if end > start: + json_text = content[start:end].strip() + else: + json_text = content + elif "```" in content: + # Fallback: generic code block + start = content.index("```") + 3 + end = content.find("```", start) + if end > start: + json_text = content[start:end].strip() + else: + json_text = content + else: + json_text = content + + # Try to parse as JSON object + try: + data = json.loads(json_text) + except json.JSONDecodeError as e: + raise ValueError( + f"Failed to parse response as JSON: {e}\n" + f"Response text (first 200 chars): {content[:200]}" + ) from e + + if not isinstance(data, dict): + raise ValueError( + f"Expected JSON object at top level, got {type(data).__name__}" + ) + + # Validate against ClassifiedSignals schema + try: + return ClassifiedSignals(**data) + except Exception as e: + raise ValueError( + f"ClassifiedSignals validation failed: {e}\n" + f"Data: {data}" + ) from e + + +# ============================================================================ +# Backend Adapter Protocol +# ============================================================================ + + +@runtime_checkable +class DistillationAdapter(Protocol): + """Abstract backend interface for staged distillation pipeline. + + All backend implementations (Ollama, GitHub, Dummy) must implement this + protocol. The shared pipeline orchestration in distillation.py calls + these five methods; adapters handle only provider-specific API details + and envelope unwrapping. All phase sequencing, fallback logic, retry + policy, and error propagation live in the shared pipeline. + + The shared pipeline checks `isinstance(adapter, DistillationAdapter)` at + startup to catch missing methods immediately. + """ + + def select_relevant_chunks( + self, + chunks: list[Chunk], + use_embeddings: bool, + ranking_prompts: list[dict[str, str]], + ) -> list[Chunk]: + """Rank and filter chunks by relevance. + + Return chunks in original discussion order with relevance metadata + populated. This is the provider's ranking call; the shared pipeline + owns fallback sequencing and batching for large chunk sets. + + Args: + chunks: Ordered list of discussion chunks to rank + use_embeddings: Whether embeddings API is available for ranking. + When True, use backend embeddings API; when False, use a + single chat-based ranking call. The shared pipeline owns + ranking_prompts: Prompt list for ranking phase + batching and positional fallback if chat-ranking fails. + + Returns: + Ranked subset of input chunks in original discussion order, + with relevance_score populated. May include all or a filtered + subset depending on backend ranking logic. + + Raises: + Exception: Any provider failure; propagates to shared pipeline + for fallback/retry logic. + """ + + def extract_chunk_signals( + self, + chunk: Chunk, + extraction_prompts: list[dict[str, str]], + ) -> list[Signal]: + """Extract user-visible signals from one chunk. + + Send extraction prompt messages + chunk.text user message + to provider. Extract response content, pass to _parse_signal_list, + and return the validated signals. + + Args: + chunk: Single discussion chunk to extract from + extraction_prompts: Prompt list for extraction phase + + Returns: + List of extracted Signal objects from this chunk + + Raises: + ValueError: If response cannot be parsed as Signal JSON + Exception: Any provider failure + """ + + def consolidate_signals( + self, + signals: list[Signal], + consolidation_prompts: list[dict[str, str]], + ) -> list[Signal]: + """Consolidate and deduplicate extracted signals. + + Send consolidation prompt messages + json(signals) user message + to provider. Extract response content, pass to _parse_signal_list, + and return the consolidated signals. + + May prefer later final outcomes over earlier speculation based on + the consolidation_prompt guidance. + + Args: + signals: List of extracted signals to consolidate + consolidation_prompts: Prompt list for consolidation phase + + Returns: + Consolidated and deduplicated list of Signal objects + + Raises: + ValueError: If response cannot be parsed as Signal JSON + Exception: Any provider failure + """ + + def classify_signals( + self, + signals: list[Signal], + classification_prompts: list[dict[str, str]], + ) -> ClassifiedSignals: + """Classify signals into release-note categories. + + Send classification prompt messages + json(signals) user message + to provider. Extract response content, pass to _parse_classified_signals, + and return the classified result. + + Each signal is assigned one of: internal, minor, targeted, major, + or no_user_facing_changes. Empty result (no user-facing changes) + is valid and must not be treated as failure. + + Args: + signals: List of consolidated signals to classify + classification_prompts: Prompt list for classification phase + + Returns: + ClassifiedSignals object with signals assigned to categories + + Raises: + ValueError: If response cannot be parsed as ClassifiedSignals JSON + Exception: Any provider failure + """ + + def render_final_context( + self, + classified: ClassifiedSignals, + metadata: DistilledContextMetadata, + ) -> Any: + """Assemble final DistilledContext from classification results. + + Combine classified signals, metadata, and any backend-specific + context into the DistilledContext dataclass used by downstream + prompt builders. No provider call is required. + + Args: + classified: Classification stage result + metadata: Metadata about distillation pipeline execution + + Returns: + DistilledContext object ready for use in prompt building + """ + + def get_adapter_tag(self) -> str: + """Return a short string tag identifying this adapter for logging. + + Returns: + String tag (e.g., "ollama", "github", "dummy") used in phase logs + """ +# ============================================================================ +# Pipeline Orchestration +# ============================================================================ + + +def _ordered_chunk(pr_data: dict[str, Any]) -> list[Chunk]: + """Convert PR discussion data into ordered discussion chunks. + + Creates an ordered sequence of chunks from PR body and comments, + preserving the discussion order. Each chunk includes source attribution + for diagnostics and signal tracing. + + Args: + pr_data: PR data dict from _fetch_pr_data (contains 'number', 'title', + 'body', 'comments', 'reviews') + + Returns: + List of Chunk objects in discussion order, indexed consecutively + """ + chunks = [] + chunk_index = 0 + + # First chunk: PR title and body + title = pr_data.get("title", "") + body = pr_data.get("body", "") + pr_number = pr_data.get("number", 0) + + if title or body: + combined = f"Title: {title}\n\n{body}" if title else body + chunks.append( + Chunk( + text=combined.strip(), + source=f"pr_{pr_number}_body", + chunk_index=chunk_index, + ) + ) + chunk_index += 1 + + # Remaining chunks: comments (including inline review comments) + comments = pr_data.get("comments", []) + for comment_idx, comment in enumerate(comments): + if isinstance(comment, dict): + # Comment from timeline or inline review + comment_text = comment.get("body") or comment.get("text") or "" + if comment_text: + chunks.append( + Chunk( + text=comment_text, + source=f"pr_{pr_number}_comment_{comment_idx}", + chunk_index=chunk_index, + ) + ) + chunk_index += 1 + + return chunks + + +def _select_relevant_chunks_with_fallback( + chunks: list[Chunk], + adapter: DistillationAdapter, + use_embeddings: bool, + ranking_prompts: list[dict[str, str]], + options: DistillationOptions | None = None, + **legacy_kwargs: Any, +) -> list[Chunk]: + """Rank and filter chunks by relevance with fallback chain. + + Attempts to rank chunks using embeddings (if available) or chat-ranking. + On failure, falls back to positional selection (first, last N, and + maintainer-keyword chunks). + + Args: + chunks: Ordered list of chunks to rank + adapter: Distillation adapter implementing the ranking call + use_embeddings: Whether to try embeddings-based ranking + ranking_prompts: Prompt list for chat-based ranking + max_ranking_chunks: Max chunks per chat-ranking batch (default 30) + maintainer_keywords: Keywords indicating maintainer decisions + last_n_fallback_chunks: Number of trailing chunks to retain in fallback + + Returns: + Ranked subset of chunks in original discussion order with + relevance_score populated. Preserves chunk_index for ordering. + """ + options = options or DistillationOptions() + max_ranking_chunks = legacy_kwargs.get( + "max_ranking_chunks", + options.max_ranking_chunks, + ) + maintainer_keywords = legacy_kwargs.get( + "maintainer_keywords", + options.maintainer_keywords, + ) + last_n_fallback_chunks = legacy_kwargs.get( + "last_n_fallback_chunks", + options.last_n_fallback_chunks, + ) + + if maintainer_keywords is None: + maintainer_keywords = ["merged", "agreed", "closing", "decided", "approved"] + + backend_tag = adapter.get_adapter_tag() + if use_embeddings and callable(getattr(adapter, "get_embedding_adapter_tag", None)): + backend_tag = str(getattr(adapter, "get_embedding_adapter_tag")()) + + def _positional_fallback() -> list[Chunk]: + """Select chunks positionally when ranking is unavailable.""" + fallback_start = time.perf_counter() + logger.debug( + " staged.relevance_selection.fallback_to_positional.start " + f"chunks={len(chunks)} route=[{'embed' if use_embeddings else 'chat'}:{backend_tag}] " + f"last_n_fallback_chunks={last_n_fallback_chunks} " + f"keyword_count={len(maintainer_keywords)}" + ) + logger.info( + " staged.relevance_selection.fallback_to_positional " + f"chunks={len(chunks)}" + ) + + selected = [] + selected_indices = set() + + if chunks: + selected.append(chunks[0]) + selected_indices.add(0) + + start_last = max(0, len(chunks) - last_n_fallback_chunks) + for i in range(start_last, len(chunks)): + if i not in selected_indices: + selected.append(chunks[i]) + selected_indices.add(i) + + keyword_lower = [kw.lower() for kw in maintainer_keywords] + for i, chunk in enumerate(chunks): + if i not in selected_indices and any(kw in chunk.text.lower() for kw in keyword_lower): + selected.append(chunk) + selected_indices.add(i) + + selected.sort(key=lambda chunk: chunk.chunk_index) + logger.trace( + " staged.relevance_selection.fallback_to_positional.selected " + f"chunk_indices={[chunk.chunk_index for chunk in selected]}" + ) + logger.debug( + " staged.relevance_selection.fallback_to_positional.end " + f"chunks={len(selected)} " + f"elapsed_ms={(time.perf_counter() - fallback_start) * 1000:.2f}" + ) + return selected + + try: + # Embeddings-based ranking can score all chunks in one provider call. + if use_embeddings or len(chunks) <= max_ranking_chunks: + return adapter.select_relevant_chunks(chunks, use_embeddings, ranking_prompts) + + if max_ranking_chunks <= 0: + raise ValueError("max_ranking_chunks must be greater than zero") + + logger.info( + f" staged.relevance_selection.batching chunks={len(chunks)} " + f"batch_size={max_ranking_chunks}" + ) + + ranked_batches: list[Chunk] = [] + for batch_start in range(0, len(chunks), max_ranking_chunks): + batch = chunks[batch_start: batch_start + max_ranking_chunks] + ranked_batches.extend( + adapter.select_relevant_chunks(batch, use_embeddings, ranking_prompts) + ) + + ranked_batches.sort(key=lambda chunk: chunk.chunk_index) + return ranked_batches + + except PARSE_EXCEPTIONS as e: + # Log the failure with context + logger.warning( + f" staged.relevance_selection.failed use_embeddings={use_embeddings}: {e}" + ) + return _positional_fallback() + + +def _extract_chunk_signals( + chunks: list[Chunk], + adapter: DistillationAdapter, + extraction_prompts: list[dict[str, str]], +) -> list[Signal]: + """Extract signals from all selected chunks. + + Calls adapter.extract_chunk_signals for each chunk and aggregates + the results into a single signal list. + + Args: + chunks: Selected chunks to process + adapter: Distillation adapter implementing the extraction call + extraction_prompts: Prompt list for extraction stage + + Returns: + Aggregated list of extracted signals from all chunks + + Raises: + Exception: If extraction fails for any chunk; includes chunk index in error + """ + all_signals = [] + + for chunk_idx, chunk in enumerate(chunks): + try: + signals = adapter.extract_chunk_signals(chunk, extraction_prompts) + all_signals.extend(signals) + except Exception as e: + logger.error( + f" staged.extraction.chunk_failed chunk_index={chunk_idx} " + f"source={chunk.source}: {e}" + ) + raise + + return all_signals + + +def _consolidate_signals_hierarchical( + signals: list[Signal], + adapter: DistillationAdapter, + consolidation_prompts: list[dict[str, str]], + max_direct_consolidation_chunks: int = 20, +) -> list[Signal]: + """Consolidate extracted signals with hierarchical batching for large sets. + + When the signal count exceeds max_direct_consolidation_chunks, runs + consolidation in sequential batches and then consolidates the batch + results (recursively if needed) before returning. + + Args: + signals: Extracted signals to consolidate + adapter: Distillation adapter implementing consolidation call + consolidation_prompts: Prompt list for consolidation stage + max_direct_consolidation_chunks: Threshold for batching (default 20) + + Returns: + Consolidated and deduplicated signal list + + Raises: + Exception: If consolidation fails at any batch; includes batch number in error + """ + if len(signals) == 0: + return [] + + # Base case: small enough to consolidate directly + if len(signals) <= max_direct_consolidation_chunks: + return adapter.consolidate_signals(signals, consolidation_prompts) + + # Recursive case: batch consolidation + logger.info( + f" staged.consolidation.batching signals={len(signals)} " + f"batch_size={max_direct_consolidation_chunks}" + ) + + batch_results = [] + batch_num = 0 + + # Process each batch + for i in range(0, len(signals), max_direct_consolidation_chunks): + batch = signals[i: i + max_direct_consolidation_chunks] + batch_num += 1 + + try: + consolidated_batch = adapter.consolidate_signals(batch, consolidation_prompts) + batch_results.extend(consolidated_batch) + except Exception as e: + logger.error( + f" staged.consolidation.batch_failed batch={batch_num} " + f"batch_size={len(batch)}: {e}" + ) + raise + + # Recursively consolidate batch results if still too large + if len(batch_results) > max_direct_consolidation_chunks: + return _consolidate_signals_hierarchical( + batch_results, + adapter, + consolidation_prompts, + max_direct_consolidation_chunks, + ) + + return batch_results + + +def _trace_signals(phase: str, signals: list[Signal]) -> None: + """Log each signal at TRACE level after a pipeline phase.""" + for i, sig in enumerate(signals): + logger.trace( + f" staged.{phase}.signal[{i}] " + f"change={sig.change!r} impact={sig.impact} " + f"confidence={sig.confidence} final_outcome={sig.final_outcome}" + ) + + +def _trace_classified(classified: ClassifiedSignals) -> None: + """Log each classified signal at TRACE level after classification.""" + for i, cs in enumerate(classified.classified): + logger.trace( + f" staged.classification.signal[{i}] " + f"category={cs.category} change={cs.signal.change!r}" + ) + if not classified.classified: + logger.trace(" staged.classification.result no_user_facing_changes") + + +def run_distillation_pipeline( + pr_data: dict[str, Any], + adapter: DistillationAdapter, + backend_config: BackendConfig, + prompts: DistillationPrompts, + options: DistillationOptions | None = None, +) -> Any: + """Orchestrate the full staged distillation pipeline. + + Executes ordered chunking, relevance selection (with fallback chain), + per-chunk extraction, hierarchical consolidation, and classification + in sequence. Returns a DistilledContext suitable for prompt building. + + All errors from adapter calls include full request/response context + for diagnostics. Phase logging includes timing and chunk counts. + + Args: + pr_data: PR data structure from _fetch_pr_data + adapter: Distillation adapter (must implement DistillationAdapter protocol) + backend_config: Backend configuration with capabilities (supports_embeddings) + prompts: Role-keyed prompt bundles for each pipeline phase. + options: Execution tuning for ranking/consolidation fallback behavior. + + Returns: + DistilledContext object ready for use by downstream prompt builders. + Note: DistilledContext is imported from main.py by the caller. + + Raises: + Exception: If any phase fails after fallback exhaustion + """ + metadata = DistilledContextMetadata(pr_number=pr_data.get("number", 0)) + + options = options or DistillationOptions() + + if not prompts.extraction: + raise ValueError("Extraction prompts cannot be empty") + if not prompts.consolidation: + raise ValueError("Consolidation prompts cannot be empty") + if not prompts.classification: + raise ValueError("Classification prompts cannot be empty") + if not prompts.ranking: + raise ValueError("Ranking prompts cannot be empty") + + # Phase 1: Ordered chunking + logger.debug(" staged.chunking.start") + phase_start = time.perf_counter() + + chunks = _ordered_chunk(pr_data) + metadata.total_chunks = len(chunks) + + logger.debug( + " staged.chunking.end " + f"chunks={len(chunks)} elapsed_ms={(time.perf_counter() - phase_start) * 1000:.2f}" + ) + + # Phase 2: Relevance selection + use_embeddings = getattr(backend_config, "capabilities", None) and getattr( + backend_config.capabilities, "supports_embeddings", False + ) + backend_tag = adapter.get_adapter_tag() + if use_embeddings and callable(getattr(adapter, "get_embedding_adapter_tag", None)): + backend_tag = str(getattr(adapter, "get_embedding_adapter_tag")()) + logger.debug( + " staged.relevance_selection.start " + f"chunks={len(chunks)} " + f"route=[{'embed' if use_embeddings else 'chat'}:{backend_tag}] " + f"use_embeddings={bool(use_embeddings)}" + ) + phase_start = time.perf_counter() + + selected_chunks = _select_relevant_chunks_with_fallback( + chunks, + adapter, + use_embeddings, + prompts.ranking, + options, + ) + metadata.selected_chunks = len(selected_chunks) + + logger.debug( + f" staged.relevance_selection.end chunks={len(selected_chunks)} " + f"elapsed_ms={(time.perf_counter() - phase_start) * 1000:.2f}" + ) + + # Phase 3: Per-chunk extraction + logger.debug( + " staged.extraction.start " + f"chunks={len(selected_chunks)} route=[chat:{adapter.get_adapter_tag()}]" + ) + phase_start = time.perf_counter() + + signals = _extract_chunk_signals( + selected_chunks, + adapter, + prompts.extraction, + ) + + metadata.extraction_phase_duration_ms = (time.perf_counter() - phase_start) * 1000 + logger.debug( + f" staged.extraction.end signals={len(signals)} " + f"elapsed_ms={metadata.extraction_phase_duration_ms:.2f}" + ) + _trace_signals("extraction", signals) + + # Phase 4: Hierarchical consolidation + logger.debug( + " staged.consolidation.start " + f"signals={len(signals)} route=[chat:{adapter.get_adapter_tag()}]" + ) + phase_start = time.perf_counter() + + signals = _consolidate_signals_hierarchical( + signals, + adapter, + prompts.consolidation, + options.max_direct_consolidation_chunks, + ) + + metadata.consolidation_phase_duration_ms = (time.perf_counter() - phase_start) * 1000 + logger.debug( + f" staged.consolidation.end signals={len(signals)} " + f"elapsed_ms={metadata.consolidation_phase_duration_ms:.2f}" + ) + _trace_signals("consolidation", signals) + + # Phase 5: Classification + logger.debug( + " staged.classification.start " + f"signals={len(signals)} route=[chat:{adapter.get_adapter_tag()}]" + ) + phase_start = time.perf_counter() + + classified = adapter.classify_signals( + signals, + prompts.classification, + ) + + metadata.classification_phase_duration_ms = (time.perf_counter() - phase_start) * 1000 + logger.debug( + f" staged.classification.end signals={len(classified.classified)} " + f"elapsed_ms={metadata.classification_phase_duration_ms:.2f}" + ) + _trace_classified(classified) + + # Phase 6: Render final context + logger.debug( + " staged.rendering.start " + f"route=[chat:{adapter.get_adapter_tag()}]" + ) + phase_start = time.perf_counter() + + distilled_context = adapter.render_final_context(classified, metadata) + logger.debug( + " staged.rendering.end " + f"elapsed_ms={(time.perf_counter() - phase_start) * 1000:.2f}" + ) + + return distilled_context diff --git a/tools/release_announcement/src/release_announcement/main.py b/tools/release_announcement/src/release_announcement/main.py new file mode 100644 index 0000000000..64d046b869 --- /dev/null +++ b/tools/release_announcement/src/release_announcement/main.py @@ -0,0 +1,1003 @@ +#!/usr/bin/env python3 +############################################################################## +# Copyright (c) 2026 +# +# Author(s): +# The Jamulus Development Team +# +############################################################################## +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any later +# version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +# +############################################################################## + +""" +Given a range of git history, iterate over PR merges and +update the specified release announcement file. + +This script supports multiple LLM backends: +- Ollama (local models, --backend ollama) +- GitHub Models API for CLI use (--backend github, resolves token via env or 'gh auth token') +- GitHub Models API for workflow use (--backend actions, requires GITHUB_TOKEN in step env) +""" + +import argparse +import json +import os +import re +import subprocess +import sys +import time +from datetime import datetime +from typing import Any, cast + +import yaml + +from .cli_config import ( + BackendCapabilities, + BackendConfig, + build_arg_parser, + resolve_backend_config as resolve_backend_config_impl, + validate_cli_args as validate_cli_args_impl, +) +from .capability_probing import ( + CapabilityProbeDeps, + probe_capabilities as probe_capabilities_impl, + validate_mode as validate_mode_impl, +) +from .app_logger import logger, GitOperationError, BackendValidationError +from .registry import registry +from .token_utils import normalize_github_token, resolve_github_token +from .distillation import DistilledContext, DistillationPrompts, run_distillation_pipeline +from .distillation import PARSE_EXCEPTIONS +from .skip_rules import get_skip_reason +from .staged_routing_adapter import StagedRoutingAdapter +from .backends.ollama_backend import ( + call_ollama_model, +) +from .backends.github_backend import ( + GitHubCallOptions, + call_github_models_api as call_github_models_api_backend, +) + +GITHUB_MODELS_DEFAULT_CHAT_MODEL = "openai/gpt-4o" +GITHUB_MODELS_DEFAULT_EMBEDDING_MODEL = "openai/text-embedding-3-small" +GITHUB_MODELS_CHAT_ENDPOINT = "https://models.github.ai/inference/chat/completions" +GITHUB_MODELS_EMBEDDINGS_ENDPOINT = "https://models.github.ai/inference/embeddings" +GITHUB_MODELS_CHAT_TOKEN_LIMIT = 7500 +"""Conservative token budget for a single GitHub Models chat-completions call.""" + +_PR_MERGE_RE = re.compile(r"^Merge pull request #(\d+) from ") +_upstream_repo_cache: dict = {} +_current_repo_name_cache: dict = {} +STAGED_PREPROCESS_EXCEPTIONS = PARSE_EXCEPTIONS + (OSError, AttributeError) +HANDLED_MAIN_EXCEPTIONS = ( + GitOperationError, + BackendValidationError, + FileNotFoundError, + yaml.YAMLError, + RuntimeError, +) +UNEXPECTED_MAIN_EXCEPTIONS = ( + ValueError, + TypeError, + KeyError, + IndexError, + OSError, + AttributeError, +) + +def get_gh_auth_env() -> dict[str, str]: + """Get subprocess env with normalized token for gh CLI calls if provided.""" + env = os.environ.copy() + raw_token = os.getenv("GH_TOKEN") or os.getenv("GITHUB_TOKEN") + if not raw_token: + return env + + token = normalize_github_token(raw_token) + # Set both vars so gh uses a clean value consistently. + env["GH_TOKEN"] = token + env["GITHUB_TOKEN"] = token + return env + + +def get_universal_timestamp(identifier: str) -> str: + """ + Resolves an identifier to an ISO8601 timestamp. + Supports: pr123, Tags, SHAs, and Branches. + """ + target = resolve_identifier_to_git_target(identifier) + + # 2. Get authoritative Git timestamp for the target + try: + return subprocess.check_output( + ["git", "show", "-s", "--format=%cI", target], text=True + ).strip() + except subprocess.CalledProcessError as err: + raise GitOperationError( + f"Could not resolve '{target}' as a Git object: {err}" + ) from err + + +def _get_upstream_repo() -> str | None: + """ + Return the parent repository's 'owner/name' string for the current repo, + or None if the current repo has no parent (i.e. it IS the canonical upstream). + Result is cached after the first call. + """ + if "value" not in _upstream_repo_cache: + try: + raw = subprocess.check_output( + ["gh", "repo", "view", "--json", "parent"], + text=True, + env=get_gh_auth_env(), + ) + parent = json.loads(raw).get("parent") + if parent: + owner = parent["owner"]["login"] + name = parent["name"] + _upstream_repo_cache["value"] = f"{owner}/{name}" + else: + _upstream_repo_cache["value"] = None + except (subprocess.CalledProcessError, json.JSONDecodeError, KeyError): + _upstream_repo_cache["value"] = None + return _upstream_repo_cache["value"] + + +def _get_current_repo_name() -> str | None: + """ + Return the current repo's 'owner/name' string (e.g. 'jamulussoftware/jamulus'). + Used when the current repo has no parent (i.e. it is the canonical upstream). + Result is cached after the first call. + """ + if "value" not in _current_repo_name_cache: + try: + result = subprocess.check_output( + ["gh", "repo", "view", "--json", "nameWithOwner", "--jq", ".nameWithOwner"], + text=True, + env=get_gh_auth_env(), + ) + _current_repo_name_cache["value"] = result.strip() or None + except (subprocess.CalledProcessError, OSError): + _current_repo_name_cache["value"] = None + return _current_repo_name_cache["value"] + + +def _fetch_inline_review_comments(pr_num: int, repo: str) -> list[str]: + """ + Fetch inline code-review thread comments (pull request review comments) via + the GitHub REST API. These are tied to specific lines of code and are NOT + returned by 'gh pr view --json reviews', which only carries the top-level + review submission body (usually empty for code-only reviews). + """ + env = get_gh_auth_env() + bodies: list[str] = [] + page = 1 + while True: + try: + raw = subprocess.check_output( + [ + "gh", "api", + f"/repos/{repo}/pulls/{pr_num}/comments?per_page=100&page={page}", + ], + text=True, + env=env, + ) + except subprocess.CalledProcessError: + break + items = json.loads(raw) + if not items: + break + bodies.extend(item["body"] for item in items if item.get("body")) + if len(items) < 100: + break + page += 1 + return bodies + + +def resolve_identifier_to_git_target(identifier: str) -> str: + """ + Resolve an identifier to a git object SHA or ref. + + Non-pr identifiers (tags, SHAs, branches) are passed straight through to + git — no network call needed. + + pr identifiers refer to a merged PR in the upstream repository. The + upstream is discovered automatically via 'gh repo view --json parent'; if + the current repository has no parent it is itself the upstream. + """ + if identifier.lower().startswith("pr"): + pr_id = identifier[2:] + upstream = _get_upstream_repo() + repo_desc = f"upstream {upstream}" if upstream else "current repo" + logger.info(f" > Resolving PR #{pr_id} via {repo_desc} ...") + try: + cmd = ["gh", "pr", "view", pr_id, "--json", "mergeCommit", "--jq", ".mergeCommit.oid"] + if upstream: + cmd += ["--repo", upstream] + target = subprocess.check_output(cmd, text=True, env=get_gh_auth_env()).strip() + if not target or target == "null": + raise GitOperationError(f"PR #{pr_id} has no merge commit (is it merged?)") + return target + except subprocess.CalledProcessError as err: + raise GitOperationError( + f"Failed to fetch PR #{pr_id} from {repo_desc}: {err}" + ) from err + + return identifier + + +def get_previous_commit_timestamp(identifier: str) -> str: + """Get the timestamp of the commit immediately before the resolved target.""" + target = resolve_identifier_to_git_target(identifier) + previous_target = f"{target}~" + + try: + return subprocess.check_output( + ["git", "show", "-s", "--format=%cI", previous_target], text=True + ).strip() + except subprocess.CalledProcessError as err: + raise GitOperationError( + f"Could not resolve previous commit for '{identifier}': {err}" + ) from err + + +def parse_iso_datetime(value: str) -> datetime: + """Parse ISO8601 timestamps from Git/GitHub into timezone-aware datetime.""" + return datetime.fromisoformat(value.replace("Z", "+00:00")) + + +def get_ordered_pr_list(start_iso: str, end_iso: str) -> list: + """ + Find PRs merged in (start_iso, end_iso] from the local git log. + Parses 'Merge pull request #N' merge commits so it works correctly + regardless of which repository the workflow runs in (no GitHub API used). + Returns list of dicts with 'number', 'title', 'mergedAt', oldest-first. + """ + sep = "\x1e" # ASCII record-separator; safe as git log field delimiter + log_output = subprocess.check_output( + ["git", "log", "--merges", f"--format=tformat:{sep}%cI%n%s%n%b"], text=True + ) + + start_dt = parse_iso_datetime(start_iso) + end_dt = parse_iso_datetime(end_iso) + + prs = [] + for record in log_output.split(sep): + record = record.strip() + if not record: + continue + lines = record.splitlines() + if len(lines) < 2: + continue + + committed_at = lines[0].strip() + subject = lines[1].strip() + match = _PR_MERGE_RE.match(subject) + if not match: + continue + + try: + committed_dt = parse_iso_datetime(committed_at) + except ValueError: + continue + + if committed_dt <= start_dt or committed_dt > end_dt: + continue + + pr_num = int(match.group(1)) + # GitHub merge commits: body line 1 is the PR title; fall back to subject. + title = next((line.strip() for line in lines[2:] if line.strip()), subject) + prs.append({"number": pr_num, "title": title, "mergedAt": committed_at}) + + return sorted(prs, key=lambda x: x["mergedAt"]) + + +def sanitize_pr_data(raw_json: str) -> dict[str, Any]: + """Strips metadata and GitHub noise to save tokens.""" + data = json.loads(raw_json) + # Remove "(Fixes #123)" and "(Closes #123)" + clean_body = re.sub( + r"\(?(Fixes|Closes) #\d+\)?", "", data.get("body", ""), flags=re.IGNORECASE + ) + + return { + "number": data.get("number"), + "title": data.get("title"), + "body": clean_body, + "comments": [c.get("body") for c in data.get("comments", []) if c.get("body")], + "reviews": [r.get("body") for r in data.get("reviews", []) if r.get("body")], + } + + +def load_prompt_template(prompt_file: str) -> dict[str, Any]: + """Load the prompt template from YAML file.""" + try: + with open(prompt_file, "r", encoding="utf-8") as f: + return yaml.safe_load(f) + except FileNotFoundError as exc: + raise FileNotFoundError(f"Prompt file '{prompt_file}' not found") from exc + except yaml.YAMLError as err: + raise yaml.YAMLError(f"Failed to parse prompt file: {err}") + + +def build_ai_prompt( + current_announcement: str, pr_data: dict[str, Any], prompt_template: dict[str, Any] +) -> dict[str, Any]: + """Build the complete AI prompt from template and data.""" + system_prompt = next( + m["content"] for m in prompt_template["messages"] if m["role"] == "system" + ) + + user_content = ( + "Current working announcement:\n====\n" + + current_announcement + + "\n====\n\nNewly merged pull request:\n" + + json.dumps(pr_data, indent=2) + + "\n====\n\nUpdate the Release Announcement to include any user-relevant " + + "changes from this PR.\nReturn the complete updated Markdown document only." + ) + return { + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_content}, + ], + "model": prompt_template.get("model", GITHUB_MODELS_DEFAULT_CHAT_MODEL), + "modelParameters": prompt_template.get("modelParameters", {}), + } + + +def call_github_models_api( + prompt: dict[str, Any], + chat_model_override: str | None = None, + embedding_model_override: str | None = None, +) -> str: + """Call the extracted GitHub backend with common token/config resolution from main.""" + return call_github_models_api_backend( + prompt=prompt, + resolve_token=resolve_github_token, + options=GitHubCallOptions( + chat_model_override=chat_model_override, + embedding_model_override=embedding_model_override, + backend_config={ + "default_chat_model": GITHUB_MODELS_DEFAULT_CHAT_MODEL, + "default_embedding_model": GITHUB_MODELS_DEFAULT_EMBEDDING_MODEL, + "chat_endpoint": GITHUB_MODELS_CHAT_ENDPOINT, + "embeddings_endpoint": GITHUB_MODELS_EMBEDDINGS_ENDPOINT, + "token_limit": GITHUB_MODELS_CHAT_TOKEN_LIMIT, + }, + ), + ) + + +def strip_markdown_fences(text: str) -> str: + """ + Removes ```markdown ... ``` or ``` ... ``` wrappers + that LLMs often add to their responses. + """ + # Remove the opening fence (with or without 'markdown' tag) + text = re.sub(r"^```(markdown)?\n", "", text, flags=re.IGNORECASE) + # Remove the closing fence + text = re.sub(r"\n```$", "", text) + return text.strip() + + +def _resolve_backend_config(args: argparse.Namespace) -> BackendConfig: + return resolve_backend_config_impl(args) + + +def resolve_backend_config(args: argparse.Namespace) -> BackendConfig: + """Public wrapper around backend config resolution.""" + return _resolve_backend_config(args) + + +def validate_cli_args(parser: argparse.ArgumentParser, args: argparse.Namespace) -> None: + """Validate cross-argument invariants before startup probing or processing.""" + validate_cli_args_impl(parser, args) + + +def probe_capabilities(config: BackendConfig) -> BackendCapabilities: + """Probe backend capabilities for the resolved model/backends.""" + deps = CapabilityProbeDeps( + get_backend=registry.get, + log_warning=logger.warning, + ) + return probe_capabilities_impl( + config=config, + deps=deps, + ) + + +def validate_mode(config: BackendConfig) -> None: + """Validate and reconcile pipeline/staged mode against probed capabilities.""" + validate_mode_impl(config, logger.warning) + + +def _load_prompt_file(path: str, role: str = "system") -> str: + """Load a prompt YAML file and return the message content for the specified role. + + For single-role extraction (system only), specify role="system". + For prompts with multiple roles (e.g., ranking with both system and user), + use _load_prompts() to get the full message list instead. + + Args: + path: Path to the prompt YAML file. + role: The message role to extract ("system" or "user"). + + Returns: + The content of the first message matching the specified role. + + Raises: + ValueError: If no message with the specified role is found. + """ + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + messages = data.get("messages", []) + for msg in messages: + if isinstance(msg, dict) and msg.get("role") == role: + content = msg.get("content", "") + logger.trace(f"loaded {role} prompt from: {path}") + return content + raise ValueError(f"No {role} message found in {path}") + + +def _load_prompts(path: str) -> list[dict[str, str]]: + """Load a prompt YAML file and return the full messages list. + + Used for multi-role prompts (e.g., ranking with both system and user messages). + + Args: + path: Path to the prompt YAML file. + + Returns: + List of message dicts with 'role' and 'content' keys. + + Raises: + ValueError: If the file has no 'messages' key or it's not a list. + """ + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + messages = data.get("messages", []) + if not isinstance(messages, list) or len(messages) == 0: + raise ValueError(f"Prompt file {path} has no valid 'messages' list") + logger.trace(f"loaded {len(messages)} message(s) from: {path}") + return messages + + +def _resolve_prompt_dir() -> str: + """Resolve the prompts directory for stage prompt YAML files. + + Tries the package-relative path first (correct for editable/dev installs), + then falls back to a CWD-relative path (correct when run via the shell + script from the repository root, where the package is pip-installed into a + temporary venv outside the repo tree). + """ + pkg_relative = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..", "prompts")) + if os.path.isdir(pkg_relative): + return pkg_relative + cwd_relative = os.path.join(os.getcwd(), "tools", "release_announcement", "prompts") + if os.path.isdir(cwd_relative): + return cwd_relative + raise FileNotFoundError( + "Could not locate tools/release_announcement/prompts. " + f"Tried package-relative: {pkg_relative} and CWD-relative: {cwd_relative}. " + "Run the script from the repository root." + ) + + +def prepare_pr_context( + pr_data: dict[str, Any], + backend_config: BackendConfig, + pipeline_mode: str, +) -> DistilledContext | None: + """Optional staged preprocessing insertion point for distillation.""" + if pipeline_mode == "legacy": + return None + if pipeline_mode != "staged": + raise ValueError(f"Unsupported pipeline mode: {pipeline_mode}") + + # Load stage prompts from tools/release_announcement/prompts. + prompt_dir = _resolve_prompt_dir() + extraction_prompts = _load_prompts(os.path.join(prompt_dir, "extraction.prompt.yml")) + consolidation_prompts = _load_prompts(os.path.join(prompt_dir, "consolidation.prompt.yml")) + classification_prompts = _load_prompts(os.path.join(prompt_dir, "classification.prompt.yml")) + ranking_prompts = _load_prompts(os.path.join(prompt_dir, "ranking.prompt.yml")) + + # Resolve adapters by role. --backend only provides defaults for these. + chat_backend_name = backend_config.chat_model_backend + embedding_backend_name = backend_config.embedding_model_backend + + chat_adapter = registry.get(chat_backend_name) + if chat_adapter is None: + raise RuntimeError(f"Backend '{chat_backend_name}' not found in registry.") + + embedding_adapter = registry.get(embedding_backend_name) + if embedding_adapter is None: + raise RuntimeError(f"Backend '{embedding_backend_name}' not found in registry.") + + adapter = StagedRoutingAdapter( + chat_adapter=chat_adapter, + embedding_adapter=embedding_adapter, + ) + + logger.info( + "staged.preprocessing.start " + f"chat_backend={chat_backend_name} " + "embedding_mode=" + f"{'enabled' if backend_config.capabilities.supports_embeddings else 'disabled'}" + f":{embedding_backend_name}" + ) + _preprocess_start = time.monotonic() + + # Call the real staged pipeline + try: + context = run_distillation_pipeline( + pr_data=pr_data, + adapter=adapter, + backend_config=backend_config, + prompts=DistillationPrompts( + extraction=extraction_prompts, + consolidation=consolidation_prompts, + classification=classification_prompts, + ranking=ranking_prompts, + ), + ) + _elapsed = time.monotonic() - _preprocess_start + logger.info(f"staged.preprocessing.end context=distilled elapsed={_elapsed:.1f}s") + return context + except STAGED_PREPROCESS_EXCEPTIONS as err: + _elapsed = time.monotonic() - _preprocess_start + logger.error(f"staged pipeline failed: {err}") + logger.info(f"staged.preprocessing.end context=none elapsed={_elapsed:.1f}s") + return None + + +def process_single_pr( + pr_num: int, + pr_title: str, + announcement_file: str, + prompt_file: str, + config: BackendConfig | None = None, +) -> str: + """ + Process a single PR and optionally update the announcement file. + Returns one of: + "committed" – LLM updated the file with user-facing changes. + "no_changes" – LLM ran but produced no diff. + "skipped:" – PR matched skip rules; not sent to LLM. + "dry_run" – dry-run mode; PR would have been processed (no LLM called). + """ + logger.info(f"--- Processing PR #{pr_num}: {pr_title} ---") + if config is None: + config = BackendConfig() + + # Get PR data + pr_data = _fetch_pr_data(pr_num, config) + if pr_data is None: + return "dry_run" + + # Check deterministic skip rules before any LLM processing. + skip_reason = get_skip_reason(pr_data) + if skip_reason is not None: + return f"skipped:{skip_reason}" + + if config.dry_run: + return "dry_run" + + if config.delay_secs > 0: + logger.info(f"--- Sleeping {config.delay_secs}s before PR #{pr_num} ---") + time.sleep(config.delay_secs) + + distilled_context: DistilledContext | None = None + if config.pipeline_mode == "staged": + try: + distilled_context = cast( + DistilledContext | None, + prepare_pr_context(pr_data, config, config.pipeline_mode), + ) + except STAGED_PREPROCESS_EXCEPTIONS as err: + logger.warning( + f"staged preprocessing failed ({err}); falling back to legacy mode" + ) + distilled_context = None + + if distilled_context is None: + logger.warning( + "staged preprocessing returned no context, falling back to legacy mode" + ) + + # Load and process announcement + current_content = _load_announcement_content(announcement_file) + prompt_template = _load_prompt_template(prompt_file) + ai_prompt = _build_ai_prompt( + current_content, + pr_data, + prompt_template, + pipeline_mode=config.pipeline_mode, + distilled_context=distilled_context, + ) + + # Process with LLM backend + updated_ra = _process_with_llm(ai_prompt, config) + + # Write and check changes + _write_and_check_announcement(updated_ra, announcement_file) + + return _check_for_changes(announcement_file) + + +# Get PR data, routing to upstream when running from a fork so that upstream +# PR numbers resolve correctly regardless of where the script runs. +def _fetch_pr_data(pr_num: int, config: BackendConfig) -> dict[str, Any] | None: + """Fetch PR data from GitHub API.""" + upstream = _get_upstream_repo() + repo_flag = ["--repo", upstream] if upstream else [] + try: + update_text = subprocess.check_output( + ["gh", "pr", "view", str(pr_num), "--json", "number,title,body,comments,reviews"] + + repo_flag, + text=True, + env=get_gh_auth_env(), + ) + pr_data = sanitize_pr_data(update_text) + + # gh pr view omits inline code-review thread comments (tied to specific + # lines of code); fetch them separately via the REST API. + repo = upstream or _get_current_repo_name() + if repo: + pr_data["comments"].extend(_fetch_inline_review_comments(pr_num, repo)) + + return pr_data + except subprocess.CalledProcessError: + if config.dry_run: + return None # PR not accessible; skip metadata fetch in dry-run. + raise + + +def _load_announcement_content(announcement_file: str) -> str: + """Load current announcement content.""" + with open(announcement_file, "r", encoding="utf-8") as f: + return f.read() + + +def _load_prompt_template(prompt_file: str) -> dict[str, Any]: + """Load prompt template.""" + return load_prompt_template(prompt_file) + + +def _build_ai_prompt( + current_content: str, + pr_data: dict[str, Any], + prompt_template: dict[str, Any], + pipeline_mode: str = "legacy", + distilled_context: DistilledContext | None = None, +) -> dict[str, Any]: + """Build AI prompt from template and data, selecting legacy or staged path. + + Legacy mode always uses raw PR payload prompt construction. + Staged mode uses distilled context when available, otherwise falls back to + the legacy raw-PR path. + """ + if pipeline_mode == "staged" and distilled_context is not None: + return _build_staged_ai_prompt( + current_content=current_content, + pr_data=pr_data, + distilled_context=distilled_context, + prompt_template=prompt_template, + ) + return build_ai_prompt(current_content, pr_data, prompt_template) + + +def _build_staged_ai_prompt( + current_content: str, + pr_data: dict[str, Any], + distilled_context: DistilledContext, + prompt_template: dict[str, Any], +) -> dict[str, Any]: + """Build staged prompt using distilled preprocessing context. + + The final edit step remains the same, but the staged path supplies the + distilled summary/signals instead of the full raw PR discussion payload. + """ + system_prompt = next( + m["content"] for m in prompt_template["messages"] if m["role"] == "system" + ) + + distilled_payload = { + "pr_number": pr_data.get("number"), + "pr_title": pr_data.get("title"), + "summary": distilled_context.summary, + "structured_signals": distilled_context.structured_signals, + "classification": distilled_context.classification, + "metadata": distilled_context.metadata, + } + + user_content = ( + "Current working announcement:\n====\n" + + current_content + + "\n====\n\nDistilled pull request context:\n" + + json.dumps(distilled_payload, indent=2) + + "\n====\n\nUpdate the Release Announcement to include any user-relevant " + + "changes from this PR.\nReturn the complete updated Markdown document only." + ) + + return { + "messages": [ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_content}, + ], + "model": prompt_template.get("model", GITHUB_MODELS_DEFAULT_CHAT_MODEL), + "modelParameters": prompt_template.get("modelParameters", {}), + } + + +def _process_with_llm(ai_prompt: dict[str, Any], config: BackendConfig) -> str: + """Process with appropriate LLM backend.""" + target_backend = config.chat_model_backend or config.backend + if target_backend == "ollama": + updated_ra = call_ollama_model(ai_prompt, config.chat_model, config.embedding_model) + elif target_backend in {"github", "actions"}: + updated_ra = call_github_models_api( + ai_prompt, + chat_model_override=config.chat_model, + embedding_model_override=config.embedding_model, + ) + else: + raise BackendValidationError(f"Unknown backend '{target_backend}'") + return strip_markdown_fences(updated_ra) + + +def _write_and_check_announcement(updated_ra: str, announcement_file: str) -> None: + """Write updated announcement and check for changes.""" + with open(announcement_file, "w", encoding="utf-8") as f: + f.write(updated_ra) + + +def _check_for_changes(announcement_file: str) -> str: + """Check if announcement file has changes.""" + diff_check = subprocess.run( + ["git", "diff", "-w", "--exit-code", announcement_file], + capture_output=True, + check=False, + ) + return "no_changes" if diff_check.returncode == 0 else "committed" + + +def _setup_backend_token(backend: str) -> None: + """Resolve and pin the GitHub token for the chosen backend.""" + if backend == "github": + token = resolve_github_token() + os.environ["GH_TOKEN"] = token + os.environ["GITHUB_TOKEN"] = token + elif backend == "actions": + raw = os.getenv("GITHUB_TOKEN") or os.getenv("GH_TOKEN") + if not raw: + logger.critical( + "--backend actions requires GITHUB_TOKEN to be set.\n" + "Add this to your workflow step:\n" + " env:\n" + " GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}" + ) + sys.exit(1) + token = normalize_github_token(raw) + os.environ["GH_TOKEN"] = token + os.environ["GITHUB_TOKEN"] = token + + +def _resolve_timeframe(args: argparse.Namespace) -> tuple[str, str, str, str]: + """Resolve revision labels and timestamps for PR discovery.""" + if args.end is None: + lower_bound_label = f"{args.start}~" + upper_bound_label = args.start + start_ts = get_previous_commit_timestamp(args.start) + end_ts = get_universal_timestamp(args.start) + else: + lower_bound_label = args.start + upper_bound_label = args.end + start_ts = get_universal_timestamp(args.start) + end_ts = get_universal_timestamp(args.end) + return lower_bound_label, upper_bound_label, start_ts, end_ts + + +def _process_prs( + args: argparse.Namespace, + config: BackendConfig, + todo_prs: list[dict[str, Any]], +) -> None: + """Process discovered PRs and print summary.""" + processed = skipped = no_changes = 0 + for pr in todo_prs: + pr_num = pr["number"] + pr_title = pr["title"] + + try: + result = process_single_pr( + pr_num, + pr_title, + args.file, + args.prompt, + config, + ) + except Exception as err: + raise RuntimeError( + f"Failed while processing PR #{pr_num} ({pr_title}): {err}" + ) from err + + if result == "committed": + logger.info(f" > Updated release announcement for #{pr_num}. Preparing git commit.") + _commit_pr_update(args.file, pr_num, pr_title) + logger.info(" Successfully committed.") + processed += 1 + elif result == "no_changes": + logger.info(f" > No user-facing changes for #{pr_num}. Skipping commit.") + no_changes += 1 + elif result.startswith("skipped"): + skip_reason = result.split(":", 1)[1] if ":" in result else "skip rule" + logger.info(f" > Skipping PR #{pr_num} ({skip_reason}).") + skipped += 1 + elif result == "dry_run": + logger.info(f" [DRY RUN] Would process PR #{pr_num}.") + processed += 1 + + if args.dry_run: + logger.info( + f"\n--- [DRY RUN] Done. {len(todo_prs)} PRs found: " + f"{processed} would process, {skipped} would skip. ---" + ) + else: + logger.info( + f"\n--- Done! {len(todo_prs)} PRs: " + f"{processed} committed, {no_changes} no changes, {skipped} skipped. ---" + ) + + +def _commit_pr_update(announcement_file: str, pr_num: int, pr_title: str) -> None: + """Stage and commit announcement updates for a processed PR.""" + try: + subprocess.run( + ["git", "add", announcement_file], + check=True, + capture_output=True, + text=True, + ) + subprocess.run( + ["git", "commit", "-m", f"[bot] RA: Merge #{pr_num}: {pr_title}"], + check=True, + capture_output=True, + text=True, + ) + except subprocess.CalledProcessError as err: + command = " ".join(str(part) for part in err.cmd) + stderr = (err.stderr or "").strip() + stdout = (err.stdout or "").strip() + details = stderr or stdout or str(err) + raise RuntimeError( + f"Could not stage or commit updates for PR #{pr_num}. " + f"Command failed: {command}. {details}" + ) from err + + +def _initialize_runtime( + parser: argparse.ArgumentParser, + args: argparse.Namespace, +) -> BackendConfig: + """Validate args, prepare token env, and reconcile capabilities.""" + validate_cli_args(parser, args) + + # Resolve and pin the token before any subprocess calls that need it. + _setup_backend_token(args.backend) + + config = _resolve_backend_config(args) + logger.debug( + "Capability resolution input: " + f"backend={config.backend} " + f"chat_backend={config.chat_model_backend} chat_model={config.chat_model!r} " + f"embedding_backend={config.embedding_model_backend} " + f"embedding_model={config.embedding_model!r} " + f"pipeline={config.pipeline_mode} staged_mode={config.staged_mode}" + ) + + if config.dry_run: + logger.debug("Skipping backend capability probe in dry-run mode...") + # In dry-run mode, assume all capabilities are available since we do not call backends. + config.capabilities = BackendCapabilities( + supports_chat=True, + supports_embeddings=True, + ) + else: + logger.debug("Probing backend capabilities...") + probe_capabilities(config) + + logger.debug( + "Capability probe result: " + f"supports_chat={config.capabilities.supports_chat} " + f"supports_embeddings={config.capabilities.supports_embeddings}" + ) + + if config.pipeline_mode == "staged": + logger.debug("Reconciling requested pipeline mode with probed capabilities...") + validate_mode(config) + logger.debug(f"Effective staged mode after validation: {config.staged_mode}") + else: + validate_mode(config) + logger.debug("Legacy pipeline selected; staged mode reconciliation skipped.") + + return config + + +def _log_and_exit_on_handled_error(err: Exception) -> None: + """Log a handled startup/processing failure and terminate with code 1.""" + logger.critical(str(err)) + sys.exit(1) + + +def _log_and_exit_on_unexpected_error(context: str, err: Exception) -> None: + """Handle unexpected startup/processing failures with a single critical line.""" + logger.critical(f"Unexpected {context} error: {err}") + sys.exit(1) + + +def _run_processing(args: argparse.Namespace, config: BackendConfig) -> None: + """Resolve target PR range and process announcements in sequence.""" + lower_bound_label, upper_bound_label, start_ts, end_ts = _resolve_timeframe(args) + logger.info(f"--- Resolving boundaries: {lower_bound_label} -> {upper_bound_label} ---") + + todo_prs = get_ordered_pr_list(start_ts, end_ts) + if not todo_prs: + logger.info("No new PRs found to process.") + return + + logger.info(f"--- Found {len(todo_prs)} PRs to merge oldest-to-newest ---") + _process_prs(args, config, todo_prs) + + +def main() -> None: + """Run the release announcement generation CLI.""" + parser = build_arg_parser() + args = parser.parse_args() + + # Set up logging level + logger.set_level(getattr(args, "log_level", "INFO")) + + if args.delay_secs < 0: + logger.critical("--delay-secs must be >= 0") + sys.exit(1) + + try: + config = _initialize_runtime(parser, args) + except HANDLED_MAIN_EXCEPTIONS as err: + _log_and_exit_on_handled_error(err) + except UNEXPECTED_MAIN_EXCEPTIONS as err: + _log_and_exit_on_unexpected_error("startup", err) + + try: + _run_processing(args, config) + except HANDLED_MAIN_EXCEPTIONS as err: + _log_and_exit_on_handled_error(err) + except UNEXPECTED_MAIN_EXCEPTIONS as err: + _log_and_exit_on_unexpected_error("processing", err) + + +def run_cli() -> None: + """Run CLI with clean interrupt handling for all module entry points.""" + try: + main() + except KeyboardInterrupt: + logger.critical("User interrupt -- stopped") + sys.exit(130) + + +if __name__ == "__main__": + run_cli() diff --git a/tools/release_announcement/src/release_announcement/registry.py b/tools/release_announcement/src/release_announcement/registry.py new file mode 100644 index 0000000000..e5c0288583 --- /dev/null +++ b/tools/release_announcement/src/release_announcement/registry.py @@ -0,0 +1,130 @@ +"""Backend registry and protocol definitions for release_announcement.""" + +from __future__ import annotations + +import importlib +import pkgutil +from typing import Callable, Protocol, runtime_checkable + + + +class ModelNotFoundError(Exception): + """Raised when the backend reports the model name is invalid.""" + + +@runtime_checkable +class BackendProtocol(Protocol): + """Abstract interface that all backends must implement.""" + + def probe_chat(self, model: str | None) -> bool: + """Send a minimal chat request to verify the backend is reachable with this model. + + Args: + model: The model name to probe. If None, the backend uses its own internal default. + + Returns: + True on success. + + Raises: + ModelNotFoundError: When the backend reports the model name is invalid. + Any other exception (network, auth, etc.) propagates as-is. + """ + + def probe_embeddings(self, model: str | None) -> bool: + """Send a minimal embedding request to verify embedding support for this model. + + Args: + model: The model name to probe. If None, the backend uses its own internal default. + + Returns: + True on success, False when the backend does not support embeddings as a capability. + + Raises: + ModelNotFoundError: When the model name is invalid. + Any other exception (network, auth, etc.) propagates as-is. + """ + + def call_chat(self, prompt: dict) -> str: + """Call the chat model with the given prompt. + + Args: + prompt: The prompt to send to the chat model. + + Returns: + The response from the chat model. + """ + + +class BackendRegistry: + """Registry for backends implementing BackendProtocol.""" + + def __init__(self) -> None: + self._backends: dict[str, BackendProtocol] = {} + self._factories: dict[str, Callable[[], BackendProtocol]] = {} + self._default_backend = "ollama" + self._bootstrapped = False + + def _bootstrap_distillation_adapters(self) -> None: + """Import distillation adapter modules once in deterministic order. + + Adapter modules register themselves via module import side effects. + """ + if self._bootstrapped: + return + + package_root = __name__.rsplit(".", maxsplit=1)[0] + package_name = f"{package_root}.backends.distillation_adapters" + try: + package = importlib.import_module(package_name) + except ImportError as err: + raise RuntimeError( + "Failed to import distillation adapter package " + f"'{package_name}': {err}" + ) from err + + module_names = sorted( + name + for _, name, _ in pkgutil.iter_modules(package.__path__) + if not name.startswith("_") + ) + for name in module_names: + importlib.import_module(f"{package_name}.{name}") + + self._bootstrapped = True + + def register(self, name: str, factory: Callable[[], BackendProtocol]) -> None: + """Register a backend factory function for lazy initialization. + + The factory is called only when the backend is first requested, and the + instance is cached for subsequent lookups. Re-registering a name + replaces the existing factory and invalidates any cached instance. + """ + if name in self._backends: + del self._backends[name] + self._factories[name] = factory + + def get(self, name: str) -> BackendProtocol | None: + """Get a backend by name, lazily initializing factories on first access.""" + self._bootstrap_distillation_adapters() + + # Check if already instantiated + if name in self._backends: + return self._backends[name] + + # Check if there's a factory for lazy initialization + if name in self._factories: + factory = self._factories[name] + backend = factory() + # Cache the instantiated backend + self._backends[name] = backend + return backend + + return None + + def resolve_backend_name(self, name: str | None) -> str: + """Resolve a backend name, using the default if name is None.""" + return name if name is not None else self._default_backend + + +# Global registry instance +registry = BackendRegistry() diff --git a/tools/release_announcement/src/release_announcement/skip_rules.py b/tools/release_announcement/src/release_announcement/skip_rules.py new file mode 100644 index 0000000000..0af89fc138 --- /dev/null +++ b/tools/release_announcement/src/release_announcement/skip_rules.py @@ -0,0 +1,49 @@ +"""Deterministic PR skip rules used before any LLM processing.""" + +from __future__ import annotations + +import re +from typing import Any + +_CHANGELOG_SKIP_RE = re.compile(r"(?m)^CHANGELOG:\s*SKIP\s*$", re.IGNORECASE) +_TRANSLATION_UPDATE_SKIP_RE = re.compile( + r"^Translations update from Hosted Weblate$", + re.IGNORECASE, +) +_CI_ACTION_BUMP_SKIP_RE = re.compile( + r"^Build:\s+Bump\s+\S+\s+from\s+\S+\s+to\s+\S+" + r"(?:\s+\(Automated PR\))?$", + re.IGNORECASE, +) + +_SKIP_REASON_CHANGELOG = "CHANGELOG: SKIP" +_SKIP_REASON_WEBLATE = "Weblate translations" +_SKIP_REASON_CI_ACTION_BUMP = "CI action version bump" + + +def get_skip_reason(pr_data: dict[str, Any]) -> str | None: + """Return skip reason when a deterministic skip rule matches PR content. + + Current skip rules: + - PR body or comments contain ``CHANGELOG: SKIP``. + - PR title equals ``Translations update from Hosted Weblate``. + - PR title matches ``Build: Bump from to `` + optionally followed by ``(Automated PR)``. + """ + text_fields = [pr_data.get("body") or ""] + list(pr_data.get("comments", [])) + if any(_CHANGELOG_SKIP_RE.search(field) for field in text_fields if field): + return _SKIP_REASON_CHANGELOG + + title = str(pr_data.get("title") or "").strip() + if _TRANSLATION_UPDATE_SKIP_RE.fullmatch(title): + return _SKIP_REASON_WEBLATE + + if _CI_ACTION_BUMP_SKIP_RE.fullmatch(title): + return _SKIP_REASON_CI_ACTION_BUMP + + return None + + +def has_changelog_skip(pr_data: dict[str, Any]) -> bool: + """Backward-compatible boolean helper for skip-rule checks.""" + return get_skip_reason(pr_data) is not None diff --git a/tools/release_announcement/src/release_announcement/staged_routing_adapter.py b/tools/release_announcement/src/release_announcement/staged_routing_adapter.py new file mode 100644 index 0000000000..a931d64891 --- /dev/null +++ b/tools/release_announcement/src/release_announcement/staged_routing_adapter.py @@ -0,0 +1,94 @@ +"""Routing adapter for staged distillation with split chat/embedding backends.""" + +from __future__ import annotations + +from .distillation import ( + Chunk, + ClassifiedSignals, + DistillationAdapter, + DistilledContext, + DistilledContextMetadata, + Signal, +) + + +class StagedRoutingAdapter(DistillationAdapter): + """Route staged calls across chat and embedding backends. + + Per plan, ``--backend`` only provides defaults; staged execution routes + chat-like phases via ``chat_model_backend`` and embedding ranking via + ``embedding_model_backend`` when embeddings are enabled. + """ + + def __init__( + self, + *, + chat_adapter: DistillationAdapter, + embedding_adapter: DistillationAdapter, + ) -> None: + self._chat_adapter = chat_adapter + self._embedding_adapter = embedding_adapter + + def select_relevant_chunks( + self, + chunks: list[Chunk], + use_embeddings: bool, + ranking_prompts: list[dict[str, str]], + ) -> list[Chunk]: + if use_embeddings: + return self._embedding_adapter.select_relevant_chunks( + chunks, + use_embeddings, + ranking_prompts, + ) + return self._chat_adapter.select_relevant_chunks( + chunks, + use_embeddings, + ranking_prompts, + ) + + def extract_chunk_signals( + self, + chunk: Chunk, + extraction_prompts: list[dict[str, str]], + ) -> list[Signal]: + return self._chat_adapter.extract_chunk_signals(chunk, extraction_prompts) + + def consolidate_signals( + self, + signals: list[Signal], + consolidation_prompts: list[dict[str, str]], + ) -> list[Signal]: + return self._chat_adapter.consolidate_signals(signals, consolidation_prompts) + + def classify_signals( + self, + signals: list[Signal], + classification_prompts: list[dict[str, str]], + ) -> ClassifiedSignals: + return self._chat_adapter.classify_signals(signals, classification_prompts) + + def render_final_context( + self, + classified: ClassifiedSignals, + metadata: DistilledContextMetadata, + ) -> DistilledContext: + return self._chat_adapter.render_final_context(classified, metadata) + + def get_chat_adapter_tag(self) -> str: + """Return the chat adapter tag for staged route-aware logging.""" + return self._chat_adapter.get_adapter_tag() + + def get_embedding_adapter_tag(self) -> str: + """Return the embedding adapter tag for staged route-aware logging.""" + return self._embedding_adapter.get_adapter_tag() + + def get_stage_adapter_tag(self, *, phase: str, use_embeddings: bool = False) -> str: + """Return backend tag for a given phase and route mode.""" + if phase == "relevance_selection" and use_embeddings: + return self.get_embedding_adapter_tag() + return self.get_chat_adapter_tag() + + def get_adapter_tag(self) -> str: + """Return the chat adapter's tag (staged routing is transparent).""" + return self._chat_adapter.get_adapter_tag() diff --git a/tools/release_announcement/src/release_announcement/token_utils.py b/tools/release_announcement/src/release_announcement/token_utils.py new file mode 100644 index 0000000000..106e3b3e2c --- /dev/null +++ b/tools/release_announcement/src/release_announcement/token_utils.py @@ -0,0 +1,61 @@ +"""Shared helpers for resolving GitHub tokens for CLI/API backends.""" + +from __future__ import annotations + +import os +import subprocess +from typing import Callable + + +def normalize_github_token(raw_token: str) -> str: + """Normalize token text from environment variables or gh output.""" + token = raw_token.replace("\r", "").replace("\n", "").strip() + if token.lower().startswith("bearer "): + token = token[7:].strip() + + if any(ch.isspace() for ch in token) or any( + ord(ch) < 32 or ord(ch) == 127 for ch in token + ): + raise RuntimeError( + "GitHub token contains whitespace/control characters after normalization. " + "Set GH_TOKEN/GITHUB_TOKEN to the raw token value." + ) + + if not token: + raise RuntimeError("GitHub token is empty after normalization.") + + return token + + +def resolve_token_from_env_or_gh(normalize: Callable[[str], str]) -> str: + """Resolve GH token from env first, then from `gh auth token` fallback.""" + raw_token = os.getenv("GH_TOKEN") or os.getenv("GITHUB_TOKEN") + if raw_token: + return normalize(raw_token) + + try: + gh_token = subprocess.check_output( + ["gh", "auth", "token"], text=True, stderr=subprocess.STDOUT + ) + except FileNotFoundError as exc: + raise RuntimeError( + "GH_TOKEN/GITHUB_TOKEN is not set and GitHub CLI ('gh') is not installed. " + "Install gh or set GH_TOKEN/GITHUB_TOKEN." + ) from exc + except subprocess.CalledProcessError as err: + details = (err.output or "").strip() + if details: + raise RuntimeError( + "GH_TOKEN/GITHUB_TOKEN is not set and failed to run 'gh auth token'.\n" + f"gh output: {details}" + ) from err + raise RuntimeError( + "GH_TOKEN/GITHUB_TOKEN is not set and failed to run 'gh auth token'." + ) from err + + return normalize(gh_token) + + +def resolve_github_token() -> str: + """Resolve GH token from env first, then from `gh auth token` fallback.""" + return resolve_token_from_env_or_gh(normalize_github_token) diff --git a/tools/release_announcement/template-ReleaseAnnouncement.md b/tools/release_announcement/template-ReleaseAnnouncement.md new file mode 100644 index 0000000000..b14257c852 --- /dev/null +++ b/tools/release_announcement/template-ReleaseAnnouncement.md @@ -0,0 +1,41 @@ +# Jamulus Next Release — Working Announcement Draft + +> **Note for maintainers:** This is a working draft, automatically updated by GitHub Copilot +> as PRs are merged to `main`. Please review, polish, and publish to +> [GitHub Discussions (Announcements)](https://github.com/orgs/jamulussoftware/discussions) +> and other channels when the release is ready. +> +> Run [`tools/get_release_contributors.py`](tools/get_release_contributors.py) to compile +> the full contributor list before publishing. +> +> See the [ChangeLog](ChangeLog) for the complete technical record of all changes. + +Here's what's new in the next release of Jamulus: + + + +## For everyone + +## For Windows users + +## For macOS users + +## For mobile users (iOS & Android) + +## For server operators + +## Translations + +--- + +As always, all feedback on the new version is welcome. Please raise any problems in a new bug report or discussion topic. + +--- + +**REMINDER:** Those of you with virus checkers are likely to find the Windows installer incorrectly flagged as a virus. This is because the installer is open source and virus checkers cannot be bothered to check what it installs, so assume that it's going to be malign. If you download the installer *only from the official release*, you should be safe to ignore any warning. + +--- + +*A big thanks to all contributors who made this release possible.* + +*This draft is automatically maintained by the [Update Release Announcement](.github/workflows/update-release-announcement.yml) workflow.* diff --git a/tools/release_announcement/tests/BASELINE_COMPLETION.txt b/tools/release_announcement/tests/BASELINE_COMPLETION.txt new file mode 100644 index 0000000000..1183cfdd81 --- /dev/null +++ b/tools/release_announcement/tests/BASELINE_COMPLETION.txt @@ -0,0 +1,31 @@ +=== Baseline Completion Summary === + +DateTime: 2026-04-04T15:22:13Z +Status: ✅ COMPLETE + +Baseline Directory: .vscode/release-announcement-baseline-20260404_161752 +Total Scenarios: 8 (all completed with exit code 0) +Total Duration: ~3.5 minutes across all runs + +Scenario Results: + github__pr3429 exit=0 duration_ms= 7445 + github__pr3502 exit=0 duration_ms= 15546 + github__pr3625 exit=0 duration_ms= 3935 + github__tag_r3_12_0beta4_to_r3_12_0beta5 exit=0 duration_ms= 6301 + ollama__pr3429 exit=0 duration_ms= 14638 + ollama__pr3502 exit=0 duration_ms=128914 + ollama__pr3625 exit=0 duration_ms= 4238 + ollama__tag_r3_12_0beta4_to_r3_12_0beta5 exit=0 duration_ms= 6755 + +Verification: + ✅ All 8 scenario directories present + ✅ Each scenario has timing.txt with valid start/end timestamps + ✅ All scenarios completed with exit_code=0 + ✅ Git worktree clean (untracked: ini-*, tools/release_announcement/) + ✅ ReleaseAnnouncement.md restored to initial state + +Deliverables: + ✅ .vscode/run-release-announcement-baseline.sh (executable) + ✅ .vscode/BASELINE_MATRIX_README.md (comprehensive agent guide) + ✅ .vscode/release-announcement-baseline-20260404_161752/ (baseline artifacts) + ✅ Timing reference table updated in README diff --git a/tools/release_announcement/tests/BASELINE_MATRIX_README.md b/tools/release_announcement/tests/BASELINE_MATRIX_README.md new file mode 100644 index 0000000000..924997b0db --- /dev/null +++ b/tools/release_announcement/tests/BASELINE_MATRIX_README.md @@ -0,0 +1,254 @@ +# Release Announcement Tool: Baseline Matrix Execution Guide for AI Agents + +## Overview + +This document provides AI agents with detailed, actionable guidance for capturing and verifying the **Step 0 baseline matrix** for the release-announcement tool restructuring plan. The baseline captures the tool's current behavior across representative scenarios before any code changes are made. + +## Purpose + +The baseline matrix establishes: +1. Current behavior of both Ollama and GitHub backends against 4 representative PR scenarios +2. High-precision timing (microsecond start/end, millisecond duration) for each run +3. Exact command invocations, stdout/stderr logs, git diffs, and exit codes +4. A reference against which all post-restructure verification steps compare outputs + +## Prerequisites + +Before running the baseline, verify: + +```bash +# Must be at repository root +cd "$(git rev-parse --show-toplevel)" + +# Worktree must be clean +git status --short + +# These files must exist +test -f tools/update-release-announcement.sh +test -f ReleaseAnnouncement.md +``` + +All three prerequisites must pass; if any fail, stop and report the failure before proceeding. + +## Execution + +### Quick Start (Recommended) + +```bash +tools/release_announcement/tests/run-release-announcement-baseline.sh +``` + +This script: +- Verifies all prerequisites +- Creates a timestamped output directory under `tools/release_announcement/tests/build/release-announcement-baseline-YYYYMMDD_HHMMSS/` +- Runs all 8 scenarios (4 scenarios × 2 backends) +- Captures timing, logs, diffs, and snapshots for each +- Cleans up git worktree after each scenario +- Reports high-level summary on completion + +### Manual Execution (If Script Fails) + +If the automated script fails, you can run individual scenarios by referencing the script's `run_case` function. Each scenario follows this pattern: + +```bash +BASELINE_DIR="tools/release_announcement/tests/build/release-announcement-baseline-$(date +%Y%m%d_%H%M%S)" +mkdir -p "$BASELINE_DIR" +BASE_COMMIT=$(git rev-parse HEAD) + +# Capture initial state +cp ReleaseAnnouncement.md "$BASELINE_DIR/ReleaseAnnouncement.initial.md" +printf '%s\n' "$BASE_COMMIT" > "$BASELINE_DIR/base_commit.txt" + +# Example: ollama backend, PR #3429 +./tools/update-release-announcement.sh --delay-secs 0 --backend ollama --file ReleaseAnnouncement.md pr3429 + +# Capture outputs +cp ReleaseAnnouncement.md "$BASELINE_DIR/ollama__pr3429/ReleaseAnnouncement.after.md" +git diff -- ReleaseAnnouncement.md > "$BASELINE_DIR/ollama__pr3429/git-diff.txt" + +# Reset for next scenario +git reset --hard "$BASE_COMMIT" +``` + +## Baseline Matrix Scenarios + +Run all 8 combinations below. Each scenario is abbreviated as `{backend}__` followed by a label: + +| # | Backend | Scenario | Arguments | Type | Expected Role | +|---|---------|----------|-----------|------|----------------| +| 1 | ollama | Normal PR | `pr3429` | single PR | Medium discussion, normal flow | +| 2 | github | Normal PR | `pr3429` | single PR | Medium discussion, normal flow | +| 3 | ollama | SKIP PR | `pr3625` | single PR | CHANGELOG: SKIP directive, should skip processing | +| 4 | github | SKIP PR | `pr3625` | single PR | CHANGELOG: SKIP directive, should skip processing | +| 5 | ollama | Large PR | `pr3502` | single PR | Large thread (182 comments), stress test for token trimming | +| 6 | github | Large PR | `pr3502` | single PR | Large thread (182 comments), stress test for embeddings/trimming | +| 7 | ollama | Tag Range | `r3_12_0beta4 r3_12_0beta5 --dry-run` | backfill | Multiple PRs, dry-run mode (no commits) | +| 8 | github | Tag Range | `r3_12_0beta4 r3_12_0beta5 --dry-run` | backfill | Multiple PRs, dry-run mode (no commits) | + +## Expected Output Structure + +Each scenario generates a directory named `{backend}__{label}` containing: + +- `metadata.txt` — backend, label, PR/tag args, dry-run flag +- `command.txt` — exact shell command executed +- `timing.txt` — start/end ISO UTC timestamps (microsecond precision), duration (ns and ms), exit code +- `stdout-stderr.log` — full stdout and stderr from the tool +- `ReleaseAnnouncement.after.md` — announcement file snapshot after run +- `git-diff.txt` — git diff output (shows changes to ReleaseAnnouncement.md) +- `git-status-short.txt` — git status after run (should be empty if cleanup worked) +- `exit_code.txt` — single integer exit code +- `reset.log` — git reset output used to clean up between scenarios +- `_artifact-index.txt` — sorted list of all files in the baseline directory (generated at end) + +### Timing File Format (Example) + +``` +start_iso_utc_us=2026-04-04T10:56:52.835529Z +end_iso_utc_us=2026-04-04T10:57:06.029304Z +start_epoch_ns=1775300212832604746 +end_epoch_ns=1775300226027601118 +duration_ns=13194996372 +duration_ms=13194 +exit_code=0 +``` + +## Success Criteria + +A baseline run is **successful** if all 8 scenarios complete without hanging or crashing, and end with an exit code report. Specifically: + +1. ✅ All 8 scenario directories created under `$BASELINE_DIR` +2. ✅ Each has `timing.txt` with valid start/end timestamps and exit_code +3. ✅ At least one scenario per backend runs with `exit_code=0` +4. ✅ `ReleaseAnnouncement.initial.md` == `ReleaseAnnouncement` before any run +5. ✅ Final git worktree is clean: `git status --short` shows only untracked files (ini-*, tools/release_announcement/, etc.) + +### Known Issues & Tolerances + +- **Large PR timeouts:** Ollama backend on PR #3502 may take > 4 minutes. If it times out or fails, that exit code and timing are still captured in `timing.txt` and `stdout-stderr.log`. +- **SKIP PR handling:** PR #3625 should be skipped (no changes to announcement). Check `git-diff.txt` to verify no output was written. +- **Dry-run semantics:** Tag-range runs use `--dry-run` and should not create git commits. Verify with `reset.log` and empty `git-status-short.txt`. + +## Post-Run Inspection + +### View timing summary + +```bash +BASELINE_DIR=".vscode/release-announcement-baseline-20260404_161752" +for d in "$BASELINE_DIR"/*__*; do + name=$(basename "$d") + timing="$d/timing.txt" + if [ -f "$timing" ]; then + dur=$(grep '^duration_ms=' "$timing" | cut -d= -f2) + code=$(grep '^exit_code=' "$timing" | cut -d= -f2) + printf '%s\tduration_ms=%s\texit_code=%s\n' "$name" "$dur" "$code" + fi +done +``` + +### Check for differences + +```bash +BASELINE_DIR=".vscode/release-announcement-baseline-20260404_161752" +# Show scenarios with non-zero exit codes +for d in "$BASELINE_DIR"/*__*; do + code=$(cat "$d/exit_code.txt" 2>/dev/null || echo "missing") + if [ "$code" != "0" ]; then + echo "$(basename "$d") exited with code: $code" + head -20 "$d/stdout-stderr.log" + fi +done +``` + +### Save baseline reference + +```bash +# Link the latest baseline as "reference" +LATEST=$(ls -dtd .vscode/release-announcement-baseline-* | head -1) +ln -sfn "$(basename "$LATEST")" .vscode/release-announcement-baseline-reference +echo "Baseline reference updated: .vscode/release-announcement-baseline-reference" +``` + +## Baseline Timing Reference (Current) + +If previous baseline runs succeeded, their timings are recorded here for comparison: + +### Captured on 2026-04-04 at 15:17–15:21 (Complete, All 8 Scenarios) + +| Scenario | Duration (ms) | Duration (sec) | Exit Code | +|----------|---:|---:|---:| +| ollama/pr3429 | 14638 | 14.6s | ✅ 0 | +| github/pr3429 | 7445 | 7.4s | ✅ 0 | +| ollama/pr3625 | 4238 | 4.2s | ✅ 0 | +| github/pr3625 | 3935 | 3.9s | ✅ 0 | +| ollama/pr3502 | 128914 | 128.9s | ✅ 0 | +| github/pr3502 | 15546 | 15.5s | ✅ 0 | +| ollama/tag-range | 6755 | 6.8s | ✅ 0 | +| github/tag-range | 6301 | 6.3s | ✅ 0 | + +**Total baseline duration:** ~3 min 30 sec across all 8 scenarios + +**Key observations:** +- ✅ **All scenarios now complete with exit code 0** (ollama/pr3502 now succeeds; previous run had exit code 1) +- Ollama large-thread (pr3502) consistently takes 2+ minutes (~129 sec); GitHub completes in ~15 sec +- GitHub handles all scenarios within 7–15 seconds +- Ollama normal/medium cases: 4–14 seconds +- Tag-range dry-run scenarios complete in ~6–7 seconds for both backends +- Previous run had ollama/pr3502 with exit code 1; fresh run shows all exit code 0 — baseline is clean + +## For AI Agents: How to Rerun + +1. **Check prerequisites:** + ```bash + git status --short # Must show only untracked: ini-*, tools/release_announcement/ + test -f .vscode/run-release-announcement-baseline.sh + ``` + +2. **Execute the baseline:** + ```bash + .vscode/run-release-announcement-baseline.sh + ``` + +3. **Wait for completion** (typically 10–15 minutes for all 8 runs). + +4. **Verify success:** + ```bash + # Check final worktree status + git status --short + + # Count artifacts + LATEST=$(ls -dtd .vscode/release-announcement-baseline-* | head -1) + find "$LATEST" -maxdepth 2 -type f | wc -l + # Should be > 70 files (8 scenarios × ~9 files + index + snapshots) + ``` + +5. **Report summary:** + ```bash + LATEST=$(ls -dtd .vscode/release-announcement-baseline-* | head -1) + echo "Baseline: $LATEST" + for d in "$LATEST"/*__*; do + name=$(basename "$d") + code=$(cat "$d/exit_code.txt") + dur=$(grep '^duration_ms=' "$d/timing.txt" | cut -d= -f2) + echo "$name: exit=$code, duration_ms=$dur" + done + ``` + +## Troubleshooting + +| Issue | Resolution | +|-------|-----------| +| Script exits with "Error: tools/update-release-announcement.sh not found" | Verify you are at repo root: `pwd` should be `/home/peter/git/Jamulus-wip` | +| Script exits with "git worktree has uncommitted changes" | Run `git status` to see uncommitted files; stash or discard as needed | +| A scenario hangs (no output for > 600s) | Interrupt with Ctrl+C; scenario will be marked incomplete in the summary | +| Large PR (pr3502) takes > 5 minutes | Normal for Ollama; GitHub typically completes in ~14s. Both durations are valid baseline data | +| Ollama/GitHub completes normally but tool returns exit code 1 | Baseline is still valid; exit code 1 is captured in `timing.txt`. This may be a known issue to investigate during Step 1 | + +## Related Files + +- **Script:** [.vscode/run-release-announcement-baseline.sh](.vscode/run-release-announcement-baseline.sh) +- **Plan:** [.vscode/plan-releaseAnnouncementDistillation.prompt.md](.vscode/plan-releaseAnnouncementDistillation.prompt.md) — see "Capture a baseline before any changes are made" section +- **Archived baselines:** All runs under `.vscode/release-announcement-baseline-*/` are timestamped and preserved for audit + +## Summary + +The baseline matrix establishes ground truth for the release-announcement tool's current behavior in both backends across representative scenarios. Successful execution of this baseline is the prerequisite for all subsequent Step 1–10 verification work. diff --git a/tools/release_announcement/tests/__init__.py b/tools/release_announcement/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tools/release_announcement/tests/assertions.py b/tools/release_announcement/tests/assertions.py new file mode 100644 index 0000000000..5233c00059 --- /dev/null +++ b/tools/release_announcement/tests/assertions.py @@ -0,0 +1,50 @@ +"""Shared assertion helpers for backend adapter protocol tests.""" + +from __future__ import annotations + +from collections.abc import Sequence +from typing import Any + +from release_announcement.cli_config import BackendCapabilities + + +def assert_distillation_adapter_surface(adapter: object) -> None: + """Assert that an adapter exposes the required distillation methods.""" + required_methods = [ + "select_relevant_chunks", + "extract_chunk_signals", + "consolidate_signals", + "classify_signals", + "render_final_context", + ] + for method_name in required_methods: + assert hasattr(adapter, method_name) + + +def patch_cli_startup_happy_path( + monkeypatch: Any, + ra_main: Any, + prs: Sequence[dict[str, Any]], +) -> None: + """Patch startup/probing/discovery steps to deterministic no-network behavior.""" + monkeypatch.setattr(ra_main, "_setup_backend_token", lambda _backend: None) + monkeypatch.setattr( + ra_main, + "probe_capabilities", + lambda _config: BackendCapabilities( + supports_chat=True, + supports_embeddings=False, + ), + ) + monkeypatch.setattr(ra_main, "validate_mode", lambda _config: None) + monkeypatch.setattr( + ra_main, + "_resolve_timeframe", + lambda _args: ( + "pr3502~", + "pr3502", + "2026-01-01T00:00:00+00:00", + "2026-01-02T00:00:00+00:00", + ), + ) + monkeypatch.setattr(ra_main, "get_ordered_pr_list", lambda _start, _end: list(prs)) diff --git a/tools/release_announcement/tests/cli_invocation.py b/tools/release_announcement/tests/cli_invocation.py new file mode 100644 index 0000000000..ce8d7744c5 --- /dev/null +++ b/tools/release_announcement/tests/cli_invocation.py @@ -0,0 +1,34 @@ +"""Shared CLI command builders for release_announcement integration tests.""" + +from __future__ import annotations + +import sys + + +def build_release_announcement_cmd( + backend: str, + pipeline: str, + start: str, + end: str | None = None, + dry_run: bool = False, +) -> list[str]: + """Build a release_announcement module invocation command.""" + cmd = [ + sys.executable, + "-m", + "release_announcement", + "--delay-secs", + "0", + "--backend", + backend, + "--pipeline", + pipeline, + "--file", + "ReleaseAnnouncement.md", + start, + ] + if end: + cmd.append(end) + if dry_run: + cmd.append("--dry-run") + return cmd diff --git a/tools/release_announcement/tests/conftest.py b/tools/release_announcement/tests/conftest.py new file mode 100644 index 0000000000..76749c7773 --- /dev/null +++ b/tools/release_announcement/tests/conftest.py @@ -0,0 +1,43 @@ +"""Pytest guardrails for backend usage in the release_announcement test suite.""" + +from __future__ import annotations + +import os + +import pytest + + +@pytest.fixture(autouse=True) +def prevent_real_backend_calls( + monkeypatch: pytest.MonkeyPatch, + request: pytest.FixtureRequest, +) -> None: + """Block only real network calls outside the final full integration test.""" + is_integration = request.node.get_closest_marker("integration") is not None + is_final_full_integration = ( + is_integration + and os.getenv("RA_RUN_STEP2_E2E") == "1" + and "test_step2_stub_integration.py" in request.node.nodeid + ) + if is_final_full_integration: + return + + def _blocked(*_args, **_kwargs): + raise RuntimeError( + "Real backend call blocked in test. Mock backend calls in unit/regression tests; " + "only the final full integration test may call real providers." + ) + + # Block GitHub HTTP calls at urllib boundary. + monkeypatch.setattr( + "src.release_announcement.backends.github_backend.urllib.request.urlopen", + _blocked, + ) + monkeypatch.setattr( + "src.release_announcement.backends.distillation_adapters.github_adapter." + "urllib.request.urlopen", + _blocked, + ) + + # Block Ollama HTTP calls at the transport boundary used by ollama's client. + monkeypatch.setattr("httpx.Client.request", _blocked) diff --git a/tools/release_announcement/tests/dummy_backend.py b/tools/release_announcement/tests/dummy_backend.py new file mode 100644 index 0000000000..75fa2300a3 --- /dev/null +++ b/tools/release_announcement/tests/dummy_backend.py @@ -0,0 +1,185 @@ +"""Dummy backend adapter for testing the staged distillation pipeline. + +Implements the DistillationAdapter protocol with hardcoded returns and no LLM calls. +Used for Step 4 verification and unit testing of the pipeline orchestration. +Registers under the "dummy" backend name (which has no prior registration). +""" + +import sys +import time + +from src.release_announcement.distillation import ( + build_distilled_context, + default_fallback_signal, + Chunk, + Signal, + ClassifiedSignal, + ClassifiedSignals, + DistilledContextMetadata, + DistilledContext, +) +from src.release_announcement.registry import registry + +try: + from src.release_announcement.main import logger +except ImportError: + class DummyLogger: + def trace(self, msg): + """Write trace output to stdout when the real logger is unavailable.""" + print(msg, file=sys.stdout) + logger = DummyLogger() + + +def _system_prompt_preview(prompts: list[dict[str, str]], limit: int = 80) -> str: + """Return a stable preview of the first system prompt in a prompt list.""" + for prompt in prompts: + if isinstance(prompt, dict) and prompt.get("role") == "system": + content = str(prompt.get("content", "")).strip().replace("\n", " ") + return content[:limit] + return "" + + +class DummyBackend: + """Dummy adapter implementing DistillationAdapter protocol for testing.""" + + def select_relevant_chunks( + self, + chunks: list[Chunk], + use_embeddings: bool, + ranking_prompts: list[dict[str, str]], + ) -> list[Chunk]: + """Return all chunks with deterministic descending relevance scores.""" + logger.trace("DummyBackend.select_relevant_chunks start") + logger.trace( + "DummyBackend.select_relevant_chunks " + f"use_embeddings={use_embeddings}" + ) + logger.trace( + "DummyBackend.select_relevant_chunks " + "ranking_prompts=" + f"{len(ranking_prompts)}" + ) + start = time.perf_counter() + result = [] + for i, chunk in enumerate(chunks): + scored_chunk = Chunk( + text=chunk.text, + source=chunk.source, + relevance_score=1.0 - (i * 0.05), # Decay from 1.0 + chunk_index=chunk.chunk_index, + ) + result.append(scored_chunk) + elapsed = (time.perf_counter() - start) * 1000 + logger.trace(f"DummyBackend.select_relevant_chunks end elapsed_ms={elapsed:.2f}") + return result + + def extract_chunk_signals( + self, + chunk: Chunk, + extraction_prompts: list[dict[str, str]], + ) -> list[Signal]: + """Return a single synthetic signal derived from the input chunk.""" + logger.trace("DummyBackend.extract_chunk_signals start") + logger.trace( + "DummyBackend.extract_chunk_signals " + f"extraction_prompts={len(extraction_prompts)}" + ) + logger.info( + "DummyBackend.extract_chunk_signals " + f"system_prompt_preview={_system_prompt_preview(extraction_prompts)!r}" + ) + start = time.perf_counter() + result = [default_fallback_signal(chunk.source)] + elapsed = (time.perf_counter() - start) * 1000 + logger.trace(f"DummyBackend.extract_chunk_signals end elapsed_ms={elapsed:.2f}") + return result + + def consolidate_signals( + self, + signals: list[Signal], + consolidation_prompts: list[dict[str, str]], + ) -> list[Signal]: + """Return the input signals unchanged for consolidation tests.""" + logger.trace("DummyBackend.consolidate_signals start") + logger.trace( + "DummyBackend.consolidate_signals " + f"consolidation_prompts={len(consolidation_prompts)}" + ) + logger.info( + "DummyBackend.consolidate_signals " + f"system_prompt_preview={_system_prompt_preview(consolidation_prompts)!r}" + ) + logger.trace(f"DummyBackend.consolidate_signals signals_count={len(signals)}") + start = time.perf_counter() + result = signals + elapsed = (time.perf_counter() - start) * 1000 + logger.trace(f"DummyBackend.consolidate_signals end elapsed_ms={elapsed:.2f}") + return result + + def classify_signals( + self, + signals: list[Signal], + classification_prompts: list[dict[str, str]], + ) -> ClassifiedSignals: + """Classify every signal as minor and build a synthetic summary.""" + logger.trace("DummyBackend.classify_signals start") + logger.trace( + "DummyBackend.classify_signals " + f"classification_prompts={len(classification_prompts)}" + ) + logger.info( + "DummyBackend.classify_signals " + f"system_prompt_preview={_system_prompt_preview(classification_prompts)!r}" + ) + logger.trace(f"DummyBackend.classify_signals signals_count={len(signals)}") + start = time.perf_counter() + classified = [ + ClassifiedSignal(signal=signal, category="minor") + for signal in signals + ] + result = ClassifiedSignals( + classified=classified, + summary=f"Dummy classification: {len(signals)} signals classified as minor", + ) + elapsed = (time.perf_counter() - start) * 1000 + logger.trace(f"DummyBackend.classify_signals end elapsed_ms={elapsed:.2f}") + return result + + def render_final_context( + self, + classified: ClassifiedSignals, + metadata: DistilledContextMetadata, + ) -> DistilledContext: + """Convert classified signals into the final distilled context structure.""" + logger.trace("DummyBackend.render_final_context start") + start = time.perf_counter() + result = build_distilled_context(classified=classified, metadata=metadata) + elapsed = (time.perf_counter() - start) * 1000 + logger.trace(f"DummyBackend.render_final_context end elapsed_ms={elapsed:.2f}") + return result + + def get_adapter_tag(self) -> str: + """Return the adapter identifier for logging.""" + return "dummy" + + +# Register the dummy backend at import time using the same lazy factory +# registration pattern as production backends. +def _create_dummy_adapter(): + """Factory function for creating a dummy backend.""" + return DummyBackend() + + +_DUMMY_REGISTRATION_STATE = {"registered": False} + + +def register_dummy_backend(): + """Register the dummy backend in the registry.""" + if _DUMMY_REGISTRATION_STATE["registered"]: + return + registry.register("dummy", _create_dummy_adapter) + _DUMMY_REGISTRATION_STATE["registered"] = True + + +# Auto-register on import +register_dummy_backend() diff --git a/tools/release_announcement/tests/run-release-announcement-baseline.sh b/tools/release_announcement/tests/run-release-announcement-baseline.sh new file mode 100755 index 0000000000..092abbe62e --- /dev/null +++ b/tools/release_announcement/tests/run-release-announcement-baseline.sh @@ -0,0 +1,167 @@ +#!/bin/bash +############################################################################## +# Release Announcement Tool Baseline Matrix Runner +# +# Purpose: Execute the Step 0 baseline matrix for the release-announcement +# tool against representative scenarios (normal PR, SKIP PR, large PR, +# tag range) with both Ollama and GitHub backends. +# +# Usage: tools/release_announcement/tests/run-release-announcement-baseline.sh +# +# Output: Timestamped directory under tools/release_announcement/tests/build/release-announcement-baseline-YYYYMMDD_HHMMSS/ +# containing per-run: metadata, command, timing (with us precision), +# stdout-stderr, git diff, file snapshots, and exit code. +# +# Prerequisites: +# - Working directory: repository root +# - Git worktree: clean (run 'git status' to verify) +# - tools/update-release-announcement.sh: present and executable +# - ReleaseAnnouncement.md: present and unchanged +# +# Exit code: 0 on complete (all 8 scenarios attempted) +# 1 if prerequisites not met +############################################################################## + +set -euo pipefail + +cd "$(git rev-parse --show-toplevel)" || { echo "Error: not in a git repository"; exit 1; } + +# Verify prerequisites +if ! [[ -f tools/update-release-announcement.sh ]]; then + echo "Error: tools/update-release-announcement.sh not found" + exit 1 +fi + +if ! [[ -f ReleaseAnnouncement.md ]]; then + echo "Error: ReleaseAnnouncement.md not found" + exit 1 +fi + +if ! git diff --quiet --exit-code; then + echo "Error: git worktree has uncommitted changes; run 'git status' to review" + exit 1 +fi + +# Create timestamped baseline directory +BASELINE_DIR="tools/release_announcement/tests/build/release-announcement-baseline-$(date +%Y%m%d_%H%M%S)" +mkdir -p "$BASELINE_DIR" + +# Capture starting state +BASE_COMMIT=$(git rev-parse HEAD) +printf '%s\n' "$BASE_COMMIT" > "$BASELINE_DIR/base_commit.txt" +cp ReleaseAnnouncement.md "$BASELINE_DIR/ReleaseAnnouncement.initial.md" + +# Function to run one scenario and capture artifacts +run_case() { + local backend="$1" + local label="$2" + local start="$3" + local end="${4:-}" + local dry="${5:-false}" + + local run_dir="$BASELINE_DIR/${backend}__${label}" + mkdir -p "$run_dir" + + # Build command + local -a cmd=(./tools/update-release-announcement.sh --delay-secs 0 --backend "$backend" --file ReleaseAnnouncement.md "$start") + if [[ -n "$end" ]]; then + cmd+=("$end") + fi + if [[ "$dry" == "true" ]]; then + cmd+=(--dry-run) + fi + + # Save metadata and command for later inspection + { + printf 'Backend: %s\n' "$backend" + printf 'Label: %s\n' "$label" + printf 'StartArg: %s\n' "$start" + printf 'EndArg: %s\n' "${end:-}" + printf 'DryRun: %s\n' "$dry" + } > "$run_dir/metadata.txt" + + { + printf 'Command:' + printf ' %q' "${cmd[@]}" + printf '\n' + } > "$run_dir/command.txt" + + # Capture start timestamp (microseconds) + local start_ns + local start_iso_us + start_ns=$(date +%s%N) + start_iso_us=$(date -u +"%Y-%m-%dT%H:%M:%S.%6NZ") + + # Run command and capture exit code + set +e + "${cmd[@]}" > "$run_dir/stdout-stderr.log" 2>&1 + local ec=$? + set -e + + # Capture end timestamp (microseconds) + local end_ns + local end_iso_us + end_ns=$(date +%s%N) + end_iso_us=$(date -u +"%Y-%m-%dT%H:%M:%S.%6NZ") + + # Compute duration + local dur_ns + local dur_ms + dur_ns=$((end_ns - start_ns)) + dur_ms=$((dur_ns / 1000000)) + + # Save timing information + { + printf 'start_iso_utc_us=%s\n' "$start_iso_us" + printf 'end_iso_utc_us=%s\n' "$end_iso_us" + printf 'start_epoch_ns=%s\n' "$start_ns" + printf 'end_epoch_ns=%s\n' "$end_ns" + printf 'duration_ns=%s\n' "$dur_ns" + printf 'duration_ms=%s\n' "$dur_ms" + printf 'exit_code=%s\n' "$ec" + } > "$run_dir/timing.txt" + + printf '%s\n' "$ec" > "$run_dir/exit_code.txt" + + # Capture file snapshots and diffs + cp ReleaseAnnouncement.md "$run_dir/ReleaseAnnouncement.after.md" + git --no-pager diff -- ReleaseAnnouncement.md > "$run_dir/git-diff.txt" + git status --short > "$run_dir/git-status-short.txt" + + # Reset to baseline for next scenario + git reset --hard "$BASE_COMMIT" > "$run_dir/reset.log" 2>&1 + + # Report result + echo "✓ $backend/$label exit=$ec duration_ms=$dur_ms" +} + +echo "=== Release Announcement Baseline Matrix ===" +echo "Repository: $(git remote get-url origin || echo '')" +echo "Baseline dir: $BASELINE_DIR" +echo "" + +# Execute baseline matrix: 4 scenarios × 2 backends = 8 runs +run_case ollama pr3429 pr3429 "" false +run_case github pr3429 pr3429 "" false + +run_case ollama pr3625 pr3625 "" false +run_case github pr3625 pr3625 "" false + +run_case ollama pr3502 pr3502 "" false +run_case github pr3502 pr3502 "" false + +run_case ollama tag_r3_12_0beta4_to_r3_12_0beta5 r3_12_0beta4 r3_12_0beta5 true +run_case github tag_r3_12_0beta4_to_r3_12_0beta5 r3_12_0beta4 r3_12_0beta5 true + +# Finalize baseline +cp ReleaseAnnouncement.md "$BASELINE_DIR/ReleaseAnnouncement.final.md" +git reset --hard "$BASE_COMMIT" >/dev/null 2>&1 +find "$BASELINE_DIR" -maxdepth 2 -type f | sort > "$BASELINE_DIR/_artifact-index.txt" + +echo "" +echo "=== Baseline Complete ===" +echo "Output directory: $BASELINE_DIR" +echo "Artifact count: $(find "$BASELINE_DIR" -maxdepth 2 -type f | wc -l)" +echo "" +echo "To inspect timing:" +echo " for d in $BASELINE_DIR/*__*; do echo \"\$(basename \$d): \$(grep duration_ms \$d/timing.txt)\"; done" diff --git a/tools/release_announcement/tests/test_data.py b/tools/release_announcement/tests/test_data.py new file mode 100644 index 0000000000..480c0d5407 --- /dev/null +++ b/tools/release_announcement/tests/test_data.py @@ -0,0 +1,17 @@ +"""Shared test data builders for distillation-related unit tests.""" + +from __future__ import annotations + +from src.release_announcement.distillation import DistilledContextMetadata + + +def sample_metadata_123() -> DistilledContextMetadata: + """Return a canonical metadata object used across adapter tests.""" + return DistilledContextMetadata( + pr_number=123, + total_chunks=10, + selected_chunks=5, + extraction_phase_duration_ms=100, + consolidation_phase_duration_ms=50, + classification_phase_duration_ms=30, + ) diff --git a/tools/release_announcement/tests/test_delay_functionality.py b/tools/release_announcement/tests/test_delay_functionality.py new file mode 100644 index 0000000000..6ca31a62ae --- /dev/null +++ b/tools/release_announcement/tests/test_delay_functionality.py @@ -0,0 +1,124 @@ +"""Test delay functionality: sleep fires only when the LLM path is taken.""" + +from __future__ import annotations + +from pathlib import Path +from unittest.mock import patch + +import pytest + +from release_announcement import main as ra_main +from release_announcement.cli_config import BackendConfig + + +def _stub_pr_data() -> dict: + return { + "number": 3502, + "title": "Test PR", + "body": "Some change", + "comments": [], + "reviews": [], + } + + +def _wire_llm_stubs(monkeypatch: pytest.MonkeyPatch) -> None: + """Patch the minimum set of collaborators so process_single_pr reaches the sleep point.""" + monkeypatch.setattr(ra_main, "_fetch_pr_data", lambda *_a, **_k: _stub_pr_data()) + monkeypatch.setattr(ra_main, "prepare_pr_context", lambda *_a, **_k: None) + monkeypatch.setattr(ra_main, "_load_announcement_content", lambda *_a: "existing") + monkeypatch.setattr(ra_main, "_load_prompt_template", lambda *_a: {}) + monkeypatch.setattr(ra_main, "_build_ai_prompt", lambda *_a, **_k: {"messages": []}) + monkeypatch.setattr(ra_main, "_process_with_llm", lambda *_a, **_k: "updated") + monkeypatch.setattr(ra_main, "_write_and_check_announcement", lambda *_a, **_k: None) + monkeypatch.setattr(ra_main, "_check_for_changes", lambda *_a, **_k: "no_changes") + + +def test_delay_fires_before_llm_when_nonzero( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Sleep is called with the configured delay when the LLM path is taken.""" + ann_file = tmp_path / "ReleaseAnnouncement.md" + ann_file.write_text("existing", encoding="utf-8") + _wire_llm_stubs(monkeypatch) + + config = BackendConfig(backend="ollama", pipeline_mode="legacy", delay_secs=5) + + with patch("release_announcement.main.time.sleep") as mock_sleep: + ra_main.process_single_pr( + pr_num=3502, + pr_title="Test PR", + announcement_file=str(ann_file), + prompt_file="unused.yml", + config=config, + ) + mock_sleep.assert_called_once_with(5) + + +def test_delay_skipped_when_zero( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """No sleep when delay_secs is zero.""" + ann_file = tmp_path / "ReleaseAnnouncement.md" + ann_file.write_text("existing", encoding="utf-8") + _wire_llm_stubs(monkeypatch) + + config = BackendConfig(backend="ollama", pipeline_mode="legacy", delay_secs=0) + + with patch("release_announcement.main.time.sleep") as mock_sleep: + ra_main.process_single_pr( + pr_num=3502, + pr_title="Test PR", + announcement_file=str(ann_file), + prompt_file="unused.yml", + config=config, + ) + mock_sleep.assert_not_called() + + +def test_delay_skipped_for_dry_run( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """No sleep when dry-run is set, even with a non-zero delay.""" + ann_file = tmp_path / "ReleaseAnnouncement.md" + ann_file.write_text("existing", encoding="utf-8") + monkeypatch.setattr(ra_main, "_fetch_pr_data", lambda *_a, **_k: _stub_pr_data()) + + config = BackendConfig(backend="ollama", pipeline_mode="legacy", dry_run=True, delay_secs=5) + + with patch("release_announcement.main.time.sleep") as mock_sleep: + result = ra_main.process_single_pr( + pr_num=3502, + pr_title="Test PR", + announcement_file=str(ann_file), + prompt_file="unused.yml", + config=config, + ) + assert result == "dry_run" + mock_sleep.assert_not_called() + + +def test_delay_skipped_for_skipped_pr( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """No sleep when the PR matches a skip rule.""" + ann_file = tmp_path / "ReleaseAnnouncement.md" + ann_file.write_text("existing", encoding="utf-8") + weblate_pr = {**_stub_pr_data(), "title": "Translations update from Hosted Weblate"} + monkeypatch.setattr(ra_main, "_fetch_pr_data", lambda *_a, **_k: weblate_pr) + + config = BackendConfig(backend="ollama", pipeline_mode="legacy", delay_secs=5) + + with patch("release_announcement.main.time.sleep") as mock_sleep: + result = ra_main.process_single_pr( + pr_num=3502, + pr_title=weblate_pr["title"], + announcement_file=str(ann_file), + prompt_file="unused.yml", + config=config, + ) + assert result.startswith("skipped:") + mock_sleep.assert_not_called() diff --git a/tools/release_announcement/tests/test_distillation_dummy_adapter.py b/tools/release_announcement/tests/test_distillation_dummy_adapter.py new file mode 100644 index 0000000000..1e99597b62 --- /dev/null +++ b/tools/release_announcement/tests/test_distillation_dummy_adapter.py @@ -0,0 +1,555 @@ +"""Unit tests for the dummy backend adapter (Substep 4c). + +Tests verify that the dummy backend correctly implements the DistillationAdapter protocol +with hardcoded returns and no LLM calls, suitable for pipeline orchestration testing +and Step 4 verification. +""" +from tests.dummy_backend import DummyBackend, register_dummy_backend +from tests.test_data import sample_metadata_123 + +from src.release_announcement.distillation import ( + ClassifiedSignal, + ClassifiedSignals, + Chunk, + DistilledContext, + DistilledContextMetadata, + Signal, +) + + +class TestDummyBackendInitialization: + """Tests for dummy backend registration and instantiation.""" + + def test_dummy_backend_instantiation(self): + """Verify DummyBackend can be instantiated.""" + backend = DummyBackend() + assert backend is not None + assert hasattr(backend, "select_relevant_chunks") + assert hasattr(backend, "extract_chunk_signals") + assert hasattr(backend, "consolidate_signals") + assert hasattr(backend, "classify_signals") + assert hasattr(backend, "render_final_context") + + def test_dummy_backend_registration(self): + """Verify register_dummy_backend() can be called without errors.""" + # Should not raise, even if called multiple times + register_dummy_backend() + register_dummy_backend() + + +class TestSelectRelevantChunks: + """Tests for DummyBackend.select_relevant_chunks().""" + + def test_select_relevant_chunks_returns_all_inputs(self): + """Verify all input chunks are returned.""" + backend = DummyBackend() + chunks = [ + Chunk( + text=f"Chunk {i}", + source=f"comment_{i}", + chunk_index=i, + ) + for i in range(3) + ] + result = backend.select_relevant_chunks( + chunks, + use_embeddings=False, + ranking_prompts=[{"role": "user", "content": "dummy ranking prompt"}], + ) + assert len(result) == 3 + + def test_select_relevant_chunks_assigns_decreasing_scores(self): + """Verify chunks get decreasing relevance scores based on position.""" + backend = DummyBackend() + chunks = [ + Chunk(text=f"Chunk {i}", source=f"comment_{i}", chunk_index=i) + for i in range(3) + ] + result = backend.select_relevant_chunks( + chunks, + use_embeddings=False, + ranking_prompts=[{"role": "user", "content": "dummy ranking prompt"}], + ) + + # Scores should decrease: 1.0, 0.95, 0.90 + assert result[0].relevance_score == 1.0 + assert result[1].relevance_score == 0.95 + assert result[2].relevance_score == 0.90 + + def test_select_relevant_chunks_empty_list(self): + """Verify empty chunk list returns empty result.""" + backend = DummyBackend() + result = backend.select_relevant_chunks( + [], + use_embeddings=True, + ranking_prompts=[{"role": "user", "content": "dummy ranking prompt"}], + ) + assert not result + + def test_select_relevant_chunks_ignores_embeddings_flag(self): + """Verify use_embeddings parameter is accepted but unused.""" + backend = DummyBackend() + chunks = [Chunk(text="Test", source="test", chunk_index=0)] + result_no_emb = backend.select_relevant_chunks( + chunks, + use_embeddings=False, + ranking_prompts=[{"role": "user", "content": "dummy ranking prompt"}], + ) + result_with_emb = backend.select_relevant_chunks( + chunks, + use_embeddings=True, + ranking_prompts=[{"role": "user", "content": "dummy ranking prompt"}], + ) + + # Results should be identical regardless of embeddings flag + assert result_no_emb[0].relevance_score == result_with_emb[0].relevance_score + + def test_select_relevant_chunks_preserves_chunk_content(self): + """Verify chunk text and source are preserved.""" + backend = DummyBackend() + original_chunks = [ + Chunk(text="Important discussion", source="title_section", chunk_index=0), + Chunk(text="Detailed feedback", source="comment_5", chunk_index=1), + ] + result = backend.select_relevant_chunks( + original_chunks, + use_embeddings=False, + ranking_prompts=[{"role": "user", "content": "dummy ranking prompt"}], + ) + + assert result[0].text == "Important discussion" + assert result[0].source == "title_section" + assert result[1].text == "Detailed feedback" + assert result[1].source == "comment_5" + + +class TestExtractChunkSignals: + """Tests for DummyBackend.extract_chunk_signals().""" + + def test_extract_chunk_signals_returns_single_signal(self): + """Verify extraction returns exactly one signal per chunk.""" + backend = DummyBackend() + chunk = Chunk(text="PR discussion", source="comment_1", chunk_index=0) + result = backend.extract_chunk_signals( + chunk, + [{"role": "system", "content": "placeholder prompt"}], + ) + + assert len(result) == 1 + assert isinstance(result[0], Signal) + + def test_extract_chunk_signals_signal_has_valid_fields(self): + """Verify extracted signal has all required fields with valid values.""" + backend = DummyBackend() + chunk = Chunk(text="Performance improvement", source="comment_3", chunk_index=0) + signals = backend.extract_chunk_signals( + chunk, + [{"role": "system", "content": "placeholder"}], + ) + + signal = signals[0] + assert signal.change is not None + assert signal.impact in ["low", "medium", "high"] + assert signal.users_affected is not None + assert signal.confidence in ["low", "medium", "high"] + assert isinstance(signal.final_outcome, bool) + + def test_extract_chunk_signals_includes_source_in_change(self): + """Verify extracted signal references the chunk source.""" + backend = DummyBackend() + chunk = Chunk(text="Some discussion", source="title_part", chunk_index=0) + signals = backend.extract_chunk_signals( + chunk, + [{"role": "system", "content": "ignored"}], + ) + + # Signal change should mention the source + assert "title_part" in signals[0].change + + def test_extract_chunk_signals_ignores_prompt(self): + """Verify extraction_prompt parameter is accepted but unused.""" + backend = DummyBackend() + chunk = Chunk(text="Test", source="src", chunk_index=0) + + result1 = backend.extract_chunk_signals( + chunk, + [{"role": "system", "content": "prompt A"}], + ) + result2 = backend.extract_chunk_signals( + chunk, + [{"role": "system", "content": "prompt B"}], + ) + + # Results should be identical regardless of prompt + assert result1[0].change == result2[0].change + + def test_extract_chunk_signals_multiple_chunks_different_signals(self): + """Verify multiple calls produce signals referencing their respective chunks.""" + backend = DummyBackend() + chunk1 = Chunk(text="Fix", source="comment_1", chunk_index=0) + chunk2 = Chunk(text="Enhancement", source="comment_2", chunk_index=1) + + signals1 = backend.extract_chunk_signals( + chunk1, + [{"role": "system", "content": "prompt"}], + ) + signals2 = backend.extract_chunk_signals( + chunk2, + [{"role": "system", "content": "prompt"}], + ) + + # Each should have a different source reference + assert "comment_1" in signals1[0].change + assert "comment_2" in signals2[0].change + + +class TestConsolidateSignals: + """Tests for DummyBackend.consolidate_signals().""" + + def test_consolidate_signals_returns_input_unchanged(self): + """Verify consolidation returns exact same signals.""" + backend = DummyBackend() + signals = [ + Signal( + change="Feature added", + impact="medium", + users_affected="most users", + confidence="high", + final_outcome=True, + ), + Signal( + change="Bug fixed", + impact="low", + users_affected="some users", + confidence="medium", + final_outcome=False, + ), + ] + result = backend.consolidate_signals( + signals, + [{"role": "system", "content": "placeholder"}], + ) + + assert len(result) == 2 + assert result[0] == signals[0] + assert result[1] == signals[1] + + def test_consolidate_signals_empty_list(self): + """Verify consolidation of empty list returns empty list.""" + backend = DummyBackend() + result = backend.consolidate_signals( + [], + [{"role": "system", "content": "prompt"}], + ) + assert not result + + def test_consolidate_signals_ignores_prompt(self): + """Verify consolidation_prompt parameter is accepted but unused.""" + backend = DummyBackend() + signal = Signal( + change="Change", + impact="low", + users_affected="users", + confidence="medium", + final_outcome=False, + ) + signals = [signal] + + result1 = backend.consolidate_signals( + signals, + [{"role": "system", "content": "prompt A"}], + ) + result2 = backend.consolidate_signals( + signals, + [{"role": "system", "content": "prompt B"}], + ) + + # Results should be identical + assert result1 == result2 + + def test_consolidate_signals_preserves_order(self): + """Verify signal order is preserved.""" + backend = DummyBackend() + signals = [ + Signal(change=f"Signal {i}", impact="low", users_affected="users", + confidence="low", final_outcome=False) + for i in range(5) + ] + result = backend.consolidate_signals( + signals, + [{"role": "system", "content": "prompt"}], + ) + + for i, signal in enumerate(result): + assert signal.change == f"Signal {i}" + + +class TestClassifySignals: + """Tests for DummyBackend.classify_signals().""" + + def test_classify_signals_returns_classified_signals_object(self): + """Verify classification returns ClassifiedSignals object.""" + backend = DummyBackend() + signals = [ + Signal( + change="Change", + impact="low", + users_affected="users", + confidence="medium", + final_outcome=False, + ) + ] + result = backend.classify_signals( + signals, + [{"role": "system", "content": "placeholder"}], + ) + + # Should have classified list and summary + assert hasattr(result, "classified") + assert hasattr(result, "summary") + assert len(result.classified) == 1 + + def test_classify_signals_all_assigned_to_minor(self): + """Verify all signals are classified as 'minor'.""" + backend = DummyBackend() + signals = [ + Signal( + change=f"Signal {i}", + impact="high" if i % 2 == 0 else "low", + users_affected="users", + confidence="high" if i % 2 == 0 else "low", + final_outcome=i % 2 == 0, + ) + for i in range(5) + ] + result = backend.classify_signals( + signals, + [{"role": "system", "content": "prompt"}], + ) + + # All should be classified as "minor" regardless of impact + for classified in result.classified: + assert classified.category == "minor" + + def test_classify_signals_preserves_signal_data(self): + """Verify signal data is preserved in classification.""" + backend = DummyBackend() + original_signal = Signal( + change="Specific change", + impact="medium", + users_affected="many users", + confidence="high", + final_outcome=True, + ) + result = backend.classify_signals( + [original_signal], + [{"role": "system", "content": "prompt"}], + ) + + classified = result.classified[0] + assert classified.signal == original_signal + + def test_classify_signals_includes_summary(self): + """Verify summary mentions signal count.""" + backend = DummyBackend() + signals = [ + Signal(change="Sig 1", impact="low", users_affected="users", + confidence="low", final_outcome=False), + Signal(change="Sig 2", impact="low", users_affected="users", + confidence="low", final_outcome=False), + Signal(change="Sig 3", impact="low", users_affected="users", + confidence="low", final_outcome=False), + ] + result = backend.classify_signals( + signals, + [{"role": "system", "content": "prompt"}], + ) + + summary = str(result.summary) + assert "3" in summary or "three" in summary.lower() + + def test_classify_signals_empty_list(self): + """Verify empty signal list produces empty classified list.""" + backend = DummyBackend() + result = backend.classify_signals( + [], + [{"role": "system", "content": "prompt"}], + ) + assert len(result.classified) == 0 + + def test_classify_signals_ignores_prompt(self): + """Verify classification_prompt parameter is accepted but unused.""" + backend = DummyBackend() + signals = [ + Signal(change="Change", impact="low", users_affected="users", + confidence="low", final_outcome=False), + ] + + result1 = backend.classify_signals( + signals, + [{"role": "system", "content": "prompt A"}], + ) + result2 = backend.classify_signals( + signals, + [{"role": "system", "content": "prompt B"}], + ) + + # Results should be identical + assert len(result1.classified) == len(result2.classified) + assert result1.classified[0].category == result2.classified[0].category + + +class TestRenderFinalContext: + """Tests for DummyBackend.render_final_context().""" + + def test_render_final_context_returns_distilled_context(self): + """Verify rendering returns DistilledContext object.""" + backend = DummyBackend() + + signal = Signal( + change="Change", + impact="low", + users_affected="users", + confidence="low", + final_outcome=False, + ) + classified = ClassifiedSignals( + classified=[ClassifiedSignal(signal=signal, category="minor")], + summary="Test summary", + ) + metadata = sample_metadata_123() + + result = backend.render_final_context(classified, metadata) + + assert isinstance(result, DistilledContext) + assert hasattr(result, "summary") + assert hasattr(result, "structured_signals") + assert hasattr(result, "classification") + assert hasattr(result, "metadata") + + def test_render_final_context_includes_classified_data(self): + """Verify rendered context includes classified signals.""" + backend = DummyBackend() + + signal = Signal( + change="Critical bug fix", + impact="high", + users_affected="all users", + confidence="high", + final_outcome=True, + ) + classified = ClassifiedSignals( + classified=[ClassifiedSignal(signal=signal, category="major")], + summary="Important fix", + ) + metadata = DistilledContextMetadata( + pr_number=456, + total_chunks=8, + selected_chunks=4, + extraction_phase_duration_ms=200, + consolidation_phase_duration_ms=100, + classification_phase_duration_ms=50, + ) + + result = backend.render_final_context(classified, metadata) + + # Structured signals should contain the signal data + assert len(result.structured_signals) == 1 + assert result.structured_signals[0]["change"] == "Critical bug fix" + assert result.structured_signals[0]["impact"] == "high" + + def test_render_final_context_includes_metadata(self): + """Verify rendered context preserves metadata.""" + backend = DummyBackend() + + metadata = DistilledContextMetadata( + pr_number=789, + total_chunks=20, + selected_chunks=10, + extraction_phase_duration_ms=1000, + consolidation_phase_duration_ms=500, + classification_phase_duration_ms=300, + ) + + result = backend.render_final_context(ClassifiedSignals(), metadata) + + assert result.metadata["pr_number"] == 789 + assert result.metadata["total_chunks"] == 20 + assert result.metadata["selected_chunks"] == 10 + + def test_render_final_context_empty_signals(self): + """Verify rendering works with no classified signals.""" + backend = DummyBackend() + + classified = ClassifiedSignals(classified=[], summary="No changes") + metadata = DistilledContextMetadata( + pr_number=999, + total_chunks=5, + selected_chunks=0, + extraction_phase_duration_ms=50, + consolidation_phase_duration_ms=0, + classification_phase_duration_ms=0, + ) + + result = backend.render_final_context(classified, metadata) + + assert len(result.structured_signals) == 0 + assert result.summary == "No changes" + + +class TestDummyBackendIntegration: + """Integration tests for dummy backend as a complete adapter.""" + + def test_full_pipeline_flow(self): + """Verify dummy backend works through complete pipeline flow.""" + backend = DummyBackend() + + # Stage 1: Select chunks + chunks = [ + Chunk(text=f"Chunk {i}", source=f"source_{i}", chunk_index=i) + for i in range(3) + ] + selected = backend.select_relevant_chunks( + chunks, + use_embeddings=False, + ranking_prompts=[{"role": "user", "content": "dummy ranking prompt"}], + ) + assert len(selected) == 3 + + # Stage 2: Extract signals + all_signals = [] + for chunk in selected: + signals = backend.extract_chunk_signals( + chunk, + [{"role": "system", "content": "extraction prompt"}], + ) + all_signals.extend(signals) + assert len(all_signals) == 3 + + # Stage 3: Consolidate signals + consolidated = backend.consolidate_signals( + all_signals, + [{"role": "system", "content": "consolidation prompt"}], + ) + assert len(consolidated) == 3 + + # Stage 4: Classify signals + classified_objs = [ + ClassifiedSignal(signal=sig, category="minor") + for sig in consolidated + ] + classified = ClassifiedSignals(classified=classified_objs, summary="Test") + + # Stage 5: Render context + metadata = DistilledContextMetadata( + pr_number=111, + total_chunks=3, + selected_chunks=3, + extraction_phase_duration_ms=100, + consolidation_phase_duration_ms=50, + classification_phase_duration_ms=30, + ) + final_context = backend.render_final_context(classified, metadata) + + assert isinstance(final_context, DistilledContext) + assert len(final_context.structured_signals) == 3 diff --git a/tools/release_announcement/tests/test_distillation_github_adapter.py b/tools/release_announcement/tests/test_distillation_github_adapter.py new file mode 100644 index 0000000000..d7e59a08f7 --- /dev/null +++ b/tools/release_announcement/tests/test_distillation_github_adapter.py @@ -0,0 +1,254 @@ +"""Unit tests for the GitHub distillation adapter.""" + +import json +from unittest.mock import patch + +import pytest + +from src.release_announcement.backends.distillation_adapters.github_adapter import ( + GitHubDistillationAdapter, +) +from src.release_announcement.distillation import Chunk, Signal + +GH_CHAT_PATH = ( + "src.release_announcement.backends.distillation_adapters.github_adapter." + "GitHubDistillationAdapter._github_chat_completion" +) +GH_POST_PATH = ( + "src.release_announcement.backends.distillation_adapters.github_adapter." + "GitHubDistillationAdapter._github_post_json" +) + + +@pytest.fixture +def github_adapter() -> GitHubDistillationAdapter: + return GitHubDistillationAdapter(token_resolver=lambda: "test-token") + + +@pytest.fixture +def sample_chunks() -> list[Chunk]: + return [Chunk(text=f"Chunk {i}", source=f"comment_{i}", chunk_index=i) for i in range(3)] + + +@pytest.fixture +def sample_signal() -> Signal: + return Signal( + change="Test change", + impact="low", + users_affected="users", + confidence="high", + final_outcome=True, + ) + + +def test_github_adapter_protocol_surface_present( + github_adapter: GitHubDistillationAdapter, +) -> None: + assert hasattr(github_adapter, "select_relevant_chunks") + assert hasattr(github_adapter, "extract_chunk_signals") + assert hasattr(github_adapter, "consolidate_signals") + assert hasattr(github_adapter, "classify_signals") + assert hasattr(github_adapter, "render_final_context") + + +def test_github_select_relevant_chunks_chat_single_provider_call( + github_adapter: GitHubDistillationAdapter, + sample_chunks: list[Chunk], +) -> None: + ranking_prompts = [ + {"role": "system", "content": "Rank chunks"}, + {"role": "user", "content": "Rank {chunk_count} chunks: {chunks}"}, + ] + + with patch(GH_CHAT_PATH) as mock_chat: + mock_chat.return_value = json.dumps({"0": 0.9, "1": 0.8, "2": 0.7}) + + out = github_adapter.select_relevant_chunks( + sample_chunks, + use_embeddings=False, + ranking_prompts=ranking_prompts, + ) + + assert mock_chat.call_count == 1 + payload = mock_chat.call_args.args[0] + assert payload["model"] == github_adapter.chat_model + assert len(payload["messages"]) == 2 + assert "Chunk 0" in payload["messages"][1]["content"] + assert len(out) == 3 + + +def test_github_select_relevant_chunks_embeddings_single_provider_call( + github_adapter: GitHubDistillationAdapter, + sample_chunks: list[Chunk], +) -> None: + with patch(GH_POST_PATH) as mock_post: + mock_post.return_value = { + "data": [ + {"index": 0, "embedding": [0.1, 0.2]}, + {"index": 1, "embedding": [0.3, 0.4]}, + {"index": 2, "embedding": [0.5, 0.6]}, + ] + } + + out = github_adapter.select_relevant_chunks( + sample_chunks, + use_embeddings=True, + ranking_prompts=[{"role": "user", "content": "unused"}], + ) + + assert mock_post.call_count == 1 + assert mock_post.call_args.args[0] == github_adapter.embedding_endpoint + payload = mock_post.call_args.args[1] + assert payload["model"] == github_adapter.embedding_model + assert payload["input"] == ["Chunk 0", "Chunk 1", "Chunk 2"] + assert len(out) == 3 + + +def test_github_extract_chunk_signals_parses_typed_signals( + github_adapter: GitHubDistillationAdapter, +) -> None: + chunk = Chunk(text="Body", source="body", chunk_index=0) + with patch(GH_CHAT_PATH) as mock_chat: + mock_chat.return_value = json.dumps( + [ + { + "change": "Extracted", + "impact": "high", + "users_affected": "all", + "confidence": "high", + "final_outcome": True, + } + ] + ) + + signals = github_adapter.extract_chunk_signals( + chunk, + [{"role": "system", "content": "extract"}], + ) + + assert mock_chat.call_count == 1 + assert len(signals) == 1 + assert signals[0].change == "Extracted" + + +def test_github_consolidate_signals_single_provider_call( + github_adapter: GitHubDistillationAdapter, + sample_signal: Signal, +) -> None: + with patch(GH_CHAT_PATH) as mock_chat: + mock_chat.return_value = json.dumps( + [ + { + "change": "Consolidated", + "impact": "low", + "users_affected": "users", + "confidence": "high", + "final_outcome": True, + } + ] + ) + + _ = github_adapter.consolidate_signals( + [sample_signal], + [{"role": "system", "content": "consolidate"}], + ) + + assert mock_chat.call_count == 1 + payload = mock_chat.call_args.args[0] + assert payload["messages"][-1]["role"] == "user" + + +def test_github_classify_signals_single_provider_call( + github_adapter: GitHubDistillationAdapter, + sample_signal: Signal, +) -> None: + with patch(GH_CHAT_PATH) as mock_chat: + mock_chat.return_value = json.dumps( + { + "classified": [ + { + "signal": { + "change": sample_signal.change, + "impact": sample_signal.impact, + "users_affected": sample_signal.users_affected, + "confidence": sample_signal.confidence, + "final_outcome": sample_signal.final_outcome, + }, + "category": "minor", + } + ], + "summary": "ok", + } + ) + + _ = github_adapter.classify_signals( + [sample_signal], + [{"role": "system", "content": "classify"}], + ) + + assert mock_chat.call_count == 1 + + +def test_github_malformed_response_raises_diagnostic_runtime_error( + github_adapter: GitHubDistillationAdapter, +) -> None: + chunk = Chunk(text="Body", source="body", chunk_index=1) + + with patch(GH_CHAT_PATH) as mock_chat: + mock_chat.return_value = "not-json" + + with pytest.raises(RuntimeError) as exc_info: + github_adapter.extract_chunk_signals( + chunk, + [{"role": "system", "content": "extract"}], + ) + + msg = str(exc_info.value) + assert "[GitHub extraction]" in msg + assert "request_payload=" in msg + + +def test_github_invalid_model_surfaces_fatal_diagnostics( + github_adapter: GitHubDistillationAdapter, + sample_chunks: list[Chunk], +) -> None: + with patch(GH_CHAT_PATH) as mock_chat: + mock_chat.side_effect = RuntimeError("status=404 response=model not found") + + with pytest.raises(RuntimeError) as exc_info: + github_adapter.select_relevant_chunks( + sample_chunks, + use_embeddings=False, + ranking_prompts=[ + {"role": "user", "content": "Rank {chunk_count} {chunks}"} + ], + ) + + msg = str(exc_info.value) + assert "[GitHub ranking]" in msg + assert "request_payload=" in msg + assert "status=404" in msg + + +def test_github_mid_pipeline_failure_logs_and_reraises( + github_adapter: GitHubDistillationAdapter, + capsys: pytest.CaptureFixture[str], +) -> None: + chunk = Chunk(text="Body", source="body", chunk_index=5) + with patch(GH_CHAT_PATH) as mock_chat: + mock_chat.side_effect = RuntimeError("http=500 body=boom") + + with pytest.raises(RuntimeError) as exc_info: + github_adapter.extract_chunk_signals( + chunk, + [{"role": "system", "content": "extract"}], + ) + + msg = str(exc_info.value) + assert "[GitHub extraction]" in msg + assert "chunk_index=5" in msg + assert "request_payload=" in msg + assert "http=500 body=boom" in msg + + captured = capsys.readouterr() + assert "[GitHub extraction]" in captured.err diff --git a/tools/release_announcement/tests/test_distillation_ollama_adapter.py b/tools/release_announcement/tests/test_distillation_ollama_adapter.py new file mode 100644 index 0000000000..4ccd6268d7 --- /dev/null +++ b/tools/release_announcement/tests/test_distillation_ollama_adapter.py @@ -0,0 +1,459 @@ +"""Unit tests for the Ollama adapter (Step 6). + +Tests verify that the Ollama adapter correctly implements the DistillationAdapter +protocol and handles errors as specified. +""" + +import json +from unittest.mock import patch + +import pytest + +from tests.assertions import assert_distillation_adapter_surface +from tests.test_data import sample_metadata_123 + +from src.release_announcement.distillation import ( + Chunk, + Signal, + ClassifiedSignal, + ClassifiedSignals, + DistilledContext, +) +from src.release_announcement.backends.distillation_adapters.ollama_adapter import ( + OllamaDistillationAdapter, + _create_ollama_adapter, +) + +OLLAMA_CALL_PATH = ( + "src.release_announcement.backends.distillation_adapters." + "ollama_adapter.call_ollama_model" +) +OLLAMA_EMBED_PATH = ( + "src.release_announcement.backends.distillation_adapters." + "ollama_adapter.ollama.embed" +) + + +class TestOllamaAdapterInitialization: + """Tests for Ollama adapter initialization.""" + + def test_ollama_adapter_instantiation(self): + """Verify OllamaDistillationAdapter can be instantiated.""" + adapter = OllamaDistillationAdapter() + assert adapter is not None + assert_distillation_adapter_surface(adapter) + + def test_ollama_adapter_model_names_from_env(self, monkeypatch): + """Verify model names are read from environment variables.""" + monkeypatch.setenv("OLLAMA_MODEL", "test-model") + monkeypatch.setenv("OLLAMA_EMBEDDING_MODEL", "test-embedding-model") + + adapter = OllamaDistillationAdapter() + assert adapter.chat_model == "test-model" + assert adapter.embedding_model == "test-embedding-model" + + def test_ollama_adapter_default_model_names(self, monkeypatch): + """Verify default model names are used when env vars are not set.""" + monkeypatch.delenv("OLLAMA_MODEL", raising=False) + monkeypatch.delenv("OLLAMA_EMBEDDING_MODEL", raising=False) + + adapter = OllamaDistillationAdapter() + assert adapter.chat_model == "mistral-large-3:675b-cloud" + assert adapter.embedding_model is None + + def test_ollama_adapter_factory(self): + """Verify the factory function creates an Ollama adapter.""" + adapter = _create_ollama_adapter() + assert isinstance(adapter, OllamaDistillationAdapter) + + +class TestOllamaSelectRelevantChunks: + """Tests for OllamaAdapter.select_relevant_chunks().""" + + @pytest.fixture + def sample_chunks(self): + """Create sample chunks for testing.""" + return [ + Chunk(text=f"Chunk {i}", source=f"comment_{i}", chunk_index=i) + for i in range(3) + ] + + def test_select_relevant_chunks_with_embeddings(self, sample_chunks, monkeypatch): + """Verify select_relevant_chunks uses embeddings when available.""" + monkeypatch.setenv("OLLAMA_EMBEDDING_MODEL", "test-embedding-model") + + adapter = OllamaDistillationAdapter() + + # Mock the ollama.embed call + with patch(OLLAMA_EMBED_PATH) as mock_embed: + mock_embed.return_value = { + "embeddings": [[0.1, 0.2], [0.3, 0.4], [0.5, 0.6]] + } + + result = adapter.select_relevant_chunks( + sample_chunks, + use_embeddings=True, + ranking_prompts=[{"role": "user", "content": "dummy"}], + ) + + # Verify the embeddings API was called + mock_embed.assert_called_once_with( + model="test-embedding-model", + input=["Chunk 0", "Chunk 1", "Chunk 2"], + ) + + # Verify all chunks are returned with relevance scores + assert len(result) == 3 + for chunk in result: + assert chunk.relevance_score > 0 + + def test_select_relevant_chunks_with_chat(self, sample_chunks): + """Verify select_relevant_chunks uses chat when embeddings are not available.""" + adapter = OllamaDistillationAdapter() # embedding_model is None + + # Create ranking prompts + prompts = [{"role": "user", "content": "Rank {chunk_count} chunks: {chunks}"}] + + # Mock the call_ollama_model function + with patch(OLLAMA_CALL_PATH) as mock_call: + mock_call.return_value = json.dumps({"0": 0.9, "1": 0.8, "2": 0.7}) + + result = adapter.select_relevant_chunks( + sample_chunks, + use_embeddings=False, + ranking_prompts=prompts, + ) + + # Verify the chat API was called + mock_call.assert_called_once() + + # Verify all chunks are returned with relevance scores + assert len(result) == 3 + assert result[0].relevance_score == 0.9 + assert result[1].relevance_score == 0.8 + assert result[2].relevance_score == 0.7 + + def test_select_relevant_chunks_error_handling(self, sample_chunks): + """Verify select_relevant_chunks handles errors correctly.""" + adapter = OllamaDistillationAdapter() + + # Create ranking prompts + prompts = [{"role": "user", "content": "Rank {chunk_count} chunks: {chunks}"}] + + # Mock the call_ollama_model function to raise an exception + with patch(OLLAMA_CALL_PATH) as mock_call: + mock_call.side_effect = RuntimeError("Test error") + + with pytest.raises(RuntimeError) as exc_info: + adapter.select_relevant_chunks( + sample_chunks, + use_embeddings=False, + ranking_prompts=prompts, + ) + + # Verify the error message contains the expected diagnostics + error_msg = str(exc_info.value) + assert "[Ollama ranking]" in error_msg + assert "phase=select_relevant_chunks" in error_msg + assert "error=Test error" in error_msg + + +class TestOllamaExtractChunkSignals: + """Tests for OllamaAdapter.extract_chunk_signals().""" + + @pytest.fixture + def sample_chunk(self): + """Create a sample chunk for testing.""" + return Chunk(text="Test chunk", source="comment_1", chunk_index=0) + + def test_extract_chunk_signals_success(self, sample_chunk): + """Verify extract_chunk_signals returns a list of signals.""" + adapter = OllamaDistillationAdapter() + + # Mock the call_ollama_model function + with patch(OLLAMA_CALL_PATH) as mock_call: + mock_call.return_value = json.dumps([ + { + "change": "Test change", + "impact": "high", + "users_affected": "all users", + "confidence": "high", + "final_outcome": True, + } + ]) + + result = adapter.extract_chunk_signals( + sample_chunk, + [{"role": "system", "content": "test prompt"}], + ) + + # Verify the chat API was called + mock_call.assert_called_once() + + # Verify a signal is returned + assert len(result) == 1 + assert isinstance(result[0], Signal) + assert result[0].change == "Test change" + + def test_extract_chunk_signals_fallback(self, sample_chunk): + """Verify extract_chunk_signals falls back when parsing fails.""" + adapter = OllamaDistillationAdapter() + + # Mock the call_ollama_model function to return invalid JSON + with patch(OLLAMA_CALL_PATH) as mock_call: + mock_call.return_value = "invalid json" + + result = adapter.extract_chunk_signals( + sample_chunk, + [{"role": "system", "content": "test prompt"}], + ) + + # Verify a fallback signal is returned + assert len(result) == 1 + assert isinstance(result[0], Signal) + + def test_extract_chunk_signals_error_handling(self, sample_chunk): + """Verify extract_chunk_signals handles errors correctly.""" + adapter = OllamaDistillationAdapter() + + # Mock the call_ollama_model function to raise an exception + with patch(OLLAMA_CALL_PATH) as mock_call: + mock_call.side_effect = RuntimeError("Test error") + + with pytest.raises(RuntimeError) as exc_info: + adapter.extract_chunk_signals( + sample_chunk, + [{"role": "system", "content": "test prompt"}], + ) + + # Verify the error message contains the expected diagnostics + error_msg = str(exc_info.value) + assert "[Ollama extraction]" in error_msg + assert "phase=extract_chunk_signals" in error_msg + assert "error=Test error" in error_msg + + +class TestOllamaConsolidateSignals: + """Tests for OllamaAdapter.consolidate_signals().""" + + @pytest.fixture + def sample_signals(self): + """Create sample signals for testing.""" + return [ + Signal( + change=f"Change {i}", + impact="high", + users_affected="all users", + confidence="high", + final_outcome=True, + ) + for i in range(3) + ] + + def test_consolidate_signals_success(self, sample_signals): + """Verify consolidate_signals returns a list of signals.""" + adapter = OllamaDistillationAdapter() + + # Mock the call_ollama_model function + with patch(OLLAMA_CALL_PATH) as mock_call: + mock_call.return_value = json.dumps([ + { + "change": "Consolidated change", + "impact": "high", + "users_affected": "all users", + "confidence": "high", + "final_outcome": True, + } + ]) + + result = adapter.consolidate_signals( + sample_signals, + [{"role": "system", "content": "test prompt"}], + ) + + # Verify the chat API was called + mock_call.assert_called_once() + + # Verify signals are returned + assert len(result) == 1 + assert isinstance(result[0], Signal) + + def test_consolidate_signals_fallback(self, sample_signals): + """Verify consolidate_signals falls back when parsing fails.""" + adapter = OllamaDistillationAdapter() + + # Mock the call_ollama_model function to return invalid JSON + with patch(OLLAMA_CALL_PATH) as mock_call: + mock_call.return_value = "invalid json" + + result = adapter.consolidate_signals( + sample_signals, + [{"role": "system", "content": "test prompt"}], + ) + + # Verify the input signals are returned unchanged + assert len(result) == 3 + assert result == sample_signals + + def test_consolidate_signals_error_handling(self, sample_signals): + """Verify consolidate_signals handles errors correctly.""" + adapter = OllamaDistillationAdapter() + + # Mock the call_ollama_model function to raise an exception + with patch(OLLAMA_CALL_PATH) as mock_call: + mock_call.side_effect = RuntimeError("Test error") + + with pytest.raises(RuntimeError) as exc_info: + adapter.consolidate_signals( + sample_signals, + [{"role": "system", "content": "test prompt"}], + ) + + # Verify the error message contains the expected diagnostics + error_msg = str(exc_info.value) + assert "[Ollama consolidation]" in error_msg + assert "phase=consolidate_signals" in error_msg + assert "error=Test error" in error_msg + + +class TestOllamaClassifySignals: + """Tests for OllamaAdapter.classify_signals().""" + + @pytest.fixture + def sample_signals(self): + """Create sample signals for testing.""" + return [ + Signal( + change=f"Change {i}", + impact="high", + users_affected="all users", + confidence="high", + final_outcome=True, + ) + for i in range(3) + ] + + def test_classify_signals_success(self, sample_signals): + """Verify classify_signals returns a ClassifiedSignals object.""" + adapter = OllamaDistillationAdapter() + + # Mock the call_ollama_model function + with patch(OLLAMA_CALL_PATH) as mock_call: + mock_call.return_value = json.dumps({ + "classified": [ + {"category": "major"}, + {"category": "minor"}, + {"category": "internal"}, + ], + "summary": "Test summary", + }) + + result = adapter.classify_signals( + sample_signals, + [{"role": "system", "content": "test prompt"}], + ) + + # Verify the chat API was called + mock_call.assert_called_once() + + # Verify a ClassifiedSignals object is returned + assert isinstance(result, ClassifiedSignals) + assert len(result.classified) == 3 + assert result.summary == "Test summary" + + def test_classify_signals_fallback(self, sample_signals): + """Verify classify_signals falls back when parsing fails.""" + adapter = OllamaDistillationAdapter() + + # Mock the call_ollama_model function to return invalid JSON + with patch(OLLAMA_CALL_PATH) as mock_call: + mock_call.return_value = "invalid json" + + result = adapter.classify_signals( + sample_signals, + [{"role": "system", "content": "test prompt"}], + ) + + # Verify all signals are classified as "minor" + assert len(result.classified) == 3 + for classified in result.classified: + assert classified.category == "minor" + + def test_classify_signals_error_handling(self, sample_signals): + """Verify classify_signals handles errors correctly.""" + adapter = OllamaDistillationAdapter() + + # Mock the call_ollama_model function to raise an exception + with patch(OLLAMA_CALL_PATH) as mock_call: + mock_call.side_effect = RuntimeError("Test error") + + with pytest.raises(RuntimeError) as exc_info: + adapter.classify_signals( + sample_signals, + [{"role": "system", "content": "test prompt"}], + ) + + # Verify the error message contains the expected diagnostics + error_msg = str(exc_info.value) + assert "[Ollama classification]" in error_msg + assert "phase=classify_signals" in error_msg + assert "error=Test error" in error_msg + + +class TestOllamaRenderFinalContext: + """Tests for OllamaAdapter.render_final_context().""" + + @pytest.fixture + def sample_classified(self): + """Create a sample ClassifiedSignals object for testing.""" + signals = [ + Signal( + change=f"Change {i}", + impact="high", + users_affected="all users", + confidence="high", + final_outcome=True, + ) + for i in range(3) + ] + classified_signals = [ + ClassifiedSignal(signal=signal, category="major") + for signal in signals + ] + return ClassifiedSignals( + classified=classified_signals, + summary="Test summary", + ) + + @pytest.fixture + def sample_metadata(self): + """Create a sample DistilledContextMetadata object for testing.""" + return sample_metadata_123() + + def test_render_final_context_success(self, sample_classified, sample_metadata): + """Verify render_final_context returns a DistilledContext object.""" + adapter = OllamaDistillationAdapter() + + result = adapter.render_final_context(sample_classified, sample_metadata) + + # Verify a DistilledContext object is returned + assert isinstance(result, DistilledContext) + assert result.summary == "Test summary" + assert len(result.structured_signals) == 3 + assert result.metadata["pr_number"] == 123 + + def test_render_final_context_empty_signals(self, sample_metadata): + """Verify render_final_context handles empty signals.""" + adapter = OllamaDistillationAdapter() + + empty_classified = ClassifiedSignals( + classified=[], + summary="Empty summary", + ) + + result = adapter.render_final_context(empty_classified, sample_metadata) + + # Verify a DistilledContext object is returned + assert isinstance(result, DistilledContext) + assert result.summary == "Empty summary" + assert len(result.structured_signals) == 0 diff --git a/tools/release_announcement/tests/test_distillation_orchestration.py b/tools/release_announcement/tests/test_distillation_orchestration.py new file mode 100644 index 0000000000..ea460a699f --- /dev/null +++ b/tools/release_announcement/tests/test_distillation_orchestration.py @@ -0,0 +1,597 @@ +"""Unit tests for staged-distillation pipeline orchestration (Substep 4b). + +Tests the helper functions: _ordered_chunk, _select_relevant_chunks_with_fallback, +and _consolidate_signals_hierarchical without requiring a full adapter integration. +""" + +from unittest.mock import MagicMock, call + +import pytest + +from src.release_announcement.distillation import ( + _ordered_chunk, + _select_relevant_chunks_with_fallback, + _consolidate_signals_hierarchical, + run_distillation_pipeline, + Chunk, + Signal, + ClassifiedSignal, + ClassifiedSignals, + DistillationPrompts, + DistillationAdapter, +) + + +def _mk_signal( + change: str, + impact: str, + users_affected: str, + confidence: str, + final_outcome: bool, +) -> Signal: + return Signal( + change=change, + impact=impact, + users_affected=users_affected, + confidence=confidence, + final_outcome=final_outcome, + ) + + +# ============================================================================ +# Tests for _ordered_chunk +# ============================================================================ + + +def test_ordered_chunk_with_pr_body_and_comments(): + """Test chunking converts PR body and comments to ordered chunks.""" + pr_data = { + "number": 100, + "title": "Fix bug in audio handling", + "body": "This PR fixes issue #42.", + "comments": [ + {"body": "First comment", "text": None}, + {"body": "Second comment", "text": None}, + ], + "reviews": [], + } + + chunks = _ordered_chunk(pr_data) + + assert len(chunks) == 3 + assert chunks[0].chunk_index == 0 + assert chunks[1].chunk_index == 1 + assert chunks[2].chunk_index == 2 + + # First chunk should be title + body + assert "Fix bug in audio handling" in chunks[0].text + assert "This PR fixes issue #42." in chunks[0].text + assert chunks[0].source == "pr_100_body" + + # Remaining chunks should be comments + assert chunks[1].text == "First comment" + assert chunks[1].source == "pr_100_comment_0" + assert chunks[2].text == "Second comment" + assert chunks[2].source == "pr_100_comment_1" + + +def test_ordered_chunk_preserves_order(): + """Test that chunks preserve PR discussion order.""" + pr_data = { + "number": 42, + "title": "Title", + "body": "Body", + "comments": [ + {"body": f"Comment {i}", "text": None} for i in range(10) + ], + "reviews": [], + } + + chunks = _ordered_chunk(pr_data) + + # Verify chunk_index is sequential + for i, chunk in enumerate(chunks): + assert chunk.chunk_index == i + + +def test_ordered_chunk_skips_empty_comments(): + """Test that empty/None comments are skipped.""" + pr_data = { + "number": 50, + "title": "Title", + "body": "Body", + "comments": [ + {"body": "Real comment"}, + {"body": None, "text": None}, + {"body": ""}, + {"body": "Another real comment"}, + ], + "reviews": [], + } + + chunks = _ordered_chunk(pr_data) + + # Should have: body + 2 real comments + assert len(chunks) == 3 + assert chunks[1].text == "Real comment" + assert chunks[2].text == "Another real comment" + + +def test_ordered_chunk_missing_title(): + """Test chunking when PR has no title.""" + pr_data = { + "number": 77, + "title": "", + "body": "Just a body", + "comments": [{"body": "A comment"}], + "reviews": [], + } + + chunks = _ordered_chunk(pr_data) + + assert len(chunks) == 2 + assert "Just a body" in chunks[0].text + + +# ============================================================================ +# Tests for _select_relevant_chunks_with_fallback +# ============================================================================ + + +def test_select_relevant_chunks_success_with_embeddings(): + """Test successful selection when adapter returns ranked chunks.""" + chunks = [ + Chunk(text="Chunk 0", chunk_index=0), + Chunk(text="Chunk 1", chunk_index=1), + Chunk(text="Chunk 2", chunk_index=2), + ] + + adapter = MagicMock(spec=DistillationAdapter) + # Adapter returns chunks reordered by relevance but preserving indices + adapter.select_relevant_chunks.return_value = [ + Chunk(text="Chunk 2", chunk_index=2, relevance_score=0.9), + Chunk(text="Chunk 0", chunk_index=0, relevance_score=0.7), + Chunk(text="Chunk 1", chunk_index=1, relevance_score=0.5), + ] + + result = _select_relevant_chunks_with_fallback( + chunks, + adapter, + use_embeddings=True, + ranking_prompts=[{"role": "user", "content": "dummy"}], + ) + + assert len(result) == 3 + # Adapter was called with chunks, use_embeddings flag, and ranking_prompt + adapter.select_relevant_chunks.assert_called_once_with( + chunks, + True, + [{"role": "user", "content": "dummy"}], + ) + + +def test_select_relevant_chunks_fallback_on_adapter_failure(): + """Test positional fallback when adapter raises an exception.""" + chunks = [ + Chunk(text="Chunk 0", chunk_index=0), + Chunk(text="Chunk 1", chunk_index=1), + Chunk(text="Chunk 2", chunk_index=2), + Chunk(text="Chunk 3", chunk_index=3), + Chunk(text="Chunk 4", chunk_index=4), + ] + + adapter = MagicMock(spec=DistillationAdapter) + adapter.select_relevant_chunks.side_effect = RuntimeError("Ranking failed") + + result = _select_relevant_chunks_with_fallback( + chunks, + adapter, + use_embeddings=False, + ranking_prompts=[{"role": "user", "content": "dummy"}], + last_n_fallback_chunks=2, + ) + + # Fallback should keep: first chunk (0) + last N (3, 4) + assert len(result) == 3 + assert result[0].chunk_index == 0 + assert result[1].chunk_index == 3 + assert result[2].chunk_index == 4 + + +def test_select_relevant_chunks_fallback_with_keywords(): + """Test positional fallback includes chunks with maintainer keywords.""" + chunks = [ + Chunk(text="Chunk 0", chunk_index=0), + Chunk(text="Discussed but no decision", chunk_index=1), + Chunk(text="We agreed on this approach", chunk_index=2), + Chunk(text="Some implementation detail", chunk_index=3), + Chunk(text="This was merged and deployed", chunk_index=4), + ] + + adapter = MagicMock(spec=DistillationAdapter) + adapter.select_relevant_chunks.side_effect = RuntimeError("Ranking failed") + + result = _select_relevant_chunks_with_fallback( + chunks, + adapter, + use_embeddings=False, + ranking_prompts=[{"role": "user", "content": "dummy"}], + last_n_fallback_chunks=1, + maintainer_keywords=["agreed", "merged"], + ) + + # Should keep: 0 (first) + 2 (agreed) + 4 (merged) + 4 (last N=1) + indices = {c.chunk_index for c in result} + assert 0 in indices # First chunk + assert 2 in indices # Contains "agreed" + assert 4 in indices # Contains "merged" and is last + # Result should be in discussion order + result_indices = [c.chunk_index for c in result] + assert result_indices == sorted(result_indices) + + +def test_select_relevant_chunks_preserves_discussion_order(): + """Test that fallback preserves original discussion order.""" + chunks = [ + Chunk(text="A", chunk_index=0), + Chunk(text="B", chunk_index=1), + Chunk(text="C", chunk_index=2), + Chunk(text="D", chunk_index=3), + ] + + adapter = MagicMock(spec=DistillationAdapter) + adapter.select_relevant_chunks.side_effect = RuntimeError("Fail") + + result = _select_relevant_chunks_with_fallback( + chunks, + adapter, + use_embeddings=False, + ranking_prompts=[{"role": "user", "content": "dummy"}], + last_n_fallback_chunks=1, + ) + + # Verify order is preserved + result_indices = [c.chunk_index for c in result] + assert result_indices == sorted(result_indices) + + +def test_select_relevant_chunks_batches_large_chat_ranking_sets(): + """Test that chat-based ranking batches large chunk sets.""" + chunks = [ + Chunk(text=f"Chunk {index}", chunk_index=index) + for index in range(5) + ] + + adapter = MagicMock(spec=DistillationAdapter) + adapter.select_relevant_chunks.side_effect = [ + [ + Chunk(text="Chunk 1", chunk_index=1, relevance_score=0.8), + Chunk(text="Chunk 0", chunk_index=0, relevance_score=0.9), + ], + [ + Chunk(text="Chunk 3", chunk_index=3, relevance_score=0.7), + Chunk(text="Chunk 2", chunk_index=2, relevance_score=0.85), + ], + [ + Chunk(text="Chunk 4", chunk_index=4, relevance_score=0.95), + ], + ] + + result = _select_relevant_chunks_with_fallback( + chunks, + adapter, + use_embeddings=False, + ranking_prompts=[{"role": "user", "content": "dummy"}], + max_ranking_chunks=2, + ) + + assert adapter.select_relevant_chunks.call_count == 3 + assert adapter.select_relevant_chunks.call_args_list == [ + call(chunks[0:2], False, [{"role": "user", "content": "dummy"}]), + call(chunks[2:4], False, [{"role": "user", "content": "dummy"}]), + call(chunks[4:5], False, [{"role": "user", "content": "dummy"}]), + ] + assert [chunk.chunk_index for chunk in result] == [0, 1, 2, 3, 4] + + +def test_select_relevant_chunks_does_not_batch_embeddings(): + """Test that embeddings-based ranking still uses a single adapter call.""" + chunks = [ + Chunk(text=f"Chunk {index}", chunk_index=index) + for index in range(4) + ] + + adapter = MagicMock(spec=DistillationAdapter) + adapter.select_relevant_chunks.return_value = chunks + + result = _select_relevant_chunks_with_fallback( + chunks, + adapter, + use_embeddings=True, + ranking_prompts=[{"role": "user", "content": "dummy"}], + max_ranking_chunks=2, + ) + + adapter.select_relevant_chunks.assert_called_once_with( + chunks, + True, + [{"role": "user", "content": "dummy"}], + ) + assert result == chunks + + +# ============================================================================ +# Tests for _consolidate_signals_hierarchical +# ============================================================================ + + +def test_consolidate_signals_direct_small_batch(): + """Test that small signal sets (<=threshold) use direct consolidation.""" + signals = [ + Signal( + change="Fix A", + impact="high", + users_affected="all", + confidence="high", + final_outcome=True, + ), + _mk_signal("Fix B", "low", "some", "medium", False), + ] + + adapter = MagicMock(spec=DistillationAdapter) + adapter.consolidate_signals.return_value = signals + + result = _consolidate_signals_hierarchical( + signals, + adapter, + [{"role": "system", "content": "prompt"}], + max_direct_consolidation_chunks=20, + ) + + # Should be called exactly once (direct consolidation) + adapter.consolidate_signals.assert_called_once() + assert result == signals + + +def test_consolidate_signals_batched_large_set(): + """Test that large signal sets (>threshold) use batch consolidation.""" + # Create 25 signals (exceeds default threshold of 20) + signals = [ + Signal( + change=f"Change {i}", + impact="low", + users_affected="none", + confidence="low", + final_outcome=False, + ) + for i in range(25) + ] + + adapter = MagicMock(spec=DistillationAdapter) + # First call: consolidate first batch of 20 + batch1_result = signals[:15] # Consolidation reduces 20 to 15 + # Second call: consolidate second batch of 5 + batch2_result = signals[20:23] # Consolidation reduces 5 to 3 + # Combined: 15 + 3 = 18 signals (below threshold, no recursion) + + adapter.consolidate_signals.side_effect = [ + batch1_result, # First batch (20 → 15) + batch2_result, # Second batch (5 → 3) + ] + + result = _consolidate_signals_hierarchical( + signals, + adapter, + [{"role": "system", "content": "prompt"}], + max_direct_consolidation_chunks=20, + ) + + # Should be called 2 times: 2 batches (combined result is 18 < 20, no recursion) + assert adapter.consolidate_signals.call_count == 2 + # Result should be combined batch results + assert len(result) == 18 + + +def test_consolidate_signals_recursive_batching(): + """Test that recursive batching occurs when batch results exceed threshold.""" + # Create 60 signals + signals = [ + Signal( + change=f"Change {i}", + impact="low", + users_affected="none", + confidence="low", + final_outcome=False, + ) + for i in range(60) + ] + + adapter = MagicMock(spec=DistillationAdapter) + + # Simulate a scenario where batch consolidation results still exceed threshold: + # 60 signals → 3 batches of 20 → each reduces by half → 30 total → exceeds threshold → recurse + # 30 signals → 2 batches of 15 (only 2 batches needed) → each reduces to 8 → 16 total → fits + batch_1_result = signals[0:10] # 20 → 10 + batch_2_result = signals[20:30] # 20 → 10 + batch_3_result = signals[40:50] # 20 → 10 + # Batch results combined: 30 signals (exceeds 20) → triggers recursion + recursive_batch_1 = signals[0:8] # 15 → 8 + recursive_batch_2 = signals[25:33] # 15 → 8 + + adapter.consolidate_signals.side_effect = [ + batch_1_result, + batch_2_result, + batch_3_result, + recursive_batch_1, + recursive_batch_2, + ] + + result = _consolidate_signals_hierarchical( + signals, + adapter, + [{"role": "system", "content": "prompt"}], + max_direct_consolidation_chunks=20, + ) + + # Should be called at least 4 times (3 initial batches + 2 recursive batches) + # The exact count depends on internal recursion depth + assert adapter.consolidate_signals.call_count >= 4 + # Verify it returns a valid result + assert isinstance(result, list) + + +def test_consolidate_signals_empty_list(): + """Test that empty signal list returns immediately.""" + adapter = MagicMock(spec=DistillationAdapter) + + result = _consolidate_signals_hierarchical( + [], + adapter, + [{"role": "system", "content": "prompt"}], + ) + + assert result == [] + adapter.consolidate_signals.assert_not_called() + + +def test_consolidate_signals_error_propagation(): + """Test that consolidation errors propagate with context.""" + signals = [ + _mk_signal("Fix", "low", "none", "low", False) + for _ in range(25) + ] + + adapter = MagicMock(spec=DistillationAdapter) + adapter.consolidate_signals.side_effect = ValueError("Invalid signal format") + + with pytest.raises(ValueError, match="Invalid signal format"): + _consolidate_signals_hierarchical( + signals, + adapter, + [{"role": "system", "content": "prompt"}], + max_direct_consolidation_chunks=20, + ) + + +# ============================================================================ +# Integration Tests +# ============================================================================ + + +def test_ordered_chunk_to_fallback_selection_integration(): + """Integration test: chunking → fallback selection on failure.""" + pr_data = { + "number": 200, + "title": "Feature X", + "body": "Implement feature X", + "comments": [ + {"body": "Review comment 1"}, + {"body": "We agreed to proceed"}, + {"body": "Implementation detail"}, + {"body": "This was merged successfully"}, + ], + "reviews": [], + } + + chunks = _ordered_chunk(pr_data) + assert len(chunks) == 5 # body + 4 comments + + adapter = MagicMock(spec=DistillationAdapter) + adapter.select_relevant_chunks.side_effect = RuntimeError("Adapter down") + + selected = _select_relevant_chunks_with_fallback( + chunks, + adapter, + use_embeddings=False, + ranking_prompts=[{"role": "user", "content": "dummy"}], + max_ranking_chunks=30, + last_n_fallback_chunks=2, + maintainer_keywords=["agreed", "merged"], + ) + + # Should include: body (0) + "agreed" (2) + "merged" (4) + last 2 (3, 4) + indices = {c.chunk_index for c in selected} + assert 0 in indices + assert 2 in indices # agreed + assert 4 in indices # merged + + +def test_run_distillation_pipeline_routes_phase_prompts_to_matching_adapter_calls(): + """Verify each phase receives the correct prompt bundle in pipeline order.""" + pr_data = { + "number": 314, + "title": "Prompt routing", + "body": "Body content", + "comments": [{"body": "Timeline comment"}], + "reviews": [], + } + + ranking_prompts = [{"role": "user", "content": "RANKING_PROMPT_MARKER {chunk_count} {chunks}"}] + extraction_prompts = [{"role": "system", "content": "EXTRACTION_PROMPT_MARKER"}] + consolidation_prompts = [{"role": "system", "content": "CONSOLIDATION_PROMPT_MARKER"}] + classification_prompts = [{"role": "system", "content": "CLASSIFICATION_PROMPT_MARKER"}] + + prompts = DistillationPrompts( + extraction=extraction_prompts, + consolidation=consolidation_prompts, + classification=classification_prompts, + ranking=ranking_prompts, + ) + + selected_chunk = Chunk(text="selected", source="pr_314_comment_0", chunk_index=1) + extracted_signal = _mk_signal( + "User-visible change", + "medium", + "operators", + "high", + True, + ) + consolidated_signal = _mk_signal( + "Consolidated change", + "medium", + "operators", + "high", + True, + ) + + adapter = MagicMock(spec=DistillationAdapter) + adapter.select_relevant_chunks.return_value = [selected_chunk] + adapter.extract_chunk_signals.return_value = [extracted_signal] + adapter.consolidate_signals.return_value = [consolidated_signal] + adapter.classify_signals.return_value = ClassifiedSignals( + classified=[ + ClassifiedSignal( + signal=consolidated_signal, + category="minor", + ) + ], + summary="classified", + ) + adapter.render_final_context.return_value = {"summary": "ok"} + + class _Capabilities: + supports_embeddings = False + + class _BackendConfig: + capabilities = _Capabilities() + + result = run_distillation_pipeline( + pr_data=pr_data, + adapter=adapter, + backend_config=_BackendConfig(), + prompts=prompts, + ) + + assert result == {"summary": "ok"} + + adapter.select_relevant_chunks.assert_called_once() + select_args = adapter.select_relevant_chunks.call_args.args + assert select_args[2] is ranking_prompts + + adapter.extract_chunk_signals.assert_called_once_with(selected_chunk, extraction_prompts) + adapter.consolidate_signals.assert_called_once_with([extracted_signal], consolidation_prompts) + adapter.classify_signals.assert_called_once_with([consolidated_signal], classification_prompts) + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tools/release_announcement/tests/test_distillation_schemas.py b/tools/release_announcement/tests/test_distillation_schemas.py new file mode 100644 index 0000000000..29fd9d2303 --- /dev/null +++ b/tools/release_announcement/tests/test_distillation_schemas.py @@ -0,0 +1,581 @@ +"""Unit tests for Substep 4a: Pydantic schemas, dataclasses, and parsing helpers. + +Tests the type foundation of the staged distillation pipeline: +- Signal, ClassifiedSignal, ClassifiedSignals Pydantic models +- Chunk and DistilledContextMetadata dataclasses +- JSON parsing helpers (_parse_signal_list, _parse_classified_signals) +""" + +import json +import pytest +from pydantic import ValidationError + +from src.release_announcement.distillation import ( + Signal, + ClassifiedSignal, + ClassifiedSignals, + Chunk, + DistilledContextMetadata, + _parse_signal_list, + _parse_classified_signals, +) + + +# ============================================================================ +# Tests for Signal Pydantic Model +# ============================================================================ + + +def test_signal_creation_valid(): + """Test creating a valid Signal.""" + signal = Signal( + change="Fixed memory leak in audio buffer", + impact="high", + users_affected="all server operators", + confidence="high", + final_outcome=True, + ) + + assert signal.change == "Fixed memory leak in audio buffer" + assert signal.impact == "high" + assert signal.users_affected == "all server operators" + assert signal.confidence == "high" + assert signal.final_outcome is True + + +def test_signal_required_fields(): + """Test that Signal requires all fields.""" + with pytest.raises(ValidationError): + Signal( + change="Fix", + impact="high", + # Missing users_affected, confidence, final_outcome + ) + + +def test_signal_field_types(): + """Test that Signal validates field types.""" + # final_outcome should be bool + signal = Signal( + change="Fix", + impact="high", + users_affected="all", + confidence="high", + final_outcome=True, + ) + assert isinstance(signal.final_outcome, bool) + + # Pydantic should coerce string "true" to bool + signal2 = Signal( + change="Fix", + impact="high", + users_affected="all", + confidence="high", + final_outcome="true", # type: ignore + ) + assert signal2.final_outcome is True + + +def test_signal_extra_fields_allowed(): + """Test that Signal allows extra fields (extra='allow' in Config).""" + signal = Signal( + change="Fix", + impact="high", + users_affected="all", + confidence="high", + final_outcome=True, + extra_field="allowed", # type: ignore + ) + assert signal.change == "Fix" + # Extra field is stored in __pydantic_extra__ or similar + + +# ============================================================================ +# Tests for ClassifiedSignal Pydantic Model +# ============================================================================ + + +def test_classified_signal_creation_valid(): + """Test creating a valid ClassifiedSignal.""" + signal = Signal( + change="Fixed bug", + impact="high", + users_affected="users", + confidence="high", + final_outcome=True, + ) + + classified = ClassifiedSignal(signal=signal, category="major") + + assert classified.signal == signal + assert classified.category == "major" + + +def test_classified_signal_valid_categories(): + """Test all valid category values.""" + signal = Signal( + change="Fix", + impact="low", + users_affected="none", + confidence="low", + final_outcome=False, + ) + + for category in ["internal", "minor", "targeted", "major", "no_user_facing_changes"]: + classified = ClassifiedSignal(signal=signal, category=category) # type: ignore + assert classified.category == category + + +def test_classified_signal_invalid_category(): + """Test that invalid categories are rejected.""" + signal = Signal( + change="Fix", + impact="low", + users_affected="none", + confidence="low", + final_outcome=False, + ) + + with pytest.raises(ValidationError): + ClassifiedSignal(signal=signal, category="invalid_category") # type: ignore + + +# ============================================================================ +# Tests for ClassifiedSignals Pydantic Model +# ============================================================================ + + +def test_classified_signals_empty(): + """Test creating ClassifiedSignals with empty list (no user-facing changes).""" + result = ClassifiedSignals(classified=[], summary="No user-facing changes") + + assert len(result.classified) == 0 + assert result.summary == "No user-facing changes" + + +def test_classified_signals_with_items(): + """Test ClassifiedSignals with multiple classified items.""" + signal1 = Signal( + change="Fix A", + impact="high", + users_affected="users", + confidence="high", + final_outcome=True, + ) + signal2 = Signal( + change="Fix B", + impact="low", + users_affected="some", + confidence="medium", + final_outcome=False, + ) + + classified1 = ClassifiedSignal(signal=signal1, category="major") + classified2 = ClassifiedSignal(signal=signal2, category="minor") + + result = ClassifiedSignals( + classified=[classified1, classified2], + summary="Two changes detected", + ) + + assert len(result.classified) == 2 + assert result.classified[0].category == "major" + assert result.classified[1].category == "minor" + + +def test_classified_signals_defaults(): + """Test ClassifiedSignals with default values.""" + result = ClassifiedSignals() + + assert result.classified == [] + assert result.summary == "" + + +# ============================================================================ +# Tests for Chunk Dataclass +# ============================================================================ + + +def test_chunk_creation_minimal(): + """Test creating Chunk with minimal required fields.""" + chunk = Chunk(text="Some discussion text") + + assert chunk.text == "Some discussion text" + assert chunk.source == "unknown" + assert chunk.relevance_score == 0.0 + assert chunk.chunk_index == 0 + + +def test_chunk_creation_full(): + """Test creating Chunk with all fields.""" + chunk = Chunk( + text="Important change", + source="pr_100_comment_5", + relevance_score=0.85, + chunk_index=3, + ) + + assert chunk.text == "Important change" + assert chunk.source == "pr_100_comment_5" + assert chunk.relevance_score == 0.85 + assert chunk.chunk_index == 3 + + +def test_chunk_mutability(): + """Test that Chunk fields can be modified.""" + chunk = Chunk(text="Original") + chunk.relevance_score = 0.95 + chunk.source = "new_source" + + assert chunk.relevance_score == 0.95 + assert chunk.source == "new_source" + + +# ============================================================================ +# Tests for DistilledContextMetadata Dataclass +# ============================================================================ + + +def test_distilled_context_metadata_defaults(): + """Test DistilledContextMetadata with default values.""" + metadata = DistilledContextMetadata() + + assert metadata.pr_number == 0 + assert metadata.total_chunks == 0 + assert metadata.selected_chunks == 0 + assert metadata.extraction_phase_duration_ms == 0.0 + assert metadata.consolidation_phase_duration_ms == 0.0 + assert metadata.classification_phase_duration_ms == 0.0 + + +def test_distilled_context_metadata_with_values(): + """Test DistilledContextMetadata with explicit values.""" + metadata = DistilledContextMetadata( + pr_number=3502, + total_chunks=10, + selected_chunks=7, + extraction_phase_duration_ms=125.5, + consolidation_phase_duration_ms=89.3, + classification_phase_duration_ms=45.2, + ) + + assert metadata.pr_number == 3502 + assert metadata.total_chunks == 10 + assert metadata.selected_chunks == 7 + assert metadata.extraction_phase_duration_ms == 125.5 + + +# ============================================================================ +# Tests for _parse_signal_list Helper +# ============================================================================ + + +def test_parse_signal_list_valid_json_array(): + """Test parsing valid JSON array of signals.""" + json_text = json.dumps([ + { + "change": "Fix A", + "impact": "high", + "users_affected": "all", + "confidence": "high", + "final_outcome": True, + }, + { + "change": "Fix B", + "impact": "low", + "users_affected": "some", + "confidence": "medium", + "final_outcome": False, + }, + ]) + + signals = _parse_signal_list(json_text) + + assert len(signals) == 2 + assert signals[0].change == "Fix A" + assert signals[1].change == "Fix B" + assert isinstance(signals[0], Signal) + + +def test_parse_signal_list_with_code_block(): + """Test parsing signals from ```json code block.""" + response = """ + Here are the extracted signals: + + ```json + [ + { + "change": "Added feature", + "impact": "medium", + "users_affected": "users", + "confidence": "high", + "final_outcome": true + } + ] + ``` + + This is the end of the response. + """ + + signals = _parse_signal_list(response) + + assert len(signals) == 1 + assert signals[0].change == "Added feature" + + +def test_parse_signal_list_with_generic_code_block(): + """Test parsing signals from generic ``` code block.""" + response = """ + Results: + + ``` + [ + { + "change": "Fix", + "impact": "low", + "users_affected": "none", + "confidence": "low", + "final_outcome": false + } + ] + ``` + """ + + signals = _parse_signal_list(response) + + assert len(signals) == 1 + assert signals[0].change == "Fix" + + +def test_parse_signal_list_empty_array(): + """Test parsing empty signal array.""" + json_text = json.dumps([]) + + signals = _parse_signal_list(json_text) + + assert not signals + + +def test_parse_signal_list_invalid_json(): + """Test that invalid JSON raises ValueError.""" + with pytest.raises(ValueError, match="Failed to parse response as JSON"): + _parse_signal_list("{not valid json") + + +def test_parse_signal_list_non_array(): + """Test that non-array JSON raises ValueError.""" + json_text = json.dumps({"signal": "not an array"}) + + with pytest.raises(ValueError, match="Expected JSON array at top level"): + _parse_signal_list(json_text) + + +def test_parse_signal_list_invalid_signal_schema(): + """Test that signals not matching schema raise ValueError.""" + json_text = json.dumps([ + { + "change": "Fix", + # Missing required fields + } + ]) + + with pytest.raises(ValueError, match="Signal validation failed"): + _parse_signal_list(json_text) + + +# ============================================================================ +# Tests for _parse_classified_signals Helper +# ============================================================================ + + +def test_parse_classified_signals_valid_json(): + """Test parsing valid ClassifiedSignals JSON.""" + json_text = json.dumps({ + "classified": [ + { + "signal": { + "change": "Fix A", + "impact": "high", + "users_affected": "all", + "confidence": "high", + "final_outcome": True, + }, + "category": "major", + } + ], + "summary": "One major change", + }) + + result = _parse_classified_signals(json_text) + + assert len(result.classified) == 1 + assert result.classified[0].category == "major" + assert result.summary == "One major change" + + +def test_parse_classified_signals_empty(): + """Test parsing empty ClassifiedSignals (no user-facing changes).""" + json_text = json.dumps({ + "classified": [], + "summary": "No user-facing changes", + }) + + result = _parse_classified_signals(json_text) + + assert len(result.classified) == 0 + assert result.summary == "No user-facing changes" + + +def test_parse_classified_signals_with_code_block(): + """Test parsing ClassifiedSignals from ```json code block.""" + response = """ + Classification result: + + ```json + { + "classified": [ + { + "signal": { + "change": "Feature X", + "impact": "high", + "users_affected": "users", + "confidence": "high", + "final_outcome": true + }, + "category": "major" + } + ], + "summary": "One major feature addition" + } + ``` + """ + + result = _parse_classified_signals(response) + + assert len(result.classified) == 1 + assert result.classified[0].signal.change == "Feature X" + + +def test_parse_classified_signals_invalid_json(): + """Test that invalid JSON raises ValueError.""" + with pytest.raises(ValueError, match="Failed to parse response as JSON"): + _parse_classified_signals("{invalid json") + + +def test_parse_classified_signals_non_object(): + """Test that non-object JSON raises ValueError.""" + json_text = json.dumps(["not", "an", "object"]) + + with pytest.raises(ValueError, match="Expected JSON object at top level"): + _parse_classified_signals(json_text) + + +def test_parse_classified_signals_invalid_category(): + """Test that invalid category raises ValueError.""" + json_text = json.dumps({ + "classified": [ + { + "signal": { + "change": "Fix", + "impact": "low", + "users_affected": "none", + "confidence": "low", + "final_outcome": False, + }, + "category": "invalid_category", + } + ], + "summary": "Invalid", + }) + + with pytest.raises(ValueError): + _parse_classified_signals(json_text) + + +# ============================================================================ +# Integration Tests +# ============================================================================ + + +def test_signal_to_classified_to_result_flow(): + """Integration test: Signal → ClassifiedSignal → ClassifiedSignals.""" + signals = [ + Signal( + change="Change 1", + impact="high", + users_affected="users", + confidence="high", + final_outcome=True, + ), + Signal( + change="Change 2", + impact="low", + users_affected="some", + confidence="low", + final_outcome=False, + ), + ] + + classified_signals = [ + ClassifiedSignal(signal=signals[0], category="major"), + ClassifiedSignal(signal=signals[1], category="minor"), + ] + + result = ClassifiedSignals( + classified=classified_signals, + summary="Two changes: one major, one minor", + ) + + assert len(result.classified) == 2 + assert result.classified[0].signal.change == "Change 1" + assert result.classified[0].category == "major" + + +def test_parse_and_classify_full_flow(): + """Test parsing signal list, then creating classified signals.""" + # First: extract signals from LLM response + extraction_response = """ + ```json + [ + { + "change": "Improved audio codec", + "impact": "high", + "users_affected": "all", + "confidence": "high", + "final_outcome": true + } + ] + ``` + """ + + extracted = _parse_signal_list(extraction_response) + assert len(extracted) == 1 + + # Second: classify the signals + classification_response = """ + ```json + { + "classified": [ + { + "signal": { + "change": "Improved audio codec", + "impact": "high", + "users_affected": "all", + "confidence": "high", + "final_outcome": true + }, + "category": "major" + } + ], + "summary": "Major improvement to audio handling" + } + ``` + """ + + classified = _parse_classified_signals(classification_response) + assert len(classified.classified) == 1 + assert classified.classified[0].category == "major" + + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) diff --git a/tools/release_announcement/tests/test_hybrid_backend_integration.py b/tools/release_announcement/tests/test_hybrid_backend_integration.py new file mode 100644 index 0000000000..b55fbba08e --- /dev/null +++ b/tools/release_announcement/tests/test_hybrid_backend_integration.py @@ -0,0 +1,53 @@ +"""Integration test for hybrid backend routing in staged preprocessing.""" + +from release_announcement import main as ra_main + + +class _TagOnlyAdapter: + """Minimal adapter stub that exposes only backend identity for routing tests.""" + + def __init__(self, tag: str, observed: dict[str, str]) -> None: + self._tag = tag + self._observed = observed + + def select_relevant_chunks(self, _chunks, use_embeddings, _ranking_prompts): + bucket = "embedding" if use_embeddings else "chat" + self._observed[bucket] = self._tag + return [] + + def get_adapter_tag(self) -> str: + return self._tag + + +def test_hybrid_backend_pipeline(monkeypatch) -> None: + """Wire different chat/embedding backends and assert staged routing preserves both.""" + observed = {"chat": "", "embedding": ""} + + def _stub_registry_get(name: str): + if name in {"ollama", "github"}: + return _TagOnlyAdapter(name, observed) + return None + + def _stub_run_distillation_pipeline(*, adapter, **_kwargs): + adapter.select_relevant_chunks([], True, []) + adapter.select_relevant_chunks([], False, []) + return ra_main.DistilledContext( + summary="ok", structured_signals=[], classification={"classified": []}, metadata={} + ) + + monkeypatch.setattr(ra_main.registry, "get", _stub_registry_get) + monkeypatch.setattr(ra_main, "run_distillation_pipeline", _stub_run_distillation_pipeline) + + context = ra_main.prepare_pr_context( + pr_data={"number": 123, "title": "Hybrid backend integration"}, + backend_config=ra_main.BackendConfig( + backend="ollama", + chat_model_backend="ollama", + embedding_model_backend="github", + pipeline_mode="staged", + ), + pipeline_mode="staged", + ) + + assert context is not None + assert observed == {"chat": "ollama", "embedding": "github"} diff --git a/tools/release_announcement/tests/test_placeholder_prompts.py b/tools/release_announcement/tests/test_placeholder_prompts.py new file mode 100644 index 0000000000..ed777ccaf2 --- /dev/null +++ b/tools/release_announcement/tests/test_placeholder_prompts.py @@ -0,0 +1,472 @@ +"""Unit tests for staged distillation prompt files. + +Tests verify that stage prompt files exist, have valid YAML structure, +and can be loaded by the prompt loader without schema errors. +""" + +import os +import pytest +import yaml + +from src.release_announcement.main import _load_prompt_file, _load_prompts + + +class TestPlaceholderPromptFilesCore: + """Tests for placeholder prompt file existence and validity.""" + + def get_prompt_path(self, filename: str) -> str: + """Get the absolute path to a prompt file in tools/release_announcement/prompts/.""" + base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.abspath(__file__) + )))) + return os.path.join(base_dir, "tools", "release_announcement", "prompts", filename) + + def test_extraction_prompt_exists(self): + """Verify extraction.prompt.yml exists.""" + path = self.get_prompt_path("extraction.prompt.yml") + assert os.path.exists(path), f"extraction.prompt.yml not found at {path}" + + def test_consolidation_prompt_exists(self): + """Verify consolidation.prompt.yml exists.""" + path = self.get_prompt_path("consolidation.prompt.yml") + assert os.path.exists(path), f"consolidation.prompt.yml not found at {path}" + + def test_classification_prompt_exists(self): + """Verify classification.prompt.yml exists.""" + path = self.get_prompt_path("classification.prompt.yml") + assert os.path.exists(path), f"classification.prompt.yml not found at {path}" + + def test_extraction_prompt_valid_yaml(self): + """Verify extraction.prompt.yml is valid YAML.""" + path = self.get_prompt_path("extraction.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + assert data is not None + + def test_consolidation_prompt_valid_yaml(self): + """Verify consolidation.prompt.yml is valid YAML.""" + path = self.get_prompt_path("consolidation.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + assert data is not None + + def test_classification_prompt_valid_yaml(self): + """Verify classification.prompt.yml is valid YAML.""" + path = self.get_prompt_path("classification.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + assert data is not None + + def test_extraction_prompt_has_messages_key(self): + """Verify extraction.prompt.yml has 'messages' key.""" + path = self.get_prompt_path("extraction.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + assert "messages" in data, "extraction.prompt.yml missing 'messages' key" + + def test_consolidation_prompt_has_messages_key(self): + """Verify consolidation.prompt.yml has 'messages' key.""" + path = self.get_prompt_path("consolidation.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + assert "messages" in data, "consolidation.prompt.yml missing 'messages' key" + + def test_classification_prompt_has_messages_key(self): + """Verify classification.prompt.yml has 'messages' key.""" + path = self.get_prompt_path("classification.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + assert "messages" in data, "classification.prompt.yml missing 'messages' key" + + def test_extraction_prompt_messages_is_list(self): + """Verify extraction.prompt.yml messages is a list.""" + path = self.get_prompt_path("extraction.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + assert isinstance(data["messages"], list), "extraction.prompt.yml messages is not a list" + assert len(data["messages"]) > 0, "extraction.prompt.yml messages is empty" + + def test_consolidation_prompt_messages_is_list(self): + """Verify consolidation.prompt.yml messages is a list.""" + path = self.get_prompt_path("consolidation.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + assert isinstance( + data["messages"], + list, + ), "consolidation.prompt.yml messages is not a list" + assert len(data["messages"]) > 0, "consolidation.prompt.yml messages is empty" + + def test_classification_prompt_messages_is_list(self): + """Verify classification.prompt.yml messages is a list.""" + path = self.get_prompt_path("classification.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + assert isinstance( + data["messages"], + list, + ), "classification.prompt.yml messages is not a list" + assert len(data["messages"]) > 0, "classification.prompt.yml messages is empty" + + def test_extraction_prompt_has_system_message(self): + """Verify extraction.prompt.yml has at least one system message.""" + path = self.get_prompt_path("extraction.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + messages = data["messages"] + system_msgs = [m for m in messages if isinstance(m, dict) and m.get("role") == "system"] + assert len(system_msgs) > 0, "extraction.prompt.yml has no system message" + + def test_consolidation_prompt_has_system_message(self): + """Verify consolidation.prompt.yml has at least one system message.""" + path = self.get_prompt_path("consolidation.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + messages = data["messages"] + system_msgs = [m for m in messages if isinstance(m, dict) and m.get("role") == "system"] + assert len(system_msgs) > 0, "consolidation.prompt.yml has no system message" + + def test_classification_prompt_has_system_message(self): + """Verify classification.prompt.yml has at least one system message.""" + path = self.get_prompt_path("classification.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + messages = data["messages"] + system_msgs = [m for m in messages if isinstance(m, dict) and m.get("role") == "system"] + assert len(system_msgs) > 0, "classification.prompt.yml has no system message" + + def test_extraction_prompt_system_message_has_content(self): + """Verify extraction.prompt.yml system message has content.""" + path = self.get_prompt_path("extraction.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + messages = data["messages"] + system_msgs = [m for m in messages if isinstance(m, dict) and m.get("role") == "system"] + assert len(system_msgs) > 0 + assert "content" in system_msgs[0] + assert isinstance(system_msgs[0]["content"], str) + assert len(system_msgs[0]["content"]) > 0 + + def test_consolidation_prompt_system_message_has_content(self): + """Verify consolidation.prompt.yml system message has content.""" + path = self.get_prompt_path("consolidation.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + messages = data["messages"] + system_msgs = [m for m in messages if isinstance(m, dict) and m.get("role") == "system"] + assert len(system_msgs) > 0 + assert "content" in system_msgs[0] + assert isinstance(system_msgs[0]["content"], str) + assert len(system_msgs[0]["content"]) > 0 + + def test_classification_prompt_system_message_has_content(self): + """Verify classification.prompt.yml system message has content.""" + path = self.get_prompt_path("classification.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + messages = data["messages"] + system_msgs = [m for m in messages if isinstance(m, dict) and m.get("role") == "system"] + assert len(system_msgs) > 0 + assert "content" in system_msgs[0] + assert isinstance(system_msgs[0]["content"], str) + assert len(system_msgs[0]["content"]) > 0 + + + +class TestPlaceholderPromptFilesRanking: + """Tests focused on ranking prompt and cross-file comparisons.""" + + def get_prompt_path(self, filename: str) -> str: + """Get the absolute path to a prompt file in tools/release_announcement/prompts/.""" + base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.abspath(__file__) + )))) + return os.path.join(base_dir, "tools", "release_announcement", "prompts", filename) + + def test_ranking_prompt_exists(self): + """Verify ranking.prompt.yml exists.""" + path = self.get_prompt_path("ranking.prompt.yml") + assert os.path.exists(path), f"ranking.prompt.yml not found at {path}" + + def test_ranking_prompt_valid_yaml(self): + """Verify ranking.prompt.yml is valid YAML.""" + path = self.get_prompt_path("ranking.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + assert data is not None + + def test_ranking_prompt_has_messages_key(self): + """Verify ranking.prompt.yml has 'messages' key.""" + path = self.get_prompt_path("ranking.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + assert "messages" in data, "ranking.prompt.yml missing 'messages' key" + + def test_ranking_prompt_messages_is_list(self): + """Verify ranking.prompt.yml messages is a list.""" + path = self.get_prompt_path("ranking.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + assert isinstance(data["messages"], list), "ranking.prompt.yml messages is not a list" + assert len(data["messages"]) > 0, "ranking.prompt.yml messages is empty" + + def test_ranking_prompt_has_user_message(self): + """Verify ranking.prompt.yml has at least one user message.""" + path = self.get_prompt_path("ranking.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + messages = data["messages"] + user_msgs = [m for m in messages if isinstance(m, dict) and m.get("role") == "user"] + assert len(user_msgs) > 0, "ranking.prompt.yml has no user message" + + def test_ranking_prompt_user_message_has_content(self): + """Verify ranking.prompt.yml user message has content.""" + path = self.get_prompt_path("ranking.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + messages = data["messages"] + user_msgs = [m for m in messages if isinstance(m, dict) and m.get("role") == "user"] + assert len(user_msgs) > 0 + assert "content" in user_msgs[0] + assert isinstance(user_msgs[0]["content"], str) + assert len(user_msgs[0]["content"]) > 0 + + def test_ranking_prompt_has_template_variables(self): + """Verify ranking.prompt.yml contains template variables.""" + path = self.get_prompt_path("ranking.prompt.yml") + with open(path, "r", encoding="utf-8") as f: + data = yaml.safe_load(f) + messages = data["messages"] + user_msgs = [m for m in messages if isinstance(m, dict) and m.get("role") == "user"] + content = user_msgs[0]["content"] + # Check for expected template variables + assert "{chunk_count}" in content, "ranking.prompt.yml missing {chunk_count} variable" + assert "{chunks}" in content, "ranking.prompt.yml missing {chunks} variable" + + def test_all_four_prompts_different_content(self): + """Verify the four prompts have different content (not identical copies).""" + with open(self.get_prompt_path("extraction.prompt.yml"), "r", encoding="utf-8") as f: + extraction = yaml.safe_load(f) + with open(self.get_prompt_path("consolidation.prompt.yml"), "r", encoding="utf-8") as f: + consolidation = yaml.safe_load(f) + with open(self.get_prompt_path("classification.prompt.yml"), "r", encoding="utf-8") as f: + classification = yaml.safe_load(f) + with open(self.get_prompt_path("ranking.prompt.yml"), "r", encoding="utf-8") as f: + ranking = yaml.safe_load(f) + + extraction_content = extraction["messages"][0]["content"] + consolidation_content = consolidation["messages"][0]["content"] + classification_content = classification["messages"][0]["content"] + ranking_content = ranking["messages"][0]["content"] + + # At least one should be different (they shouldn't all be identical) + unique_prompts = {extraction_content, consolidation_content, + classification_content, ranking_content} + assert len(unique_prompts) >= 2, "Prompts should have different content" + + +class TestLoadPromptFileFunction: + """Tests for the _load_prompt_file helper function.""" + + SIGNAL_FIELDS = [ + "change", + "impact", + "users_affected", + "confidence", + "final_outcome", + ] + + def _assert_io_contract( + self, + content: str, + expect_classification_object: bool = False, + ) -> None: + """Verify prompt text includes stable Input/Output contract sections.""" + assert "Input:" in content or "Input scope:" in content + assert "Output contract:" in content + assert "Return ONLY valid JSON." in content + for field in self.SIGNAL_FIELDS: + assert field in content + + if expect_classification_object: + # Must align with ClassifiedSignals top-level structure used by parsing code. + assert "classified" in content + assert "summary" in content + + def get_prompt_path(self, filename: str) -> str: + """Get the absolute path to a prompt file in tools/release_announcement/prompts/.""" + base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.abspath(__file__) + )))) + return os.path.join(base_dir, "tools", "release_announcement", "prompts", filename) + + def test_load_extraction_prompt_with_system_role(self): + """Verify _load_prompt_file correctly loads extraction prompt with system role.""" + path = self.get_prompt_path("extraction.prompt.yml") + content = _load_prompt_file(path, role="system") + + assert isinstance(content, str) + assert len(content) > 0 + assert "placeholder" not in content.lower() + self._assert_io_contract(content) + + def test_load_consolidation_prompt_with_system_role(self): + """Verify _load_prompt_file correctly loads consolidation prompt with system role.""" + path = self.get_prompt_path("consolidation.prompt.yml") + content = _load_prompt_file(path, role="system") + + assert isinstance(content, str) + assert len(content) > 0 + assert "placeholder" not in content.lower() + self._assert_io_contract(content) + + def test_load_classification_prompt_with_system_role(self): + """Verify _load_prompt_file correctly loads classification prompt with system role.""" + path = self.get_prompt_path("classification.prompt.yml") + content = _load_prompt_file(path, role="system") + + assert isinstance(content, str) + assert len(content) > 0 + assert "placeholder" not in content.lower() + self._assert_io_contract(content, expect_classification_object=True) + assert "internal" in content + assert "minor" in content + assert "targeted" in content + assert "major" in content + assert "no_user_facing_changes" in content + + def test_stage_prompts_do_not_contain_placeholder_marker(self): + """Verify stage prompts no longer contain placeholder content.""" + for filename in [ + "extraction.prompt.yml", + "consolidation.prompt.yml", + "classification.prompt.yml", + ]: + path = self.get_prompt_path(filename) + content = _load_prompt_file(path, role="system") + assert "placeholder" not in content.lower() + + def test_load_ranking_prompt_with_user_role(self): + """Verify _load_prompt_file correctly loads ranking prompt with user role.""" + path = self.get_prompt_path("ranking.prompt.yml") + content = _load_prompt_file(path, role="user") + + assert isinstance(content, str) + assert len(content) > 0 + assert "{chunk_count}" in content + assert "{chunks}" in content + + def test_load_system_prompt_fails_with_user_role(self): + """Verify _load_prompt_file fails when requesting user role from system-only prompt.""" + path = self.get_prompt_path("extraction.prompt.yml") + + with pytest.raises(ValueError) as exc_info: + _load_prompt_file(path, role="user") + + assert "user" in str(exc_info.value) + assert "extraction.prompt.yml" in str(exc_info.value) + + def test_load_ranking_prompt_with_both_roles(self): + """Verify _load_prompt_file can load both system and user roles from ranking prompt.""" + path = self.get_prompt_path("ranking.prompt.yml") + + # Ranking prompt now has both system and user messages + system_content = _load_prompt_file(path, role="system") + user_content = _load_prompt_file(path, role="user") + + assert isinstance(system_content, str) + assert len(system_content) > 0 + assert "release announcement" in system_content.lower() + + assert isinstance(user_content, str) + assert len(user_content) > 0 + assert "{chunk_count}" in user_content or "{chunks}" in user_content + + +class TestLoadPromptMessagesFunction: + """Tests for the _load_prompts helper function.""" + + def get_prompt_path(self, filename: str) -> str: + """Get the absolute path to a prompt file in tools/release_announcement/prompts/.""" + base_dir = os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname( + os.path.abspath(__file__) + )))) + return os.path.join(base_dir, "tools", "release_announcement", "prompts", filename) + + def test_load_ranking_prompt_messages_structure(self): + """Verify _load_prompts returns full messages with both roles.""" + path = self.get_prompt_path("ranking.prompt.yml") + messages = _load_prompts(path) + + assert isinstance(messages, list) + assert len(messages) >= 2 + + roles = [msg.get("role") for msg in messages] + assert "system" in roles + assert "user" in roles + + def test_ranking_prompt_messages_have_content(self): + """Verify ranking.prompt.yml messages all have content.""" + path = self.get_prompt_path("ranking.prompt.yml") + messages = _load_prompts(path) + + for msg in messages: + assert "content" in msg + assert isinstance(msg["content"], str) + assert len(msg["content"]) > 0 + + def test_ranking_prompt_user_message_has_template_variables(self): + """Verify ranking.prompt.yml user message contains template variables.""" + path = self.get_prompt_path("ranking.prompt.yml") + messages = _load_prompts(path) + + user_msg = next((m for m in messages if m.get("role") == "user"), None) + assert user_msg is not None + + content = user_msg["content"] + assert "{chunk_count}" in content + assert "{chunks}" in content + + def test_load_prompts_invalid_file(self): + """Verify _load_prompts fails gracefully for missing file.""" + with pytest.raises(FileNotFoundError): + _load_prompts("/nonexistent/prompts/missing.prompt.yml") + + def test_load_extraction_prompt_messages(self): + """Verify extraction.prompt.yml loads as messages list with system role.""" + path = self.get_prompt_path("extraction.prompt.yml") + messages = _load_prompts(path) + + assert isinstance(messages, list) + assert len(messages) >= 1 + + system_msg = next((m for m in messages if m.get("role") == "system"), None) + assert system_msg is not None + assert isinstance(system_msg["content"], str) + assert len(system_msg["content"]) > 0 + + def test_load_consolidation_prompt_messages(self): + """Verify consolidation.prompt.yml loads as messages list with system role.""" + path = self.get_prompt_path("consolidation.prompt.yml") + messages = _load_prompts(path) + + assert isinstance(messages, list) + assert len(messages) >= 1 + + system_msg = next((m for m in messages if m.get("role") == "system"), None) + assert system_msg is not None + assert isinstance(system_msg["content"], str) + assert len(system_msg["content"]) > 0 + + def test_load_classification_prompt_messages(self): + """Verify classification.prompt.yml loads as messages list with system role.""" + path = self.get_prompt_path("classification.prompt.yml") + messages = _load_prompts(path) + + assert isinstance(messages, list) + assert len(messages) >= 1 + + system_msg = next((m for m in messages if m.get("role") == "system"), None) + assert system_msg is not None + assert isinstance(system_msg["content"], str) + assert len(system_msg["content"]) > 0 diff --git a/tools/release_announcement/tests/test_registry.py b/tools/release_announcement/tests/test_registry.py new file mode 100644 index 0000000000..9f71514e53 --- /dev/null +++ b/tools/release_announcement/tests/test_registry.py @@ -0,0 +1,82 @@ +"""Tests for the backend registry.""" + +from src.release_announcement.registry import registry + + +def test_registry_register_and_get(): + """Test that the registry can register and retrieve a backend.""" + # Create a minimal backend that implements the protocol + class MinimalBackend: + def probe_chat(self, model: str | None) -> bool: + """Report chat support for the minimal backend stub.""" + # At least acknowledge the parameter exists + if model is not None: + _ = model # Acknowledge parameter + return True + + def probe_embeddings(self, model: str | None) -> bool: + """Report that the minimal backend stub has no embedding support.""" + # At least acknowledge the parameter exists + if model is not None: + _ = model # Acknowledge parameter + return False + + def call_chat(self, prompt: dict) -> str: + """Return a placeholder chat response for registry tests.""" + # At least acknowledge the parameter exists + _ = len(prompt) # Acknowledge parameter + return "response" + + def create_minimal_backend(): + """Create a minimal backend instance for registry registration tests.""" + return MinimalBackend() + + registry.register("testBackend", create_minimal_backend) + backend = registry.get("testBackend") + assert isinstance(backend, MinimalBackend) + + +def test_registry_get_unknown(): + """Test that the registry returns None for unknown backends.""" + assert registry.get("unknownBackend") is None + + +def test_registry_duplicate_registration_replaces_backend(): + """Test that duplicate backend registration replaces the existing backend.""" + + class FirstBackend: + def probe_chat(self, _model: str | None) -> bool: + """Report chat support for the first replacement backend.""" + return True + + def probe_embeddings(self, _model: str | None) -> bool: + """Report no embedding support for the first replacement backend.""" + return False + + def call_chat(self, _prompt: dict) -> str: + """Return the first backend marker response.""" + return "first" + + class SecondBackend: + def probe_chat(self, _model: str | None) -> bool: + """Report chat support for the second replacement backend.""" + return True + + def probe_embeddings(self, _model: str | None) -> bool: + """Report no embedding support for the second replacement backend.""" + return False + + def call_chat(self, _prompt: dict) -> str: + """Return the second backend marker response.""" + return "second" + + registry.register("replaceableBackend", FirstBackend) + first = registry.get("replaceableBackend") + assert first is not None + assert first.call_chat({}) == "first" + + registry.register("replaceableBackend", SecondBackend) + second = registry.get("replaceableBackend") + assert second is not None + assert second.call_chat({}) == "second" + assert first is not second diff --git a/tools/release_announcement/tests/test_registry_lazy_init.py b/tools/release_announcement/tests/test_registry_lazy_init.py new file mode 100644 index 0000000000..0c7794424b --- /dev/null +++ b/tools/release_announcement/tests/test_registry_lazy_init.py @@ -0,0 +1,119 @@ +"""Tests for lazy backend initialization in the registry. + +Verifies that backends are only instantiated when requested, not at import time. +This allows the module to be imported without environment requirements. +""" + +import os +import pytest +from src.release_announcement.registry import registry + + +class TestRegistryLazyInitialization: + """Tests for lazy backend initialization.""" + + def test_registry_imports_without_github_token(self): + """Verify registry can be imported without GITHUB_TOKEN set. + + This confirms that the actions backend is not instantiated at import time. + """ + # If this test runs, it proves the import succeeded without GITHUB_TOKEN + assert registry is not None + + def test_dummy_backend_can_be_retrieved(self): + """Verify dummy backend is registered (via test file import). + + The dummy backend is registered when tests/dummy_backend.py is imported. + This test verifies it's available in the registry. + """ + # Dummy backend is only registered if the test file was already imported + # We can't guarantee import order, so we just verify the registry works + backend = registry.get("ollama") # Use a guaranteed backend instead + assert backend is not None + + def test_ollama_backend_lazy_init(self): + """Verify ollama backend is lazily initialized on first access.""" + backend = registry.get("ollama") + assert backend is not None + # Getting it again should return the same cached instance + backend2 = registry.get("ollama") + assert backend is backend2 + + def test_github_backend_lazy_init(self): + """Verify github backend is lazily initialized on first access.""" + backend = registry.get("github") + assert backend is not None + # Getting it again should return the same cached instance + backend2 = registry.get("github") + assert backend is backend2 + + def test_actions_backend_requires_token_on_use(self): + """Verify actions backend only requires GITHUB_TOKEN when actually requested. + + With lazy token resolution, the backend is instantiated without error, + but accessing the token property raises RuntimeError. + """ + # Save current env + old_token = os.environ.get("GITHUB_TOKEN") + old_gh_token = os.environ.get("GH_TOKEN") + + try: + # Ensure token is not set + os.environ.pop("GITHUB_TOKEN", None) + os.environ.pop("GH_TOKEN", None) + + # Requesting the actions backend succeeds without error + backend = registry.get("actions") + assert backend is not None + + # But accessing the token property raises RuntimeError + with pytest.raises(RuntimeError, match="GITHUB_TOKEN"): + _ = backend.token + finally: + # Restore env + if old_token is not None: + os.environ["GITHUB_TOKEN"] = old_token + if old_gh_token is not None: + os.environ["GH_TOKEN"] = old_gh_token + + def test_nonexistent_backend_returns_none(self): + """Verify requesting a nonexistent backend returns None.""" + backend = registry.get("nonexistent") + assert backend is None + + def test_actions_backend_token_cached(self): + """Verify actions backend caches the token after first resolution.""" + # Save current env + old_token = os.environ.get("GITHUB_TOKEN") + + try: + # Set a dummy token + os.environ["GITHUB_TOKEN"] = "test_token_12345" + + backend = registry.get("actions") + + # First access resolves the token + token1 = backend.token + assert token1 == "test_token_12345" + + # Change env (shouldn't affect cached value) + os.environ["GITHUB_TOKEN"] = "different_token" + + # Second access returns cached value + token2 = backend.token + assert token2 == "test_token_12345" + assert token1 is token2 # Same object + finally: + # Restore env + if old_token is not None: + os.environ["GITHUB_TOKEN"] = old_token + else: + os.environ.pop("GITHUB_TOKEN", None) + + def test_backend_protocols_implemented(self): + """Verify all retrieved backends implement BackendProtocol methods.""" + for backend_name in ["ollama", "github"]: + backend = registry.get(backend_name) + assert hasattr(backend, "probe_chat") + assert hasattr(backend, "probe_embeddings") + assert hasattr(backend, "call_chat") diff --git a/tools/release_announcement/tests/test_regression_matrix.py b/tools/release_announcement/tests/test_regression_matrix.py new file mode 100644 index 0000000000..fcb153fed2 --- /dev/null +++ b/tools/release_announcement/tests/test_regression_matrix.py @@ -0,0 +1,207 @@ +"""Regression matrix tests that compare generated announcements against baselines.""" + +from __future__ import annotations + +import os +import subprocess +from dataclasses import dataclass +from datetime import UTC, datetime +from pathlib import Path + +import pytest + +from tests.cli_invocation import build_release_announcement_cmd + + +REPO_ROOT = Path(__file__).resolve().parents[3] +DEFAULT_BASELINE_DIR = REPO_ROOT / ".vscode" / "release-announcement-baseline" + + +@dataclass(frozen=True) +class MatrixScenario: + backend: str + label: str + start: str + end: str | None = None + dry_run: bool = False + + +@dataclass(frozen=True) +class MatrixCaseResult: + case_name: str + return_code: int + diff_text: str + new_after: str + + +SCENARIOS = [ + MatrixScenario("ollama", "pr3429", "pr3429"), + MatrixScenario("github", "pr3429", "pr3429"), + MatrixScenario("ollama", "pr3625", "pr3625"), + MatrixScenario("github", "pr3625", "pr3625"), + MatrixScenario("ollama", "pr3502", "pr3502"), + MatrixScenario("github", "pr3502", "pr3502"), + MatrixScenario("ollama", "tag_r3_12_0beta4_to_r3_12_0beta5", + "r3_12_0beta4", "r3_12_0beta5", True), + MatrixScenario("github", "tag_r3_12_0beta4_to_r3_12_0beta5", + "r3_12_0beta4", "r3_12_0beta5", True), +] + + +def _is_clean_tracked_worktree() -> bool: + return subprocess.run( + ["git", "diff", "--quiet", "--exit-code"], cwd=REPO_ROOT, check=False + ).returncode == 0 + + +def _git(*args: str) -> str: + return subprocess.check_output(["git", *args], cwd=REPO_ROOT, text=True).strip() + + +def _prepare_matrix_environment() -> tuple[Path, bool, dict[str, str], Path, str]: + """Validate prerequisites and prepare run environment for matrix tests.""" + baseline_dir = Path(os.getenv("RA_BASELINE_DIR", str(DEFAULT_BASELINE_DIR))) + strict_output = os.getenv("RA_STRICT_OUTPUT", "0") == "1" + + py_env = os.environ.copy() + src_path = REPO_ROOT / "tools" / "release_announcement" / "src" + py_env["PYTHONPATH"] = f"{src_path}:{py_env.get('PYTHONPATH', '')}" + + run_root = ( + REPO_ROOT + / "build" + / "release_announcement" + / "matrix_regression" + / datetime.now(UTC).strftime("%Y%m%d_%H%M%S") + ) + run_root.mkdir(parents=True, exist_ok=True) + + base_commit = _git("rev-parse", "HEAD") + return baseline_dir, strict_output, py_env, run_root, base_commit + + +def _build_matrix_command(scenario: MatrixScenario) -> list[str]: + return build_release_announcement_cmd( + backend=scenario.backend, + pipeline="legacy", + start=scenario.start, + end=scenario.end, + dry_run=scenario.dry_run, + ) + + +def _run_matrix_scenario( + scenario: MatrixScenario, + py_env: dict[str, str], + run_root: Path, + base_commit: str, +) -> tuple[str, Path, str, int, str]: + case_name = f"{scenario.backend}__{scenario.label}" + out_case = run_root / case_name + out_case.mkdir(parents=True, exist_ok=True) + + cmd = _build_matrix_command(scenario) + with (out_case / "stdout-stderr.log").open("w", encoding="utf-8") as handle: + proc = subprocess.run( + cmd, + cwd=REPO_ROOT, + env=py_env, + check=False, + stdout=handle, + stderr=subprocess.STDOUT, + text=True, + ) + + after_text = (REPO_ROOT / "ReleaseAnnouncement.md").read_text(encoding="utf-8") + (out_case / "exit_code.txt").write_text(f"{proc.returncode}\n", encoding="utf-8") + (out_case / "ReleaseAnnouncement.after.md").write_text(after_text, encoding="utf-8") + diff_text = subprocess.check_output( + ["git", "--no-pager", "diff", "--", "ReleaseAnnouncement.md"], + cwd=REPO_ROOT, + text=True, + ) + (out_case / "git-diff.txt").write_text(diff_text, encoding="utf-8") + + subprocess.run(["git", "reset", "--hard", base_commit], cwd=REPO_ROOT, check=True) + return case_name, out_case, after_text, proc.returncode, diff_text + + +def _compare_matrix_case( + result: MatrixCaseResult, + baseline_case: Path, + strict_output: bool, + failures: list[str], +) -> None: + baseline_exit = (baseline_case / "exit_code.txt").read_text(encoding="utf-8").strip() + if str(result.return_code) != baseline_exit: + failures.append( + f"{result.case_name}: exit code mismatch " + f"(new={result.return_code}, baseline={baseline_exit})" + ) + + baseline_diff = (baseline_case / "git-diff.txt").read_text(encoding="utf-8") + if result.diff_text != baseline_diff: + failures.append(f"{result.case_name}: git-diff mismatch") + + baseline_changed = bool(baseline_diff.strip()) + new_changed = bool(result.diff_text.strip()) + if baseline_changed != new_changed: + failures.append( + f"{result.case_name}: changed/no-change mismatch " + f"(new_changed={new_changed}, baseline_changed={baseline_changed})" + ) + + if strict_output: + baseline_after = (baseline_case / "ReleaseAnnouncement.after.md").read_text( + encoding="utf-8" + ) + if baseline_after != result.new_after: + failures.append(f"{result.case_name}: ReleaseAnnouncement.after.md mismatch") + + +@pytest.mark.integration +@pytest.mark.regression +def test_release_announcement_matrix_regression_against_baseline() -> None: + """Compare a backend/scenario matrix against saved release announcement baselines.""" + if os.getenv("RA_RUN_MATRIX") != "1": + pytest.skip("Set RA_RUN_MATRIX=1 to run full matrix regression") + + baseline_dir, strict_output, py_env, run_root, base_commit = _prepare_matrix_environment() + if not baseline_dir.exists(): + pytest.skip(f"Baseline directory does not exist: {baseline_dir}") + + if not _is_clean_tracked_worktree(): + pytest.skip("Tracked files are dirty; matrix regression requires a clean tracked worktree") + + failures: list[str] = [] + + try: + for scenario in SCENARIOS: + case_name = f"{scenario.backend}__{scenario.label}" + baseline_case = baseline_dir / case_name + if not baseline_case.exists(): + failures.append(f"missing baseline case directory: {baseline_case}") + continue + + case_name, _out_case, new_after, return_code, diff_text = _run_matrix_scenario( + scenario, + py_env, + run_root, + base_commit, + ) + result = MatrixCaseResult( + case_name=case_name, + return_code=return_code, + diff_text=diff_text, + new_after=new_after, + ) + _compare_matrix_case( + result=result, + baseline_case=baseline_case, + strict_output=strict_output, + failures=failures, + ) + finally: + subprocess.run(["git", "reset", "--hard", base_commit], cwd=REPO_ROOT, check=False) + + assert not failures, "\n".join(failures) diff --git a/tools/release_announcement/tests/test_staged_pipeline_integration.py b/tools/release_announcement/tests/test_staged_pipeline_integration.py new file mode 100644 index 0000000000..0390857efa --- /dev/null +++ b/tools/release_announcement/tests/test_staged_pipeline_integration.py @@ -0,0 +1,96 @@ +"""Integration tests for the staged distillation pipeline (Substeps 4f/4g). + +These tests verify that: +- The staged pipeline runs end-to-end with the dummy backend, logging each stage. +- The fallback to the legacy pipeline occurs if the staged pipeline returns None. +""" + +import logging + +import pytest +from tests.dummy_backend import DummyBackend, register_dummy_backend +from release_announcement.app_logger import logger as app_logger + +from src.release_announcement.main import ( + prepare_pr_context, + BackendConfig, + _load_prompts, + _resolve_prompt_dir, +) +from src.release_announcement.registry import registry + + +@pytest.fixture(autouse=True) +def register_dummy(): + """Ensure the dummy backend is registered before each test in this module.""" + # Ensure dummy backend is registered for these tests + register_dummy_backend() + + +def sample_pr_data(): + """Return representative PR data for staged pipeline integration tests.""" + return { + "number": 42, + "title": "Test PR", + "body": "This is a test PR body.", + "comments": ["Looks good", "Please update docs"], + "reviews": ["Approved"], + } + + +def test_staged_pipeline_dummy_backend_logs_all_stages(capsys, caplog): + """Verify staged pipeline routes to correct backends and executes phases.""" + + caplog.set_level(logging.DEBUG) + app_logger.set_level("DEBUG") + config = BackendConfig(backend="dummy", pipeline_mode="staged") + context = prepare_pr_context(sample_pr_data(), config, "staged") + captured = capsys.readouterr() + out = captured.out + captured.err + caplog.text + prompt_dir = _resolve_prompt_dir() + extraction_preview = ( + _load_prompts(f"{prompt_dir}/extraction.prompt.yml")[0]["content"] + .strip() + .replace("\n", " ")[:80] + ) + consolidation_preview = _load_prompts( + f"{prompt_dir}/consolidation.prompt.yml" + )[0]["content"].strip().replace("\n", " ")[:80] + classification_preview = _load_prompts( + f"{prompt_dir}/classification.prompt.yml" + )[0]["content"].strip().replace("\n", " ")[:80] + # Ensure staged preprocessing startup reports chat-only embedding mode. + assert "staged.preprocessing.start chat_backend=dummy" in out + assert "embedding_mode=disabled:dummy" in out + assert "DummyBackend.extract_chunk_signals" in out + assert "DummyBackend.consolidate_signals" in out + assert "DummyBackend.classify_signals" in out + assert f"system_prompt_preview={extraction_preview!r}" in out + assert f"system_prompt_preview={consolidation_preview!r}" in out + assert f"system_prompt_preview={classification_preview!r}" in out + assert "system_prompt_preview='placeholder" not in out + # The context should be a DistilledContext (not None) + assert context is not None + assert isinstance(context.classification, dict) + assert "classified" in context.classification + + +def test_staged_pipeline_fallback_to_legacy(capsys): + """Verify legacy fallback remains available when staged selection fails.""" + class FallbackDummyBackend(DummyBackend): + def select_relevant_chunks(self, chunks, use_embeddings, ranking_prompts=None): + return None # Simulate backend failure to trigger fallback + + def _create_fallback_backend() -> FallbackDummyBackend: + return FallbackDummyBackend() + + # Register the fallback backend under a unique name + registry.register("fallback_dummy", _create_fallback_backend) + config = BackendConfig(backend="fallback_dummy", pipeline_mode="staged") + context = prepare_pr_context(sample_pr_data(), config, "staged") + _captured = capsys.readouterr() # Capture output for potential future logging checks + # The pipeline should handle the None return or exception gracefully + # Note: with current implementation, async pipeline may produce a context + # even on partial failure + assert context is None or hasattr(context, "summary") + # Optionally, check for fallback log message if implemented diff --git a/tools/release_announcement/tests/test_step2_pipeline.py b/tools/release_announcement/tests/test_step2_pipeline.py new file mode 100644 index 0000000000..24ba01a930 --- /dev/null +++ b/tools/release_announcement/tests/test_step2_pipeline.py @@ -0,0 +1,510 @@ +"""Tests Step 2 staged preprocessing integration and legacy fallback behavior.""" + +from __future__ import annotations + +import os +from pathlib import Path + +import pytest + +from release_announcement import main as ra_main +from release_announcement.distillation import ClassifiedSignals +from release_announcement.skip_rules import has_changelog_skip + + +def _sample_pr_data() -> dict: + return { + "number": 123, + "title": "Sample PR", + "body": "User visible change", + "comments": ["Looks good", "Merged"], + "reviews": ["Please rename field"], + } + + +def test_prepare_pr_context_legacy_returns_none(capsys: pytest.CaptureFixture[str]) -> None: + """Return no staged context when the legacy pipeline mode is requested.""" + config = ra_main.BackendConfig(pipeline_mode="legacy") + + context = ra_main.prepare_pr_context(_sample_pr_data(), config, "legacy") + + assert context is None + assert "staged.preprocessing" not in capsys.readouterr().out + + +def test_process_single_pr_staged_falls_back_to_legacy_builder( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + """Fall back to the legacy builder when staged preprocessing yields no context.""" + ann_file = tmp_path / "ReleaseAnnouncement.md" + ann_file.write_text("existing announcement", encoding="utf-8") + + pr_data = _sample_pr_data() + build_calls: list[dict] = [] + + monkeypatch.setattr(ra_main, "_fetch_pr_data", lambda *_args, **_kwargs: pr_data) + + def _capture_build( + current_content: str, + in_pr_data: dict, + _template: dict, + pipeline_mode: str = "legacy", + distilled_context=None, + ) -> dict: + build_calls.append( + { + "content": current_content, + "pr_data": in_pr_data, + "pipeline_mode": pipeline_mode, + "distilled_context": distilled_context, + } + ) + return { + "messages": [{"role": "user", "content": "prompt"}], + "model": "m", + "modelParameters": {}, + } + + monkeypatch.setattr(ra_main, "_load_prompt_template", + lambda *_args, **_kwargs: {"messages": []}) + monkeypatch.setattr(ra_main, "_build_ai_prompt", _capture_build) + monkeypatch.setattr(ra_main, "_process_with_llm", lambda *_args, **_kwargs: "updated") + monkeypatch.setattr(ra_main, "_write_and_check_announcement", lambda *_args, **_kwargs: None) + monkeypatch.setattr(ra_main, "_check_for_changes", lambda *_args, **_kwargs: "no_changes") + + result = ra_main.process_single_pr( + pr_num=123, + pr_title="Sample PR", + announcement_file=str(ann_file), + prompt_file="unused.yml", + config=ra_main.BackendConfig(backend="ollama", pipeline_mode="staged"), + ) + + assert result == "no_changes" + assert build_calls + assert build_calls[0]["pr_data"] == pr_data + assert build_calls[0]["pipeline_mode"] == "staged" + assert build_calls[0]["distilled_context"] is None + + captured = capsys.readouterr() + output = captured.out + captured.err + assert "staged preprocessing returned no context, falling back to legacy mode" in output + + +def test_prepare_pr_context_passes_loaded_phase_prompts_to_pipeline( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Load real stage prompt files and pass them through to the staged pipeline call.""" + captured: dict[str, object] = {} + + class _StubBackend: + pass + + def _stub_get(_name: str): + return _StubBackend() + + def _stub_run_distillation_pipeline(*, pr_data, adapter, backend_config, prompts): + captured["pr_data"] = pr_data + captured["adapter"] = adapter + captured["backend_config"] = backend_config + captured["prompts"] = prompts + return ra_main.DistilledContext( + summary="ok", + structured_signals=[], + classification={"classified": []}, + metadata={}, + ) + + monkeypatch.setattr(ra_main.registry, "get", _stub_get) + monkeypatch.setattr(ra_main, "run_distillation_pipeline", _stub_run_distillation_pipeline) + + config = ra_main.BackendConfig(backend="dummy", pipeline_mode="staged") + context = ra_main.prepare_pr_context(_sample_pr_data(), config, "staged") + + assert context is not None + assert context.summary == "ok" + + resolve_prompt_dir = getattr(ra_main, "_resolve_prompt_dir") + load_prompts = getattr(ra_main, "_load_prompts") + prompt_dir = resolve_prompt_dir() + expected_extraction = load_prompts( + str(os.path.join(prompt_dir, "extraction.prompt.yml")) + ) + expected_consolidation = load_prompts( + str(os.path.join(prompt_dir, "consolidation.prompt.yml")) + ) + expected_classification = load_prompts( + str(os.path.join(prompt_dir, "classification.prompt.yml")) + ) + expected_ranking = load_prompts(str(os.path.join(prompt_dir, "ranking.prompt.yml"))) + + assert "prompts" in captured + prompts = captured["prompts"] + assert prompts.extraction == expected_extraction + assert prompts.consolidation == expected_consolidation + assert prompts.classification == expected_classification + assert prompts.ranking == expected_ranking + + +def test_prepare_pr_context_logs_elapsed_time_when_staged_pipeline_fails( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + """Log elapsed time on the handled no-context exit path too.""" + + class _StubBackend: + pass + + def _stub_get(_name: str): + return _StubBackend() + + monotonic_values = iter([100.0, 101.25]) + + def _stub_monotonic() -> float: + return next(monotonic_values) + + def _stub_run_distillation_pipeline(**_kwargs): + raise AttributeError("forced preprocessing failure") + + monkeypatch.setattr(ra_main.registry, "get", _stub_get) + monkeypatch.setattr(ra_main, "run_distillation_pipeline", _stub_run_distillation_pipeline) + monkeypatch.setattr(ra_main.time, "monotonic", _stub_monotonic) + + config = ra_main.BackendConfig(backend="dummy", pipeline_mode="staged") + context = ra_main.prepare_pr_context(_sample_pr_data(), config, "staged") + + assert context is None + + output = capsys.readouterr().out + assert "staged.preprocessing.end context=none elapsed=1.2s" in output + + +def test_prepare_pr_context_routes_chat_and_embedding_backends_independently( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Use embedding backend for embeddings ranking and chat backend otherwise.""" + + class _ChatAdapter: + def select_relevant_chunks(self, _chunks, use_embeddings, _ranking_prompts): + if use_embeddings: + raise AssertionError("chat adapter should not handle embeddings ranking") + return [] + + def extract_chunk_signals(self, _chunk, _extraction_prompts): + return [] + + def consolidate_signals(self, _signals, _consolidation_prompts): + return [] + + def classify_signals(self, _signals, _classification_prompts): + return ClassifiedSignals(classified=[], summary="") + + def render_final_context(self, _classified, _metadata): + return ra_main.DistilledContext( + summary="chat", + structured_signals=[], + classification={"classified": []}, + metadata={}, + ) + + def get_adapter_tag(self): + return "ollama" + + class _EmbeddingAdapter(_ChatAdapter): + def select_relevant_chunks(self, _chunks, use_embeddings, _ranking_prompts): + if not use_embeddings: + raise AssertionError("embedding adapter should only handle embeddings ranking") + return [] + + def get_adapter_tag(self): + return "github" + + def _stub_get(name: str): + if name == "ollama": + return _ChatAdapter() + if name == "github": + return _EmbeddingAdapter() + return None + + observed = {"embedding_selected": False, "chat_selected": False} + + def _stub_run_distillation_pipeline(*, adapter, **_kwargs): + adapter.select_relevant_chunks([], True, []) + observed["embedding_selected"] = True + adapter.select_relevant_chunks([], False, []) + observed["chat_selected"] = True + return ra_main.DistilledContext( + summary="ok", + structured_signals=[], + classification={"classified": []}, + metadata={}, + ) + + monkeypatch.setattr(ra_main.registry, "get", _stub_get) + monkeypatch.setattr(ra_main, "run_distillation_pipeline", _stub_run_distillation_pipeline) + + config = ra_main.BackendConfig( + backend="ollama", + chat_model_backend="ollama", + embedding_model_backend="github", + pipeline_mode="staged", + ) + context = ra_main.prepare_pr_context(_sample_pr_data(), config, "staged") + + assert context is not None + assert observed["embedding_selected"] is True + assert observed["chat_selected"] is True + + +@pytest.mark.parametrize("backend", ["ollama", "github", "actions"]) +def test_process_single_pr_staged_calls_prepare_for_all_backends( + backend: str, + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Call staged preprocessing once for every supported staged backend.""" + ann_file = tmp_path / "ReleaseAnnouncement.md" + ann_file.write_text("existing announcement", encoding="utf-8") + + monkeypatch.setattr(ra_main, "_fetch_pr_data", lambda *_args, **_kwargs: _sample_pr_data()) + monkeypatch.setattr(ra_main, "_load_prompt_template", + lambda *_args, **_kwargs: {"messages": []}) + monkeypatch.setattr( + ra_main, + "_build_ai_prompt", + lambda *_args, **_kwargs: { + "messages": [{"role": "user", "content": "prompt"}], + "model": "m", + "modelParameters": {}, + }, + ) + monkeypatch.setattr(ra_main, "_process_with_llm", lambda *_args, **_kwargs: "updated") + monkeypatch.setattr(ra_main, "_write_and_check_announcement", lambda *_args, **_kwargs: None) + monkeypatch.setattr(ra_main, "_check_for_changes", lambda *_args, **_kwargs: "no_changes") + + calls = {"count": 0} + + def _stub_prepare(*_args, **_kwargs): + calls["count"] += 1 + + monkeypatch.setattr(ra_main, "prepare_pr_context", _stub_prepare) + + result = ra_main.process_single_pr( + pr_num=123, + pr_title="Sample PR", + announcement_file=str(ann_file), + prompt_file="unused.yml", + config=ra_main.BackendConfig(backend=backend, pipeline_mode="staged"), + ) + + assert result == "no_changes" + assert calls["count"] == 1 + + +def test_build_ai_prompt_uses_staged_builder_when_context_present() -> None: + """Route staged mode with context through the staged builder path.""" + prompt = { + "messages": [{"role": "system", "content": "sys"}], + "model": "m", + "modelParameters": {}, + } + pr_data = _sample_pr_data() + distilled = ra_main.DistilledContext( + summary="distilled summary", + structured_signals=[{"signal": "a"}], + classification={"classified": []}, + metadata={"stage": "ok"}, + ) + + build_ai_prompt = getattr(ra_main, "_build_ai_prompt") + result = build_ai_prompt( + current_content="seed", + pr_data=pr_data, + prompt_template=prompt, + pipeline_mode="staged", + distilled_context=distilled, + ) + + assert result["messages"][0]["content"] == "sys" + user_content = result["messages"][1]["content"] + assert "Distilled pull request context" in user_content + assert "distilled summary" in user_content + assert "Newly merged pull request" not in user_content + + +def test_build_ai_prompt_uses_legacy_builder_when_context_absent() -> None: + """Route to legacy prompt shape when staged mode has no distilled context.""" + prompt = { + "messages": [{"role": "system", "content": "sys"}], + "model": "m", + "modelParameters": {}, + } + pr_data = _sample_pr_data() + + build_ai_prompt = getattr(ra_main, "_build_ai_prompt") + result = build_ai_prompt( + current_content="seed", + pr_data=pr_data, + prompt_template=prompt, + pipeline_mode="staged", + distilled_context=None, + ) + + user_content = result["messages"][1]["content"] + assert "Newly merged pull request" in user_content + + +def test_has_changelog_skip_matches_hosted_weblate_title() -> None: + """Match the deterministic skip rule for Hosted Weblate translation updates.""" + pr_data = { + "number": 999, + "title": "Translations update from Hosted Weblate", + "body": "", + "comments": [], + "reviews": [], + } + + assert has_changelog_skip(pr_data) is True + + +def test_process_single_pr_skips_hosted_weblate_translation_updates( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Skip Hosted Weblate translation updates without invoking LLM processing.""" + ann_file = tmp_path / "ReleaseAnnouncement.md" + ann_file.write_text("existing announcement", encoding="utf-8") + + pr_data = { + "number": 1000, + "title": "Translations update from Hosted Weblate", + "body": "", + "comments": [], + "reviews": [], + } + + monkeypatch.setattr(ra_main, "_fetch_pr_data", lambda *_args, **_kwargs: pr_data) + + def _fail_if_called(*_args, **_kwargs): + raise AssertionError("LLM path should not run for Hosted Weblate translation updates") + + monkeypatch.setattr(ra_main, "_process_with_llm", _fail_if_called) + + result = ra_main.process_single_pr( + pr_num=1000, + pr_title=pr_data["title"], + announcement_file=str(ann_file), + prompt_file="unused.yml", + config=ra_main.BackendConfig(backend="ollama", pipeline_mode="legacy"), + ) + + assert result == "skipped:Weblate translations" + + +def test_has_changelog_skip_matches_ci_action_version_bump_title() -> None: + """Match deterministic skip rule for GitHub Action version bump PR titles.""" + pr_data = { + "number": 1001, + "title": "Build: Bump actions/upload-artifact from 6 to 7", + "body": "", + "comments": [], + "reviews": [], + } + + assert has_changelog_skip(pr_data) is True + + +def test_has_changelog_skip_matches_ci_action_version_bump_automated_pr_title() -> None: + """Match deterministic skip rule for CI bump titles with automated suffix.""" + pr_data = { + "number": 1003, + "title": "Build: Bump actions/cache from v3 to v4 (Automated PR)", + "body": "", + "comments": [], + "reviews": [], + } + + assert has_changelog_skip(pr_data) is True + + +def test_process_single_pr_skips_ci_action_version_bumps( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Skip CI action version bump PRs without invoking LLM processing.""" + ann_file = tmp_path / "ReleaseAnnouncement.md" + ann_file.write_text("existing announcement", encoding="utf-8") + + pr_data = { + "number": 1002, + "title": "Build: Bump owner/action from old to new", + "body": "", + "comments": [], + "reviews": [], + } + + monkeypatch.setattr(ra_main, "_fetch_pr_data", lambda *_args, **_kwargs: pr_data) + + def _fail_if_called(*_args, **_kwargs): + raise AssertionError("LLM path should not run for CI action version bump PRs") + + monkeypatch.setattr(ra_main, "_process_with_llm", _fail_if_called) + + result = ra_main.process_single_pr( + pr_num=1002, + pr_title=pr_data["title"], + announcement_file=str(ann_file), + prompt_file="unused.yml", + config=ra_main.BackendConfig(backend="ollama", pipeline_mode="legacy"), + ) + + assert result == "skipped:CI action version bump" + + +def test_process_single_pr_skips_ci_action_version_bumps_with_automated_suffix( + tmp_path: Path, + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Skip CI action bump PRs with '(Automated PR)' suffix.""" + ann_file = tmp_path / "ReleaseAnnouncement.md" + ann_file.write_text("existing announcement", encoding="utf-8") + + pr_data = { + "number": 1004, + "title": "Build: Bump owner/action from old to new (Automated PR)", + "body": "", + "comments": [], + "reviews": [], + } + + monkeypatch.setattr(ra_main, "_fetch_pr_data", lambda *_args, **_kwargs: pr_data) + + def _fail_if_called(*_args, **_kwargs): + raise AssertionError("LLM path should not run for CI action version bump PRs") + + monkeypatch.setattr(ra_main, "_process_with_llm", _fail_if_called) + + result = ra_main.process_single_pr( + pr_num=1004, + pr_title=pr_data["title"], + announcement_file=str(ann_file), + prompt_file="unused.yml", + config=ra_main.BackendConfig(backend="ollama", pipeline_mode="legacy"), + ) + + assert result == "skipped:CI action version bump" + + +def test_has_changelog_skip_matches_non_slash_package_bump_title() -> None: + """Match bump titles where the package name contains no owner/ prefix.""" + pr_data = { + "number": 3617, + "title": "Build: Bump ASIO-SDK from asiosdk_2.3.3_2019-06-14 to ASIO-SDK_2.3.4_2025-10-15 (Automated PR)", + "body": "", + "comments": [], + "reviews": [], + } + + assert has_changelog_skip(pr_data) is True diff --git a/tools/release_announcement/tests/test_step2_stub_integration.py b/tools/release_announcement/tests/test_step2_stub_integration.py new file mode 100644 index 0000000000..d6a0676d3f --- /dev/null +++ b/tools/release_announcement/tests/test_step2_stub_integration.py @@ -0,0 +1,229 @@ +"""Integration test for the Step 2 stub pipeline against baseline output.""" + +from __future__ import annotations + +import os +import shutil +import subprocess +from datetime import UTC, datetime +from pathlib import Path + +import pytest + +from tests.cli_invocation import build_release_announcement_cmd + + +REPO_ROOT = Path(__file__).resolve().parents[3] +BASELINE_DIR = REPO_ROOT / ".vscode" / "release-announcement-baseline" + + +class Step2RunContext: + """Runtime context for Step 2 integration command execution.""" + + def __init__( + self, + backend: str, + pr_ref: str, + py_env: dict[str, str], + run_root: Path, + ): + self.backend = backend + self.pr_ref = pr_ref + self.py_env = py_env + self.run_root = run_root + + +def _git_output(*args: str) -> str: + return subprocess.check_output(["git", *args], cwd=REPO_ROOT, text=True).strip() + + +def _run_command(command: list[str], log_file: Path, env: dict[str, str]) -> int: + with log_file.open("w", encoding="utf-8") as handle: + proc = subprocess.run( + command, + cwd=REPO_ROOT, + stdout=handle, + stderr=subprocess.STDOUT, + check=False, + env=env, + text=True, + ) + return proc.returncode + + +def _resolve_github_token_from_gh() -> str | None: + """Return token from gh CLI auth context when available.""" + try: + raw = subprocess.check_output( + ["gh", "auth", "token"], + cwd=REPO_ROOT, + text=True, + stderr=subprocess.STDOUT, + ) + except (FileNotFoundError, subprocess.CalledProcessError): + return None + + token = raw.strip() + return token or None + + +def _prepare_step2_environment() -> dict[str, object]: + backend = os.getenv("RA_STEP2_BACKEND", "github") + pr_ref = os.getenv("RA_STEP2_PR", "pr3429") + strict_output = os.getenv("RA_STRICT_OUTPUT", "0") == "1" + + resolved_token = os.getenv("GH_TOKEN") or os.getenv("GITHUB_TOKEN") + if not resolved_token: + resolved_token = _resolve_github_token_from_gh() + + baseline_case_dir = BASELINE_DIR / f"{backend}__{pr_ref}" + baseline_after = baseline_case_dir / "ReleaseAnnouncement.after.md" + base_commit = _git_output("rev-parse", "HEAD") + run_root = ( + REPO_ROOT + / "build" + / "release_announcement" + / "step2_stub" + / datetime.now(UTC).strftime("%Y%m%d_%H%M%S") + ) + run_root.mkdir(parents=True, exist_ok=True) + + py_env = os.environ.copy() + src_path = REPO_ROOT / "tools" / "release_announcement" / "src" + py_env["PYTHONPATH"] = f"{src_path}:{py_env.get('PYTHONPATH', '')}" + if resolved_token: + py_env["GH_TOKEN"] = resolved_token + py_env["GITHUB_TOKEN"] = resolved_token + + return { + "backend": backend, + "pr_ref": pr_ref, + "strict_output": strict_output, + "resolved_token": resolved_token, + "baseline_case_dir": baseline_case_dir, + "baseline_after": baseline_after, + "base_commit": base_commit, + "run_root": run_root, + "py_env": py_env, + } + + +def _assert_step2_prereqs(env: dict[str, object]) -> None: + backend = str(env["backend"]) + resolved_token = env["resolved_token"] + baseline_after = env["baseline_after"] + + if backend in {"github", "actions"} and not resolved_token: + pytest.skip( + "GitHub backend requires GH_TOKEN/GITHUB_TOKEN or an authenticated " + "`gh auth token` context" + ) + + tracked_dirty = subprocess.run( + ["git", "diff", "--quiet", "--exit-code"], cwd=REPO_ROOT, check=False + ) + if tracked_dirty.returncode != 0: + pytest.skip("Tracked files are dirty; integration test requires clean tracked worktree") + + if isinstance(baseline_after, Path) and not baseline_after.exists(): + pytest.skip(f"Missing baseline artifact: {baseline_after}") + + +def _run_pipeline_case( + context: Step2RunContext, + pipeline: str, + base_commit: str, +) -> tuple[int, str, str, bool]: + label = f"{context.backend}_{context.pr_ref}_{pipeline}" + case_dir = context.run_root / label + case_dir.mkdir(parents=True, exist_ok=True) + + cmd = build_release_announcement_cmd( + backend=context.backend, + pipeline=pipeline, + start=context.pr_ref, + ) + code = _run_command(cmd, case_dir / "stdout-stderr.log", context.py_env) + + after_text = (REPO_ROOT / "ReleaseAnnouncement.md").read_text(encoding="utf-8") + (case_dir / "ReleaseAnnouncement.after.md").write_text(after_text, encoding="utf-8") + diff_text = subprocess.check_output( + ["git", "--no-pager", "diff", "--", "ReleaseAnnouncement.md"], + cwd=REPO_ROOT, + text=True, + ) + (case_dir / "git-diff.txt").write_text(diff_text, encoding="utf-8") + + subprocess.run(["git", "reset", "--hard", base_commit], cwd=REPO_ROOT, check=True) + changed = bool(diff_text.strip()) + return ( + code, + after_text, + (case_dir / "stdout-stderr.log").read_text(encoding="utf-8"), + changed, + ) + + +@pytest.mark.integration +def test_step2_stub_legacy_and_staged_match_baseline_for_one_real_pr() -> None: + """Compare staged and legacy Step 2 output against one saved real-PR baseline.""" + if os.getenv("RA_RUN_STEP2_E2E") != "1": + pytest.skip("Set RA_RUN_STEP2_E2E=1 to run networked Step 2 integration checks") + + env = _prepare_step2_environment() + _assert_step2_prereqs(env) + + try: + context = Step2RunContext( + backend=str(env["backend"]), + pr_ref=str(env["pr_ref"]), + py_env=env["py_env"], + run_root=env["run_root"], + ) + legacy_code, legacy_after, legacy_log, legacy_changed = _run_pipeline_case( + context=context, + pipeline="legacy", + base_commit=str(env["base_commit"]), + ) + staged_code, staged_after, staged_log, staged_changed = _run_pipeline_case( + context=context, + pipeline="staged", + base_commit=str(env["base_commit"]), + ) + + assert legacy_code == 0 + assert staged_code == 0 + + baseline_diff_text = (env["baseline_case_dir"] / "git-diff.txt").read_text( + encoding="utf-8" + ) + baseline_changed = bool(baseline_diff_text.strip()) + + # Default verification is behavioral parity rather than byte-identity. + assert legacy_changed == baseline_changed + assert staged_changed == baseline_changed + assert staged_changed == legacy_changed + + if env["strict_output"]: + baseline_text = env["baseline_after"].read_text(encoding="utf-8") + assert legacy_after == baseline_text + assert staged_after == legacy_after + + assert "staged.chunking.start" in staged_log + assert "staged.extraction.start" in staged_log + assert "staged.consolidation.start" in staged_log + assert "staged.classification.start" in staged_log + assert ( + "staged preprocessing returned no context, falling back to legacy mode" + in staged_log + ) + assert "staged.chunking.start" not in legacy_log + finally: + subprocess.run( + ["git", "reset", "--hard", str(env["base_commit"])], + cwd=REPO_ROOT, + check=False, + ) + if (REPO_ROOT / "ReleaseAnnouncement.md").exists() and not env["baseline_after"].exists(): + shutil.copy2(REPO_ROOT / "ReleaseAnnouncement.md", + env["run_root"] / "ReleaseAnnouncement.last.md") diff --git a/tools/release_announcement/tests/test_step3_cli_capabilities.py b/tools/release_announcement/tests/test_step3_cli_capabilities.py new file mode 100644 index 0000000000..2179674aef --- /dev/null +++ b/tools/release_announcement/tests/test_step3_cli_capabilities.py @@ -0,0 +1,477 @@ +"""Tests CLI backend capability resolution and default model selection behavior.""" + +from __future__ import annotations + +import argparse +from dataclasses import dataclass + +import pytest + +from release_announcement import main as ra_main +from release_announcement.cli_config import build_arg_parser +from release_announcement.registry import ModelNotFoundError + + +def _parse(cli_args: list[str]) -> argparse.Namespace: + parser = build_arg_parser() + return parser.parse_args(cli_args + ["--file", "ReleaseAnnouncement.md", "pr3429"]) + + +def _resolve(cli_args: list[str]) -> ra_main.BackendConfig: + args = _parse(cli_args) + return ra_main.resolve_backend_config(args) + + +def test_defaults_to_ollama_chat_and_no_embedding(monkeypatch: pytest.MonkeyPatch) -> None: + """Use Ollama defaults when no backend or model overrides are provided.""" + monkeypatch.delenv("OLLAMA_MODEL", raising=False) + config = _resolve([]) + + assert config.backend == "ollama" + assert config.chat_model == "mistral-large-3:675b-cloud" + assert config.embedding_model is None + assert config.chat_model_source == "backend-default" + assert config.embedding_model_source == "backend-default" + + +def test_backend_ollama_defaults(monkeypatch: pytest.MonkeyPatch) -> None: + """Resolve Ollama backend defaults when the backend is selected explicitly.""" + monkeypatch.delenv("OLLAMA_MODEL", raising=False) + config = _resolve(["--backend", "ollama"]) + + assert config.chat_model == "mistral-large-3:675b-cloud" + assert config.embedding_model is None + + +def test_backend_github_defaults() -> None: + """Resolve GitHub backend defaults for chat and embedding models.""" + config = _resolve(["--backend", "github"]) + + assert config.chat_model == "openai/gpt-4o" + assert config.embedding_model == "openai/text-embedding-3-small" + + +def test_ollama_chat_model_override(monkeypatch: pytest.MonkeyPatch) -> None: + """Honor an explicit Ollama chat model override.""" + monkeypatch.delenv("OLLAMA_MODEL", raising=False) + config = _resolve(["--backend", "ollama", "--chat-model", "mistral-large"]) + + assert config.chat_model == "mistral-large" + assert config.embedding_model is None + + +def test_ollama_embedding_model_override_with_prefix(monkeypatch: pytest.MonkeyPatch) -> None: + """Strip the Ollama prefix when an embedding override is provided.""" + monkeypatch.delenv("OLLAMA_MODEL", raising=False) + config = _resolve(["--backend", "ollama", "--embedding-model", "ollama/nomic-embed-text"]) + + assert config.embedding_model == "nomic-embed-text" + assert config.chat_model == "mistral-large-3:675b-cloud" + + +def test_github_embedding_model_override_from_ollama_backend( + monkeypatch: pytest.MonkeyPatch) -> None: + """Allow an embedding model override to switch to the GitHub backend.""" + monkeypatch.delenv("OLLAMA_MODEL", raising=False) + config = _resolve( + ["--backend", "ollama", "--embedding-model", "github/text-embedding-3-small"] + ) + + assert config.embedding_model == "text-embedding-3-small" + assert config.chat_model == "mistral-large-3:675b-cloud" + + +def test_github_backend_with_ollama_chat_override() -> None: + """Allow a GitHub backend selection with an Ollama chat model override.""" + config = _resolve(["--backend", "github", "--chat-model", "ollama/mistral-large"]) + + assert config.chat_model == "mistral-large" + assert config.embedding_model == "openai/text-embedding-3-small" + assert config.chat_model_backend == "ollama" + + +def test_no_backend_with_chat_and_embedding_flags() -> None: + """Default the backend to Ollama when only unprefixed model flags are provided.""" + config = _resolve(["--chat-model", "mistral-large", "--embedding-model", "nomic-embed-text"]) + + assert config.backend == "ollama" + assert config.chat_model == "mistral-large" + assert config.embedding_model == "nomic-embed-text" + + +def test_no_backend_with_prefixed_models() -> None: + """Resolve backend ownership from prefixed chat and embedding model values.""" + config = _resolve( + [ + "--chat-model", + "ollama/mistral-large", + "--embedding-model", + "github/text-embedding-3-small", + ] + ) + + assert config.backend == "ollama" + assert config.chat_model == "mistral-large" + assert config.embedding_model == "text-embedding-3-small" + assert config.chat_model_backend == "ollama" + assert config.embedding_model_backend == "github" + + +def test_chat_prefix_only_uses_backend_default() -> None: + """Use the backend default chat model when only a backend prefix is given.""" + config = _resolve(["--chat-model", "github/"]) + + assert config.chat_model == "openai/gpt-4o" + assert config.chat_model_backend == "github" + + +def test_embedding_prefix_only_uses_backend_default() -> None: + """Use the backend default embedding model when only a backend prefix is given.""" + config = _resolve(["--embedding-model", "ollama/"]) + + assert config.embedding_model is None + assert config.embedding_model_backend == "ollama" + + +def test_pipeline_staged_ollama_embedding_model_is_resolved() -> None: + """Resolve the embedding model override in staged Ollama mode.""" + config = _resolve( + [ + "--pipeline", + "staged", + "--backend", + "ollama", + "--embedding-model", + "ollama/nomic-embed-text", + ] + ) + + assert config.embedding_model == "nomic-embed-text" + assert config.embedding_model_backend == "ollama" + + +def test_pipeline_legacy_stores_embedding_model() -> None: + """Preserve the embedding model override in legacy pipeline mode.""" + config = _resolve(["--pipeline", "legacy", "--embedding-model", "nomic-embed-text"]) + + assert config.embedding_model == "nomic-embed-text" + + +def test_staged_mode_valid_with_default_staged_pipeline() -> None: + """Allow --staged-mode when staged is the default pipeline mode.""" + parser = build_arg_parser() + args = parser.parse_args([ + "--staged-mode", + "chat-only", + "--file", + "ReleaseAnnouncement.md", + "pr3429", + ]) + + ra_main.validate_cli_args(parser, args) + + +def test_chat_only_cannot_be_combined_with_embedding_model() -> None: + """Reject embedding models when staged chat-only mode is requested.""" + parser = build_arg_parser() + args = parser.parse_args([ + "--pipeline", + "staged", + "--staged-mode", + "chat-only", + "--embedding-model", + "nomic-embed-text", + "--file", + "ReleaseAnnouncement.md", + "pr3429", + ]) + + with pytest.raises(SystemExit): + ra_main.validate_cli_args(parser, args) + + +def test_chat_only_skips_embedding_probe(monkeypatch: pytest.MonkeyPatch) -> None: + """Skip embedding capability probes when chat-only staged mode is selected.""" + config = _resolve( + ["--pipeline", "staged", "--staged-mode", "chat-only", "--backend", "ollama"] + ) + calls: list[str] = [] + + class _FakeBackend: + def probe_chat(self, _model: str | None) -> bool: + calls.append("chat") + return True + + def probe_embeddings(self, _model: str | None) -> bool: + calls.append("embed") + return False + + monkeypatch.setattr(ra_main.registry, "get", lambda _name: _FakeBackend()) + + caps = ra_main.probe_capabilities(config) + + assert calls == ["chat"] + assert caps.supports_embeddings is False + assert config.capabilities.supports_embeddings is False + + +def test_probe_capabilities_ollama_same_backend_probes_embedding_with_expected_args( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Probe Ollama embeddings in startup probe when chat and embeddings share backend.""" + config = _resolve(["--backend", "ollama", "--embed", "github"]) + calls: list[tuple[str, str | None]] = [] + + class _FakeBackend: + def probe_chat(self, model: str | None) -> bool: + calls.append(("chat", model)) + return True + + def probe_embeddings(self, model: str | None) -> bool: + calls.append(("embed", model)) + return True + + monkeypatch.setattr(ra_main.registry, "get", lambda _name: _FakeBackend()) + + caps = ra_main.probe_capabilities(config) + + assert calls == [ + ("chat", "mistral-large-3:675b-cloud"), + ("embed", "github"), + ] + assert caps.supports_chat is True + assert caps.supports_embeddings is True + + +def test_probe_capabilities_cross_backend_makes_two_calls_with_expected_args( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Probe chat and embeddings separately when model backends differ.""" + config = _resolve( + [ + "--backend", + "ollama", + "--embed", + "github/text-embedding-3-small", + ] + ) + calls: list[tuple[str, str, str | None]] = [] + + class _ChatBackend: + def probe_chat(self, model: str | None) -> bool: + calls.append(("chat", "ollama", model)) + return True + + def probe_embeddings(self, model: str | None) -> bool: + calls.append(("embed", "ollama", model)) + return False + + class _EmbedBackend: + def probe_chat(self, model: str | None) -> bool: + calls.append(("chat", "github", model)) + return False + + def probe_embeddings(self, model: str | None) -> bool: + calls.append(("embed", "github", model)) + return True + + def _get_backend(name: str): + if name == "ollama": + return _ChatBackend() + if name == "github": + return _EmbedBackend() + return None + + monkeypatch.setattr(ra_main.registry, "get", _get_backend) + + caps = ra_main.probe_capabilities(config) + + assert calls == [ + ("chat", "ollama", "mistral-large-3:675b-cloud"), + ("embed", "github", "text-embedding-3-small"), + ] + + assert caps.supports_chat is True + assert caps.supports_embeddings is True + + +@dataclass +class _ProbeError(RuntimeError): + message: str + endpoint: str | None = None + request_payload: dict | None = None + status_code: int | None = None + headers: dict | None = None + body: str | None = None + + def __str__(self) -> str: + return self.message + + +@dataclass +class _GenericProbeError(Exception): + message: str + endpoint: str | None = None + request_payload: dict | None = None + status_code: int | None = None + headers: dict | None = None + body: str | None = None + + def __str__(self) -> str: + return self.message + + +def test_probe_capabilities_chat_failure_raises_runtimeerror_with_full_diagnostics( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Chat probe failures must surface full request/response diagnostics.""" + config = _resolve(["--backend", "github"]) + + class _FailingBackend: + def probe_chat(self, _model: str | None) -> bool: + raise _ProbeError( + "GitHub Models HTTP error", + endpoint="https://models.github.ai/inference/chat/completions", + request_payload={ + "model": "openai/gpt-4o", + "messages": [{"role": "user", "content": "Reply with 'ok'."}], + }, + status_code=403, + headers={"Content-Type": "application/json"}, + body='{"error":"forbidden"}', + ) + + def probe_embeddings(self, _model: str | None) -> bool: + return True + + monkeypatch.setattr(ra_main.registry, "get", lambda _name: _FailingBackend()) + + with pytest.raises(RuntimeError) as exc_info: + ra_main.probe_capabilities(config) + + msg = str(exc_info.value) + assert "phase=chat" in msg + assert "backend=github" in msg + assert "request_payload=" in msg + assert "response_status=403" in msg + assert "response_headers=" in msg + assert "response_body={\"error\":\"forbidden\"}" in msg + + +def test_probe_capabilities_chat_non_runtime_exception_is_wrapped( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Non-RuntimeError backend probe exceptions should still be wrapped with diagnostics.""" + config = _resolve(["--backend", "github"]) + + class _FailingBackend: + def probe_chat(self, _model: str | None) -> bool: + raise _GenericProbeError( + "GitHub Models HTTP error", + endpoint="https://models.github.ai/inference/chat/completions", + request_payload={ + "model": "openai/gpt-4o", + "messages": [{"role": "user", "content": "Reply with 'ok'."}], + }, + status_code=403, + headers={"Content-Type": "application/json"}, + body=( + '{"error":{"message":"Unable to proceed with model usage. ' + 'This account has reached its budget limit."}}' + ), + ) + + def probe_embeddings(self, _model: str | None) -> bool: + return True + + monkeypatch.setattr(ra_main.registry, "get", lambda _name: _FailingBackend()) + + with pytest.raises(RuntimeError) as exc_info: + ra_main.probe_capabilities(config) + + msg = str(exc_info.value) + assert "phase=chat" in msg + assert "backend=github" in msg + assert "request_payload=" in msg + assert "response_status=403" in msg + assert "budget limit" in msg + + +def test_probe_capabilities_embedding_failure_warns_and_falls_back( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + """Embedding probe failures should warn and continue with embeddings disabled.""" + config = _resolve(["--backend", "github", "--embed", "ollama/github"]) + + class _ChatBackend: + def probe_chat(self, _model: str | None) -> bool: + return True + + def probe_embeddings(self, _model: str | None) -> bool: + return False + + class _FailingEmbedBackend: + def probe_chat(self, _model: str | None) -> bool: + return False + + def probe_embeddings(self, _model: str | None) -> bool: + raise _ProbeError( + "embed failed", + endpoint="http://localhost:11434/api/embed", + request_payload={"model": "github", "input": ["test"]}, + status_code=404, + headers={"Content-Type": "application/json"}, + body='{"error":"model not found"}', + ) + + def _get_backend(name: str): + if name == "github": + return _ChatBackend() + if name == "ollama": + return _FailingEmbedBackend() + return None + + monkeypatch.setattr(ra_main.registry, "get", _get_backend) + + caps = ra_main.probe_capabilities(config) + captured = capsys.readouterr() + out = captured.out + captured.err + + assert caps.supports_chat is True + assert caps.supports_embeddings is False + assert "Embedding capability probe failed" in out + assert "phase=embedding" in out + assert "request_payload=" in out + assert "response_status=404" in out + + +def test_probe_capabilities_embedding_model_not_found_warns_and_falls_back( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + """Missing embedding models should warn and continue in chat-only mode.""" + config = _resolve(["--backend", "github", "--embed", "github/foo"]) + + class _BackendWithMissingEmbeddingModel: + def probe_chat(self, _model: str | None) -> bool: + return True + + def probe_embeddings(self, _model: str | None) -> bool: + raise ModelNotFoundError("model 'foo' not found on backend 'github'") + + monkeypatch.setattr( + ra_main.registry, + "get", + lambda _name: _BackendWithMissingEmbeddingModel(), + ) + + caps = ra_main.probe_capabilities(config) + captured = capsys.readouterr() + out = captured.out + captured.err + + assert caps.supports_chat is True + assert caps.supports_embeddings is False + assert "Embedding capability probe failed" in out + assert "phase=embedding" in out + assert "model 'foo' not found on backend 'github'" in out diff --git a/tools/release_announcement/tests/test_step4_process_commit_error_handling.py b/tools/release_announcement/tests/test_step4_process_commit_error_handling.py new file mode 100644 index 0000000000..9bf32a4675 --- /dev/null +++ b/tools/release_announcement/tests/test_step4_process_commit_error_handling.py @@ -0,0 +1,151 @@ +"""Step 4 regression tests for commit-stage error handling and messaging.""" + +from __future__ import annotations + +import argparse +import subprocess +import sys + +import pytest + +from release_announcement import main as ra_main +from release_announcement.cli_config import BackendConfig +from tests.assertions import patch_cli_startup_happy_path + + +def test_process_prs_reports_result_before_git_add_failure( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + """Print process outcome before staging and raise user-facing error on git failure.""" + args = argparse.Namespace( + delay_secs=0, + dry_run=False, + file="/tmp/ra.md", + prompt="unused.prompt.yml", + ) + config = BackendConfig(backend="ollama") + + monkeypatch.setattr(ra_main, "process_single_pr", lambda *_args: "committed") + + def _fake_run(cmd, check, capture_output, text): + del check, capture_output, text + if cmd[:2] == ["git", "add"]: + raise subprocess.CalledProcessError( + 128, + cmd, + stderr="fatal: /tmp/ra.md: '/tmp/ra.md' is outside repository", + ) + return subprocess.CompletedProcess(cmd, 0, "", "") + + monkeypatch.setattr(subprocess, "run", _fake_run) + + process_prs = getattr(ra_main, "_process_prs") + with pytest.raises(RuntimeError) as exc_info: + process_prs( + args, + config, + [{"number": 3502, "title": "Add MIDI GUI tab and learn function"}], + ) + + out = capsys.readouterr().out + assert "Updated release announcement for #3502. Preparing git commit." in out + msg = str(exc_info.value) + assert "Could not stage or commit updates for PR #3502." in msg + assert "Command failed: git add /tmp/ra.md." in msg + assert "outside repository" in msg + + +def test_main_exits_cleanly_on_process_prs_runtime_error( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + """Main should surface processing errors as a single critical line and exit 1.""" + monkeypatch.setattr( + sys, + "argv", + [ + "release_announcement", + "--delay-secs", + "0", + "--file", + "ReleaseAnnouncement.md", + "pr3502", + ], + ) + + patch_cli_startup_happy_path( + monkeypatch, + ra_main, + [{"number": 3502, "title": "Test PR"}], + ) + monkeypatch.setattr( + ra_main, + "_process_prs", + lambda _args, _config, _prs: (_ for _ in ()).throw( + RuntimeError("Could not stage or commit updates for PR #3502.") + ), + ) + + with pytest.raises(SystemExit) as exc_info: + ra_main.main() + + assert exc_info.value.code == 1 + err = capsys.readouterr().err + assert "Could not stage or commit updates for PR #3502." in err + + +def test_process_prs_wraps_process_single_pr_exceptions( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Convert per-PR exceptions into user-facing RuntimeError context.""" + args = argparse.Namespace( + delay_secs=0, + dry_run=False, + file="ReleaseAnnouncement.md", + prompt="unused.prompt.yml", + ) + config = BackendConfig(backend="ollama") + + monkeypatch.setattr( + ra_main, + "process_single_pr", + lambda *_args: (_ for _ in ()).throw(RuntimeError("boom")), + ) + + process_prs = getattr(ra_main, "_process_prs") + with pytest.raises(RuntimeError) as exc_info: + process_prs(args, config, [{"number": 3502, "title": "Test PR"}]) + + assert "Failed while processing PR #3502 (Test PR): boom" in str(exc_info.value) + + +def test_main_exits_cleanly_on_unexpected_startup_error( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + """Unexpected startup exceptions should be reported as a single critical line.""" + monkeypatch.setattr( + sys, + "argv", + [ + "release_announcement", + "--delay-secs", + "0", + "--file", + "ReleaseAnnouncement.md", + "pr3502", + ], + ) + monkeypatch.setattr( + ra_main, + "_setup_backend_token", + lambda _backend: (_ for _ in ()).throw(ValueError("token setup failed")), + ) + + with pytest.raises(SystemExit) as exc_info: + ra_main.main() + + assert exc_info.value.code == 1 + err = capsys.readouterr().err + assert "Unexpected startup error: token setup failed" in err diff --git a/tools/release_announcement/tests/test_step4_startup_probe_ordering.py b/tools/release_announcement/tests/test_step4_startup_probe_ordering.py new file mode 100644 index 0000000000..2bfac70e4f --- /dev/null +++ b/tools/release_announcement/tests/test_step4_startup_probe_ordering.py @@ -0,0 +1,93 @@ +"""Step 4 regression tests for startup capability probe ordering. + +These tests ensure runtime capability probing happens once at startup, before +any PR boundary/timeframe resolution work begins. +""" + +from __future__ import annotations + +import sys + +import pytest + +from release_announcement import main as ra_main +from release_announcement.cli_config import BackendCapabilities + + +def _set_argv(monkeypatch: pytest.MonkeyPatch, extra: list[str] | None = None) -> None: + argv = [ + "release_announcement", + "--file", + "ReleaseAnnouncement.md", + "--delay-secs", + "0", + "pr3502", + ] + if extra: + argv[1:1] = extra + monkeypatch.setattr(sys, "argv", argv) + + +def test_startup_probes_before_timeframe_resolution(monkeypatch: pytest.MonkeyPatch) -> None: + """Run probe/validation before resolving PR boundaries.""" + call_order: list[str] = [] + + _set_argv(monkeypatch) + + monkeypatch.setattr(ra_main, "_setup_backend_token", lambda _backend: None) + + def _probe(_config): + call_order.append("probe") + return BackendCapabilities(supports_chat=True, supports_embeddings=False) + + def _validate(_config): + call_order.append("validate") + + def _resolve_timeframe(_args): + call_order.append("timeframe") + return ( + "pr3502~", + "pr3502", + "2026-01-01T00:00:00+00:00", + "2026-01-02T00:00:00+00:00", + ) + + monkeypatch.setattr(ra_main, "probe_capabilities", _probe) + monkeypatch.setattr(ra_main, "validate_mode", _validate) + monkeypatch.setattr(ra_main, "_resolve_timeframe", _resolve_timeframe) + monkeypatch.setattr(ra_main, "get_ordered_pr_list", lambda _start, _end: []) + + ra_main.main() + + assert call_order == ["probe", "validate", "timeframe"] + + +def test_probe_failure_exits_before_timeframe_resolution(monkeypatch: pytest.MonkeyPatch) -> None: + """Exit immediately on probe failure, before PR resolution starts.""" + _set_argv(monkeypatch, ["--backend", "ollama", "--embed", "github"]) + + monkeypatch.setattr(ra_main, "_setup_backend_token", lambda _backend: None) + + def _raise_probe(_config): + raise RuntimeError("probe failed") + + timeframe_called = False + + def _resolve_timeframe(_args): + nonlocal timeframe_called + timeframe_called = True + return ( + "pr3502~", + "pr3502", + "2026-01-01T00:00:00+00:00", + "2026-01-02T00:00:00+00:00", + ) + + monkeypatch.setattr(ra_main, "probe_capabilities", _raise_probe) + monkeypatch.setattr(ra_main, "_resolve_timeframe", _resolve_timeframe) + + with pytest.raises(SystemExit) as exc: + ra_main.main() + + assert exc.value.code == 1 + assert timeframe_called is False diff --git a/tools/release_announcement/tests/test_step8_output_parity.py b/tools/release_announcement/tests/test_step8_output_parity.py new file mode 100644 index 0000000000..fccfc963a4 --- /dev/null +++ b/tools/release_announcement/tests/test_step8_output_parity.py @@ -0,0 +1,83 @@ +"""Step 8 output parity tests using CLI entrypoint invocation.""" + +from __future__ import annotations + +import sys +from pathlib import Path + +import pytest + +from release_announcement import main as ra_main +from tests import dummy_backend +from tests.assertions import patch_cli_startup_happy_path + + +def _invoke_main_with_args( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + pipeline: str, +) -> bytes: + """Import modules, then execute CLI main with argv and return output bytes.""" + dummy_backend.register_dummy_backend() + + announcement_path = tmp_path / f"ReleaseAnnouncement.{pipeline}.md" + announcement_path.write_text("seed\n", encoding="utf-8") + + prompt_path = tmp_path / "release-announcement.prompt.yml" + prompt_path.write_text( + "messages:\n" + " - role: system\n" + " content: test-system-prompt\n", + encoding="utf-8", + ) + + sample_pr_data = { + "number": 3429, + "title": "Test PR", + "body": "Test body", + "comments": ["Comment 1", "Comment 2"], + "reviews": [], + } + + patch_cli_startup_happy_path( + monkeypatch, + ra_main, + [{"number": 3429, "title": "Test PR"}], + ) + monkeypatch.setattr(ra_main, "_fetch_pr_data", lambda _pr_num, _config: sample_pr_data) + monkeypatch.setattr( + ra_main, + "_process_with_llm", + lambda _ai_prompt, _config: "Generated release notes\n", + ) + monkeypatch.setattr(ra_main, "_check_for_changes", lambda _path: "no_changes") + + argv = [ + "release_announcement", + "--backend", + "dummy", + "--pipeline", + pipeline, + "--delay-secs", + "0", + "--file", + str(announcement_path), + "--prompt", + str(prompt_path), + "pr3429", + ] + monkeypatch.setattr(sys, "argv", argv) + + ra_main.main() + return announcement_path.read_bytes() + + +def test_entrypoint_legacy_vs_staged_are_byte_identical( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, +) -> None: + """Legacy and staged entrypoint runs should emit identical announcement bytes.""" + legacy_bytes = _invoke_main_with_args(monkeypatch, tmp_path, pipeline="legacy") + staged_bytes = _invoke_main_with_args(monkeypatch, tmp_path, pipeline="staged") + + assert legacy_bytes == staged_bytes diff --git a/tools/update-release-announcement.sh b/tools/update-release-announcement.sh new file mode 100755 index 0000000000..25e5ab94e3 --- /dev/null +++ b/tools/update-release-announcement.sh @@ -0,0 +1,43 @@ +#!/bin/bash +############################################################################## +# Copyright (c) 2026 +# +# Author(s): +# The Jamulus Development Team +# +############################################################################## +# +# This program is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free Software +# Foundation; either version 2 of the License, or (at your option) any later +# version. +# +# This program is distributed in the hope that it will be useful, but WITHOUT +# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along with +# this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +# +############################################################################## + +set -eu -o pipefail + +for required_path in ".git" "tools/release_announcement/prompts" "tools/release_announcement"; do + if [[ ! -e "$required_path" ]]; then + echo "Error: expected '$required_path' in current working directory '$PWD'." >&2 + echo "Run this script from the repository root." >&2 + exit 1 + fi +done + +python3 -m venv /tmp/release-annoucement.venv +# This is the python venv, no point running shellcheck on it +# shellcheck disable=SC1091 +source /tmp/release-annoucement.venv/bin/activate +DELAY_SECS="${DELAY_SECS:-5}" +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +python3 -m pip install -q "${SCRIPT_DIR}/release_announcement" +RELEASE_ANNOUNCEMENT_PROG="$(basename "${BASH_SOURCE[0]}")" python3 -m release_announcement --delay-secs "$DELAY_SECS" "$@"