diff --git a/.cargo/config.toml b/.cargo/config.toml index e0e3f5bef..23928ed63 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,8 +1,19 @@ -[alias] -test_details = ["test", "--target", "aarch64-apple-darwin"] +# No global [build] target — the workspace contains adapters for multiple targets: +# trusted-server-adapter-fastly → wasm32-wasip1 (Fastly Compute) +# trusted-server-adapter-axum → native (dev server) +# Future: trusted-server-adapter-cloudflare → wasm32-unknown-unknown +# +# Both adapters are workspace members so `-p` resolves both. +# default-members = [fastly] — required so Viceroy can locate the binary via `cargo run --bin`. +# Use the aliases below to target each adapter with the correct toolchain. -[build] -target = "wasm32-wasip1" +[alias] +# Fastly adapter + shared crates (wasm32-wasip1 via Viceroy) +test-fastly = ["test", "--workspace", "--exclude", "trusted-server-adapter-axum", "--target", "wasm32-wasip1"] +# Axum dev server adapter (native) +test-axum = ["test", "-p", "trusted-server-adapter-axum"] +# CI convenience — runs both in sequence (shell aliases can't chain; use a script or CI steps) +# cargo test-fastly && cargo test-axum [target.'cfg(all(target_arch = "wasm32"))'] runner = "viceroy run -C ../../fastly.toml -- " diff --git a/.claude/agents/issue-creator.md b/.claude/agents/issue-creator.md index ed99dd373..f3f1d4052 100644 --- a/.claude/agents/issue-creator.md +++ b/.claude/agents/issue-creator.md @@ -79,7 +79,7 @@ Output the issue URL and type. - Use issue **types**, not labels, for categorization. - Every issue should have clear done-when / acceptance criteria. - Use the affected area dropdown values from the templates: - - Core (synthetic IDs, cookies, GDPR) + - Core (Edge Cookies, GDPR) - Integrations (prebid, lockr, permutive, etc.) - HTML processing / JS injection - Ad serving (Equativ) diff --git a/.claude/agents/repo-explorer.md b/.claude/agents/repo-explorer.md index 570686f8e..98a3ef09f 100644 --- a/.claude/agents/repo-explorer.md +++ b/.claude/agents/repo-explorer.md @@ -11,7 +11,7 @@ implementation details. This is a Rust workspace with three crates: -- `crates/trusted-server-core/` — core library (integrations, HTML processing, synthetic IDs, GDPR) +- `crates/trusted-server-core/` — core library (integrations, HTML processing, Edge Cookies, GDPR) - `crates/trusted-server-adapter-fastly/` — Fastly Compute entry point - `crates/js/` — TypeScript/JS build pipeline (per-integration IIFE bundles) diff --git a/.github/actions/setup-integration-test-env/action.yml b/.github/actions/setup-integration-test-env/action.yml index d44858f35..bd87d493e 100644 --- a/.github/actions/setup-integration-test-env/action.yml +++ b/.github/actions/setup-integration-test-env/action.yml @@ -17,6 +17,10 @@ inputs: description: Build the trusted-server WASM binary for integration tests. required: false default: "true" + build-axum: + description: Build the trusted-server-axum native binary for integration tests. + required: false + default: "true" build-test-images: description: Build the framework Docker images used by integration tests. required: false @@ -45,6 +49,15 @@ runs: shell: bash run: echo "node-version=$(grep '^nodejs ' .tool-versions | awk '{print $2}')" >> "$GITHUB_OUTPUT" + - name: Retrieve Viceroy version + id: viceroy-version + if: ${{ inputs.install-viceroy == 'true' }} + shell: bash + # `.tool-versions` is the single source of truth for the Viceroy pin. + # The pin matters because upstream Viceroy > v0.16.4 has bumped MSRV + # beyond the rustc pin in `rust-toolchain.toml`. + run: echo "viceroy-version=$(grep '^viceroy ' .tool-versions | awk '{print $2}')" >> "$GITHUB_OUTPUT" + - name: Set up Rust toolchain uses: actions-rust-lang/setup-rust-toolchain@v1 with: @@ -58,12 +71,12 @@ runs: uses: actions/cache@v4 with: path: ~/.cargo/bin/viceroy - key: viceroy-${{ runner.os }}-v0.16.4 + key: viceroy-${{ runner.os }}-v${{ steps.viceroy-version.outputs.viceroy-version }} - name: Install Viceroy if: ${{ inputs.install-viceroy == 'true' && steps.cache-viceroy.outputs.cache-hit != 'true' }} shell: bash - run: cargo install --git https://github.com/fastly/Viceroy --tag v0.16.4 viceroy + run: cargo install --git https://github.com/fastly/Viceroy --tag v${{ steps.viceroy-version.outputs.viceroy-version }} viceroy - name: Build WASM binary if: ${{ inputs.build-wasm == 'true' }} @@ -71,10 +84,20 @@ runs: env: TRUSTED_SERVER__PUBLISHER__ORIGIN_URL: http://127.0.0.1:${{ inputs.origin-port }} TRUSTED_SERVER__PUBLISHER__PROXY_SECRET: integration-test-proxy-secret - TRUSTED_SERVER__SYNTHETIC__SECRET_KEY: integration-test-secret-key + TRUSTED_SERVER__EDGE_COOKIE__SECRET_KEY: integration-test-secret-key TRUSTED_SERVER__PROXY__CERTIFICATE_CHECK: "false" run: cargo build --package trusted-server-adapter-fastly --release --target wasm32-wasip1 + - name: Build Axum native binary + if: ${{ inputs.build-axum == 'true' }} + shell: bash + env: + TRUSTED_SERVER__PUBLISHER__ORIGIN_URL: http://127.0.0.1:${{ inputs.origin-port }} + TRUSTED_SERVER__PUBLISHER__PROXY_SECRET: integration-test-proxy-secret + TRUSTED_SERVER__SYNTHETIC__SECRET_KEY: integration-test-secret-key + TRUSTED_SERVER__PROXY__CERTIFICATE_CHECK: "false" + run: cargo build -p trusted-server-adapter-axum + - name: Build WordPress test container if: ${{ inputs.build-test-images == 'true' }} shell: bash diff --git a/.github/workflows/integration-tests.yml b/.github/workflows/integration-tests.yml index da467583c..2c570ef31 100644 --- a/.github/workflows/integration-tests.yml +++ b/.github/workflows/integration-tests.yml @@ -14,6 +14,7 @@ env: ORIGIN_PORT: 8888 ARTIFACTS_DIR: /tmp/integration-test-artifacts WASM_ARTIFACT_PATH: /tmp/integration-test-artifacts/wasm/trusted-server-adapter-fastly.wasm + AXUM_ARTIFACT_PATH: /tmp/integration-test-artifacts/axum/trusted-server-axum DOCKER_ARTIFACT_PATH: /tmp/integration-test-artifacts/docker/test-images.tar jobs: @@ -32,8 +33,9 @@ jobs: - name: Package integration test artifacts run: | - mkdir -p "$(dirname "$WASM_ARTIFACT_PATH")" "$(dirname "$DOCKER_ARTIFACT_PATH")" + mkdir -p "$(dirname "$WASM_ARTIFACT_PATH")" "$(dirname "$AXUM_ARTIFACT_PATH")" "$(dirname "$DOCKER_ARTIFACT_PATH")" cp target/wasm32-wasip1/release/trusted-server-adapter-fastly.wasm "$WASM_ARTIFACT_PATH" + cp target/debug/trusted-server-axum "$AXUM_ARTIFACT_PATH" docker save \ --output "$DOCKER_ARTIFACT_PATH" \ test-wordpress:latest test-nextjs:latest @@ -69,6 +71,9 @@ jobs: name: integration-test-artifacts path: ${{ env.ARTIFACTS_DIR }} + - name: Make binaries executable + run: chmod +x "$AXUM_ARTIFACT_PATH" + - name: Load integration test Docker images run: docker load --input "$DOCKER_ARTIFACT_PATH" @@ -80,6 +85,7 @@ jobs: -- --include-ignored --skip test_wordpress_fastly --skip test_nextjs_fastly --test-threads=1 env: WASM_BINARY_PATH: ${{ env.WASM_ARTIFACT_PATH }} + AXUM_BINARY_PATH: ${{ env.AXUM_ARTIFACT_PATH }} INTEGRATION_ORIGIN_PORT: ${{ env.ORIGIN_PORT }} RUST_LOG: info diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 5eea36a74..d27ded915 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -21,6 +21,15 @@ jobs: run: echo "rust-version=$(grep '^rust ' .tool-versions | awk '{print $2}')" >> $GITHUB_OUTPUT shell: bash + - name: Retrieve Viceroy version + id: viceroy-version + # `.tool-versions` is the single source of truth so this workflow and + # `.github/actions/setup-integration-test-env/action.yml` can't drift. + # The pin matters because upstream Viceroy > v0.16.4 has bumped MSRV + # beyond the rustc pin in `rust-toolchain.toml`. + run: echo "viceroy-version=$(grep '^viceroy ' .tool-versions | awk '{print $2}')" >> $GITHUB_OUTPUT + shell: bash + - name: Set up Rust toolchain uses: actions-rust-lang/setup-rust-toolchain@v1 with: @@ -28,23 +37,42 @@ jobs: target: wasm32-wasip1 cache-shared-key: cargo-${{ runner.os }} - - name: Get Viceroy cache key - id: viceroy-rev - run: echo "sha=$(git ls-remote https://github.com/fastly/Viceroy HEAD | cut -f1)" >> $GITHUB_OUTPUT - - name: Cache Viceroy binary id: cache-viceroy uses: actions/cache@v4 with: path: ~/.cargo/bin/viceroy - key: viceroy-${{ runner.os }}-${{ steps.viceroy-rev.outputs.sha }} + key: viceroy-${{ runner.os }}-v${{ steps.viceroy-version.outputs.viceroy-version }} - - name: Install Viceroy (from main since 0.14.3 is broken) + - name: Install Viceroy if: steps.cache-viceroy.outputs.cache-hit != 'true' - run: cargo install --git https://github.com/fastly/Viceroy viceroy + run: cargo install --git https://github.com/fastly/Viceroy --tag v${{ steps.viceroy-version.outputs.viceroy-version }} viceroy - name: Run tests - run: cargo test --workspace + run: cargo test-fastly + + test-axum: + name: cargo test (axum native) + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Retrieve Rust version + id: rust-version + run: echo "rust-version=$(grep '^rust ' .tool-versions | awk '{print $2}')" >> $GITHUB_OUTPUT + shell: bash + + - name: Set up Rust toolchain + uses: actions-rust-lang/setup-rust-toolchain@v1 + with: + toolchain: ${{ steps.rust-version.outputs.rust-version }} + cache-shared-key: cargo-${{ runner.os }} + + - name: Build Axum adapter + run: cargo build -p trusted-server-adapter-axum + + - name: Run Axum adapter tests + run: cargo test-axum test-typescript: name: vitest diff --git a/.gitignore b/.gitignore index af70c452a..e9e08c452 100644 --- a/.gitignore +++ b/.gitignore @@ -2,6 +2,9 @@ /bin /pkg /target + +# EdgeZero local KV store (created by axum dev server) +.edgezero/ /crates/integration-tests/target # env diff --git a/.tool-versions b/.tool-versions index 0828da5bc..8d8751b80 100644 --- a/.tool-versions +++ b/.tool-versions @@ -1,3 +1,4 @@ fastly 13.3.0 rust 1.91.1 nodejs 24.12.0 +viceroy 0.16.4 diff --git a/AGENTS.md b/AGENTS.md index bcbcd179f..8e2df32c5 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -17,7 +17,10 @@ If you cannot read `CLAUDE.md`, follow these rules: 1. Present a plan and get approval before coding. 2. Keep changes minimal — do not refactor unrelated code. -3. Run `cargo test --workspace` after every code change. +3. Run tests after every code change — use the workspace aliases defined in `.cargo/config.toml`: + - `cargo test-fastly` — Fastly adapter + core (wasm32-wasip1 via Viceroy) + - `cargo test-axum` — Axum dev server adapter (native) + Do NOT use bare `cargo test --workspace` — it will attempt to compile the Fastly adapter for the host target. 4. Run `cargo fmt --all -- --check` and `cargo clippy --workspace --all-targets --all-features -- -D warnings`. 5. Run JS tests with `cd crates/js/lib && npx vitest run` when touching JS/TS code. 6. Use `error-stack` (`Report`) for error handling — not anyhow, eyre, or thiserror. diff --git a/CLAUDE.md b/CLAUDE.md index 329576491..5d988621e 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -6,7 +6,7 @@ ## Project Overview Rust-based edge computing application targeting **Fastly Compute**. Handles -privacy-preserving synthetic ID generation, ad serving with GDPR compliance, +privacy-preserving Edge Cookie (EC) ID generation, ad serving with GDPR compliance, real-time bidding integration, and publisher-side JavaScript injection. ## Workspace Layout @@ -15,6 +15,7 @@ real-time bidding integration, and publisher-side JavaScript injection. crates/ trusted-server-core/ # Core library — shared logic, integrations, HTML processing trusted-server-adapter-fastly/ # Fastly Compute entry point (wasm32-wasip1 binary) + trusted-server-adapter-axum/ # Axum dev server entry point (native binary) js/ # TypeScript/JS build — per-integration IIFE bundles lib/ # TS source, Vitest tests, esbuild pipeline ``` @@ -49,13 +50,21 @@ fastly compute serve # Deploy to Fastly fastly compute publish + +# Run Axum dev server (native — no Viceroy) +cargo run -p trusted-server-adapter-axum + +# Test Axum adapter only +cargo test-axum ``` ### Testing & Quality ```bash -# Run all Rust tests (uses viceroy) -cargo test --workspace +# Run all Rust tests — use workspace aliases (see .cargo/config.toml) +# default-members = [fastly] so Viceroy can locate the binary via `cargo run --bin`. +cargo test-fastly # Fastly adapter + core (wasm32-wasip1 via Viceroy) +cargo test-axum # Axum dev server adapter (native) # Format cargo fmt --all -- --check @@ -366,7 +375,7 @@ both runtime behavior and build/tooling changes. | `crates/trusted-server-core/src/tsjs.rs` | Script tag generation with module IDs | | `crates/trusted-server-core/src/html_processor.rs` | Injects `"#; + + let mut output = Vec::new(); + pipeline + .process(Cursor::new(html_input.as_bytes()), &mut output) + .expect("should process with small chunks"); + let processed = String::from_utf8_lossy(&output); + + assert!( + processed.contains("/integrations/google_tag_manager/gtm.js"), + "should rewrite fragmented GTM URL. Got: {processed}" + ); + assert!( + !processed.contains("googletagmanager.com"), + "should not contain original GTM domain. Got: {processed}" + ); + } + + /// Regression test for the overlapping-rewriter bug: when both the GTM and + /// Next.js integrations are enabled and a `"#; + + let mut output = Vec::new(); + pipeline + .process(Cursor::new(html_input.as_bytes()), &mut output) + .expect("should process with small chunks"); + let processed = String::from_utf8_lossy(&output); + + assert!( + processed.contains("test.example.com") && processed.contains("/reviews"), + "Next.js rewrite must survive when GTM is also enabled. Got: {processed}" + ); + assert!( + !processed.contains("origin.example.com/reviews"), + "origin host must not leak through. Got: {processed}" + ); + } + + /// Regression test for PR #618 P1: fragmented `__NEXT_DATA__` where an + /// intermediate fragment boundary lands on a short tail like `g` (which + /// used to be treated as a plausible GTM prefix) must NOT trigger GTM + /// accumulation. Otherwise GTM would claim the script and overwrite + /// `NextJs`'s URL rewrite with an unchanged Replace. + /// + /// The payload is crafted so a 32-byte chunk boundary lands at the end + /// of a word ending in `g` ("config"/"img"/"slug"/"thing"), and the + /// rewritable origin URL appears later in the payload. + #[test] + fn fragmented_next_data_with_trailing_g_survives_gtm() { + use crate::streaming_processor::{Compression, PipelineConfig, StreamingPipeline}; + use std::io::Cursor; + + let mut settings = make_settings(); + settings + .integrations + .insert_config( + "google_tag_manager", + &serde_json::json!({ + "enabled": true, + "container_id": "GTM-GTAIL1" + }), + ) + .expect("should update gtm config"); + settings + .integrations + .insert_config( + "nextjs", + &serde_json::json!({ + "enabled": true, + "rewrite_attributes": ["href", "link", "url"], + }), + ) + .expect("should update nextjs config"); + + let registry = IntegrationRegistry::new(&settings).expect("should create registry"); + let config = config_from_settings(&settings, ®istry); + let processor = create_html_processor(config); + + // chunk_size=32 with this payload produces fragments whose tails + // include "config", "img", "slug", and "thing" — all ending in `g`. + let pipeline_config = PipelineConfig { + input_compression: Compression::None, + output_compression: Compression::None, + chunk_size: 32, + }; + let mut pipeline = StreamingPipeline::new(pipeline_config, processor); + + let html_input = r#""#; + + let mut output = Vec::new(); + pipeline + .process(Cursor::new(html_input.as_bytes()), &mut output) + .expect("should process with small chunks"); + let processed = String::from_utf8_lossy(&output); + + assert!( + processed.contains("test.example.com") && processed.contains("/reviews"), + "Next.js rewrite must survive when fragments end in short `g`-tails. Got: {processed}" + ); + assert!( + !processed.contains("origin.example.com/reviews"), + "origin host must not leak through. Got: {processed}" + ); + } + + #[test] + fn might_contain_gtm_prefix_detects_full_match_and_boundary_prefix() { + // Full marker present. + assert!(might_contain_gtm_prefix("xxx googletagmanager.com yyy")); + assert!(might_contain_gtm_prefix("x google-analytics.com")); + + // Boundary: text ends with a proper prefix of length ≥ GTM_MIN_PREFIX_LEN. + // "google" itself (6 bytes) is the shortest accepted trailing prefix. + assert!(might_contain_gtm_prefix("src='https://www.google")); + assert!(might_contain_gtm_prefix("src='https://www.googletag")); + assert!(might_contain_gtm_prefix( + "src='https://www.googletagmanager" + )); + } + + #[test] + fn might_contain_gtm_prefix_rejects_short_ambiguous_tails() { + // Short tails (< GTM_MIN_PREFIX_LEN) are ambiguous with ordinary + // English or minified tokens and must NOT engage GTM accumulation. + // Previously these returned true because any non-empty prefix of a + // marker was accepted, which let GTM claim and clobber fragments + // from overlapping script rewriters (see PR #618 P1). + for text in [ + "x", // "g"-less + "img", // ends in 'g' + "slug", // ends in 'g' + "config", // ends in 'g' + "thing", // ends in 'g' + "y go", // ends in 'go' + "xgoo", // ends in 'goo' + "xgoog", // ends in 'goog' + "xgoogl", // ends in 'googl' + "console.log('hi');", // no tail match at all + "", + "}", + ] { + assert!( + !might_contain_gtm_prefix(text), + "`{text}` should not engage GTM accumulation" + ); + } + } } diff --git a/crates/trusted-server-core/src/integrations/gpt.rs b/crates/trusted-server-core/src/integrations/gpt.rs index 818eeda8d..dab62d4d8 100644 --- a/crates/trusted-server-core/src/integrations/gpt.rs +++ b/crates/trusted-server-core/src/integrations/gpt.rs @@ -133,7 +133,7 @@ impl GptIntegration { .with_streaming() .without_forward_headers(); config.follow_redirects = false; - config.forward_synthetic_id = false; + config.forward_ec_id = false; Self::apply_request_header_allowlist(config, req) } @@ -660,7 +660,7 @@ mod tests { // -- GPT proxy configuration -- #[test] - fn build_proxy_config_uses_streaming_without_synthetic_forwarding_or_redirects() { + fn build_proxy_config_uses_streaming_without_ec_forwarding_or_redirects() { let req = build_http_request( Method::GET, "https://edge.example.com/integrations/gpt/script", @@ -675,8 +675,8 @@ mod tests { "should stream GPT assets verbatim without rewrite processing" ); assert!( - !config.forward_synthetic_id, - "should not append synthetic_id to GPT asset requests" + !config.forward_ec_id, + "should not append EC ID to GPT asset requests" ); assert!( !config.follow_redirects, diff --git a/crates/trusted-server-core/src/integrations/mod.rs b/crates/trusted-server-core/src/integrations/mod.rs index a22b2fdbf..d925813b8 100644 --- a/crates/trusted-server-core/src/integrations/mod.rs +++ b/crates/trusted-server-core/src/integrations/mod.rs @@ -47,28 +47,14 @@ pub(crate) fn ensure_integration_backend( url: &str, integration: &'static str, ) -> Result> { - let parsed = Url::parse(url).change_context(TrustedServerError::Integration { - integration: integration.to_string(), - message: "Invalid upstream URL".to_string(), - })?; - services .backend() - .ensure(&PlatformBackendSpec { - scheme: parsed.scheme().to_string(), - host: parsed - .host_str() - .ok_or_else(|| { - Report::new(TrustedServerError::Integration { - integration: integration.to_string(), - message: "Upstream URL missing host".to_string(), - }) - })? - .to_string(), - port: parsed.port(), - certificate_check: true, - first_byte_timeout: std::time::Duration::from_secs(15), - }) + .ensure(&integration_backend_spec( + url, + integration, + true, + std::time::Duration::from_secs(15), + )?) .change_context(TrustedServerError::Integration { integration: integration.to_string(), message: "Failed to register backend".to_string(), @@ -92,28 +78,14 @@ pub(crate) fn ensure_integration_backend_with_timeout( integration: &'static str, first_byte_timeout: Duration, ) -> Result> { - let parsed = Url::parse(url).change_context(TrustedServerError::Integration { - integration: integration.to_string(), - message: "Invalid upstream URL".to_string(), - })?; - services .backend() - .ensure(&PlatformBackendSpec { - scheme: parsed.scheme().to_string(), - host: parsed - .host_str() - .ok_or_else(|| { - Report::new(TrustedServerError::Integration { - integration: integration.to_string(), - message: "Upstream URL missing host".to_string(), - }) - })? - .to_string(), - port: parsed.port(), - certificate_check: true, + .ensure(&integration_backend_spec( + url, + integration, + true, first_byte_timeout, - }) + )?) .change_context(TrustedServerError::Integration { integration: integration.to_string(), message: "Failed to register backend".to_string(), @@ -122,36 +94,59 @@ pub(crate) fn ensure_integration_backend_with_timeout( /// Compute the deterministic backend name for a URL without registering a backend. /// -/// Uses the same naming convention as [`crate::platform::PlatformBackend::predict_name`]: -/// `backend_{scheme}_{host}_{port}{cert_suffix}_t{timeout_ms}` with `.` and `:` -/// replaced by `_`. +/// Parses `url`, builds a [`PlatformBackendSpec`], and delegates to +/// [`crate::platform::PlatformBackend::predict_name`]. +/// +/// # Errors /// -/// Returns `None` when the URL cannot be parsed or is missing a host. -pub(crate) fn predict_backend_name_for_url( +/// Returns an error when the URL cannot be parsed, is missing a host, or the +/// platform backend cannot predict a name for the spec. +pub(crate) fn predict_integration_backend_name( + services: &RuntimeServices, url: &str, + integration: &'static str, certificate_check: bool, first_byte_timeout: Duration, -) -> Option { - let parsed = Url::parse(url).ok()?; - let scheme = parsed.scheme(); - let host = parsed.host_str()?; - - let default_port = if scheme.eq_ignore_ascii_case("https") { - 443u16 - } else { - 80u16 - }; - let port = parsed.port().unwrap_or(default_port); +) -> Result> { + services + .backend() + .predict_name(&integration_backend_spec( + url, + integration, + certificate_check, + first_byte_timeout, + )?) + .change_context(TrustedServerError::Integration { + integration: integration.to_string(), + message: "Failed to predict backend name".to_string(), + }) +} - let name_base = format!("{}_{}_{}", scheme, host, port); - let cert_suffix = if certificate_check { "" } else { "_nocert" }; - let timeout_ms = first_byte_timeout.as_millis(); - Some(format!( - "backend_{}{}_t{}", - name_base.replace(['.', ':'], "_"), - cert_suffix, - timeout_ms - )) +fn integration_backend_spec( + url: &str, + integration: &'static str, + certificate_check: bool, + first_byte_timeout: Duration, +) -> Result> { + let parsed = Url::parse(url).change_context(TrustedServerError::Integration { + integration: integration.to_string(), + message: format!("Invalid upstream URL: {url}"), + })?; + Ok(PlatformBackendSpec { + scheme: parsed.scheme().to_string(), + host: parsed + .host_str() + .ok_or_else(|| { + Report::new(TrustedServerError::Integration { + integration: integration.to_string(), + message: "Upstream URL missing host".to_string(), + }) + })? + .to_string(), + port: parsed.port(), + certificate_check, + first_byte_timeout, + }) } /// Maximum body size accepted by integration proxy endpoints (256 KiB). diff --git a/crates/trusted-server-core/src/integrations/nextjs/mod.rs b/crates/trusted-server-core/src/integrations/nextjs/mod.rs index 502444388..6414284d0 100644 --- a/crates/trusted-server-core/src/integrations/nextjs/mod.rs +++ b/crates/trusted-server-core/src/integrations/nextjs/mod.rs @@ -599,4 +599,128 @@ mod tests { final_html ); } + + /// Regression test: with a small chunk size, `lol_html` fragments the + /// `__NEXT_DATA__` text node across chunks. The rewriter must accumulate + /// fragments and produce correct output. + #[test] + fn small_chunk_next_data_rewrite_survives_fragmentation() { + // Build a __NEXT_DATA__ payload large enough to cross a 32-byte chunk boundary. + let html = r#""#; + + let mut settings = create_test_settings(); + settings + .integrations + .insert_config( + "nextjs", + &json!({ + "enabled": true, + "rewrite_attributes": ["href", "link", "url"], + }), + ) + .expect("should update nextjs config"); + let registry = IntegrationRegistry::new(&settings).expect("should create registry"); + let config = config_from_settings(&settings, ®istry); + let processor = create_html_processor(config); + + // Use a very small chunk size to force fragmentation. + let pipeline_config = PipelineConfig { + input_compression: Compression::None, + output_compression: Compression::None, + chunk_size: 32, + }; + let mut pipeline = StreamingPipeline::new(pipeline_config, processor); + + let mut output = Vec::new(); + pipeline + .process(Cursor::new(html.as_bytes()), &mut output) + .expect("should process with small chunks"); + + let processed = String::from_utf8_lossy(&output); + assert!( + processed.contains("test.example.com") && processed.contains("/reviews"), + "should rewrite fragmented __NEXT_DATA__ href. Got: {processed}" + ); + assert!( + !processed.contains("origin.example.com/reviews"), + "should not contain original origin href. Got: {processed}" + ); + assert!( + processed.contains("Hello World"), + "should preserve non-URL content. Got: {processed}" + ); + } + + /// Regression test: a fragmented `self.__next_f.push([1, "…"])` RSC script + /// must still have its origin URLs rewritten after going through the full + /// streaming pipeline into the accumulating post-processor. Exercises the + /// "fallback" branch of `NextJsHtmlPostProcessor` where no placeholders + /// were captured during streaming (because every fragment returned `Keep` + /// on `!is_last`) and `post_process_rsc_html_in_place_with_limit` has to + /// re-parse the accumulated HTML to find RSC push scripts. + #[test] + fn small_chunk_rsc_push_survives_fragmentation_via_post_processor_fallback() { + // Build an RSC push script whose payload contains multiple origin URLs. + // With chunk_size = 128, this script's text node will be fragmented at + // chunk boundaries by the streaming input, so NextJsRscPlaceholderRewriter + // will return Keep on every fragment and the post-processor fallback + // has to rewrite on the accumulated HTML. + let html = format!( + r#""#, + "x".repeat(400), // pad to guarantee chunk-boundary fragmentation + ); + + let mut settings = create_test_settings(); + settings + .integrations + .insert_config( + "nextjs", + &json!({ + "enabled": true, + "rewrite_attributes": ["href", "link", "url"], + }), + ) + .expect("should update nextjs config"); + let registry = IntegrationRegistry::new(&settings).expect("should create registry"); + let config = config_from_settings(&settings, ®istry); + let processor = create_html_processor(config); + + let pipeline_config = PipelineConfig { + input_compression: Compression::None, + output_compression: Compression::None, + chunk_size: 128, + }; + let mut pipeline = StreamingPipeline::new(pipeline_config, processor); + + let mut output = Vec::new(); + pipeline + .process(Cursor::new(html.as_bytes()), &mut output) + .expect("should process fragmented RSC push"); + let processed = String::from_utf8_lossy(&output); + + assert!( + !processed.contains("origin.example.com"), + "no origin host should leak through post-processor fallback. Got: {processed}" + ); + assert!( + processed.contains("test.example.com/a") + && processed.contains("test.example.com/img.png") + && processed.contains("test.example.com/deep/path?q=1"), + "all origin URLs must be rewritten to proxy host. Got: {processed}" + ); + assert!( + !processed.contains(RSC_PAYLOAD_PLACEHOLDER_PREFIX), + "no placeholder should leak to output. Got: {processed}" + ); + // Structural integrity: the push call envelope must still be present + // and the JS string literal must be properly terminated. + assert!( + processed.contains(r#"self.__next_f.push([1,""#), + "push call must survive. Got: {processed}" + ); + assert!( + processed.contains(r#""])"#), + "push call must close properly — `\"])` followed by . Got: {processed}" + ); + } } diff --git a/crates/trusted-server-core/src/integrations/nextjs/rsc_placeholders.rs b/crates/trusted-server-core/src/integrations/nextjs/rsc_placeholders.rs index 1aa0b3916..10101a70c 100644 --- a/crates/trusted-server-core/src/integrations/nextjs/rsc_placeholders.rs +++ b/crates/trusted-server-core/src/integrations/nextjs/rsc_placeholders.rs @@ -54,12 +54,13 @@ impl IntegrationScriptRewriter for NextJsRscPlaceholderRewriter { return ScriptRewriteAction::keep(); } - // Only process complete (unfragmented) scripts during streaming. - // Fragmented scripts are handled by the post-processor which re-parses the final HTML. - // This avoids corrupting non-RSC scripts that happen to be fragmented during streaming. + // Deliberately does not accumulate fragments (unlike NextJsNextDataRewriter + // and GoogleTagManagerIntegration which use Mutex buffers). RSC + // placeholder processing has a post-processor fallback that re-parses + // the final HTML at end-of-document, so fragmented scripts are safely + // deferred. Accumulation here would also risk corrupting non-RSC scripts + // that happen to be fragmented during streaming. if !ctx.is_last_in_text_node { - // Script is fragmented - skip placeholder processing. - // The post-processor will handle RSC scripts at end-of-document. return ScriptRewriteAction::keep(); } diff --git a/crates/trusted-server-core/src/integrations/nextjs/script_rewriter.rs b/crates/trusted-server-core/src/integrations/nextjs/script_rewriter.rs index 72617c3e6..eaf00a16c 100644 --- a/crates/trusted-server-core/src/integrations/nextjs/script_rewriter.rs +++ b/crates/trusted-server-core/src/integrations/nextjs/script_rewriter.rs @@ -1,4 +1,4 @@ -use std::sync::Arc; +use std::sync::{Arc, Mutex}; use error_stack::Report; use regex::{escape, Regex}; @@ -14,6 +14,14 @@ use super::{NextJsIntegrationConfig, NEXTJS_INTEGRATION_ID}; pub(super) struct NextJsNextDataRewriter { config: Arc, rewriter: UrlRewriter, + /// Accumulates text fragments when `lol_html` splits a text node across + /// chunk boundaries. Drained on `is_last_in_text_node`. + /// + /// Uses `Mutex` to satisfy the `Sync` bound on `IntegrationScriptRewriter`. + /// The pipeline is single-threaded (`lol_html::HtmlRewriter` is `!Send`), + /// so the lock is uncontended. `lol_html` delivers text chunks sequentially + /// per element — the buffer is always empty when a new element's text begins. + accumulated_text: Mutex, } impl NextJsNextDataRewriter { @@ -23,6 +31,7 @@ impl NextJsNextDataRewriter { Ok(Self { rewriter: UrlRewriter::new(&config.rewrite_attributes)?, config, + accumulated_text: Mutex::new(String::new()), }) } @@ -65,7 +74,33 @@ impl IntegrationScriptRewriter for NextJsNextDataRewriter { return ScriptRewriteAction::keep(); } - self.rewrite_structured(content, ctx) + let mut buf = self + .accumulated_text + .lock() + .unwrap_or_else(std::sync::PoisonError::into_inner); + + if !ctx.is_last_in_text_node { + // Intermediate fragment — accumulate and suppress output. + buf.push_str(content); + return ScriptRewriteAction::RemoveNode; + } + + // Last fragment. If nothing was accumulated, process directly. + if buf.is_empty() { + return self.rewrite_structured(content, ctx); + } + + // Complete the accumulated text and process the full content. + // If rewrite_structured returns Keep, we must still emit the full + // accumulated text via Replace — intermediate fragments were already + // removed from lol_html's output via RemoveNode. + buf.push_str(content); + let full_content = std::mem::take(&mut *buf); + let action = self.rewrite_structured(&full_content, ctx); + if matches!(action, ScriptRewriteAction::Keep) { + return ScriptRewriteAction::replace(full_content); + } + action } } @@ -464,4 +499,119 @@ mod tests { assert!(rewritten.contains("https://proxy.example.com/news")); assert!(rewritten.contains("//proxy.example.com/assets/logo.png")); } + + #[test] + fn fragmented_next_data_is_accumulated_and_rewritten() { + let rewriter = NextJsNextDataRewriter::new(test_config()).expect("should build rewriter"); + let document_state = IntegrationDocumentState::default(); + + let fragment1 = r#"{"props":{"pageProps":{"href":"https://origin."#; + let fragment2 = r#"example.com/reviews"}}}"#; + + let ctx_intermediate = IntegrationScriptContext { + selector: "script#__NEXT_DATA__", + request_host: "ts.example.com", + request_scheme: "https", + origin_host: "origin.example.com", + is_last_in_text_node: false, + document_state: &document_state, + }; + let ctx_last = IntegrationScriptContext { + is_last_in_text_node: true, + ..ctx_intermediate + }; + + let action1 = rewriter.rewrite(fragment1, &ctx_intermediate); + assert_eq!( + action1, + ScriptRewriteAction::RemoveNode, + "should suppress intermediate fragment" + ); + + let action2 = rewriter.rewrite(fragment2, &ctx_last); + match action2 { + ScriptRewriteAction::Replace(rewritten) => { + assert!( + rewritten.contains("ts.example.com"), + "should rewrite origin to proxy host. Got: {rewritten}" + ); + assert!( + rewritten.contains("/reviews"), + "should preserve path. Got: {rewritten}" + ); + assert!( + !rewritten.contains("origin.example.com"), + "should not contain original host. Got: {rewritten}" + ); + } + other => panic!("expected Replace, got {other:?}"), + } + } + + #[test] + fn unfragmented_next_data_works_without_accumulation() { + let rewriter = NextJsNextDataRewriter::new(test_config()).expect("should build rewriter"); + let document_state = IntegrationDocumentState::default(); + let payload = r#"{"props":{"pageProps":{"href":"https://origin.example.com/page"}}}"#; + + let ctx_single = IntegrationScriptContext { + selector: "script#__NEXT_DATA__", + request_host: "ts.example.com", + request_scheme: "https", + origin_host: "origin.example.com", + is_last_in_text_node: true, + document_state: &document_state, + }; + + let action = rewriter.rewrite(payload, &ctx_single); + match action { + ScriptRewriteAction::Replace(rewritten) => { + assert!( + rewritten.contains("ts.example.com"), + "should rewrite. Got: {rewritten}" + ); + } + other => panic!("expected Replace, got {other:?}"), + } + } + + #[test] + fn fragmented_next_data_without_rewritable_urls_preserves_content() { + let rewriter = NextJsNextDataRewriter::new(test_config()).expect("should build rewriter"); + let document_state = IntegrationDocumentState::default(); + + // __NEXT_DATA__ JSON with no origin URLs — rewrite_structured returns Keep. + let fragment1 = r#"{"props":{"pageProps":{"title":"Hello"#; + let fragment2 = r#" World","count":42}}}"#; + + let ctx_intermediate = IntegrationScriptContext { + selector: "script#__NEXT_DATA__", + request_host: "ts.example.com", + request_scheme: "https", + origin_host: "origin.example.com", + is_last_in_text_node: false, + document_state: &document_state, + }; + let ctx_last = IntegrationScriptContext { + is_last_in_text_node: true, + ..ctx_intermediate + }; + + let action1 = rewriter.rewrite(fragment1, &ctx_intermediate); + assert_eq!(action1, ScriptRewriteAction::RemoveNode); + + // Last fragment: even though no URLs to rewrite, must emit full content + // because intermediate fragments were removed. + let action2 = rewriter.rewrite(fragment2, &ctx_last); + match action2 { + ScriptRewriteAction::Replace(content) => { + let expected = format!("{fragment1}{fragment2}"); + assert_eq!( + content, expected, + "should emit full accumulated content unchanged" + ); + } + other => panic!("expected Replace with passthrough, got {other:?}"), + } + } } diff --git a/crates/trusted-server-core/src/integrations/prebid.rs b/crates/trusted-server-core/src/integrations/prebid.rs index 9ab07fbea..28bf92a24 100644 --- a/crates/trusted-server-core/src/integrations/prebid.rs +++ b/crates/trusted-server-core/src/integrations/prebid.rs @@ -21,7 +21,7 @@ use crate::cookies::{strip_cookies, CONSENT_COOKIE_NAMES}; use crate::error::TrustedServerError; use crate::http_util::RequestInfo; use crate::integrations::{ - collect_body, ensure_integration_backend_with_timeout, predict_backend_name_for_url, + collect_body, ensure_integration_backend_with_timeout, predict_integration_backend_name, AttributeRewriteAction, IntegrationAttributeContext, IntegrationAttributeRewriter, IntegrationEndpoint, IntegrationHeadInjector, IntegrationHtmlContext, IntegrationProxy, IntegrationRegistration, @@ -521,6 +521,7 @@ impl PrebidAuctionProvider { request: &AuctionRequest, context: &AuctionContext<'_>, signer: Option<(&RequestSigner, String, &SigningParams)>, + _request_info: RequestInfo, ) -> OpenRtbRequest { let imps = request .slots @@ -660,7 +661,7 @@ impl PrebidAuctionProvider { // EIDs will be populated by identity providers; consent gating // is applied via `gate_eids_by_consent` before they are set here. eids: None, - synthetic_fresh: Some(request.user.fresh_id.clone()), + ec_fresh: Some(request.user.fresh_id.clone()), } .to_ext(), ..Default::default() @@ -1053,23 +1054,26 @@ impl AuctionProvider for PrebidAuctionProvider { ) -> Result> { log::info!("Prebid: requesting bids for {} slots", request.slots.len()); + let request_info = RequestInfo::from_request(context.request, context.client_info); + // Create signer and compute signature if request signing is enabled - let signer_with_signature = if let Some(request_signing_config) = - &context.settings.request_signing - { - if request_signing_config.enabled { - let request_info = RequestInfo::from_request(context.request, context.client_info); - let signer = RequestSigner::from_services(context.services)?; - let params = - SigningParams::new(request.id.clone(), request_info.host, request_info.scheme); - let signature = signer.sign_request(¶ms)?; - Some((signer, signature, params)) + let signer_with_signature = + if let Some(request_signing_config) = &context.settings.request_signing { + if request_signing_config.enabled { + let signer = RequestSigner::from_services(context.services)?; + let params = SigningParams::new( + request.id.clone(), + request_info.host.clone(), + request_info.scheme.clone(), + ); + let signature = signer.sign_request(¶ms)?; + Some((signer, signature, params)) + } else { + None + } } else { None - } - } else { - None - }; + }; // Convert to OpenRTB with all enrichments let openrtb = self.to_openrtb( @@ -1078,6 +1082,7 @@ impl AuctionProvider for PrebidAuctionProvider { signer_with_signature .as_ref() .map(|(s, sig, params)| (s, sig.clone(), params)), + request_info, ); // An empty `imp` array violates the OpenRTB spec and wastes a network @@ -1213,19 +1218,21 @@ impl AuctionProvider for PrebidAuctionProvider { self.config.enabled } - fn backend_name(&self, timeout_ms: u32) -> Option { - let name = predict_backend_name_for_url( + fn backend_name(&self, services: &RuntimeServices, timeout_ms: u32) -> Option { + predict_integration_backend_name( + services, &self.config.server_url, + PREBID_INTEGRATION_ID, true, Duration::from_millis(u64::from(timeout_ms)), - ); - if name.is_none() { + ) + .inspect_err(|e| { log::error!( - "Failed to predict backend name for Prebid server URL '{}'", + "Failed to predict backend name for Prebid server URL '{}': {e:?}", self.config.server_url ); - } - name + }) + .ok() } } @@ -1277,16 +1284,24 @@ mod tests { use std::sync::Arc; use super::*; + use crate::auction::test_support::create_test_auction_context as shared_test_auction_context; use crate::auction::types::{ AdFormat, AdSlot, AuctionContext, AuctionRequest, DeviceInfo, PublisherInfo, UserInfo, }; + use crate::consent::ConsentContext; use crate::geo::GeoInfo; use crate::html_processor::{create_html_processor, HtmlProcessorConfig}; use crate::integrations::{ AttributeRewriteAction, IntegrationDocumentState, IntegrationRegistry, }; - use crate::platform::test_support::{build_services_with_http_client, StubHttpClient}; + use crate::platform::test_support::{ + build_services_with_http_client, NoopConfigStore, NoopGeo, NoopHttpClient, NoopSecretStore, + StubHttpClient, + }; + use crate::platform::{ + ClientInfo, PlatformBackend, PlatformBackendSpec, PlatformError, RuntimeServices, + }; use crate::settings::Settings; use crate::streaming_processor::{Compression, PipelineConfig, StreamingPipeline}; use crate::test_support::tests::crate_test_settings_str; @@ -1316,6 +1331,57 @@ mod tests { } } + struct PredictOnlyBackend; + + impl PlatformBackend for PredictOnlyBackend { + fn predict_name( + &self, + spec: &PlatformBackendSpec, + ) -> Result> { + Ok(format!( + "predicted_{}_{}_{}", + spec.scheme, + spec.host, + spec.first_byte_timeout.as_millis() + )) + } + + fn ensure(&self, _spec: &PlatformBackendSpec) -> Result> { + Ok("unused".to_string()) + } + } + + fn services_with_backend(backend: impl PlatformBackend + 'static) -> RuntimeServices { + RuntimeServices::builder() + .config_store(Arc::new(NoopConfigStore)) + .secret_store(Arc::new(NoopSecretStore)) + .kv_store(Arc::new(edgezero_core::key_value_store::NoopKvStore)) + .backend(Arc::new(backend)) + .http_client(Arc::new(NoopHttpClient)) + .geo(Arc::new(NoopGeo)) + .client_info(ClientInfo { + client_ip: None, + tls_protocol: None, + tls_cipher: None, + }) + .build() + } + + #[test] + fn prebid_backend_name_delegates_to_platform_backend_prediction() { + let provider = PrebidAuctionProvider::new(base_config()); + let services = services_with_backend(PredictOnlyBackend); + + let backend_name = provider + .backend_name(&services, 123) + .expect("should predict backend name through platform backend"); + + assert_eq!( + backend_name, "predicted_https_prebid.example_123", + "should use PlatformBackend::predict_name instead of duplicating the naming scheme" + ); + } + fn create_test_auction_request() -> AuctionRequest { AuctionRequest { id: "auction-123".to_string(), @@ -1397,17 +1463,11 @@ mod tests { request: &'a http::Request, client_info: &'a crate::platform::ClientInfo, ) -> AuctionContext<'a> { - use crate::platform::test_support::noop_services; - let services: &'static crate::platform::RuntimeServices = - Box::leak(Box::new(noop_services())); - AuctionContext { - settings, - request, - client_info, - timeout_ms: 1000, - provider_responses: None, - services, - } + shared_test_auction_context(settings, request, client_info, 1000) + } + + fn make_request_info(context: &AuctionContext<'_>) -> RequestInfo { + RequestInfo::from_request(context.request, context.client_info) } fn config_from_settings( @@ -1423,7 +1483,7 @@ mod tests { ) } - /// Shared TOML prefix for config-parsing tests (publisher + synthetic sections). + /// Shared TOML prefix for config-parsing tests (publisher + ec sections). const TOML_BASE: &str = r#" [[handlers]] path = "^/admin" @@ -1436,11 +1496,8 @@ cookie_domain = ".test-publisher.com" origin_url = "https://origin.test-publisher.com" proxy_secret = "test-secret" -[synthetic] -counter_store = "test-counter-store" -opid_store = "test-opid-store" +[edge_cookie] secret_key = "test-secret-key" -template = "{{client_ip}}:{{user_agent}}" "#; /// Parse a TOML string containing only the `[integrations.prebid]` section @@ -1835,7 +1892,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); assert_eq!( openrtb.test, None, @@ -1890,7 +1952,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); assert_eq!( openrtb.test, @@ -1927,7 +1994,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); assert_eq!( openrtb @@ -1962,7 +2034,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); assert_eq!( openrtb.test, None, @@ -2020,7 +2097,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); let imp = &openrtb.imp[0]; assert_eq!(imp.bidfloor, Some(1.5), "should set bidfloor from slot"); @@ -2048,7 +2130,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); let imp = &openrtb.imp[0]; assert_eq!(imp.bidfloor, None, "should omit bidfloor when not set"); @@ -2075,7 +2162,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); let imp = &openrtb.imp[0]; assert_eq!(imp.secure, Some(true), "should require HTTPS creatives"); @@ -2122,7 +2214,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); assert_eq!( openrtb.user.as_ref().and_then(|u| u.consent.as_deref()), @@ -2175,7 +2272,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); assert_eq!( openrtb.regs.as_ref().and_then(|r| r.gdpr), @@ -2216,7 +2318,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); assert!( openrtb.regs.is_none(), @@ -2247,7 +2354,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); assert_eq!( openrtb.regs.as_ref().and_then(|r| r.gdpr), @@ -2273,7 +2385,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); assert!(openrtb.regs.is_none(), "should omit regs entirely"); } @@ -2302,7 +2419,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); let regs = openrtb.regs.as_ref().expect("should have regs"); assert_eq!( @@ -2528,7 +2650,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); let device = openrtb.device.as_ref().expect("should have device"); assert_eq!(device.dnt, Some(true), "should set dnt from DNT header"); @@ -2560,7 +2687,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); let device = openrtb.device.as_ref().expect("should have device"); assert_eq!( @@ -2596,7 +2728,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); let device = openrtb.device.as_ref().expect("should have device"); assert_eq!( @@ -2635,7 +2772,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); assert!( openrtb.imp.is_empty(), @@ -2673,7 +2815,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); let geo = openrtb .device .as_ref() @@ -2709,7 +2856,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); assert_eq!( openrtb.tmax, @@ -2742,7 +2894,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); assert_eq!( openrtb.tmax, None, @@ -2772,7 +2929,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); let formats = &openrtb.imp[0] .banner .as_ref() @@ -2809,7 +2971,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); let site = openrtb.site.as_ref().expect("should have site"); assert_eq!( @@ -2836,7 +3003,12 @@ server_url = "https://prebid.example" }, ); - let openrtb = provider.to_openrtb(&auction_request, &context, None); + let openrtb = provider.to_openrtb( + &auction_request, + &context, + None, + make_request_info(&context), + ); let publisher = openrtb .site .as_ref() @@ -3005,7 +3177,8 @@ server_url = "https://prebid.example" provider_responses: None, services: &services, }; - provider.to_openrtb(request, &context, None) + let request_info = make_request_info(&context); + provider.to_openrtb(request, &context, None, request_info) } fn bidder_params(ortb: &OpenRtbRequest) -> &serde_json::Map { diff --git a/crates/trusted-server-core/src/integrations/registry.rs b/crates/trusted-server-core/src/integrations/registry.rs index 0b648ae7f..c02f89857 100644 --- a/crates/trusted-server-core/src/integrations/registry.rs +++ b/crates/trusted-server-core/src/integrations/registry.rs @@ -8,12 +8,12 @@ use error_stack::Report; use http::{HeaderValue, Method, Request, Response}; use matchit::Router; -use crate::constants::HEADER_X_SYNTHETIC_ID; -use crate::cookies::set_synthetic_cookie; +use crate::constants::HEADER_X_TS_EC; +use crate::cookies::set_ec_cookie; +use crate::edge_cookie::get_or_generate_ec_id; use crate::error::TrustedServerError; use crate::platform::RuntimeServices; use crate::settings::Settings; -use crate::synthetic::get_or_generate_synthetic_id; /// Action returned by attribute rewriters to describe how the runtime should mutate the element. #[derive(Debug, Clone, PartialEq, Eq)] @@ -244,6 +244,13 @@ impl IntegrationEndpoint { } /// Trait implemented by integration proxies that expose HTTP endpoints. +/// +/// `Send + Sync` bounds are required so trait objects can be stored in +/// `Arc` and shared across the single-threaded WASM +/// request context. The `?Send` on the async methods is intentional — see the +/// `!Send` design rationale on [`PlatformPendingRequest`] for the full +/// explanation. On wasm32 these bounds are compatible because the runtime is +/// single-threaded. #[async_trait(?Send)] pub trait IntegrationProxy: Send + Sync { /// Integration identifier used for logging and optional URL namespace. @@ -646,8 +653,8 @@ impl IntegrationRegistry { /// Dispatch a proxy request when an integration handles the path. /// - /// This method automatically sets the `x-synthetic-id` header and - /// `synthetic_id` cookie on successful responses. + /// This method automatically sets the `x-ts-ec` header and + /// `ts-ec` cookie on successful responses. #[must_use] pub async fn handle_proxy( &self, @@ -658,47 +665,46 @@ impl IntegrationRegistry { mut req: Request, ) -> Option, Report>> { if let Some((proxy, _)) = self.find_route(method, path) { - let synthetic_id_result = get_or_generate_synthetic_id(settings, services, &req); + let ec_id_result = get_or_generate_ec_id(settings, services, &req); - if let Ok(ref synthetic_id) = synthetic_id_result { - match HeaderValue::from_str(synthetic_id) { + // Set EC ID header on the request so integrations can read it. + // Header injection: HeaderValue::from_str rejects values containing \r, \n, or \0, + // so a crafted EC ID cannot inject additional request headers. + if let Ok(ref ec_id) = ec_id_result { + match HeaderValue::from_str(ec_id) { Ok(header_value) => { req.headers_mut() - .insert(HEADER_X_SYNTHETIC_ID.clone(), header_value); + .insert(HEADER_X_TS_EC.clone(), header_value); } Err(error) => { - log::warn!( - "Failed to build x-synthetic-id request header value: {}", - error - ); + log::warn!("Failed to build x-ts-ec request header value: {}", error); } } } let mut result = proxy.handle(settings, services, req).await; + // Set EC ID header on successful responses if let Ok(ref mut response) = result { - match synthetic_id_result { - Ok(ref synthetic_id) => { - match HeaderValue::from_str(synthetic_id) { + match ec_id_result { + Ok(ref ec_id) => { + match HeaderValue::from_str(ec_id) { Ok(header_value) => { response .headers_mut() - .insert(HEADER_X_SYNTHETIC_ID.clone(), header_value); + .insert(HEADER_X_TS_EC.clone(), header_value); } Err(error) => { log::warn!( - "Failed to build x-synthetic-id response header value: {}", + "Failed to build x-ts-ec response header value: {}", error ); } } - set_synthetic_cookie(settings, response, synthetic_id); + set_ec_cookie(settings, response, ec_id); } Err(ref err) => { - log::warn!( - "Failed to generate synthetic ID for integration response: {err:?}" - ); + log::warn!("Failed to generate EC ID for integration response: {err:?}"); } } } @@ -747,6 +753,15 @@ impl IntegrationRegistry { self.inner.script_rewriters.clone() } + /// Check whether any HTML post-processors are registered. + /// + /// Cheaper than [`html_post_processors()`](Self::html_post_processors) when + /// only the presence check is needed — avoids cloning `Vec>`. + #[must_use] + pub fn has_html_post_processors(&self) -> bool { + !self.inner.html_post_processors.is_empty() + } + /// Expose registered HTML post-processors. #[must_use] pub fn html_post_processors(&self) -> Vec> { @@ -1294,28 +1309,28 @@ mod tests { assert!(!registry.has_route(&Method::POST, "/integrations/test/users")); } - // Tests for synthetic ID header on proxy responses - use crate::constants::COOKIE_SYNTHETIC_ID; + // Tests for EC ID header on proxy responses + use crate::constants::COOKIE_TS_EC; use crate::test_support::tests::create_test_settings; /// Mock proxy that returns a simple 200 OK response - struct SyntheticIdTestProxy; + struct EcTestProxy; #[async_trait(?Send)] - impl IntegrationProxy for SyntheticIdTestProxy { + impl IntegrationProxy for EcTestProxy { fn integration_name(&self) -> &'static str { - "synthetic_id_test" + "ec_test" } fn routes(&self) -> Vec { vec![ IntegrationEndpoint { method: Method::GET, - path: "/integrations/test/synthetic".to_string(), + path: "/integrations/test/ec".to_string(), }, IntegrationEndpoint { method: Method::POST, - path: "/integrations/test/synthetic".to_string(), + path: "/integrations/test/ec".to_string(), }, ] } @@ -1334,14 +1349,14 @@ mod tests { } #[test] - fn handle_proxy_sets_synthetic_id_header_on_response() { + fn handle_proxy_sets_ec_id_header_on_response() { let settings = create_test_settings(); let routes = vec![( Method::GET, - "/integrations/test/synthetic", + "/integrations/test/ec", ( - Arc::new(SyntheticIdTestProxy) as Arc, - "synthetic_id_test", + Arc::new(EcTestProxy) as Arc, + "ec_test", ), )]; let registry = IntegrationRegistry::from_routes(routes); @@ -1356,7 +1371,7 @@ mod tests { // Call handle_proxy (uses futures executor in test environment) let result = futures::executor::block_on(registry.handle_proxy( &Method::GET, - "/integrations/test/synthetic", + "/integrations/test/ec", &settings, &noop_services(), req, @@ -1370,33 +1385,33 @@ mod tests { let response = response.unwrap(); assert!( - response.headers().get(&HEADER_X_SYNTHETIC_ID).is_some(), - "Response should have x-synthetic-id header" + response.headers().get(&HEADER_X_TS_EC).is_some(), + "Response should have x-ts-ec header" ); let set_cookie = response.headers().get(header::SET_COOKIE); assert!( set_cookie.is_some(), - "Response should have Set-Cookie header for synthetic_id" + "Response should have Set-Cookie header for ts-ec" ); let cookie_value = set_cookie.unwrap().to_str().unwrap(); assert!( - cookie_value.contains(COOKIE_SYNTHETIC_ID), - "Set-Cookie should contain synthetic_id cookie, got: {}", + cookie_value.contains(COOKIE_TS_EC), + "Set-Cookie should contain ts-ec cookie, got: {}", cookie_value ); } #[test] - fn handle_proxy_replaces_invalid_request_header_with_matching_response_cookie() { + fn handle_proxy_replaces_invalid_ec_request_header_with_matching_response_cookie() { let settings = create_test_settings(); let routes = vec![( Method::GET, - "/integrations/test/synthetic", + "/integrations/test/ec", ( - Arc::new(SyntheticIdTestProxy) as Arc, - "synthetic_id_test", + Arc::new(EcTestProxy) as Arc, + "ec_test", ), )]; let registry = IntegrationRegistry::from_routes(routes); @@ -1407,13 +1422,13 @@ mod tests { .body(EdgeBody::empty()) .expect("should build request"); req.headers_mut().insert( - HEADER_X_SYNTHETIC_ID.clone(), + HEADER_X_TS_EC.clone(), HeaderValue::from_static("evil;injected"), ); let result = futures::executor::block_on(registry.handle_proxy( &Method::GET, - "/integrations/test/synthetic", + "/integrations/test/ec", &settings, &noop_services(), req, @@ -1423,8 +1438,8 @@ mod tests { let response = result.expect("handler should succeed"); let response_header = response .headers() - .get(&HEADER_X_SYNTHETIC_ID) - .expect("response should have x-synthetic-id header") + .get(&HEADER_X_TS_EC) + .expect("response should have x-ts-ec header") .to_str() .expect("header should be valid UTF-8") .to_string(); @@ -1435,9 +1450,9 @@ mod tests { .to_str() .expect("header should be valid UTF-8"); let cookie_value = cookie_header - .strip_prefix(&format!("{}=", COOKIE_SYNTHETIC_ID)) + .strip_prefix(&format!("{}=", COOKIE_TS_EC)) .and_then(|s| s.split_once(';').map(|(value, _)| value)) - .expect("should contain the synthetic_id cookie value"); + .expect("should contain the ts-ec cookie value"); assert_ne!( response_header, "evil;injected", @@ -1445,7 +1460,7 @@ mod tests { ); assert_eq!( response_header, cookie_value, - "response header and cookie should carry the same effective synthetic ID" + "response header and cookie should carry the same effective EC ID" ); } @@ -1454,11 +1469,8 @@ mod tests { let settings = create_test_settings(); let routes = vec![( Method::GET, - "/integrations/test/synthetic", - ( - Arc::new(SyntheticIdTestProxy) as Arc, - "test", - ), + "/integrations/test/ec", + (Arc::new(EcTestProxy) as Arc, "test"), )]; let registry = IntegrationRegistry::from_routes(routes); @@ -1472,7 +1484,7 @@ mod tests { header::COOKIE, HeaderValue::from_str(&format!( "{}={}", - crate::constants::COOKIE_SYNTHETIC_ID, + COOKIE_TS_EC, crate::test_support::tests::VALID_SYNTHETIC_ID )) .expect("should build Cookie header"), @@ -1480,7 +1492,7 @@ mod tests { let result = futures::executor::block_on(registry.handle_proxy( &Method::GET, - "/integrations/test/synthetic", + "/integrations/test/ec", &settings, &noop_services(), req, @@ -1490,8 +1502,8 @@ mod tests { let response = result.expect("proxy handle should succeed"); assert!( - response.headers().get(&HEADER_X_SYNTHETIC_ID).is_some(), - "Response should still have x-synthetic-id header" + response.headers().get(&HEADER_X_TS_EC).is_some(), + "Response should still have x-ts-ec header" ); let set_cookie = response.headers().get(header::SET_COOKIE); @@ -1504,8 +1516,8 @@ mod tests { if let Some(cookie) = set_cookie { let cookie_str = cookie.to_str().unwrap_or(""); assert!( - cookie_str.contains(COOKIE_SYNTHETIC_ID), - "Should contain synthetic_id cookie, got: {}", + cookie_str.contains(COOKIE_TS_EC), + "Should contain ts-ec cookie, got: {}", cookie_str ); } @@ -1516,10 +1528,10 @@ mod tests { let settings = create_test_settings(); let routes = vec![( Method::POST, - "/integrations/test/synthetic", + "/integrations/test/ec", ( - Arc::new(SyntheticIdTestProxy) as Arc, - "synthetic_id_test", + Arc::new(EcTestProxy) as Arc, + "ec_test", ), )]; let registry = IntegrationRegistry::from_routes(routes); @@ -1532,7 +1544,7 @@ mod tests { let result = futures::executor::block_on(registry.handle_proxy( &Method::POST, - "/integrations/test/synthetic", + "/integrations/test/ec", &settings, &noop_services(), req, @@ -1544,8 +1556,8 @@ mod tests { let response = response.unwrap(); assert!( - response.headers().get(&HEADER_X_SYNTHETIC_ID).is_some(), - "POST response should have x-synthetic-id header" + response.headers().get(&HEADER_X_TS_EC).is_some(), + "POST response should have x-ts-ec header" ); } diff --git a/crates/trusted-server-core/src/integrations/testlight.rs b/crates/trusted-server-core/src/integrations/testlight.rs index ce30ca621..36b35995f 100644 --- a/crates/trusted-server-core/src/integrations/testlight.rs +++ b/crates/trusted-server-core/src/integrations/testlight.rs @@ -9,15 +9,16 @@ use serde::{Deserialize, Serialize}; use serde_json::{Map, Value}; use validator::Validate; +use crate::edge_cookie::get_ec_id; use crate::error::TrustedServerError; use crate::integrations::{ - collect_body, AttributeRewriteAction, IntegrationAttributeContext, + collect_body, collect_body_bounded, AttributeRewriteAction, IntegrationAttributeContext, IntegrationAttributeRewriter, IntegrationEndpoint, IntegrationProxy, IntegrationRegistration, + INTEGRATION_MAX_BODY_BYTES, }; use crate::platform::RuntimeServices; use crate::proxy::{proxy_request, ProxyRequestConfig}; use crate::settings::{IntegrationConfig, Settings}; -use crate::synthetic::get_synthetic_id; use crate::tsjs; const TESTLIGHT_INTEGRATION_ID: &str = "testlight"; @@ -178,23 +179,25 @@ impl IntegrationProxy for TestlightIntegration { req: http::Request, ) -> Result, Report> { let (parts, body) = req.into_parts(); - let payload_bytes = collect_body(body, TESTLIGHT_INTEGRATION_ID).await?; + let payload_bytes = + collect_body_bounded(body, INTEGRATION_MAX_BODY_BYTES, TESTLIGHT_INTEGRATION_ID) + .await?; let req = http::Request::from_parts(parts, EdgeBody::empty()); - // Read synthetic ID from header (set by registry) or cookie - let synthetic_id = get_synthetic_id(&req) - .change_context(Self::error("Failed to read synthetic ID"))? + // Read EC ID from header (set by registry) or cookie + let ec_id = get_ec_id(&req) + .change_context(Self::error("Failed to read EC ID"))? .ok_or_else(|| { Report::new(Self::error( - "Synthetic ID not found in request header or cookie — \ + "EC ID not found in request header or cookie — \ check that the integration registry propagated it", )) })?; - let payload_bytes = Self::rewrite_request_body(&payload_bytes, &synthetic_id)?; + let payload_bytes = Self::rewrite_request_body(&payload_bytes, &ec_id)?; let mut proxy_config = ProxyRequestConfig::new(&self.config.endpoint); - proxy_config.forward_synthetic_id = false; + proxy_config.forward_ec_id = false; proxy_config.body = Some(payload_bytes); proxy_config.stream_passthrough = true; proxy_config.headers.push(( @@ -424,54 +427,52 @@ mod tests { ); } - #[test] - fn handle_uses_platform_http_client_with_http_request() { - futures::executor::block_on(async { - let stub = Arc::new(StubHttpClient::new()); - stub.push_response(200, br#"{"ok":true}"#.to_vec()); - let services = build_services_with_http_client( - Arc::clone(&stub) as Arc - ); - let settings = create_test_settings(); - let integration = TestlightIntegration::new(TestlightConfig { - enabled: true, - endpoint: "https://example.com/openrtb".to_string(), - timeout_ms: 1000, - shim_src: tsjs::tsjs_unified_script_src(), - rewrite_scripts: true, - }); - let mut req = http::Request::builder() - .method(Method::POST) - .uri("https://edge.example.com/integrations/testlight/auction") - .body(EdgeBody::from(br#"{"imp":[{"id":"slot-1"}]}"#.to_vec())) - .expect("should build request"); - req.headers_mut().insert( - crate::constants::HEADER_X_SYNTHETIC_ID.clone(), - http::HeaderValue::from_static(VALID_SYNTHETIC_ID), - ); + #[tokio::test] + async fn handle_uses_platform_http_client_with_http_request() { + let stub = Arc::new(StubHttpClient::new()); + stub.push_response(200, br#"{"ok":true}"#.to_vec()); + let services = build_services_with_http_client( + Arc::clone(&stub) as Arc + ); + let settings = create_test_settings(); + let integration = TestlightIntegration::new(TestlightConfig { + enabled: true, + endpoint: "https://example.com/openrtb".to_string(), + timeout_ms: 1000, + shim_src: tsjs::tsjs_unified_script_src(), + rewrite_scripts: true, + }); + let mut req = http::Request::builder() + .method(Method::POST) + .uri("https://edge.example.com/integrations/testlight/auction") + .body(EdgeBody::from(br#"{"imp":[{"id":"slot-1"}]}"#.to_vec())) + .expect("should build request"); + req.headers_mut().insert( + crate::constants::HEADER_X_TS_EC.clone(), + http::HeaderValue::from_static(VALID_SYNTHETIC_ID), + ); - let response = integration - .handle(&settings, &services, req) - .await - .expect("should proxy Testlight request"); + let response = integration + .handle(&settings, &services, req) + .await + .expect("should proxy Testlight request"); - assert_eq!( - response.status(), - http::StatusCode::OK, - "should return stubbed upstream status" - ); - assert_eq!( - stub.recorded_backend_names(), - vec!["stub-backend".to_string()], - "should route outbound request through PlatformHttpClient" - ); - let response_json: serde_json::Value = - serde_json::from_slice(&response.into_body().into_bytes()) - .expect("should parse JSON response"); - assert_eq!( - response_json["ok"], true, - "should preserve the upstream JSON response body" - ); - }); + assert_eq!( + response.status(), + http::StatusCode::OK, + "should return stubbed upstream status" + ); + assert_eq!( + stub.recorded_backend_names(), + vec!["stub-backend".to_string()], + "should route outbound request through PlatformHttpClient" + ); + let response_json: serde_json::Value = + serde_json::from_slice(&response.into_body().into_bytes()) + .expect("should parse JSON response"); + assert_eq!( + response_json["ok"], true, + "should preserve the upstream JSON response body" + ); } } diff --git a/crates/trusted-server-core/src/lib.rs b/crates/trusted-server-core/src/lib.rs index 3c0e0141b..03c71c107 100644 --- a/crates/trusted-server-core/src/lib.rs +++ b/crates/trusted-server-core/src/lib.rs @@ -16,8 +16,7 @@ //! - [`privacy`]: Privacy utilities and helpers //! - [`settings`]: Configuration management and validation //! - [`streaming_replacer`]: Streaming URL replacement for large responses -//! - [`synthetic`]: Synthetic ID generation using HMAC -//! - [`templates`]: Handlebars template handling +//! - [`edge_cookie`]: Edge Cookie (EC) ID generation using HMAC //! - [`test_support`]: Testing utilities and mocks //! - [`why`]: Debugging and introspection utilities @@ -35,11 +34,13 @@ pub mod auction; pub mod auction_config_types; pub mod auth; +pub(crate) mod backend; pub mod consent; pub mod consent_config; pub mod constants; pub mod cookies; pub mod creative; +pub mod edge_cookie; pub mod error; pub mod geo; pub(crate) mod host_rewrite; @@ -56,9 +57,9 @@ pub mod request_signing; pub mod rsc_flight; pub mod settings; pub mod settings_data; +pub mod storage; pub mod streaming_processor; pub mod streaming_replacer; -pub mod synthetic; pub mod test_support; pub mod tsjs; diff --git a/crates/trusted-server-core/src/migration_guards.rs b/crates/trusted-server-core/src/migration_guards.rs index b02be5223..7cfa8d0e2 100644 --- a/crates/trusted-server-core/src/migration_guards.rs +++ b/crates/trusted-server-core/src/migration_guards.rs @@ -27,7 +27,6 @@ fn migrated_utility_and_handler_modules_do_not_depend_on_fastly_request_response let sources = [ ("auth.rs", include_str!("auth.rs")), ("cookies.rs", include_str!("cookies.rs")), - ("synthetic.rs", include_str!("synthetic.rs")), ("http_util.rs", include_str!("http_util.rs")), ("geo.rs", include_str!("geo.rs")), ("publisher.rs", include_str!("publisher.rs")), diff --git a/crates/trusted-server-core/src/openrtb.rs b/crates/trusted-server-core/src/openrtb.rs index b7a8849e8..3c9be932e 100644 --- a/crates/trusted-server-core/src/openrtb.rs +++ b/crates/trusted-server-core/src/openrtb.rs @@ -49,8 +49,12 @@ pub struct UserExt { /// Gated by TCF Purpose 1 (storage) and Purpose 4 (personalized ads). #[serde(skip_serializing_if = "Option::is_none")] pub eids: Option>, + /// Whether this EC ID was freshly generated for this request. + /// + /// **Breaking change:** this wire field was previously named `synthetic_fresh`. + /// Downstream PBS modules or analytics reading the old name must be updated. #[serde(skip_serializing_if = "Option::is_none")] - pub synthetic_fresh: Option, + pub ec_fresh: Option, } impl ToExt for UserExt {} @@ -332,7 +336,7 @@ mod tests { consented_providers: Some("2~2628.2316~dv.".to_string()), }), eids: None, - synthetic_fresh: None, + ec_fresh: None, } .to_ext(), ..Default::default() @@ -393,4 +397,24 @@ mod tests { "ext should be omitted when None" ); } + + #[test] + fn user_ext_serializes_ec_fresh_not_synthetic_fresh() { + let ext = UserExt { + consent: None, + consented_providers_settings: None, + eids: None, + ec_fresh: Some("true".to_string()), + }; + + let serialized = serde_json::to_value(&ext).expect("should serialize UserExt"); + assert_eq!( + serialized["ec_fresh"], "true", + "ec_fresh should be present in serialized output" + ); + assert!( + serialized.get("synthetic_fresh").is_none(), + "synthetic_fresh should not appear — field was renamed to ec_fresh" + ); + } } diff --git a/crates/trusted-server-core/src/platform/http.rs b/crates/trusted-server-core/src/platform/http.rs index f12bf3050..b6efe1b4e 100644 --- a/crates/trusted-server-core/src/platform/http.rs +++ b/crates/trusted-server-core/src/platform/http.rs @@ -193,4 +193,20 @@ pub trait PlatformHttpClient: Send + Sync { &self, pending_requests: Vec, ) -> Result>; + + /// Wait for a single in-flight request to complete. + /// + /// This is a convenience wrapper around [`select`](Self::select) for the + /// common case where only one request is in flight. + /// + /// # Errors + /// + /// Returns `PlatformError::HttpClient` if the underlying `select` fails or + /// the response itself contains an error. + async fn wait( + &self, + pending: PlatformPendingRequest, + ) -> Result> { + self.select(vec![pending]).await?.ready + } } diff --git a/crates/trusted-server-core/src/platform/mod.rs b/crates/trusted-server-core/src/platform/mod.rs index 474aabbb6..6662e8374 100644 --- a/crates/trusted-server-core/src/platform/mod.rs +++ b/crates/trusted-server-core/src/platform/mod.rs @@ -16,22 +16,18 @@ //! ## Platform-Agnostic Components //! //! The following components were evaluated for platform-specific behavior -//! (PR 8) and found to have a platform-agnostic rewriting pipeline. No -//! platform trait is required; future adapters (PR 16/17) need not provide +//! (verified 2026-03-31; see `docs/superpowers/plans/2026-03-31-pr8-content-rewriting-verification.md`) +//! and found to have a platform-agnostic rewriting pipeline. No +//! platform trait is required; future adapters (Cloudflare Workers, Axum, Spin) need not provide //! any content-rewriting implementation: //! //! - **Content rewriting** — `html_processor`, `streaming_processor`, //! `streaming_replacer`, and `rsc_flight` modules use only standard Rust //! (`std::io::Read`/`Write`, `lol_html`, `flate2`, `brotli`). The pipeline -//! is accessed via `StreamingPipeline::process` which -//! accepts any reader including platform body types (which implement +//! is accessed via [`StreamingPipeline::process`](crate::streaming_processor::StreamingPipeline::process) which +//! accepts any reader, including `fastly::Body` (which implements //! `std::io::Read`). //! -//! The `publisher.rs` handler module is platform-coupled at its handler -//! layer — it accepts and returns `EdgeBody` in function signatures. -//! This is an HTTP-type coupling that will be addressed in future PRs. -//! It is not a content-rewriting concern. -//! //! No `PlatformContentRewriter` trait exists or is needed. //! @@ -58,9 +54,56 @@ pub use types::{ #[cfg(test)] mod tests { - use super::test_support::noop_services; + use std::net::{IpAddr, Ipv4Addr}; + use std::sync::Arc; + use std::time::Duration; + + use async_trait::async_trait; + use bytes::Bytes; + use edgezero_core::key_value_store::KvPage; + + use super::test_support::{noop_services, noop_services_with_client_ip}; use super::*; + struct MarkerKvStore(&'static str); + + #[async_trait(?Send)] + impl PlatformKvStore for MarkerKvStore { + async fn get_bytes(&self, key: &str) -> Result, KvError> { + if key == "marker" { + Ok(Some(Bytes::from(self.0.to_string()))) + } else { + Ok(None) + } + } + + async fn put_bytes(&self, _key: &str, _value: Bytes) -> Result<(), KvError> { + Ok(()) + } + + async fn put_bytes_with_ttl( + &self, + _key: &str, + _value: Bytes, + _ttl: Duration, + ) -> Result<(), KvError> { + Ok(()) + } + + async fn delete(&self, _key: &str) -> Result<(), KvError> { + Ok(()) + } + + async fn list_keys_page( + &self, + _prefix: &str, + _cursor: Option<&str>, + _limit: usize, + ) -> Result { + Ok(KvPage::default()) + } + } + fn _assert_config_store_object_safe(_: &dyn PlatformConfigStore) {} fn _assert_secret_store_object_safe(_: &dyn PlatformSecretStore) {} fn _assert_kv_store_object_safe(_: &dyn PlatformKvStore) {} @@ -101,6 +144,34 @@ mod tests { assert!(result.is_none(), "should return None when no IP is present"); } + #[test] + fn runtime_services_with_kv_store_replaces_only_the_new_clone() { + let services = noop_services_with_client_ip(IpAddr::V4(Ipv4Addr::new(198, 51, 100, 7))); + let replaced = services + .clone() + .with_kv_store(Arc::new(MarkerKvStore("replaced"))); + + let original_value = futures::executor::block_on(services.kv_store().get_bytes("marker")) + .expect("should query the original noop store"); + let replaced_value = futures::executor::block_on(replaced.kv_store().get_bytes("marker")) + .expect("should query the replaced marker store"); + + assert_eq!( + original_value, None, + "should keep the original RuntimeServices KV store unchanged" + ); + assert_eq!( + replaced_value, + Some(Bytes::from_static(b"replaced")), + "should expose the replacement KV store through kv_store()" + ); + assert_eq!( + replaced.client_info().client_ip, + services.client_info().client_ip, + "should preserve client_info through with_kv_store" + ); + } + #[test] fn platform_pending_request_downcasts_and_preserves_backend_name() { let pending = PlatformPendingRequest::new(7_u8).with_backend_name("origin"); diff --git a/crates/trusted-server-core/src/platform/test_support.rs b/crates/trusted-server-core/src/platform/test_support.rs index 15ca210c0..b77f53d8a 100644 --- a/crates/trusted-server-core/src/platform/test_support.rs +++ b/crates/trusted-server-core/src/platform/test_support.rs @@ -1,14 +1,21 @@ -use std::collections::VecDeque; +use std::collections::{HashMap, VecDeque}; use std::net::IpAddr; use std::sync::{Arc, Mutex}; +use std::time::Duration; +use base64::{engine::general_purpose, Engine as _}; +use ed25519_dalek::SigningKey; use error_stack::{Report, ResultExt}; +use rand::rngs::OsRng; + +use edgezero_core::key_value_store::{KvError, KvPage, KvStore as PlatformKvStore}; use super::{ ClientInfo, GeoInfo, PlatformBackend, PlatformBackendSpec, PlatformConfigStore, PlatformError, PlatformGeo, PlatformHttpClient, PlatformHttpRequest, PlatformPendingRequest, PlatformResponse, PlatformSecretStore, PlatformSelectResult, RuntimeServices, StoreId, StoreName, }; +use crate::request_signing::{JWKS_STORE_NAME, SIGNING_STORE_NAME}; pub(crate) struct NoopConfigStore; @@ -56,6 +63,74 @@ impl PlatformSecretStore for NoopSecretStore { } } +pub(crate) struct HashMapConfigStore { + data: HashMap, +} + +impl HashMapConfigStore { + pub(crate) fn new(data: HashMap) -> Self { + Self { data } + } +} + +impl PlatformConfigStore for HashMapConfigStore { + fn get(&self, _store_name: &StoreName, key: &str) -> Result> { + self.data + .get(key) + .cloned() + .ok_or_else(|| Report::new(PlatformError::ConfigStore)) + } + + fn put( + &self, + _store_id: &StoreId, + _key: &str, + _value: &str, + ) -> Result<(), Report> { + Err(Report::new(PlatformError::Unsupported)) + } + + fn delete(&self, _store_id: &StoreId, _key: &str) -> Result<(), Report> { + Err(Report::new(PlatformError::Unsupported)) + } +} + +pub(crate) struct HashMapSecretStore { + data: HashMap>, +} + +impl HashMapSecretStore { + pub(crate) fn new(data: HashMap>) -> Self { + Self { data } + } +} + +impl PlatformSecretStore for HashMapSecretStore { + fn get_bytes( + &self, + _store_name: &StoreName, + key: &str, + ) -> Result, Report> { + self.data + .get(key) + .cloned() + .ok_or_else(|| Report::new(PlatformError::SecretStore)) + } + + fn create( + &self, + _store_id: &StoreId, + _name: &str, + _value: &str, + ) -> Result<(), Report> { + Err(Report::new(PlatformError::Unsupported)) + } + + fn delete(&self, _store_id: &StoreId, _name: &str) -> Result<(), Report> { + Err(Report::new(PlatformError::Unsupported)) + } +} + pub(crate) struct NoopBackend; impl PlatformBackend for NoopBackend { @@ -118,7 +193,7 @@ impl PlatformBackend for StubBackend { // StubHttpClient // --------------------------------------------------------------------------- -/// Canned response carried by a [`StubPendingResponse`] through `send_async` +/// Canned response carried by a [`PlatformPendingRequest`] through `send_async` /// and resolved by [`StubHttpClient::select`]. struct StubPendingResponse { backend_name: String, @@ -142,6 +217,8 @@ pub(crate) struct StubHttpClient { calls: Mutex>, // (status_code, body_bytes) — kept Send by avoiding Body::Stream responses: Mutex)>>, + // Headers captured per send call, stored as (name, value) string pairs. + request_headers: Mutex>>, } impl StubHttpClient { @@ -149,6 +226,7 @@ impl StubHttpClient { Self { calls: Mutex::new(Vec::new()), responses: Mutex::new(VecDeque::new()), + request_headers: Mutex::new(Vec::new()), } } @@ -164,6 +242,16 @@ impl StubHttpClient { pub fn recorded_backend_names(&self) -> Vec { self.calls.lock().expect("should lock calls").clone() } + + /// Return the request headers captured per `send` call, in order. + /// + /// Each entry is the set of `(name, value)` pairs from one call. + pub fn recorded_request_headers(&self) -> Vec> { + self.request_headers + .lock() + .expect("should lock request_headers") + .clone() + } } // ?Send matches PlatformHttpClient. See http.rs for the full rationale. @@ -178,6 +266,22 @@ impl PlatformHttpClient for StubHttpClient { .expect("should lock calls") .push(request.backend_name.clone()); + let headers: Vec<(String, String)> = request + .request + .headers() + .iter() + .filter_map(|(name, value)| { + value + .to_str() + .ok() + .map(|v| (name.as_str().to_string(), v.to_string())) + }) + .collect(); + self.request_headers + .lock() + .expect("should lock request_headers") + .push(headers); + let (status, body_bytes) = self .responses .lock() @@ -218,6 +322,12 @@ impl PlatformHttpClient for StubHttpClient { Ok(PlatformPendingRequest::new(pending).with_backend_name(backend_name)) } + /// Always marks the first pending request in the input as ready (FIFO order). + /// + /// This differs from Fastly's production `select()`, which returns whichever + /// request completes first and makes no ordering guarantees. Tests that rely on + /// this stub should not depend on "first-pushed = first-ready" semantics, and + /// should document their ordering assumptions explicitly if order matters. async fn select( &self, mut pending_requests: Vec, @@ -249,6 +359,67 @@ impl PlatformHttpClient for StubHttpClient { } } +// --------------------------------------------------------------------------- +// RecordingKvStore +// --------------------------------------------------------------------------- + +/// Test stub for [`PlatformKvStore`] that records `delete()` keys for assertion. +/// +/// All other operations are no-ops: reads return `Ok(None)`, writes return `Ok(())`. +pub(crate) struct RecordingKvStore { + deleted: Mutex>, +} + +impl RecordingKvStore { + pub(crate) fn new() -> Self { + Self { + deleted: Mutex::new(Vec::new()), + } + } + + /// Return the keys passed to `delete()`, in call order. + pub(crate) fn deleted_keys(&self) -> Vec { + self.deleted.lock().expect("should lock deleted").clone() + } +} + +#[async_trait::async_trait(?Send)] +impl PlatformKvStore for RecordingKvStore { + async fn get_bytes(&self, _key: &str) -> Result, KvError> { + Ok(None) + } + + async fn put_bytes(&self, _key: &str, _value: bytes::Bytes) -> Result<(), KvError> { + Ok(()) + } + + async fn put_bytes_with_ttl( + &self, + _key: &str, + _value: bytes::Bytes, + _ttl: Duration, + ) -> Result<(), KvError> { + Ok(()) + } + + async fn delete(&self, key: &str) -> Result<(), KvError> { + self.deleted + .lock() + .expect("should lock deleted") + .push(key.to_owned()); + Ok(()) + } + + async fn list_keys_page( + &self, + _prefix: &str, + _cursor: Option<&str>, + _limit: usize, + ) -> Result { + Ok(KvPage::default()) + } +} + pub(crate) struct NoopGeo; impl PlatformGeo for NoopGeo { @@ -280,6 +451,28 @@ pub(crate) fn build_services_with_config_and_secret( .build() } +pub(crate) fn build_request_signing_services() -> RuntimeServices { + let signing_key = SigningKey::generate(&mut OsRng); + let key_b64 = general_purpose::STANDARD.encode(signing_key.as_bytes()); + let x_b64 = general_purpose::URL_SAFE_NO_PAD.encode(signing_key.verifying_key().as_bytes()); + let jwk_json = format!( + r#"{{"kty":"OKP","crv":"Ed25519","x":"{}","kid":"test-kid","alg":"EdDSA"}}"#, + x_b64 + ); + + let mut config_data = HashMap::new(); + config_data.insert("current-kid".to_string(), "test-kid".to_string()); + config_data.insert("test-kid".to_string(), jwk_json); + + let mut secret_data = HashMap::new(); + secret_data.insert("test-kid".to_string(), key_b64.into_bytes()); + + build_services_with_config_and_secret( + HashMapConfigStore::new(config_data), + HashMapSecretStore::new(secret_data), + ) +} + pub(crate) fn build_services_with_config( config_store: impl PlatformConfigStore + 'static, ) -> RuntimeServices { @@ -341,8 +534,7 @@ pub(crate) fn build_services_with_http_client( #[cfg(test)] mod tests { - use std::time::Duration; - + use crate::backend::DEFAULT_FIRST_BYTE_TIMEOUT; use edgezero_core::body::Body; use edgezero_core::http::request_builder; @@ -476,7 +668,7 @@ mod tests { host: "example.com".to_string(), port: None, certificate_check: true, - first_byte_timeout: Duration::from_secs(15), + first_byte_timeout: DEFAULT_FIRST_BYTE_TIMEOUT, }; let name = stub.ensure(&spec).expect("should return a backend name"); assert_eq!(name, "stub-backend", "should return fixed name"); @@ -502,4 +694,53 @@ mod tests { "should delegate to injected secret store" ); } + + #[test] + fn hash_map_stores_return_preset_values() { + let mut config = HashMap::new(); + config.insert("current-kid".to_string(), "test-kid".to_string()); + + let mut secrets = HashMap::new(); + secrets.insert("test-kid".to_string(), b"secret-material".to_vec()); + + let services = build_services_with_config_and_secret( + HashMapConfigStore::new(config), + HashMapSecretStore::new(secrets), + ); + + assert_eq!( + services + .config_store() + .get(&JWKS_STORE_NAME, "current-kid") + .expect("should read current-kid from config test store"), + "test-kid" + ); + assert_eq!( + services + .secret_store() + .get_bytes(&SIGNING_STORE_NAME, "test-kid") + .expect("should read signing key bytes from secret test store"), + b"secret-material".to_vec() + ); + } + + #[test] + fn build_request_signing_services_provides_current_kid_and_signing_key() { + let services = build_request_signing_services(); + + let kid = services + .config_store() + .get(&JWKS_STORE_NAME, "current-kid") + .expect("should expose current-kid in config store"); + let key_bytes = services + .secret_store() + .get_bytes(&SIGNING_STORE_NAME, &kid) + .expect("should expose signing key bytes in secret store"); + + assert_eq!(kid, "test-kid", "should use the standard signing test kid"); + assert!( + !key_bytes.is_empty(), + "should provide key material for the current signing key" + ); + } } diff --git a/crates/trusted-server-core/src/platform/types.rs b/crates/trusted-server-core/src/platform/types.rs index 0eaa3a0c0..3b17ee3b6 100644 --- a/crates/trusted-server-core/src/platform/types.rs +++ b/crates/trusted-server-core/src/platform/types.rs @@ -134,10 +134,11 @@ pub struct RuntimeServices { pub(crate) config_store: Arc, /// Access to encrypted secret stores. pub(crate) secret_store: Arc, - /// KV store for the primary (opid) store. + /// KV store service selected for the current request path. /// - /// Additional stores (`counter_store`, `creative_store`) are opened on - /// demand in individual handlers until multi-store support lands here. + /// Adapters may replace this with a different concrete store on a + /// per-request basis by cloning [`RuntimeServices`] with + /// [`RuntimeServices::with_kv_store`]. pub(crate) kv_store: Arc, /// Dynamic backend registration and name prediction. pub(crate) backend: Arc, @@ -186,6 +187,12 @@ impl RuntimeServices { &*self.secret_store } + /// Returns the KV store service. + #[must_use] + pub fn kv_store(&self) -> &dyn PlatformKvStore { + &*self.kv_store + } + /// Returns the dynamic backend service. #[must_use] pub fn backend(&self) -> &dyn PlatformBackend { @@ -216,6 +223,19 @@ impl RuntimeServices { pub fn kv_handle(&self) -> super::KvHandle { super::KvHandle::new(self.kv_store.clone()) } + + /// Returns a clone of this instance with the KV store replaced by `store`. + /// + /// Adapters use this to lazily inject the request-specific KV store for + /// handlers that require one without rebuilding the rest of the runtime + /// services graph. + #[must_use] + pub fn with_kv_store(self, store: Arc) -> Self { + Self { + kv_store: store, + ..self + } + } } impl fmt::Debug for RuntimeServices { diff --git a/crates/trusted-server-core/src/proxy.rs b/crates/trusted-server-core/src/proxy.rs index a1568556a..1d5f42ab8 100644 --- a/crates/trusted-server-core/src/proxy.rs +++ b/crates/trusted-server-core/src/proxy.rs @@ -1,3 +1,4 @@ +use crate::backend::DEFAULT_FIRST_BYTE_TIMEOUT; use crate::http_util::{compute_encrypted_sha256_token, ct_str_eq}; use edgezero_core::body::Body as EdgeBody; use edgezero_core::http::{request_builder as edge_request_builder, Uri as EdgeUri}; @@ -12,11 +13,11 @@ use crate::constants::{ HEADER_USER_AGENT, HEADER_X_FORWARDED_FOR, }; use crate::creative::{CreativeCssProcessor, CreativeHtmlProcessor}; +use crate::edge_cookie::get_ec_id; use crate::error::TrustedServerError; use crate::platform::{PlatformBackendSpec, PlatformHttpRequest, RuntimeServices}; use crate::settings::Settings; use crate::streaming_processor::{Compression, PipelineConfig, StreamProcessor, StreamingPipeline}; -use crate::synthetic::get_synthetic_id; /// Chunk size used for streaming content through the rewrite pipeline. const STREAMING_CHUNK_SIZE: usize = 8192; @@ -25,7 +26,13 @@ fn body_as_reader(body: EdgeBody) -> Cursor { Cursor::new(body.into_bytes()) } -/// Headers copied from the original client request to the upstream proxy request. +/// Headers copied from the original client request to the upstream proxy request +/// when `copy_request_headers` is enabled. +/// +/// `Accept-Encoding` is also overridden in the same code path, but with a fixed +/// value ([`SUPPORTED_ENCODINGS`]) rather than forwarding the client's preference. +/// Both forwarded headers and the Accept-Encoding override are applied together in +/// the `copy_request_headers` branch of the proxy request builder. const PROXY_FORWARD_HEADERS: [header::HeaderName; 5] = [ HEADER_USER_AGENT, HEADER_ACCEPT, @@ -52,8 +59,8 @@ pub struct ProxyRequestConfig<'a> { pub target_url: &'a str, /// Whether redirects should be followed automatically. pub follow_redirects: bool, - /// Whether to append the caller's synthetic ID as a query param. - pub forward_synthetic_id: bool, + /// Whether to append the caller's EC ID as a query param. + pub forward_ec_id: bool, /// Optional body to send to the origin. pub body: Option>, /// Additional headers to forward to the origin. @@ -62,25 +69,35 @@ pub struct ProxyRequestConfig<'a> { pub copy_request_headers: bool, /// When true, stream the origin response without HTML/CSS rewrites. pub stream_passthrough: bool, - /// Domain allowlist enforced on the initial target and every redirect hop. + /// Domains allowed for the initial request and any redirects. + /// + /// **Open mode** (`&[]`): every host is permitted. Integration proxies pass `&[]` + /// because their target URLs originate from operator-controlled configuration + /// (e.g. `trusted-server.toml` integration settings) and are therefore trusted at + /// operator setup time rather than at request time. + /// + /// **Restricted mode** (non-empty slice): only hosts matching a listed pattern are + /// permitted. First-party proxy handlers pass `&settings.proxy.allowed_domains` + /// because they follow redirect chains that may originate from untrusted + /// creative-supplied URLs. /// - /// An empty slice disables allowlist enforcement (open mode). - /// Integration proxies should pass `&[]`; first-party proxy passes - /// `&settings.proxy.allowed_domains`. + /// **Behavior change from pre-PR-14**: `proxy_with_redirects` previously always + /// enforced `&settings.proxy.allowed_domains` regardless of the caller. After PR 14, + /// only [`handle_first_party_proxy`] and its siblings enforce the operator allowlist; + /// integration proxies use open mode. This is intentional: applying the operator + /// domain allowlist to integration redirects would require every operator to enumerate + /// every integration CDN in their config, which is impractical. pub allowed_domains: &'a [String], } impl<'a> ProxyRequestConfig<'a> { - /// Build a proxy configuration that follows redirects and forwards the synthetic ID. - /// - /// `allowed_domains` defaults to `&[]` (open mode). Override it for the - /// first-party proxy by setting `allowed_domains` directly. + /// Build a proxy configuration that follows redirects and forwards the EC ID. #[must_use] pub fn new(target_url: &'a str) -> Self { Self { target_url, follow_redirects: true, - forward_synthetic_id: true, + forward_ec_id: true, body: None, headers: Vec::new(), copy_request_headers: true, @@ -407,16 +424,23 @@ fn finalize_response( } } +/// Bundles per-request header configuration and [`RuntimeServices`] for the proxy redirect loop. struct ProxyRequestHeaders<'a> { additional_headers: &'a [(header::HeaderName, HeaderValue)], copy_request_headers: bool, services: &'a RuntimeServices, } +struct ProxyRedirectPolicy<'a> { + follow_redirects: bool, + stream_passthrough: bool, + allowed_domains: &'a [String], +} + /// Proxy a request to a clear target URL while reusing creative rewrite logic. /// /// This forwards a curated header set, follows redirects when enabled, and can append -/// the caller's synthetic ID as a `synthetic_id` query parameter to the target URL. +/// the caller's EC ID as a `ts-ec` query parameter to the target URL. /// Optional bodies/headers can be supplied via [`ProxyRequestConfig`]. /// /// # Errors @@ -432,12 +456,12 @@ pub async fn proxy_request( let ProxyRequestConfig { target_url, follow_redirects, - forward_synthetic_id, + forward_ec_id, body, headers, copy_request_headers, stream_passthrough, - allowed_domains: _, + allowed_domains, } = config; let mut target_url_parsed = url::Url::parse(target_url).map_err(|_| { @@ -446,60 +470,64 @@ pub async fn proxy_request( }) })?; - if forward_synthetic_id { - append_synthetic_id(&req, &mut target_url_parsed); + if forward_ec_id { + append_ec_id(&req, &mut target_url_parsed); } proxy_with_redirects( settings, &req, target_url_parsed, - follow_redirects, body.as_deref(), ProxyRequestHeaders { additional_headers: &headers, copy_request_headers, services, }, - stream_passthrough, + ProxyRedirectPolicy { + follow_redirects, + stream_passthrough, + allowed_domains, + }, ) .await } -fn append_synthetic_id(req: &Request, target_url_parsed: &mut url::Url) { - let synthetic_id_param = match get_synthetic_id(req) { +/// Upserts the `ts-ec` query parameter on a URL, replacing any existing value. +fn upsert_ec_query_param(url: &mut url::Url, ec_id: &str) { + let mut pairs: Vec<(String, String)> = url + .query_pairs() + .filter(|(k, _)| k.as_ref() != "ts-ec") + .map(|(k, v)| (k.into_owned(), v.into_owned())) + .collect(); + + pairs.push(("ts-ec".to_string(), ec_id.to_string())); + + url.set_query(None); + let mut serializer = url::form_urlencoded::Serializer::new(String::new()); + for (k, v) in &pairs { + serializer.append_pair(k, v); + } + url.set_query(Some(&serializer.finish())); +} + +fn append_ec_id(req: &Request, target_url_parsed: &mut url::Url) { + let ec_id_param = match get_ec_id(req) { Ok(id) => id, Err(e) => { - log::warn!("failed to extract synthetic ID for forwarding: {:?}", e); + log::warn!("failed to extract EC ID for forwarding: {:?}", e); None } }; - if let Some(synthetic_id) = synthetic_id_param { - let mut pairs: Vec<(String, String)> = target_url_parsed - .query_pairs() - .filter(|(k, _)| k.as_ref() != "synthetic_id") - .map(|(k, v)| (k.into_owned(), v.into_owned())) - .collect(); - - pairs.push(("synthetic_id".to_string(), synthetic_id)); - - target_url_parsed.set_query(None); - if !pairs.is_empty() { - let mut serializer = url::form_urlencoded::Serializer::new(String::new()); - for (k, v) in &pairs { - serializer.append_pair(k, v); - } - let query_str = serializer.finish(); - target_url_parsed.set_query(Some(&query_str)); - } - + if let Some(ec_id) = ec_id_param { + upsert_ec_query_param(target_url_parsed, &ec_id); log::debug!( - "forwarding synthetic_id to origin url {}", + "forwarding EC ID to origin url {}", target_url_parsed.as_str() ); } else { - log::debug!("no synthetic_id to forward to origin"); + log::debug!("no EC ID to forward to origin"); } } @@ -539,10 +567,9 @@ async fn proxy_with_redirects( settings: &Settings, req: &Request, target_url_parsed: url::Url, - follow_redirects: bool, body: Option<&[u8]>, request_headers: ProxyRequestHeaders<'_>, - stream_passthrough: bool, + redirect_policy: ProxyRedirectPolicy<'_>, ) -> Result, Report> { const MAX_REDIRECTS: usize = 4; @@ -570,7 +597,7 @@ async fn proxy_with_redirects( })); } - if !redirect_is_permitted(&settings.proxy.allowed_domains, host) { + if !redirect_is_permitted(redirect_policy.allowed_domains, host) { log::warn!( "request to `{}` blocked: host not in proxy allowed_domains", host @@ -588,7 +615,7 @@ async fn proxy_with_redirects( host: host.to_string(), port: parsed_url.port(), certificate_check: settings.proxy.certificate_check, - first_byte_timeout: Duration::from_secs(15), + first_byte_timeout: DEFAULT_FIRST_BYTE_TIMEOUT, }) .change_context(TrustedServerError::Proxy { message: "backend registration failed".to_string(), @@ -602,19 +629,26 @@ async fn proxy_with_redirects( })?, ); + // Collect outbound headers using insert-semantics so additional_headers override any + // header set by copy_request_headers, matching the old set_header() replace behavior. + let mut outbound_headers = http::HeaderMap::new(); if request_headers.copy_request_headers { for header_name in PROXY_FORWARD_HEADERS { if let Some(v) = req.headers().get(&header_name) { - builder = builder.header(header_name.as_str(), v.as_bytes()); + outbound_headers.insert(header_name, v.clone()); } } - builder = builder.header( - HEADER_ACCEPT_ENCODING.as_str(), - SUPPORTED_ENCODINGS.as_bytes(), + outbound_headers.insert( + HEADER_ACCEPT_ENCODING, + HeaderValue::from_static(SUPPORTED_ENCODINGS), ); } for (name, value) in request_headers.additional_headers { - builder = builder.header(name.clone(), value.clone()); + // insert() replaces any existing value, matching set_header() semantics. + outbound_headers.insert(name.clone(), value.clone()); + } + for (name, value) in &outbound_headers { + builder = builder.header(name, value); } let body_bytes = body.map(<[u8]>::to_vec).unwrap_or_default(); let edge_req = @@ -635,8 +669,14 @@ async fn proxy_with_redirects( let beresp = platform_resp.response; - if !follow_redirects { - return finalize_response(settings, req, ¤t_url, beresp, stream_passthrough); + if !redirect_policy.follow_redirects { + return finalize_response( + settings, + req, + ¤t_url, + beresp, + redirect_policy.stream_passthrough, + ); } let status = beresp.status(); @@ -650,7 +690,13 @@ async fn proxy_with_redirects( ); if !is_redirect { - return finalize_response(settings, req, ¤t_url, beresp, stream_passthrough); + return finalize_response( + settings, + req, + ¤t_url, + beresp, + redirect_policy.stream_passthrough, + ); } let Some(location) = beresp @@ -659,7 +705,13 @@ async fn proxy_with_redirects( .and_then(|h| h.to_str().ok()) .filter(|value| !value.is_empty()) else { - return finalize_response(settings, req, ¤t_url, beresp, stream_passthrough); + return finalize_response( + settings, + req, + ¤t_url, + beresp, + redirect_policy.stream_passthrough, + ); }; if redirect_attempt == MAX_REDIRECTS { @@ -680,7 +732,13 @@ async fn proxy_with_redirects( let next_scheme = next_url.scheme().to_ascii_lowercase(); if next_scheme != "http" && next_scheme != "https" { - return finalize_response(settings, req, ¤t_url, beresp, stream_passthrough); + return finalize_response( + settings, + req, + ¤t_url, + beresp, + redirect_policy.stream_passthrough, + ); } let next_host = match next_url.host_str() { @@ -691,7 +749,7 @@ async fn proxy_with_redirects( })); } }; - if !redirect_is_permitted(&settings.proxy.allowed_domains, next_host) { + if !redirect_is_permitted(redirect_policy.allowed_domains, next_host) { log::warn!( "redirect to `{}` blocked: host not in proxy allowed_domains", next_host @@ -750,7 +808,7 @@ pub async fn handle_first_party_proxy( ProxyRequestConfig { target_url: &target_url, follow_redirects: true, - forward_synthetic_id: true, + forward_ec_id: true, body: None, headers: Vec::new(), copy_request_headers: true, @@ -783,44 +841,24 @@ pub async fn handle_first_party_click( had_params, } = reconstruct_and_validate_signed_target(settings, &req.uri().to_string())?; - let synthetic_id = match get_synthetic_id(&req) { + let ec_id = match get_ec_id(&req) { Ok(id) => id, Err(e) => { - log::warn!("failed to extract synthetic ID for forwarding: {:?}", e); + log::warn!("failed to extract EC ID for forwarding: {:?}", e); None } }; let mut redirect_target = full_for_token.clone(); - if let Some(ref synthetic_id_value) = synthetic_id { + if let Some(ref ec_id_value) = ec_id { match url::Url::parse(&redirect_target) { Ok(mut url) => { - let mut pairs: Vec<(String, String)> = url - .query_pairs() - .filter(|(k, _)| k.as_ref() != "synthetic_id") - .map(|(k, v)| (k.into_owned(), v.into_owned())) - .collect(); - pairs.push(("synthetic_id".to_string(), synthetic_id_value.clone())); - - url.set_query(None); - if !pairs.is_empty() { - let mut serializer = url::form_urlencoded::Serializer::new(String::new()); - for (k, v) in &pairs { - serializer.append_pair(k, v); - } - let query_str = serializer.finish(); - url.set_query(Some(&query_str)); - } - - let final_target = url.to_string(); - log::debug!("forwarding synthetic_id to target url {}", final_target); - redirect_target = final_target; + upsert_ec_query_param(&mut url, ec_id_value); + redirect_target = url.to_string(); + log::debug!("forwarding EC ID to target url {}", redirect_target); } Err(e) => { - log::warn!( - "failed to parse target url for synthetic forwarding: {:?}", - e - ); + log::warn!("failed to parse target url for EC ID forwarding: {:?}", e); } } } @@ -837,13 +875,13 @@ pub async fn handle_first_party_click( .and_then(|h| h.to_str().ok()) .unwrap_or(""); log::info!( - "redirect tsurl={} params_present={} target={} referer={} ua={} synthetic_id={}", + "redirect tsurl={} params_present={} target={} referer={} ua={} ec_id={}", tsurl, had_params, redirect_target, referer, ua, - synthetic_id.as_deref().unwrap_or("") + ec_id.as_deref().unwrap_or("") ); // 302 redirect to target URL @@ -1292,17 +1330,27 @@ fn reconstruct_and_validate_signed_target( #[cfg(test)] mod tests { + use std::collections::VecDeque; + use std::sync::{Arc, Mutex}; + use super::{ handle_first_party_click, handle_first_party_proxy, handle_first_party_proxy_rebuild, handle_first_party_proxy_sign, is_host_allowed, proxy_request, rebuild_response_with_body, reconstruct_and_validate_signed_target, redirect_is_permitted, ProxyRequestConfig, + SUPPORTED_ENCODINGS, }; use crate::constants::HEADER_ACCEPT; use crate::creative; use crate::error::{IntoHttpResponse, TrustedServerError}; - use crate::platform::test_support::noop_services; + use crate::platform::test_support::{build_services_with_http_client, noop_services}; + use crate::platform::{ + PlatformError, PlatformHttpClient, PlatformHttpRequest, PlatformPendingRequest, + PlatformResponse, PlatformSelectResult, + }; use crate::test_support::tests::create_test_settings; + use bytes::Bytes; use edgezero_core::body::Body as EdgeBody; + use edgezero_core::http::response_builder as edge_response_builder; use error_stack::Report; use http::{header, HeaderValue, Method, Request as HttpRequest, Response, StatusCode}; @@ -1365,6 +1413,79 @@ mod tests { .expect("response body should be valid UTF-8") } + struct QueuedHttpResponse { + status: u16, + headers: Vec<(header::HeaderName, HeaderValue)>, + body: Vec, + } + + #[derive(Default)] + struct HeaderAwareStubHttpClient { + responses: Mutex>, + } + + impl HeaderAwareStubHttpClient { + fn new() -> Self { + Self::default() + } + + fn push_response( + &self, + status: u16, + headers: Vec<(header::HeaderName, HeaderValue)>, + body: Vec, + ) { + self.responses + .lock() + .expect("should lock queued responses") + .push_back(QueuedHttpResponse { + status, + headers, + body, + }); + } + } + + #[async_trait::async_trait(?Send)] + impl PlatformHttpClient for HeaderAwareStubHttpClient { + async fn send( + &self, + _request: PlatformHttpRequest, + ) -> Result> { + let queued = self + .responses + .lock() + .expect("should lock queued responses") + .pop_front() + .ok_or_else(|| Report::new(PlatformError::HttpClient))?; + + let mut builder = edgezero_core::http::response_builder().status(queued.status); + for (name, value) in queued.headers { + builder = builder.header(name, value); + } + + let response = builder + .body(EdgeBody::from(queued.body)) + .expect("should build stub HTTP response"); + + Ok(PlatformResponse::new(response)) + } + + async fn send_async( + &self, + _request: PlatformHttpRequest, + ) -> Result> { + Err(Report::new(PlatformError::Unsupported)) + } + + async fn select( + &self, + _pending_requests: Vec, + ) -> Result> { + Err(Report::new(PlatformError::Unsupported)) + } + } + fn build_http_response(status: StatusCode, body: EdgeBody) -> Response { let mut response = Response::new(body); *response.status_mut() = status; @@ -1378,95 +1499,124 @@ mod tests { .and_then(|value| value.to_str().ok()) } - #[test] - fn proxy_missing_param_returns_400() { - futures::executor::block_on(async { - let settings = create_test_settings(); - let req = build_http_request(Method::GET, "https://example.com/first-party/proxy"); - let err: Report = - handle_first_party_proxy(&settings, &noop_services(), req) - .await - .expect_err("expected error"); - assert_eq!(err.current_context().status_code(), StatusCode::BAD_GATEWAY); - }); + /// Test double that always returns a streaming (non-buffered) response body. + /// + /// Used to exercise the `Body::Stream` error path in + /// `platform_response_to_fastly`, which cannot materialise a streaming body + /// into a `fastly::Response`. Only `send` is implemented; `send_async` and + /// `select` return `PlatformError::Unsupported`. + struct StreamingResponseHttpClient; + + #[async_trait::async_trait(?Send)] + impl PlatformHttpClient for StreamingResponseHttpClient { + async fn send( + &self, + _request: PlatformHttpRequest, + ) -> Result> { + let edge_response = edge_response_builder() + .status(StatusCode::OK) + .body(EdgeBody::stream(futures::stream::iter(vec![ + Bytes::from_static(b"chunk"), + ]))) + .expect("should build streaming test response"); + + Ok(PlatformResponse::new(edge_response).with_backend_name("stub-backend")) + } + + async fn send_async( + &self, + _request: PlatformHttpRequest, + ) -> Result> { + Err(Report::new(PlatformError::Unsupported)) + } + + async fn select( + &self, + _pending_requests: Vec, + ) -> Result> { + Err(Report::new(PlatformError::Unsupported)) + } } - #[test] - fn proxy_missing_or_invalid_token_returns_400() { - futures::executor::block_on(async { - let settings = create_test_settings(); - // missing tstoken should 400 - let req = build_http_request( - Method::GET, - "https://example.com/first-party/proxy?tsurl=https%3A%2F%2Fcdn.example%2Fa.png", - ); - let err: Report = - handle_first_party_proxy(&settings, &noop_services(), req) - .await - .expect_err("expected error"); - assert_eq!(err.current_context().status_code(), StatusCode::BAD_GATEWAY); - }); + #[tokio::test] + async fn proxy_missing_param_returns_400() { + let settings = create_test_settings(); + let req = build_http_request(Method::GET, "https://example.com/first-party/proxy"); + let err: Report = + handle_first_party_proxy(&settings, &noop_services(), req) + .await + .expect_err("expected error"); + assert_eq!(err.current_context().status_code(), StatusCode::BAD_GATEWAY); } - #[test] - fn proxy_sign_returns_signed_url() { - futures::executor::block_on(async { - let settings = create_test_settings(); - let body = serde_json::json!({ - "url": "https://cdn.example/asset.js?c=3&b=2", - }); - let req = build_http_post_json_request("https://edge.example/first-party/sign", &body); - let resp = handle_first_party_proxy_sign(&settings, &noop_services(), req) + #[tokio::test] + async fn proxy_missing_or_invalid_token_returns_400() { + let settings = create_test_settings(); + // missing tstoken should 400 + let req = build_http_request( + Method::GET, + "https://example.com/first-party/proxy?tsurl=https%3A%2F%2Fcdn.example%2Fa.png", + ); + let err: Report = + handle_first_party_proxy(&settings, &noop_services(), req) .await - .expect("sign ok"); - assert_eq!(resp.status(), StatusCode::OK); - let json = response_body_string(resp); - assert!(json.contains("/first-party/proxy?tsurl="), "{}", json); - assert!(json.contains("tsexp"), "{}", json); - assert!( - json.contains("\"base\":\"https://cdn.example/asset.js\""), - "{}", - json - ); - }); + .expect_err("expected error"); + assert_eq!(err.current_context().status_code(), StatusCode::BAD_GATEWAY); } - #[test] - fn proxy_sign_rejects_invalid_url() { - futures::executor::block_on(async { - let settings = create_test_settings(); - let body = serde_json::json!({ - "url": "data:image/png;base64,AAAA", - }); - let req = build_http_post_json_request("https://edge.example/first-party/sign", &body); - let err: Report = - handle_first_party_proxy_sign(&settings, &noop_services(), req) - .await - .expect_err("expected error"); - assert_eq!(err.current_context().status_code(), StatusCode::BAD_GATEWAY); + #[tokio::test] + async fn proxy_sign_returns_signed_url() { + let settings = create_test_settings(); + let body = serde_json::json!({ + "url": "https://cdn.example/asset.js?c=3&b=2", }); + let req = build_http_post_json_request("https://edge.example/first-party/sign", &body); + let resp = handle_first_party_proxy_sign(&settings, &noop_services(), req) + .await + .expect("sign ok"); + assert_eq!(resp.status(), StatusCode::OK); + let json = response_body_string(resp); + assert!(json.contains("/first-party/proxy?tsurl="), "{}", json); + assert!(json.contains("tsexp"), "{}", json); + assert!( + json.contains("\"base\":\"https://cdn.example/asset.js\""), + "{}", + json + ); } - #[test] - fn proxy_sign_preserves_non_standard_port() { - futures::executor::block_on(async { - let settings = create_test_settings(); - let body = serde_json::json!({ - "url": "https://cdn.example.com:9443/img/300x250.svg", - }); - let req = build_http_post_json_request("https://edge.example/first-party/sign", &body); - let resp = handle_first_party_proxy_sign(&settings, &noop_services(), req) + #[tokio::test] + async fn proxy_sign_rejects_invalid_url() { + let settings = create_test_settings(); + let body = serde_json::json!({ + "url": "data:image/png;base64,AAAA", + }); + let req = build_http_post_json_request("https://edge.example/first-party/sign", &body); + let err: Report = + handle_first_party_proxy_sign(&settings, &noop_services(), req) .await - .expect("should sign URL with non-standard port"); - assert_eq!(resp.status(), StatusCode::OK); - let json = response_body_string(resp); - // Port 9443 should be preserved (URL-encoded as %3A9443) - assert!( - json.contains("%3A9443"), - "Port should be preserved in signed URL: {}", - json - ); + .expect_err("expected error"); + assert_eq!(err.current_context().status_code(), StatusCode::BAD_GATEWAY); + } + + #[tokio::test] + async fn proxy_sign_preserves_non_standard_port() { + let settings = create_test_settings(); + let body = serde_json::json!({ + "url": "https://cdn.example.com:9443/img/300x250.svg", }); + let req = build_http_post_json_request("https://edge.example/first-party/sign", &body); + let resp = handle_first_party_proxy_sign(&settings, &noop_services(), req) + .await + .expect("should sign URL with non-standard port"); + assert_eq!(resp.status(), StatusCode::OK); + let json = response_body_string(resp); + // Port 9443 should be preserved (URL-encoded as %3A9443) + assert!( + json.contains("%3A9443"), + "Port should be preserved in signed URL: {}", + json + ); } #[test] @@ -1482,10 +1632,7 @@ mod tests { assert_eq!(cfg.target_url, "https://example.com/asset"); assert!(cfg.follow_redirects, "should follow redirects by default"); - assert!( - cfg.forward_synthetic_id, - "should forward synthetic id by default" - ); + assert!(cfg.forward_ec_id, "should forward EC ID by default"); assert_eq!(cfg.body.as_deref(), Some(&[1, 2, 3][..])); assert_eq!(cfg.headers.len(), 1, "should include custom header"); assert!( @@ -1508,183 +1655,167 @@ mod tests { ); } - #[test] - fn reconstruct_rejects_expired_tsexp() { - futures::executor::block_on(async { - use std::time::{Duration, SystemTime, UNIX_EPOCH}; - - let settings = create_test_settings(); - let tsurl = "https://cdn.example/asset.js"; - let expired = SystemTime::now() - .checked_sub(Duration::from_secs(60)) - .unwrap_or(UNIX_EPOCH) - .duration_since(UNIX_EPOCH) - .unwrap_or_else(|_| Duration::from_secs(0)) - .as_secs(); - let canonical = format!("{}?tsexp={}", tsurl, expired); - let sig = crate::http_util::compute_encrypted_sha256_token(&settings, &canonical); - let tsurl_encoded = - url::form_urlencoded::byte_serialize(tsurl.as_bytes()).collect::(); - let url = format!( - "https://edge.example/first-party/proxy?tsurl={}&tsexp={}&tstoken={}", - tsurl_encoded, expired, sig - ); + #[tokio::test] + async fn reconstruct_rejects_expired_tsexp() { + use std::time::{Duration, SystemTime, UNIX_EPOCH}; - let err: Report = - reconstruct_and_validate_signed_target(&settings, &url) - .expect_err("expected expiration failure"); - assert_eq!(err.current_context().status_code(), StatusCode::BAD_GATEWAY); - }); + let settings = create_test_settings(); + let tsurl = "https://cdn.example/asset.js"; + let expired = SystemTime::now() + .checked_sub(Duration::from_secs(60)) + .unwrap_or(UNIX_EPOCH) + .duration_since(UNIX_EPOCH) + .unwrap_or_else(|_| Duration::from_secs(0)) + .as_secs(); + let canonical = format!("{}?tsexp={}", tsurl, expired); + let sig = crate::http_util::compute_encrypted_sha256_token(&settings, &canonical); + let tsurl_encoded = + url::form_urlencoded::byte_serialize(tsurl.as_bytes()).collect::(); + let url = format!( + "https://edge.example/first-party/proxy?tsurl={}&tsexp={}&tstoken={}", + tsurl_encoded, expired, sig + ); + + let err: Report = + reconstruct_and_validate_signed_target(&settings, &url) + .expect_err("expected expiration failure"); + assert_eq!(err.current_context().status_code(), StatusCode::BAD_GATEWAY); } - #[test] - fn reconstruct_rejects_tampered_tstoken() { - futures::executor::block_on(async { - let settings = create_test_settings(); - let tsurl = "https://cdn.example/asset.js"; - let tsurl_encoded = - url::form_urlencoded::byte_serialize(tsurl.as_bytes()).collect::(); - // Syntactically valid base64url token of the right length, but not the correct signature - let bad_token = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; - let url = format!( - "https://edge.example/first-party/proxy?tsurl={}&tstoken={}", - tsurl_encoded, bad_token - ); + #[tokio::test] + async fn reconstruct_rejects_tampered_tstoken() { + let settings = create_test_settings(); + let tsurl = "https://cdn.example/asset.js"; + let tsurl_encoded = + url::form_urlencoded::byte_serialize(tsurl.as_bytes()).collect::(); + // Syntactically valid base64url token of the right length, but not the correct signature + let bad_token = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; + let url = format!( + "https://edge.example/first-party/proxy?tsurl={}&tstoken={}", + tsurl_encoded, bad_token + ); - let err: Report = - reconstruct_and_validate_signed_target(&settings, &url) - .expect_err("should reject tampered token"); - assert_eq!( - err.current_context().status_code(), - StatusCode::FORBIDDEN, - "should return 403 for invalid tstoken" - ); - }); + let err: Report = + reconstruct_and_validate_signed_target(&settings, &url) + .expect_err("should reject tampered token"); + assert_eq!( + err.current_context().status_code(), + StatusCode::FORBIDDEN, + "should return 403 for invalid tstoken" + ); } - #[test] - fn click_missing_params_returns_400() { - futures::executor::block_on(async { - let settings = create_test_settings(); - let req = build_http_request(Method::GET, "https://edge.example/first-party/click"); - let err: Report = - handle_first_party_click(&settings, &noop_services(), req) - .await - .expect_err("expected error"); - assert_eq!(err.current_context().status_code(), StatusCode::BAD_GATEWAY); - }); + #[tokio::test] + async fn click_missing_params_returns_400() { + let settings = create_test_settings(); + let req = build_http_request(Method::GET, "https://edge.example/first-party/click"); + let err: Report = + handle_first_party_click(&settings, &noop_services(), req) + .await + .expect_err("expected error"); + assert_eq!(err.current_context().status_code(), StatusCode::BAD_GATEWAY); } - #[test] - fn click_valid_token_redirects() { - futures::executor::block_on(async { - let settings = create_test_settings(); - let tsurl = "https://cdn.example/a.png"; - let params = "foo=1&bar=2"; - let full = format!("{}?{}", tsurl, params); - let sig = crate::http_util::compute_encrypted_sha256_token(&settings, &full); - let req = build_http_request( - Method::GET, - format!( - "https://edge.example/first-party/click?tsurl={}&{}&tstoken={}", - url::form_urlencoded::byte_serialize(tsurl.as_bytes()).collect::(), - params, - sig - ), - ); - let resp = handle_first_party_click(&settings, &noop_services(), req) - .await - .expect("should redirect"); - assert_eq!(resp.status(), StatusCode::FOUND); - let loc = resp - .headers() - .get(http::header::LOCATION) - .and_then(|h| h.to_str().ok()) - .unwrap_or(""); - assert_eq!(loc, full); - }); + #[tokio::test] + async fn click_valid_token_redirects() { + let settings = create_test_settings(); + let tsurl = "https://cdn.example/a.png"; + let params = "foo=1&bar=2"; + let full = format!("{}?{}", tsurl, params); + let sig = crate::http_util::compute_encrypted_sha256_token(&settings, &full); + let req = build_http_request( + Method::GET, + format!( + "https://edge.example/first-party/click?tsurl={}&{}&tstoken={}", + url::form_urlencoded::byte_serialize(tsurl.as_bytes()).collect::(), + params, + sig + ), + ); + let resp = handle_first_party_click(&settings, &noop_services(), req) + .await + .expect("should redirect"); + assert_eq!(resp.status(), StatusCode::FOUND); + let loc = resp + .headers() + .get(http::header::LOCATION) + .and_then(|h| h.to_str().ok()) + .unwrap_or(""); + assert_eq!(loc, full); } - #[test] - fn click_appends_synthetic_id_when_present() { - futures::executor::block_on(async { - let settings = create_test_settings(); - let tsurl = "https://cdn.example/a.png"; - let params = "foo=1"; - let full = format!("{}?{}", tsurl, params); - let sig = crate::http_util::compute_encrypted_sha256_token(&settings, &full); - let mut req = build_http_request( - Method::GET, - format!( - "https://edge.example/first-party/click?tsurl={}&{}&tstoken={}", - url::form_urlencoded::byte_serialize(tsurl.as_bytes()).collect::(), - params, - sig - ), - ); - let valid_synthetic_id = crate::test_support::tests::VALID_SYNTHETIC_ID; - req.headers_mut().insert( - crate::constants::HEADER_X_SYNTHETIC_ID, - HeaderValue::from_static(valid_synthetic_id), - ); + #[tokio::test] + async fn click_appends_ec_id_when_present() { + let settings = create_test_settings(); + let tsurl = "https://cdn.example/a.png"; + let params = "foo=1"; + let full = format!("{}?{}", tsurl, params); + let sig = crate::http_util::compute_encrypted_sha256_token(&settings, &full); + let mut req = build_http_request( + Method::GET, + format!( + "https://edge.example/first-party/click?tsurl={}&{}&tstoken={}", + url::form_urlencoded::byte_serialize(tsurl.as_bytes()).collect::(), + params, + sig + ), + ); + req.headers_mut().insert( + crate::constants::HEADER_X_TS_EC, + HeaderValue::from_static("ec-123"), + ); - let resp = handle_first_party_click(&settings, &noop_services(), req) - .await - .expect("should redirect"); + let resp = handle_first_party_click(&settings, &noop_services(), req) + .await + .expect("should redirect"); - let loc = resp - .headers() - .get(header::LOCATION) - .and_then(|h| h.to_str().ok()) - .expect("Location header should be present and valid"); - let parsed = url::Url::parse(loc).expect("Location should be a valid URL"); - let mut pairs: std::collections::HashMap = parsed - .query_pairs() - .map(|(k, v)| (k.into_owned(), v.into_owned())) - .collect(); - assert_eq!(pairs.remove("foo").as_deref(), Some("1")); - assert_eq!( - pairs.remove("synthetic_id").as_deref(), - Some(valid_synthetic_id) - ); - assert!(pairs.is_empty()); - }); + let loc = resp + .headers() + .get(header::LOCATION) + .and_then(|h| h.to_str().ok()) + .expect("Location header should be present and valid"); + let parsed = url::Url::parse(loc).expect("Location should be a valid URL"); + let mut pairs: std::collections::HashMap = parsed + .query_pairs() + .map(|(k, v)| (k.into_owned(), v.into_owned())) + .collect(); + assert_eq!(pairs.remove("foo").as_deref(), Some("1")); + assert_eq!(pairs.remove("ts-ec").as_deref(), Some("ec-123")); + assert!(pairs.is_empty()); } - #[test] - fn proxy_rebuild_adds_and_removes_params() { - futures::executor::block_on(async { - let settings = create_test_settings(); - // Original canonical (no token) - let tsclick = "/first-party/click?tsurl=https%3A%2F%2Fcdn.example%2Flanding.html&x=1"; - let body = serde_json::json!({ - "tsclick": tsclick, - "add": {"y": "2"}, - "del": ["x"], - }); - let req = HttpRequest::builder() - .method(Method::POST) - .uri("https://edge.example/first-party/proxy-rebuild") - .body(EdgeBody::from( - serde_json::to_string(&body).expect("test JSON should serialize"), - )) - .expect("should build proxy rebuild request"); - let resp = handle_first_party_proxy_rebuild(&settings, &noop_services(), req) - .await - .expect("rebuild ok"); - assert_eq!(resp.status(), StatusCode::OK); - let json = response_body_string(resp); - assert!(json.contains("/first-party/click?tsurl=")); - assert!(json.contains("tstoken")); - // Diagnostics - assert!( - json.contains("\"base\":\"https://cdn.example/landing.html\""), - "{}", - json - ); - assert!(json.contains("\"added\":{\"y\":\"2\"}"), "{}", json); - assert!(json.contains("\"removed\":[\"x\"]"), "{}", json); + #[tokio::test] + async fn proxy_rebuild_adds_and_removes_params() { + let settings = create_test_settings(); + // Original canonical (no token) + let tsclick = "/first-party/click?tsurl=https%3A%2F%2Fcdn.example%2Flanding.html&x=1"; + let body = serde_json::json!({ + "tsclick": tsclick, + "add": {"y": "2"}, + "del": ["x"], }); + let req = HttpRequest::builder() + .method(Method::POST) + .uri("https://edge.example/first-party/proxy-rebuild") + .body(EdgeBody::from( + serde_json::to_string(&body).expect("test JSON should serialize"), + )) + .expect("should build proxy rebuild request"); + let resp = handle_first_party_proxy_rebuild(&settings, &noop_services(), req) + .await + .expect("rebuild ok"); + assert_eq!(resp.status(), StatusCode::OK); + let json = response_body_string(resp); + assert!(json.contains("/first-party/click?tsurl=")); + assert!(json.contains("tstoken")); + // Diagnostics + assert!( + json.contains("\"base\":\"https://cdn.example/landing.html\""), + "{}", + json + ); + assert!(json.contains("\"added\":{\"y\":\"2\"}"), "{}", json); + assert!(json.contains("\"removed\":[\"x\"]"), "{}", json); } // --- Additional tests covering helper + edge cases --- @@ -1709,98 +1840,86 @@ mod tests { } } - #[test] - fn reconstruct_valid_with_params_preserves_order() { - futures::executor::block_on(async { - let settings = create_test_settings(); - let clear = "https://cdn.example/asset.js?c=3&b=2&a=1"; - // Simulate creative-generated first-party URL - let first_party = creative::build_proxy_url(&settings, clear); - // Reconstruct and validate (need absolute URL for parsing) - let st = reconstruct_and_validate_signed_target( - &settings, - &format!("https://edge.example{}", first_party), - ) - .expect("reconstruct ok"); - assert_eq!(st.tsurl, "https://cdn.example/asset.js"); - assert!(st.had_params); - assert_eq!(st.target_url, canonical_clear_url(clear)); - }); + #[tokio::test] + async fn reconstruct_valid_with_params_preserves_order() { + let settings = create_test_settings(); + let clear = "https://cdn.example/asset.js?c=3&b=2&a=1"; + // Simulate creative-generated first-party URL + let first_party = creative::build_proxy_url(&settings, clear); + // Reconstruct and validate (need absolute URL for parsing) + let st = reconstruct_and_validate_signed_target( + &settings, + &format!("https://edge.example{}", first_party), + ) + .expect("reconstruct ok"); + assert_eq!(st.tsurl, "https://cdn.example/asset.js"); + assert!(st.had_params); + assert_eq!(st.target_url, canonical_clear_url(clear)); } - #[test] - fn reconstruct_valid_without_params() { - futures::executor::block_on(async { - let settings = create_test_settings(); - let clear = "https://cdn.example/asset.js"; - let first_party = creative::build_proxy_url(&settings, clear); - let st = reconstruct_and_validate_signed_target( - &settings, - &format!("https://edge.example{}", first_party), - ) - .expect("reconstruct ok"); - assert_eq!(st.tsurl, clear); - assert!(!st.had_params); - assert_eq!(st.target_url, clear); - }); + #[tokio::test] + async fn reconstruct_valid_without_params() { + let settings = create_test_settings(); + let clear = "https://cdn.example/asset.js"; + let first_party = creative::build_proxy_url(&settings, clear); + let st = reconstruct_and_validate_signed_target( + &settings, + &format!("https://edge.example{}", first_party), + ) + .expect("reconstruct ok"); + assert_eq!(st.tsurl, clear); + assert!(!st.had_params); + assert_eq!(st.target_url, clear); } - #[test] - fn proxy_rejects_unsupported_scheme() { - futures::executor::block_on(async { - let settings = create_test_settings(); - let clear = "ftp://cdn.example/file.gif"; - // Build a first-party proxy URL with a token for the unsupported scheme - let first_party = creative::build_proxy_url(&settings, clear); - let req = - build_http_request(Method::GET, format!("https://edge.example{}", first_party)); - let err: Report = - handle_first_party_proxy(&settings, &noop_services(), req) - .await - .expect_err("expected error"); - assert_eq!(err.current_context().status_code(), StatusCode::BAD_GATEWAY); - }); + #[tokio::test] + async fn proxy_rejects_unsupported_scheme() { + let settings = create_test_settings(); + let clear = "ftp://cdn.example/file.gif"; + // Build a first-party proxy URL with a token for the unsupported scheme + let first_party = creative::build_proxy_url(&settings, clear); + let req = build_http_request(Method::GET, format!("https://edge.example{}", first_party)); + let err: Report = + handle_first_party_proxy(&settings, &noop_services(), req) + .await + .expect_err("expected error"); + assert_eq!(err.current_context().status_code(), StatusCode::BAD_GATEWAY); } - #[test] - fn proxy_invalid_target_url_errors() { - futures::executor::block_on(async { - let settings = create_test_settings(); - // Intentionally malformed target (host missing) but signed consistently - let tsurl = "https://"; // invalid URL - // Manually construct first-party URL matching creative's format - let full_for_token = tsurl.to_string(); - let sig = crate::http_util::compute_encrypted_sha256_token(&settings, &full_for_token); - let url = format!( - "https://edge.example/first-party/proxy?tsurl={}&tstoken={}", - url::form_urlencoded::byte_serialize(tsurl.as_bytes()).collect::(), - sig - ); - let req = build_http_request(Method::GET, &url); - let err: Report = - handle_first_party_proxy(&settings, &noop_services(), req) - .await - .expect_err("expected error"); - assert_eq!(err.current_context().status_code(), StatusCode::BAD_GATEWAY); - }); + #[tokio::test] + async fn proxy_invalid_target_url_errors() { + let settings = create_test_settings(); + // Intentionally malformed target (host missing) but signed consistently + let tsurl = "https://"; // invalid URL + // Manually construct first-party URL matching creative's format + let full_for_token = tsurl.to_string(); + let sig = crate::http_util::compute_encrypted_sha256_token(&settings, &full_for_token); + let url = format!( + "https://edge.example/first-party/proxy?tsurl={}&tstoken={}", + url::form_urlencoded::byte_serialize(tsurl.as_bytes()).collect::(), + sig + ); + let req = build_http_request(Method::GET, &url); + let err: Report = + handle_first_party_proxy(&settings, &noop_services(), req) + .await + .expect_err("expected error"); + assert_eq!(err.current_context().status_code(), StatusCode::BAD_GATEWAY); } - #[test] - fn click_sets_cache_control_no_store_private() { - futures::executor::block_on(async { - let settings = create_test_settings(); - let clear = "https://cdn.example/landing.html?x=1"; - let first_party = creative::build_click_url(&settings, clear); - let req = - build_http_request(Method::GET, format!("https://edge.example{}", first_party)); - let resp = handle_first_party_click(&settings, &noop_services(), req) - .await - .expect("should redirect"); - assert_eq!(resp.status(), StatusCode::FOUND); - let cc = response_header(&resp, header::CACHE_CONTROL).unwrap_or(""); - assert!(cc.contains("no-store")); - assert!(cc.contains("private")); - }); + #[tokio::test] + async fn click_sets_cache_control_no_store_private() { + let settings = create_test_settings(); + let clear = "https://cdn.example/landing.html?x=1"; + let first_party = creative::build_click_url(&settings, clear); + let req = build_http_request(Method::GET, format!("https://edge.example{}", first_party)); + let resp = handle_first_party_click(&settings, &noop_services(), req) + .await + .expect("should redirect"); + assert_eq!(resp.status(), StatusCode::FOUND); + let cc = response_header(&resp, header::CACHE_CONTROL).unwrap_or(""); + assert!(cc.contains("no-store")); + assert!(cc.contains("private")); } // --- Finalization path tests (no network) --- @@ -2094,45 +2213,276 @@ mod tests { // --- Platform HTTP client integration --- + #[tokio::test] + async fn proxy_request_calls_platform_http_client_send() { + use crate::platform::test_support::StubHttpClient; + + let stub = Arc::new(StubHttpClient::new()); + stub.push_response(200, b"ok".to_vec()); + let services = build_services_with_http_client( + Arc::clone(&stub) as Arc + ); + let settings = create_test_settings(); + let req = build_http_request(Method::GET, "https://example.com/"); + + let result = proxy_request( + &settings, + req, + ProxyRequestConfig { + target_url: "https://example.com/resource", + follow_redirects: false, + forward_ec_id: false, + body: None, + headers: Vec::new(), + copy_request_headers: false, + stream_passthrough: false, + allowed_domains: &[], + }, + &services, + ) + .await; + + assert!(result.is_ok(), "should proxy successfully"); + let calls = stub.recorded_backend_names(); + assert_eq!(calls.len(), 1, "should call send exactly once"); + assert_eq!( + calls[0], "stub-backend", + "should use backend name from StubBackend" + ); + } + + #[tokio::test] + async fn proxy_request_allows_open_mode_when_settings_allowlist_is_non_empty() { + let mut settings = create_test_settings(); + settings.proxy.allowed_domains = vec!["allowed.example".to_string()]; + + let stub = Arc::new(HeaderAwareStubHttpClient::new()); + stub.push_response(200, Vec::new(), b"ok".to_vec()); + let services = build_services_with_http_client( + Arc::clone(&stub) as Arc + ); + let req = build_http_request(Method::GET, "https://edge.example/"); + + let response = proxy_request( + &settings, + req, + ProxyRequestConfig { + target_url: "https://blocked.example/resource.js", + follow_redirects: false, + forward_ec_id: false, + body: None, + headers: Vec::new(), + copy_request_headers: false, + stream_passthrough: false, + allowed_domains: &[], + }, + &services, + ) + .await + .expect("open mode should ignore settings.proxy.allowed_domains"); + + assert_eq!(response.status(), StatusCode::OK); + assert_eq!(response_body_string(response), "ok"); + } + + #[tokio::test] + async fn proxy_request_uses_config_allowlist_for_redirect_hops() { + let mut settings = create_test_settings(); + settings.proxy.allowed_domains = vec!["origin.example".to_string()]; + + let stub = Arc::new(HeaderAwareStubHttpClient::new()); + stub.push_response( + 302, + vec![( + header::LOCATION, + HeaderValue::from_static("https://redirected.example/final.js"), + )], + Vec::new(), + ); + stub.push_response(200, Vec::new(), b"redirected".to_vec()); + + let services = build_services_with_http_client( + Arc::clone(&stub) as Arc + ); + let req = build_http_request(Method::GET, "https://edge.example/"); + + let response = proxy_request( + &settings, + req, + ProxyRequestConfig { + target_url: "https://origin.example/start.js", + follow_redirects: true, + forward_ec_id: false, + body: None, + headers: Vec::new(), + copy_request_headers: false, + stream_passthrough: false, + allowed_domains: &[], + }, + &services, + ) + .await + .expect("open mode should allow redirect hops outside settings allowlist"); + + assert_eq!(response.status(), StatusCode::OK); + assert_eq!(response_body_string(response), "redirected"); + } + + #[tokio::test] + async fn proxy_request_forwards_curated_headers_when_copy_request_headers_is_true() { + use crate::platform::test_support::StubHttpClient; + + let stub = Arc::new(StubHttpClient::new()); + stub.push_response(200, b"ok".to_vec()); + let services = build_services_with_http_client( + Arc::clone(&stub) as Arc + ); + let settings = create_test_settings(); + let mut req = HttpRequest::builder() + .method(Method::GET) + .uri("https://example.com/") + .body(EdgeBody::empty()) + .expect("should build test request"); + req.headers_mut().insert( + header::USER_AGENT, + HeaderValue::from_static("test-agent/1.0"), + ); + req.headers_mut() + .insert(header::ACCEPT, HeaderValue::from_static("text/html")); + req.headers_mut() + .insert(header::ACCEPT_LANGUAGE, HeaderValue::from_static("en-US")); + + let result = proxy_request( + &settings, + req, + ProxyRequestConfig { + target_url: "https://example.com/resource", + follow_redirects: false, + forward_ec_id: false, + body: None, + headers: Vec::new(), + copy_request_headers: true, + stream_passthrough: false, + allowed_domains: &[], + }, + &services, + ) + .await; + + assert!(result.is_ok(), "should proxy successfully"); + let all_headers = stub.recorded_request_headers(); + assert_eq!(all_headers.len(), 1, "should have captured one request"); + let sent = &all_headers[0]; + + let header_value = |name: &str| -> Option { + sent.iter().find(|(n, _)| n == name).map(|(_, v)| v.clone()) + }; + + assert_eq!( + header_value("user-agent").as_deref(), + Some("test-agent/1.0"), + "should forward User-Agent" + ); + assert_eq!( + header_value("accept").as_deref(), + Some("text/html"), + "should forward Accept" + ); + assert_eq!( + header_value("accept-language").as_deref(), + Some("en-US"), + "should forward Accept-Language" + ); + assert_eq!( + header_value("accept-encoding").as_deref(), + Some(SUPPORTED_ENCODINGS), + "should override Accept-Encoding with supported encodings" + ); + } + + #[tokio::test] + async fn proxy_request_passes_through_streaming_platform_response_body() { + // HTTP types can carry streaming bodies; proxy_request returns Ok even when + // the origin sends a streaming body (unlike the old Fastly path which required + // materialising the body before wrapping it in fastly::Response). + let services = build_services_with_http_client( + Arc::new(StreamingResponseHttpClient) as Arc + ); + let settings = create_test_settings(); + let req = HttpRequest::builder() + .method(Method::GET) + .uri("https://example.com/") + .body(EdgeBody::empty()) + .expect("should build test request"); + + let result = proxy_request( + &settings, + req, + ProxyRequestConfig { + target_url: "https://example.com/resource", + follow_redirects: false, + forward_ec_id: false, + body: None, + headers: Vec::new(), + copy_request_headers: false, + stream_passthrough: false, + allowed_domains: &[], + }, + &services, + ) + .await; + + assert!( + result.is_ok(), + "should pass streaming body through with HTTP types: {result:?}" + ); + assert_eq!( + result.expect("should succeed").status(), + StatusCode::OK, + "should preserve the origin status code" + ); + } + #[test] - fn proxy_request_calls_platform_http_client_send() { - futures::executor::block_on(async { - use crate::platform::test_support::{build_services_with_http_client, StubHttpClient}; - use std::sync::Arc; - - let stub = Arc::new(StubHttpClient::new()); - stub.push_response(200, b"ok".to_vec()); - let services = build_services_with_http_client( - Arc::clone(&stub) as Arc - ); - let settings = create_test_settings(); - let req = build_http_request(Method::GET, "https://example.com/"); + fn rebuild_response_with_body_preserves_multiple_set_cookie_headers() { + let mut beresp = Response::new(EdgeBody::empty()); + *beresp.status_mut() = StatusCode::OK; + beresp.headers_mut().append( + header::SET_COOKIE, + HeaderValue::from_static("a=1; Path=/; Secure"), + ); + beresp.headers_mut().append( + header::SET_COOKIE, + HeaderValue::from_static("b=2; Path=/; Secure"), + ); - let result = proxy_request( - &settings, - req, - ProxyRequestConfig { - target_url: "https://example.com/resource", - follow_redirects: false, - forward_synthetic_id: false, - body: None, - headers: Vec::new(), - copy_request_headers: false, - stream_passthrough: false, - allowed_domains: &[], - }, - &services, - ) - .await; - - assert!(result.is_ok(), "should proxy successfully"); - let calls = stub.recorded_backend_names(); - assert_eq!(calls.len(), 1, "should call send exactly once"); - assert_eq!( - calls[0], "stub-backend", - "should use backend name from StubBackend" - ); - }); + let rebuilt = rebuild_response_with_body( + beresp, + "text/html; charset=utf-8", + b"rewritten".to_vec(), + false, + ); + + let cookies: Vec = rebuilt + .headers() + .get_all(header::SET_COOKIE) + .into_iter() + .map(|value| { + value + .to_str() + .expect("should preserve UTF-8 Set-Cookie header values") + .to_string() + }) + .collect(); + + assert_eq!( + cookies, + vec![ + "a=1; Path=/; Secure".to_string(), + "b=2; Path=/; Secure".to_string(), + ], + "should preserve every Set-Cookie value when rebuilding the response" + ); } // --- is_host_allowed --- @@ -2350,45 +2700,80 @@ mod tests { // --- initial target allowlist enforcement (integration-level) --- // - // NOTE: A test for Nth-hop redirect blocking (i.e. exercising the - // `redirect_is_permitted` check that fires *after* receiving a 302 - // response) requires a Viceroy backend fixture that returns a redirect. - // That infrastructure is not available here. The unit tests above for - // `redirect_is_permitted` and `ip_literal_blocked_by_domain_allowlist` - // cover the blocking logic used at every hop. + // The unit tests above cover the host-matching logic itself. The tests + // below verify that proxy_request threads config.allowed_domains through + // the initial target check and redirect hops. + + #[tokio::test] + async fn proxy_initial_target_blocked_by_allowlist() { + use crate::http_util::compute_encrypted_sha256_token; + + let mut settings = create_test_settings(); + settings.proxy.allowed_domains = vec!["allowed.com".to_string()]; + + let target = "https://blocked.com/pixel.gif"; + let token = compute_encrypted_sha256_token(&settings, target); + let url = format!( + "https://edge.example/first-party/proxy?tsurl={}&tstoken={}", + urlencoding::encode(target), + token, + ); + let req = build_http_request(Method::GET, url); + let services = crate::platform::test_support::noop_services(); + let err = handle_first_party_proxy(&settings, &services, req) + .await + .expect_err("should block initial target not in allowlist"); + assert_eq!( + err.current_context().status_code(), + StatusCode::FORBIDDEN, + "should return 403 for allowlist violation" + ); + assert!( + matches!( + err.current_context(), + TrustedServerError::AllowlistViolation { .. } + ), + "should be AllowlistViolation error" + ); + } - #[test] - fn proxy_initial_target_blocked_by_allowlist() { - futures::executor::block_on(async { - use crate::http_util::compute_encrypted_sha256_token; - - let mut settings = create_test_settings(); - settings.proxy.allowed_domains = vec!["allowed.com".to_string()]; - - let target = "https://blocked.com/pixel.gif"; - let token = compute_encrypted_sha256_token(&settings, target); - let url = format!( - "https://edge.example/first-party/proxy?tsurl={}&tstoken={}", - urlencoding::encode(target), - token, - ); - let req = build_http_request(Method::GET, url); - let services = crate::platform::test_support::noop_services(); - let err = handle_first_party_proxy(&settings, &services, req) - .await - .expect_err("should block initial target not in allowlist"); - assert_eq!( - err.current_context().status_code(), - StatusCode::FORBIDDEN, - "should return 403 for allowlist violation" - ); - assert!( - matches!( - err.current_context(), - TrustedServerError::AllowlistViolation { .. } - ), - "should be AllowlistViolation error" - ); - }); + #[tokio::test] + async fn sign_rejects_oversized_body() { + let settings = create_test_settings(); + let oversized = vec![b'x'; 65537]; + let req = HttpRequest::builder() + .method(Method::POST) + .uri("https://edge.example/first-party/sign") + .header(header::CONTENT_TYPE, "application/json") + .body(EdgeBody::from(oversized)) + .expect("should build request"); + let err = handle_first_party_proxy_sign(&settings, &noop_services(), req) + .await + .expect_err("should reject oversized body"); + assert_eq!( + err.current_context().status_code(), + StatusCode::PAYLOAD_TOO_LARGE, + "should return 413 for oversized sign body" + ); + } + + #[tokio::test] + async fn rebuild_rejects_oversized_body() { + let settings = create_test_settings(); + let oversized = vec![b'x'; 65537]; + let req = HttpRequest::builder() + .method(Method::POST) + .uri("https://edge.example/first-party/proxy-rebuild") + .header(header::CONTENT_TYPE, "application/json") + .body(EdgeBody::from(oversized)) + .expect("should build request"); + let err = handle_first_party_proxy_rebuild(&settings, &noop_services(), req) + .await + .expect_err("should reject oversized body"); + assert_eq!( + err.current_context().status_code(), + StatusCode::PAYLOAD_TOO_LARGE, + "should return 413 for oversized rebuild body" + ); } } diff --git a/crates/trusted-server-core/src/publisher.rs b/crates/trusted-server-core/src/publisher.rs index 0918f3a0f..70c689509 100644 --- a/crates/trusted-server-core/src/publisher.rs +++ b/crates/trusted-server-core/src/publisher.rs @@ -1,14 +1,14 @@ +use std::io::Write; use std::time::Duration; use edgezero_core::body::Body as EdgeBody; use error_stack::{Report, ResultExt}; use http::{header, HeaderValue, Request, Response, StatusCode, Uri}; -use crate::consent::{ - allows_ssc_creation, build_consent_context, kv::ConsentKvOps, ConsentPipelineInput, -}; -use crate::constants::{COOKIE_SYNTHETIC_ID, HEADER_X_COMPRESS_HINT, HEADER_X_SYNTHETIC_ID}; +use crate::consent::{allows_ec_creation, build_consent_context, ConsentPipelineInput}; +use crate::constants::{COOKIE_TS_EC, HEADER_X_COMPRESS_HINT, HEADER_X_TS_EC}; use crate::cookies::handle_request_cookies; +use crate::edge_cookie::get_or_generate_ec_id; use crate::error::TrustedServerError; use crate::http_util::{serve_static_with_etag, RequestInfo}; use crate::integrations::IntegrationRegistry; @@ -17,8 +17,6 @@ use crate::rsc_flight::RscFlightUrlRewriter; use crate::settings::Settings; use crate::streaming_processor::{Compression, PipelineConfig, StreamProcessor, StreamingPipeline}; use crate::streaming_replacer::create_url_replacer; -use crate::synthetic::{get_or_generate_synthetic_id, is_valid_synthetic_id}; - const SUPPORTED_ENCODING_VALUES: [&str; 3] = ["gzip", "deflate", "br"]; const DEFAULT_PUBLISHER_FIRST_BYTE_TIMEOUT: Duration = Duration::from_secs(15); @@ -189,12 +187,21 @@ struct ProcessResponseParams<'a> { integration_registry: &'a IntegrationRegistry, } -/// Process response body in streaming fashion with compression preservation -fn process_response_streaming( +/// Process response body through the streaming pipeline. +/// +/// Selects the appropriate processor based on content type (HTML rewriter, +/// RSC Flight rewriter, or URL replacer) and pipes chunks from `body` +/// through it into `output`. The caller decides what `output` is — a +/// `Vec` for buffered responses, or a `StreamingBody` for streaming. +/// +/// # Errors +/// +/// Returns an error if processor creation or chunk processing fails. +fn process_response_streaming( body: EdgeBody, + output: &mut W, params: &ProcessResponseParams, -) -> Result> { - // Check if this is HTML content +) -> Result<(), Report> { let is_html = params.content_type.contains("text/html"); let is_rsc_flight = params.content_type.contains("text/x-component"); log::debug!( @@ -206,15 +213,14 @@ fn process_response_streaming( params.origin_host ); - // Determine compression type let compression = Compression::from_content_encoding(params.content_encoding); + let config = PipelineConfig { + input_compression: compression, + output_compression: compression, + chunk_size: 8192, + }; - // Create output body to collect results - let mut output = Vec::new(); - - // Choose processor based on content type if is_html { - // Use HTML rewriter for HTML content let processor = create_html_stream_processor( params.origin_host, params.request_host, @@ -222,57 +228,26 @@ fn process_response_streaming( params.settings, params.integration_registry, )?; - - let config = PipelineConfig { - input_compression: compression, - output_compression: compression, - chunk_size: 8192, - }; - - let mut pipeline = StreamingPipeline::new(config, processor); - pipeline.process(body_as_reader(body), &mut output)?; + StreamingPipeline::new(config, processor).process(body_as_reader(body), output)?; } else if is_rsc_flight { - // RSC Flight responses are length-prefixed (T rows). A naive string replacement will - // corrupt the stream by changing byte lengths without updating the prefixes. let processor = RscFlightUrlRewriter::new( params.origin_host, params.origin_url, params.request_host, params.request_scheme, ); - - let config = PipelineConfig { - input_compression: compression, - output_compression: compression, - chunk_size: 8192, - }; - - let mut pipeline = StreamingPipeline::new(config, processor); - pipeline.process(body_as_reader(body), &mut output)?; + StreamingPipeline::new(config, processor).process(body_as_reader(body), output)?; } else { - // Use simple text replacer for non-HTML content let replacer = create_url_replacer( params.origin_host, params.origin_url, params.request_host, params.request_scheme, ); - - let config = PipelineConfig { - input_compression: compression, - output_compression: compression, - chunk_size: 8192, - }; - - let mut pipeline = StreamingPipeline::new(config, replacer); - pipeline.process(body_as_reader(body), &mut output)?; + StreamingPipeline::new(config, replacer).process(body_as_reader(body), output)?; } - log::debug!( - "Streaming processing complete - output size: {} bytes", - output.len() - ); - Ok(EdgeBody::from(output)) + Ok(()) } /// Create a unified HTML stream processor @@ -296,11 +271,133 @@ fn create_html_stream_processor( Ok(create_html_processor(config)) } -/// Proxies requests to the publisher's origin server. +/// Result of publisher request handling, indicating whether the response +/// body should be streamed or has already been buffered. +pub enum PublisherResponse { + /// Response is fully buffered and ready to send via `send_to_client()`. + Buffered(Response), + /// Response headers are ready for a streaming response. Covers processable + /// content on any status (2xx or non-2xx — e.g., branded 404/500 HTML and + /// error JSON still get URL rewriting) where the encoding is supported + /// and either the content is non-HTML or no HTML post-processors are + /// registered. The caller must: + /// 1. Call `finalize_response()` on the response + /// 2. Call `response.stream_to_client()` to get a `StreamingBody` + /// 3. Call `stream_publisher_body()` with the body and streaming writer + /// 4. Call `StreamingBody::finish()` + Stream { + /// Response with all headers set (EC ID, cookies, etc.) + /// but body not yet written. `Content-Length` already removed. + response: Response, + /// Origin body to be piped through the streaming pipeline. + body: EdgeBody, + /// Parameters for `process_response_streaming`. + params: OwnedProcessResponseParams, + }, + /// Non-processable 2xx response (images, fonts, video). The adapter must + /// reattach the body via setting the body before returning. + /// `finalize_response()` and `send_to_client()` are applied at the outer + /// response-dispatch level, not in this arm. + /// + /// `Content-Length` is preserved — the body is unmodified. + PassThrough { + /// Response with all headers set but body not yet written. + response: Response, + /// Origin body to stream directly to the client. + body: EdgeBody, + }, +} + +/// Routing decision for a proxied response. +/// +/// Computed purely from response metadata — no side effects, no body is +/// consumed. [`handle_publisher_request`] calls [`classify_response_route`] +/// once and dispatches to the matching [`PublisherResponse`] arm. Tests +/// exercise the classifier directly so the gate formula lives in one place. +#[derive(Debug, PartialEq, Eq)] +pub(crate) enum ResponseRoute { + /// 2xx non-processable (images, fonts, video), not 204/205. Origin body + /// is streamed unmodified via [`PublisherResponse::PassThrough`]. + PassThrough, + /// Processable content with supported encoding and either non-HTML or no + /// HTML post-processors registered. Covers both 2xx and non-2xx (e.g., + /// branded 404/500 pages still get origin URL rewriting). Routed through + /// [`PublisherResponse::Stream`]. + Stream, + /// Response returned unmodified via [`PublisherResponse::Buffered`] — covers + /// 204/205 (RFC-prohibited bodies), empty request host with non-processable + /// content, and unsupported encodings. + BufferedUnmodified, + /// HTML with post-processors registered. Runs the full pipeline into a + /// buffer, then returns [`PublisherResponse::Buffered`] with the processed body. + BufferedProcessed, +} + +/// Decide how a proxied response should be routed. +/// +/// Pure: no header mutation, no body consumed. All inputs are extracted +/// from the origin response at the call site. +pub(crate) fn classify_response_route( + status: StatusCode, + content_type: &str, + content_encoding: &str, + request_host: &str, + has_post_processors: bool, +) -> ResponseRoute { + // 204 No Content (RFC 9110 §15.3.5) and 205 Reset Content (§15.3.6) + // prohibit a message body. Excluded first so no later arm can emit one + // regardless of Content-Type or post-processor registration. + if status == StatusCode::NO_CONTENT || status == StatusCode::RESET_CONTENT { + return ResponseRoute::BufferedUnmodified; + } + + let should_process = is_processable_content_type(content_type); + + // Non-processable content: 2xx streams through unchanged; non-2xx falls + // back to buffered (the origin's error body reaches the client as-is). + if !should_process { + if status.is_success() { + return ResponseRoute::PassThrough; + } + return ResponseRoute::BufferedUnmodified; + } + + // Processable content (2xx or non-2xx) still needs URL rewriting against + // a known request host — without one, fall back to unmodified. + if request_host.is_empty() { + return ResponseRoute::BufferedUnmodified; + } + + // Unsupported Content-Encoding: we cannot decompress, so processing would + // treat compressed bytes as identity and produce garbled output. + if !is_supported_content_encoding(content_encoding) { + return ResponseRoute::BufferedUnmodified; + } + + let is_html = content_type.contains("text/html"); + if is_html && has_post_processors { + // HTML with post-processors: need the full document to inject. + return ResponseRoute::BufferedProcessed; + } + + ResponseRoute::Stream +} + +/// Owned version of [`ProcessResponseParams`] for returning from +/// `handle_publisher_request` without lifetime issues. +pub struct OwnedProcessResponseParams { + pub(crate) content_encoding: String, + pub(crate) origin_host: String, + pub(crate) origin_url: String, + pub(crate) request_host: String, + pub(crate) request_scheme: String, + pub(crate) content_type: String, +} + +/// Stream the publisher response body through the processing pipeline. /// -/// This function forwards incoming requests to the configured origin URL, -/// preserving headers and request body. It's used as a fallback for routes -/// not explicitly handled by the trusted server. +/// Called by the adapter after `stream_to_client()` has committed the +/// response headers. Writes processed chunks directly to `output`. /// /// This is `async` because it uses `services.http_client().send(...).await` rather /// than the synchronous Fastly SDK `req.send()`. The only caller wraps the entire @@ -309,16 +406,52 @@ fn create_html_stream_processor( /// /// # Errors /// -/// Returns a [`TrustedServerError`] if: -/// - The proxy request fails -/// - The origin backend is unreachable +/// Returns an error if processing fails mid-stream. Since headers are +/// already committed, the caller should log the error and drop the +/// `StreamingBody` (client sees a truncated response). +pub fn stream_publisher_body( + body: EdgeBody, + output: &mut W, + params: &OwnedProcessResponseParams, + settings: &Settings, + integration_registry: &IntegrationRegistry, +) -> Result<(), Report> { + let borrowed = ProcessResponseParams { + content_encoding: ¶ms.content_encoding, + origin_host: ¶ms.origin_host, + origin_url: ¶ms.origin_url, + request_host: ¶ms.request_host, + request_scheme: ¶ms.request_scheme, + settings, + content_type: ¶ms.content_type, + integration_registry, + }; + process_response_streaming(body, output, &borrowed) +} + +/// Proxies requests to the publisher's origin server. +/// +/// Returns a [`PublisherResponse`] indicating how the response should be sent: +/// - [`PassThrough`](PublisherResponse::PassThrough) — 2xx non-processable content +/// (images, fonts, video). Body reattached unmodified for `send_to_client()`. +/// - [`Stream`](PublisherResponse::Stream) — processable content with supported +/// `Content-Encoding` and either non-HTML or no HTML post-processors. +/// Applies to both 2xx and non-2xx status (e.g., branded 404/500 HTML and +/// error JSON still get origin URL rewriting). Body piped through the +/// streaming pipeline. +/// - [`Buffered`](PublisherResponse::Buffered) — non-2xx responses, unsupported +/// encoding, or HTML with post-processors that need the full document. +/// +/// # Errors +/// +/// Returns a [`TrustedServerError`] if the proxy request fails or the +/// origin backend is unreachable. pub async fn handle_publisher_request( settings: &Settings, integration_registry: &IntegrationRegistry, services: &RuntimeServices, - kv_ops: Option<&dyn ConsentKvOps>, mut req: Request, -) -> Result, Report> { +) -> Result> { log::debug!("Proxying request to publisher_origin"); // Prebid.js requests are not intercepted here anymore. The HTML processor removes @@ -338,21 +471,21 @@ pub async fn handle_publisher_request( req.headers().get("x-forwarded-proto"), ); - // Parse cookies once for reuse by both consent extraction and synthetic ID logic. + // Parse cookies once for reuse by both consent extraction and EC ID logic. let cookie_jar = handle_request_cookies(&req)?; - // Capture the current SSC cookie value for revocation handling. - // This must come from the cookie itself (not the x-synthetic-id header) + // Capture the current EC cookie value for revocation handling. + // This must come from the cookie itself (not the x-ts-ec header) // to ensure KV deletion targets the same identifier being revoked. - let existing_ssc_cookie = cookie_jar + let existing_ec_cookie = cookie_jar .as_ref() - .and_then(|jar| jar.get(COOKIE_SYNTHETIC_ID)) + .and_then(|jar| jar.get(COOKIE_TS_EC)) .map(|cookie| cookie.value().to_owned()); - // Generate synthetic identifiers before the request body is consumed. + // Generate EC identifiers before the request body is consumed. // Always generated for internal use (KV lookups, logging) even when // consent is absent — the cookie is only *set* when consent allows it. - let synthetic_id = get_or_generate_synthetic_id(settings, services, &req)?; + let ec_id = get_or_generate_ec_id(settings, services, &req)?; // Extract, decode, and log consent signals (TCF, GPP, US Privacy, GPC) // from the incoming request. The ConsentContext carries both raw strings @@ -371,15 +504,15 @@ pub async fn handle_publisher_request( req: &req, config: &settings.consent, geo: geo.as_ref(), - synthetic_id: Some(synthetic_id.as_str()), - kv_ops, + ec_id: Some(ec_id.as_str()), + kv_store: settings + .consent + .consent_store + .as_deref() + .map(|_| services.kv_store()), }); - let ssc_allowed = allows_ssc_creation(&consent_context); - log::debug!( - "Proxy synthetic IDs - trusted: {}, ssc_allowed: {}", - synthetic_id, - ssc_allowed, - ); + let ec_allowed = allows_ec_creation(&consent_context); + log::debug!("Proxy ec_allowed: {}", ec_allowed); let parsed_origin = url::Url::parse(&settings.publisher.origin_url).change_context( TrustedServerError::Proxy { @@ -440,13 +573,23 @@ pub async fn handle_publisher_request( })? .response; - // Log all response headers for debugging log::debug!("Response headers:"); for (name, value) in response.headers() { log::debug!(" {}: {:?}", name, value); } - // Check if the response has a text-based content type that we should process + // Set EC ID / cookie headers BEFORE body processing. + // These are body-independent (computed from request cookies + consent). + apply_ec_headers( + settings, + services, + &mut response, + &ec_id, + ec_allowed, + existing_ec_cookie.as_deref(), + &consent_context, + ); + let content_type = response .headers() .get(header::CONTENT_TYPE) @@ -454,117 +597,179 @@ pub async fn handle_publisher_request( .unwrap_or_default() .to_string(); - let should_process = content_type.contains("text/") - || content_type.contains("application/javascript") - || content_type.contains("application/json"); - - if should_process && !request_host.is_empty() { - // Check if the response is compressed - let content_encoding = response - .headers() - .get(header::CONTENT_ENCODING) - .map(|h| h.to_str().unwrap_or_default()) - .unwrap_or_default() - .to_lowercase(); - - // Log response details for debugging - log::debug!( - "Processing response - Content-Type: {}, Content-Encoding: {}, Request Host: {}, Origin Host: {}", - content_type, content_encoding, request_host, origin_host - ); - - // Take the response body for streaming processing - let body = std::mem::replace(response.body_mut(), EdgeBody::empty()); - - // Process the body using streaming approach - let params = ProcessResponseParams { - content_encoding: &content_encoding, - origin_host: &origin_host, - origin_url: &settings.publisher.origin_url, - request_host, - request_scheme, - settings, - content_type: &content_type, - integration_registry, - }; - match process_response_streaming(body, ¶ms) { - Ok(processed_body) => { - // Set the processed body back - *response.body_mut() = processed_body; + let status = response.status(); + let content_encoding = response + .headers() + .get(header::CONTENT_ENCODING) + .map(|h| h.to_str().unwrap_or_default()) + .unwrap_or_default() + .to_lowercase(); + let has_post_processors = integration_registry.has_html_post_processors(); - // Remove Content-Length as the size has likely changed - response.headers_mut().remove(header::CONTENT_LENGTH); + let route = classify_response_route( + status, + &content_type, + &content_encoding, + request_host, + has_post_processors, + ); - // Keep Content-Encoding header since we're returning compressed content + match route { + ResponseRoute::PassThrough => { + log::debug!( + "Pass-through binary response - Content-Type: '{}', status: {}", + content_type, + status, + ); + let (parts, body) = response.into_parts(); + let response = Response::from_parts(parts, EdgeBody::empty()); + Ok(PublisherResponse::PassThrough { response, body }) + } + ResponseRoute::BufferedUnmodified => { + // Misconfiguration: processable content returned unrewritten because + // we have no Host header to rewrite URLs against. Surface at WARN so + // mis-proxied pages are visible in production logs. + if is_processable_content_type(&content_type) && request_host.is_empty() { + log::warn!( + "Empty request host — returning processable content unmodified (Content-Type: '{}', status: {}). Check proxy Host header.", + content_type, + status, + ); + } else if !is_supported_content_encoding(&content_encoding) { + log::warn!( + "Unsupported Content-Encoding '{}' - returning response unmodified", + content_encoding, + ); + } else { log::debug!( - "Preserved Content-Encoding: {} for compressed response", - content_encoding + "Skipping response processing - Content-Type: '{}', request_host: '{}', status: {}", + content_type, + request_host, + status, ); - - log::debug!("Completed streaming processing of response body"); - } - Err(e) => { - log::error!("Failed to process response body: {:?}", e); - // Return an error response - return Err(e); } + Ok(PublisherResponse::Buffered(response)) + } + ResponseRoute::Stream => { + log::debug!( + "Streaming response - Content-Type: {}, Content-Encoding: {}, Request Host: {}, Origin Host: {}", + content_type, content_encoding, request_host, origin_host + ); + + let body = std::mem::replace(response.body_mut(), EdgeBody::empty()); + response.headers_mut().remove(header::CONTENT_LENGTH); + + Ok(PublisherResponse::Stream { + response, + body, + params: OwnedProcessResponseParams { + content_encoding, + origin_host, + origin_url: settings.publisher.origin_url.clone(), + request_host: request_host.to_string(), + request_scheme: request_scheme.to_string(), + content_type, + }, + }) + } + ResponseRoute::BufferedProcessed => { + log::debug!( + "Buffered response - Content-Type: {}, Content-Encoding: {}, Request Host: {}, Origin Host: {}", + content_type, content_encoding, request_host, origin_host + ); + + let body = std::mem::replace(response.body_mut(), EdgeBody::empty()); + let params = ProcessResponseParams { + content_encoding: &content_encoding, + origin_host: &origin_host, + origin_url: &settings.publisher.origin_url, + request_host, + request_scheme, + settings, + content_type: &content_type, + integration_registry, + }; + let mut output = Vec::new(); + process_response_streaming(body, &mut output, ¶ms)?; + + response.headers_mut().insert( + header::CONTENT_LENGTH, + HeaderValue::from(output.len() as u64), + ); + *response.body_mut() = EdgeBody::from(output); + + Ok(PublisherResponse::Buffered(response)) } - } else { - log::debug!( - "Skipping response processing - should_process: {}, request_host: '{}'", - should_process, - request_host - ); } +} - // Consent-gated SSC creation: - // - Consent given → set synthetic ID header + cookie. - // - Consent absent + existing cookie → revoke (expire cookie + delete KV entry). - // - Consent absent + no cookie → do nothing. - if ssc_allowed { - match HeaderValue::from_str(synthetic_id.as_str()) { - Ok(header_value) => { - response - .headers_mut() - .insert(HEADER_X_SYNTHETIC_ID, header_value); +/// Whether the content type requires processing (URL rewriting, HTML injection). +/// +/// Text-based and JavaScript/JSON responses are processable; binary types +/// (images, fonts, video, etc.) pass through unchanged. +fn is_processable_content_type(content_type: &str) -> bool { + content_type.contains("text/") + || content_type.contains("application/javascript") + || content_type.contains("application/json") +} + +/// Whether the `Content-Encoding` is one the streaming pipeline can handle. +/// +/// Unsupported encodings (e.g. `zstd` from a misbehaving origin) bypass the +/// rewrite pipeline entirely and are returned unchanged. Processing such +/// bodies as identity-encoded would produce garbled output. +fn is_supported_content_encoding(encoding: &str) -> bool { + matches!(encoding, "" | "identity" | "gzip" | "deflate" | "br") +} + +/// Apply EC ID and cookie headers to the response. +/// +/// Extracted so headers can be set before streaming begins (headers must +/// be finalized before `stream_to_client()` commits them). +/// +/// Consent-gated EC creation: +/// - Consent given → set EC ID header + cookie. +/// - Consent absent + existing cookie → revoke (expire cookie + delete KV entry). +/// - Consent absent + no cookie → do nothing. +fn apply_ec_headers( + settings: &Settings, + services: &RuntimeServices, + response: &mut Response, + ec_id: &str, + ec_allowed: bool, + existing_ec_cookie: Option<&str>, + consent_context: &crate::consent::ConsentContext, +) { + if ec_allowed { + // HeaderValue::from_str rejects \r, \n, and \0, so the EC ID + // cannot inject additional response headers. + match HeaderValue::from_str(ec_id) { + Ok(v) => { + response.headers_mut().insert(HEADER_X_TS_EC, v); } Err(_) => { - log::warn!( - "Rejecting synthetic ID response header: value of {} bytes is not a valid header value", - synthetic_id.len() - ); + log::warn!("Rejecting EC ID response header: value is not a valid header value"); } } - // Cookie persistence is skipped if the synthetic ID contains RFC 6265-illegal + // Cookie persistence is skipped if the EC ID contains RFC 6265-illegal // characters. The header is still emitted when consent allows it. - crate::cookies::set_synthetic_cookie(settings, &mut response, synthetic_id.as_str()); - } else if let Some(cookie_synthetic_id) = existing_ssc_cookie.as_deref() { - // Always expire the cookie — consent is withdrawn regardless of whether the - // stored value is well-formed. - crate::cookies::expire_synthetic_cookie(settings, &mut response); - if is_valid_synthetic_id(cookie_synthetic_id) { - log::info!( - "SSC revoked: consent withdrawn (jurisdiction={})", - consent_context.jurisdiction, - ); - if let Some(kv) = kv_ops { - kv.delete_entry(cookie_synthetic_id); - } - } else { - log::warn!( - "SSC cookie has invalid format, skipping KV deletion (len={}, jurisdiction={})", - cookie_synthetic_id.len(), - consent_context.jurisdiction, - ); + crate::cookies::set_ec_cookie(settings, response, ec_id); + } else if let Some(cookie_ec_id) = existing_ec_cookie { + log::info!( + "EC revoked for '{}': consent withdrawn (jurisdiction={})", + cookie_ec_id, + consent_context.jurisdiction, + ); + crate::cookies::expire_ec_cookie(settings, response); + if settings.consent.consent_store.is_some() { + crate::consent::kv::delete_consent_from_kv(services.kv_store(), cookie_ec_id); } } else { log::debug!( - "SSC skipped: no consent and no existing cookie (jurisdiction={})", + "EC skipped: no consent and no existing cookie (jurisdiction={})", consent_context.jurisdiction, ); } - - Ok(response) } #[cfg(test)] @@ -574,7 +779,7 @@ mod tests { use crate::platform::test_support::{ build_services_with_http_client, noop_services, StubHttpClient, }; - use crate::test_support::tests::{create_test_settings, VALID_SYNTHETIC_ID}; + use crate::test_support::tests::create_test_settings; use edgezero_core::body::Body as EdgeBody; use http::{header, Method, Request as HttpRequest, StatusCode}; use std::sync::Arc; @@ -610,21 +815,60 @@ mod tests { ("application/octet-stream", false), ]; - for (content_type, should_process) in test_cases { - let result = content_type.contains("text/html") - || content_type.contains("text/css") - || content_type.contains("text/javascript") - || content_type.contains("application/javascript") - || content_type.contains("application/json"); - + for (content_type, expected) in test_cases { assert_eq!( - result, should_process, - "Content-Type '{}' should_process: expected {}, got {}", - content_type, should_process, result + is_processable_content_type(content_type), + expected, + "Content-Type '{content_type}' should_process: expected {expected}", ); } } + #[test] + fn supported_content_encoding_accepts_known_values() { + assert!(is_supported_content_encoding(""), "should accept empty"); + assert!( + is_supported_content_encoding("identity"), + "should accept identity" + ); + assert!(is_supported_content_encoding("gzip"), "should accept gzip"); + assert!( + is_supported_content_encoding("deflate"), + "should accept deflate" + ); + assert!(is_supported_content_encoding("br"), "should accept br"); + } + + #[test] + fn supported_content_encoding_rejects_unknown_values() { + assert!(!is_supported_content_encoding("zstd"), "should reject zstd"); + assert!( + !is_supported_content_encoding("compress"), + "should reject compress" + ); + assert!( + !is_supported_content_encoding("snappy"), + "should reject snappy" + ); + } + + #[test] + fn unsupported_encoding_response_is_returned_unmodified() { + // Processable (HTML) 2xx with unsupported encoding must route to + // BufferedUnmodified — feeding zstd-compressed bytes to the rewriter + // as identity would produce garbled output. + assert_eq!( + classify_response_route( + StatusCode::OK, + "text/html; charset=utf-8", + "zstd", + "example.com", + false, + ), + ResponseRoute::BufferedUnmodified, + ); + } + #[test] fn test_publisher_origin_host_extraction() { let settings = create_test_settings(); @@ -676,11 +920,252 @@ mod tests { } } - // Note: test_streaming_compressed_content removed as it directly tested private function - // process_response_streaming. The functionality is tested through handle_publisher_request. + // Gate tests — exercise `classify_response_route` directly, the same + // function `handle_publisher_request` calls. If the gate formula changes, + // both production and tests are affected identically: no silent drift. - // Note: test_streaming_brotli_content removed as it directly tested private function - // process_response_streaming. The functionality is tested through handle_publisher_request. + #[test] + fn route_streams_2xx_html_without_post_processors() { + assert_eq!( + classify_response_route( + StatusCode::OK, + "text/html; charset=utf-8", + "gzip", + "example.com", + false, + ), + ResponseRoute::Stream, + ); + } + + #[test] + fn route_buffers_html_with_post_processors_for_processing() { + assert_eq!( + classify_response_route( + StatusCode::OK, + "text/html; charset=utf-8", + "gzip", + "example.com", + true, + ), + ResponseRoute::BufferedProcessed, + ); + } + + #[test] + fn route_streams_non_html_even_with_post_processors_registered() { + // Post-processors only apply to HTML; JSON/JS can still stream. + assert_eq!( + classify_response_route( + StatusCode::OK, + "application/json", + "gzip", + "example.com", + true, + ), + ResponseRoute::Stream, + ); + } + + #[test] + fn route_buffers_unmodified_on_unsupported_encoding() { + // Unsupported encoding cannot be streamed (would be fed to rewriter + // as identity and produce garbled output). + assert_eq!( + classify_response_route(StatusCode::OK, "text/html", "zstd", "example.com", false,), + ResponseRoute::BufferedUnmodified, + ); + } + + #[test] + fn route_passes_through_non_processable_2xx() { + // Binary content (images, fonts) on 2xx streams the origin body direct. + assert_eq!( + classify_response_route(StatusCode::OK, "image/png", "", "example.com", false,), + ResponseRoute::PassThrough, + ); + } + + #[test] + fn route_buffers_non_processable_error_responses() { + // Non-2xx never pass through — response needs to reach the client + // as-is (with any error body the origin produced). + assert_eq!( + classify_response_route(StatusCode::NOT_FOUND, "image/png", "", "example.com", false,), + ResponseRoute::BufferedUnmodified, + ); + } + + #[test] + fn route_excludes_204_from_pass_through() { + // 204 No Content (RFC 9110 §15.3.5) prohibits a message body. + assert_eq!( + classify_response_route( + StatusCode::NO_CONTENT, + "image/png", + "", + "example.com", + false, + ), + ResponseRoute::BufferedUnmodified, + ); + } + + #[test] + fn route_excludes_205_from_pass_through() { + // 205 Reset Content (RFC 9110 §15.3.6) prohibits a message body. + assert_eq!( + classify_response_route( + StatusCode::RESET_CONTENT, + "image/png", + "", + "example.com", + false, + ), + ResponseRoute::BufferedUnmodified, + ); + } + + #[test] + fn route_excludes_204_for_processable_content_types() { + // 204 must stay body-less even when Content-Type would otherwise route + // to Stream or BufferedProcessed. + assert_eq!( + classify_response_route( + StatusCode::NO_CONTENT, + "text/html; charset=utf-8", + "gzip", + "example.com", + false, + ), + ResponseRoute::BufferedUnmodified, + "204 + HTML must not route to Stream", + ); + assert_eq!( + classify_response_route( + StatusCode::NO_CONTENT, + "text/html; charset=utf-8", + "gzip", + "example.com", + true, + ), + ResponseRoute::BufferedUnmodified, + "204 + HTML + post-processors must not route to BufferedProcessed", + ); + } + + #[test] + fn route_excludes_205_for_processable_content_types() { + assert_eq!( + classify_response_route( + StatusCode::RESET_CONTENT, + "application/json", + "", + "example.com", + false, + ), + ResponseRoute::BufferedUnmodified, + "205 + JSON must not route to Stream", + ); + } + + #[test] + fn route_streams_non_2xx_processable_content() { + // Branded 404 or 500 HTML with origin URLs must still be rewritten. + // This matches the pre-streaming behavior on main. + assert_eq!( + classify_response_route( + StatusCode::NOT_FOUND, + "text/html; charset=utf-8", + "gzip", + "example.com", + false, + ), + ResponseRoute::Stream, + ); + assert_eq!( + classify_response_route( + StatusCode::INTERNAL_SERVER_ERROR, + "application/json", + "gzip", + "example.com", + false, + ), + ResponseRoute::Stream, + ); + } + + #[test] + fn route_processes_non_2xx_html_with_post_processors() { + // Non-2xx HTML with post-processors still needs full-document processing + // for head injection, same as 2xx. + assert_eq!( + classify_response_route( + StatusCode::NOT_FOUND, + "text/html; charset=utf-8", + "gzip", + "example.com", + true, + ), + ResponseRoute::BufferedProcessed, + ); + } + + #[test] + fn route_passes_through_non_processable_even_with_empty_request_host() { + // Empty request_host blocks URL rewriting but pass-through does no + // rewriting, so a non-processable 2xx still streams through. + assert_eq!( + classify_response_route(StatusCode::OK, "image/png", "", "", false,), + ResponseRoute::PassThrough, + ); + } + + #[test] + fn route_buffers_processable_content_with_empty_request_host() { + // Misconfiguration case — URL rewriting needs a host, so the + // processable response falls back to unmodified pass-through. + assert_eq!( + classify_response_route(StatusCode::OK, "text/html", "gzip", "", false,), + ResponseRoute::BufferedUnmodified, + ); + } + + #[test] + fn pass_through_preserves_body_and_content_length() { + // Simulate the PassThrough path: take body, reattach, send. + // Verify byte-for-byte identity and Content-Length preservation. + let image_bytes: Vec = (0..=255).cycle().take(4096).collect(); + + let mut response = Response::builder() + .status(StatusCode::OK) + .header(header::CONTENT_TYPE, "image/png") + .header(header::CONTENT_LENGTH, image_bytes.len() as u64) + .body(EdgeBody::from(image_bytes.clone())) + .expect("should build test response"); + + // Simulate PassThrough: take body then reattach + let body = std::mem::replace(response.body_mut(), EdgeBody::empty()); + // Body is unmodified — Content-Length stays correct + assert_eq!( + response + .headers() + .get(header::CONTENT_LENGTH) + .and_then(|v| v.to_str().ok()) + .expect("should have content-length"), + "4096", + "Content-Length should be preserved for pass-through" + ); + + // Reattach and verify body content + *response.body_mut() = body; + let (_, final_body) = response.into_parts(); + let output = final_body.into_bytes(); + assert_eq!( + output, image_bytes, + "pass-through should preserve body byte-for-byte" + ); + } #[test] fn test_content_encoding_detection() { @@ -803,40 +1288,95 @@ mod tests { } #[test] - fn revocation_targets_cookie_synthetic_id_not_header() { + fn revocation_targets_cookie_ec_id_not_header() { let settings = create_test_settings(); - let cookie_synthetic_id = - "b2a1c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0b1a2.Zx98y7"; let mut req = build_request(Method::GET, "https://test.example.com/page"); req.headers_mut().insert( - header::HeaderName::from_static("x-synthetic-id"), - http::HeaderValue::from_static(VALID_SYNTHETIC_ID), + crate::constants::HEADER_X_TS_EC, + http::HeaderValue::from_static("header_id"), ); req.headers_mut().insert( header::COOKIE, - http::HeaderValue::from_str(&format!( - "synthetic_id={cookie_synthetic_id}; other=value" - )) - .expect("cookie header should be valid"), + http::HeaderValue::from_static("ts-ec=cookie_id; other=value"), ); let cookie_jar = handle_request_cookies(&req).expect("should parse cookies"); - let existing_ssc_cookie = cookie_jar + let existing_ec_cookie = cookie_jar .as_ref() - .and_then(|jar| jar.get(COOKIE_SYNTHETIC_ID)) + .and_then(|jar| jar.get(COOKIE_TS_EC)) .map(|cookie| cookie.value().to_owned()); - let resolved_synthetic_id = get_or_generate_synthetic_id(&settings, &noop_services(), &req) - .expect("should resolve synthetic id"); + let resolved_ec_id = + get_or_generate_ec_id(&settings, &noop_services(), &req).expect("should resolve EC ID"); assert_eq!( - existing_ssc_cookie.as_deref(), - Some(cookie_synthetic_id), + existing_ec_cookie.as_deref(), + Some("cookie_id"), "should read revocation target from cookie value" ); assert_eq!( - resolved_synthetic_id, VALID_SYNTHETIC_ID, - "should still resolve request synthetic ID from header precedence" + resolved_ec_id, "header_id", + "should still resolve request EC ID from header precedence" + ); + } + + #[test] + fn revocation_deletes_kv_entry_for_cookie_ec_id() { + use crate::platform::test_support::RecordingKvStore; + + let mut settings = create_test_settings(); + settings.consent.consent_store = Some("test-consent-store".to_string()); + + let recording = Arc::new(RecordingKvStore::new()); + let services = noop_services() + .with_kv_store(Arc::clone(&recording) as Arc); + + let mut response = Response::new(EdgeBody::empty()); + let consent_ctx = crate::consent::ConsentContext::default(); + + apply_ec_headers( + &settings, + &services, + &mut response, + "new-ec-id", + false, + Some("cookie-ec-id"), + &consent_ctx, + ); + + assert_eq!( + recording.deleted_keys(), + vec!["cookie-ec-id"], + "should delete KV entry for the revoked EC cookie ID" + ); + } + + #[test] + fn revocation_does_not_delete_kv_when_consent_store_absent() { + use crate::platform::test_support::RecordingKvStore; + + let settings = create_test_settings(); + + let recording = Arc::new(RecordingKvStore::new()); + let services = noop_services() + .with_kv_store(Arc::clone(&recording) as Arc); + + let mut response = Response::new(EdgeBody::empty()); + let consent_ctx = crate::consent::ConsentContext::default(); + + apply_ec_headers( + &settings, + &services, + &mut response, + "new-ec-id", + false, + Some("cookie-ec-id"), + &consent_ctx, + ); + + assert!( + recording.deleted_keys().is_empty(), + "should not delete KV entry when no consent_store is configured" ); } @@ -971,35 +1511,328 @@ mod tests { ); } + #[tokio::test] + async fn publisher_request_uses_platform_http_client_with_http_types() { + let settings = create_test_settings(); + let registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + let stub = Arc::new(StubHttpClient::new()); + stub.push_response(200, b"origin response".to_vec()); + let services = build_services_with_http_client( + Arc::clone(&stub) as Arc + ); + let req = HttpRequest::builder() + .method(Method::GET) + .uri("https://publisher.example/page") + .header(header::HOST, "publisher.example") + .body(EdgeBody::empty()) + .expect("should build request"); + + let pub_response = handle_publisher_request(&settings, ®istry, &services, req) + .await + .expect("should proxy publisher request"); + let response = match pub_response { + PublisherResponse::Buffered(r) => r, + PublisherResponse::PassThrough { mut response, body } => { + *response.body_mut() = body; + response + } + PublisherResponse::Stream { response, .. } => response, + }; + + assert_eq!(response.status(), StatusCode::OK); + assert_eq!(response_body_string(response), "origin response"); + assert_eq!( + stub.recorded_backend_names(), + vec!["stub-backend".to_string()], + "should proxy through the platform http client" + ); + } + #[test] - fn publisher_request_uses_platform_http_client_with_http_types() { - futures::executor::block_on(async { - let settings = create_test_settings(); - let registry = - IntegrationRegistry::new(&settings).expect("should create integration registry"); - let stub = Arc::new(StubHttpClient::new()); - stub.push_response(200, b"origin response".to_vec()); - let services = build_services_with_http_client( - Arc::clone(&stub) as Arc - ); - let req = HttpRequest::builder() - .method(Method::GET) - .uri("https://publisher.example/page") - .header(header::HOST, "publisher.example") - .body(EdgeBody::empty()) - .expect("should build request"); - - let response = handle_publisher_request(&settings, ®istry, &services, None, req) - .await - .expect("should proxy publisher request"); - - assert_eq!(response.status(), StatusCode::OK); - assert_eq!(response_body_string(response), "origin response"); - assert_eq!( - stub.recorded_backend_names(), - vec!["stub-backend".to_string()], - "should proxy through the platform http client" - ); - }); + fn stream_publisher_body_preserves_gzip_round_trip() { + use flate2::write::GzEncoder; + use std::io::Write; + + let settings = create_test_settings(); + let registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + + // Compress CSS containing an origin URL that should be rewritten. + // CSS uses the text URL replacer (not lol_html), so inline URLs are rewritten. + let html = b"body { background: url('https://origin.example.com/page'); }"; + let mut compressed = Vec::new(); + { + let mut encoder = GzEncoder::new(&mut compressed, flate2::Compression::default()); + encoder.write_all(html).expect("should compress"); + encoder.finish().expect("should finish compression"); + } + + let body = EdgeBody::from(compressed); + let params = OwnedProcessResponseParams { + content_encoding: "gzip".to_string(), + origin_host: "origin.example.com".to_string(), + origin_url: "https://origin.example.com".to_string(), + request_host: "proxy.example.com".to_string(), + request_scheme: "https".to_string(), + content_type: "text/css".to_string(), + }; + + let mut output = Vec::new(); + stream_publisher_body(body, &mut output, ¶ms, &settings, ®istry) + .expect("should process gzip CSS"); + + // Decompress output + use flate2::read::GzDecoder; + use std::io::Read; + let mut decoder = GzDecoder::new(&output[..]); + let mut decompressed = String::new(); + decoder + .read_to_string(&mut decompressed) + .expect("should decompress output"); + + assert!( + decompressed.contains("proxy.example.com"), + "should rewrite origin to proxy. Got: {decompressed}" + ); + assert!( + !decompressed.contains("origin.example.com"), + "should not contain original host. Got: {decompressed}" + ); + } + + /// Empty origin body on the streaming route must produce no output + /// without erroring. Exercises the `Ok(0)` branch of `process_chunks` + /// plus the processor's `is_last=true, chunk=[]` terminal call. + #[test] + fn stream_publisher_body_handles_empty_body() { + let settings = create_test_settings(); + let registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + + let params = OwnedProcessResponseParams { + content_encoding: String::new(), + origin_host: "origin.example.com".to_string(), + origin_url: "https://origin.example.com".to_string(), + request_host: "proxy.example.com".to_string(), + request_scheme: "https".to_string(), + content_type: "text/html; charset=utf-8".to_string(), + }; + + let mut output = Vec::new(); + stream_publisher_body( + EdgeBody::empty(), + &mut output, + ¶ms, + &settings, + ®istry, + ) + .expect("should succeed on empty body"); + + assert!( + output.is_empty(), + "empty origin body should produce empty streaming output. Got: {output:?}" + ); + } + + /// Mid-stream decoder failure must surface as an error. The adapter + /// relies on this: once headers are committed, it logs and drops the + /// `StreamingBody` so the client sees a truncated response. If a decode + /// failure silently emitted bytes, the client would see a malformed + /// document instead. + #[test] + fn stream_publisher_body_surfaces_mid_stream_decode_error() { + let settings = create_test_settings(); + let registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + + // Claim gzip encoding but feed non-gzip bytes. The GzDecoder will + // error as soon as it tries to read the gzip header. + let params = OwnedProcessResponseParams { + content_encoding: "gzip".to_string(), + origin_host: "origin.example.com".to_string(), + origin_url: "https://origin.example.com".to_string(), + request_host: "proxy.example.com".to_string(), + request_scheme: "https".to_string(), + content_type: "text/html".to_string(), + }; + + let bogus_body = EdgeBody::from(b"not gzip".to_vec()); + let mut output = Vec::new(); + let result = stream_publisher_body(bogus_body, &mut output, ¶ms, &settings, ®istry); + + assert!( + result.is_err(), + "decoding bogus gzip as gzip should return Err so the adapter can drop the stream" + ); + } + + /// Pass-through dispatch contract: the adapter treats `PublisherResponse::PassThrough` + /// by reattaching the origin body unchanged and letting Fastly emit it. + /// Simulate that step and assert byte identity plus Content-Length + /// preservation. Distinct from `pass_through_preserves_body_and_content_length` + /// which only tests the header preservation; this one walks the full + /// take-then-reattach pattern the adapter uses. + #[test] + fn publisher_response_pass_through_reattach_preserves_bytes() { + // Simulate a 2xx image/png response: Body::from(bytes), take_body(), + // then set_body(body). `classify_response_route` already picks + // PassThrough for this combination; this covers the adapter's + // reattachment half of the contract. + let image_bytes: Vec = (0..=127).cycle().take(2048).collect(); + + let mut response = Response::builder() + .status(StatusCode::OK) + .header(header::CONTENT_TYPE, "image/png") + .header(header::CONTENT_LENGTH, image_bytes.len() as u64) + .body(EdgeBody::from(image_bytes.clone())) + .expect("should build test response"); + + // Mirror adapter: take body, then reattach. + let body = std::mem::replace(response.body_mut(), EdgeBody::empty()); + *response.body_mut() = body; + + assert_eq!( + response + .headers() + .get(header::CONTENT_LENGTH) + .and_then(|v| v.to_str().ok()) + .expect("content-length should survive"), + "2048" + ); + let (_, final_body) = response.into_parts(); + let round_trip = final_body.into_bytes(); + assert_eq!( + round_trip, image_bytes, + "pass-through reattach must preserve bytes exactly" + ); + } + + /// Buffered-processed dispatch contract: HTML with a registered post-processor + /// routes through `BufferedProcessed`, and the handler path sets + /// `Content-Length` from the processed body length. Verify that invariant + /// via the classifier + `process_response_streaming` composition. + #[test] + fn buffered_processed_sets_content_length_from_processed_body() { + // Configure nextjs so a post-processor is registered. + let mut settings = create_test_settings(); + settings + .integrations + .insert_config( + "nextjs", + &serde_json::json!({ + "enabled": true, + "rewrite_attributes": ["href", "link", "url"], + }), + ) + .expect("should update nextjs config"); + + let registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + + assert!( + registry.has_html_post_processors(), + "nextjs integration must register an HTML post-processor" + ); + assert_eq!( + classify_response_route( + StatusCode::OK, + "text/html; charset=utf-8", + "", + "proxy.example.com", + registry.has_html_post_processors(), + ), + ResponseRoute::BufferedProcessed, + "HTML with post-processors must route to BufferedProcessed" + ); + + // Feed a small HTML body through the same pipeline the + // BufferedProcessed arm uses (Vec output). + let html = + b"link"; + let body = EdgeBody::from(html.to_vec()); + + let params = OwnedProcessResponseParams { + content_encoding: String::new(), + origin_host: "origin.example.com".to_string(), + origin_url: "https://origin.example.com".to_string(), + request_host: "proxy.example.com".to_string(), + request_scheme: "https".to_string(), + content_type: "text/html; charset=utf-8".to_string(), + }; + let mut output = Vec::new(); + stream_publisher_body(body, &mut output, ¶ms, &settings, ®istry) + .expect("should process buffered HTML"); + + assert!( + !output.is_empty(), + "buffered processed output must not be empty" + ); + let as_str = std::str::from_utf8(&output).expect("output should be valid UTF-8"); + assert!( + as_str.contains("proxy.example.com"), + "origin must be rewritten. Got: {as_str}" + ); + assert!( + !as_str.contains("origin.example.com"), + "origin host must not leak. Got: {as_str}" + ); + } + + /// Document-state survives from the streaming pass into the post-processor. + /// `NextJsRscPlaceholderRewriter` writes into `IntegrationDocumentState` + /// during streaming; `NextJsHtmlPostProcessor` reads it and substitutes. + /// Regression test: with post-processors registered, placeholders must + /// be inserted during streaming and substituted out of the final output. + #[test] + fn document_state_placeholders_substitute_through_accumulating_path() { + let mut settings = create_test_settings(); + settings + .integrations + .insert_config( + "nextjs", + &serde_json::json!({ + "enabled": true, + "rewrite_attributes": ["href", "link", "url"], + }), + ) + .expect("should update nextjs config"); + let registry = + IntegrationRegistry::new(&settings).expect("should create integration registry"); + + // Small, single-fragment RSC script — placeholder path (not fallback). + let html = br#""#; + let params = OwnedProcessResponseParams { + content_encoding: String::new(), + origin_host: "origin.example.com".to_string(), + origin_url: "https://origin.example.com".to_string(), + request_host: "proxy.example.com".to_string(), + request_scheme: "https".to_string(), + content_type: "text/html".to_string(), + }; + + let mut output = Vec::new(); + stream_publisher_body( + EdgeBody::from(html.to_vec()), + &mut output, + ¶ms, + &settings, + ®istry, + ) + .expect("should process RSC push"); + + let processed = String::from_utf8(output).expect("valid UTF-8"); + assert!( + !processed.contains("__ts_rsc_payload_"), + "placeholder must be substituted before reaching output. Got: {processed}" + ); + assert!( + processed.contains("proxy.example.com/page"), + "origin URL must be rewritten in the substituted payload. Got: {processed}" + ); + assert!( + !processed.contains("origin.example.com"), + "origin host must not leak. Got: {processed}" + ); } } diff --git a/crates/trusted-server-core/src/request_signing/endpoints.rs b/crates/trusted-server-core/src/request_signing/endpoints.rs index 110fe3014..ca339fda0 100644 --- a/crates/trusted-server-core/src/request_signing/endpoints.rs +++ b/crates/trusted-server-core/src/request_signing/endpoints.rs @@ -8,7 +8,7 @@ use error_stack::{Report, ResultExt}; use http::{header, Request, Response, StatusCode}; use serde::{Deserialize, Serialize}; -use crate::error::TrustedServerError; +use crate::error::{IntoHttpResponse, TrustedServerError}; use crate::platform::RuntimeServices; use crate::request_signing::discovery::TrustedServerDiscovery; use crate::request_signing::rotation::KeyRotationManager; @@ -59,34 +59,56 @@ pub fn handle_trusted_server_discovery( Ok(json_response(StatusCode::OK, json)) } +/// JSON request body for the signature verification endpoint. #[derive(Debug, Deserialize, Serialize)] pub struct VerifySignatureRequest { + /// Canonical payload that was signed. pub payload: String, + /// Base64-encoded Ed25519 signature to verify. pub signature: String, + /// Key identifier used to look up the public JWK. pub kid: String, } +/// JSON response body for the signature verification endpoint. #[derive(Debug, Deserialize, Serialize)] pub struct VerifySignatureResponse { + /// Whether signature verification succeeded. pub verified: bool, + /// Key identifier that was used during verification. pub kid: String, + /// Human-readable verification result summary. pub message: String, + /// Error detail when verification fails unexpectedly. #[serde(skip_serializing_if = "Option::is_none")] pub error: Option, } +const VERIFY_MAX_BODY_BYTES: usize = 4096; +const ADMIN_MAX_BODY_BYTES: usize = 4096; + /// Will verify a signature given a payload and kid /// Useful for testing integration with signatures /// /// # Errors /// -/// Returns an error if the request body cannot be parsed as JSON or if verification fails. +/// Returns an error if the request body cannot be parsed as JSON or if the +/// response body cannot be serialized. pub fn handle_verify_signature( _settings: &Settings, services: &RuntimeServices, req: Request, ) -> Result, Report> { let body = req.into_body().into_bytes(); + if body.len() > VERIFY_MAX_BODY_BYTES { + return Err(Report::new(TrustedServerError::RequestTooLarge { + message: format!( + "verify-signature payload {} exceeds limit of {}", + body.len(), + VERIFY_MAX_BODY_BYTES, + ), + })); + } let verify_req: VerifySignatureRequest = serde_json::from_slice(&body).change_context(TrustedServerError::Configuration { message: "invalid JSON request body".into(), @@ -112,12 +134,15 @@ pub fn handle_verify_signature( message: "Signature verification failed".into(), error: Some("Invalid signature".into()), }, - Err(e) => VerifySignatureResponse { - verified: false, - kid: verify_req.kid, - message: "Verification error".into(), - error: Some(format!("{}", e)), - }, + Err(e) => { + log::warn!("signature verification failed: {e}"); + VerifySignatureResponse { + verified: false, + kid: verify_req.kid, + message: "Verification error".into(), + error: Some("internal verification error".into()), + } + } }; let response_json = serde_json::to_string(&response).map_err(|e| { @@ -129,45 +154,112 @@ pub fn handle_verify_signature( Ok(json_response(StatusCode::OK, response_json)) } +/// JSON request body for the key-rotation endpoint. #[derive(Debug, Deserialize, Serialize)] pub struct RotateKeyRequest { + /// Optional explicit key identifier for the new signing key. #[serde(skip_serializing_if = "Option::is_none")] pub kid: Option, } +/// JSON response body for the key-rotation endpoint. #[derive(Debug, Deserialize, Serialize)] pub struct RotateKeyResponse { + /// Whether the rotation operation succeeded. pub success: bool, + /// Human-readable summary of the rotation result. pub message: String, + /// Newly generated or supplied key identifier. pub new_kid: String, + /// Previously active key identifier, if one existed. pub previous_kid: Option, + /// Active key identifiers after the rotation completes. pub active_kids: Vec, + /// Public JWK associated with the newly active key. pub jwk: serde_json::Value, + /// Error detail when rotation fails. #[serde(skip_serializing_if = "Option::is_none")] pub error: Option, } -/// Rotates the current active kid by generating and saving a new one +struct SigningStoreIds<'a> { + config_store_id: &'a str, + secret_store_id: &'a str, +} + +const MAX_KID_LENGTH: usize = 128; + +fn signing_store_ids( + settings: &Settings, +) -> Result, Report> { + settings + .request_signing + .as_ref() + .map(|setting| SigningStoreIds { + config_store_id: setting.config_store_id.as_str(), + secret_store_id: setting.secret_store_id.as_str(), + }) + .ok_or_else(|| { + TrustedServerError::Configuration { + message: "missing signing storage configuration".to_string(), + } + .into() + }) +} + +fn validate_kid(kid: &str) -> Result<(), Report> { + if kid.is_empty() || kid.len() > MAX_KID_LENGTH { + return Err(Report::new(TrustedServerError::BadRequest { + message: format!("kid must be 1..={MAX_KID_LENGTH} characters"), + })); + } + + if !kid + .chars() + .all(|c| c.is_ascii_alphanumeric() || matches!(c, '-' | '_' | '.' | ':')) + { + return Err(Report::new(TrustedServerError::BadRequest { + message: "kid must contain only ASCII alphanumerics, '-', '_', '.', ':'".into(), + })); + } + + Ok(()) +} + +/// Rotates the current active kid by generating and saving a new one. +/// +/// # Response contract +/// +/// Returns `200 OK` with `success: true` on success, `400 Bad Request` for an +/// invalid operator-supplied `kid`, or `500 Internal Server Error` when rotation +/// fails. Failure responses include `success: false` and a populated `error` +/// field. Unlike [`handle_verify_signature`], the error field contains internal +/// detail — this is intentional because this endpoint is auth-gated and +/// operator-facing only. /// /// # Errors /// -/// Returns an error if the request signing settings are missing, JSON parsing fails, or key rotation fails. +/// Returns an error if the request signing settings are missing or JSON parsing fails. pub fn handle_rotate_key( settings: &Settings, services: &RuntimeServices, req: Request, ) -> Result, Report> { - let (config_store_id, secret_store_id) = match &settings.request_signing { - Some(setting) => (&setting.config_store_id, &setting.secret_store_id), - None => { - return Err(TrustedServerError::Configuration { - message: "missing signing storage configuration".to_string(), - } - .into()); - } - }; + let SigningStoreIds { + config_store_id, + secret_store_id, + } = signing_store_ids(settings)?; let body = req.into_body().into_bytes(); + if body.len() > ADMIN_MAX_BODY_BYTES { + return Err(Report::new(TrustedServerError::RequestTooLarge { + message: format!( + "rotate-key payload {} exceeds limit of {}", + body.len(), + ADMIN_MAX_BODY_BYTES, + ), + })); + } let rotate_req: RotateKeyRequest = if body.is_empty() { RotateKeyRequest { kid: None } } else { @@ -177,8 +269,14 @@ pub fn handle_rotate_key( }; let manager = KeyRotationManager::new(config_store_id, secret_store_id); + let validation_result = if let Some(kid) = rotate_req.kid.as_deref() { + validate_kid(kid) + } else { + Ok(()) + }; + let result = validation_result.and_then(|()| manager.rotate_key(services, rotate_req.kid)); - match manager.rotate_key(services, rotate_req.kid) { + match result { Ok(result) => { let jwk_value = serde_json::to_value(&result.jwk).map_err(|e| { Report::new(TrustedServerError::Configuration { @@ -205,6 +303,7 @@ pub fn handle_rotate_key( Ok(json_response(StatusCode::OK, response_json)) } Err(e) => { + let status = e.current_context().status_code(); let response = RotateKeyResponse { success: false, message: "Key rotation failed".to_string(), @@ -221,53 +320,73 @@ pub fn handle_rotate_key( }) })?; - Ok(json_response( - StatusCode::INTERNAL_SERVER_ERROR, - response_json, - )) + Ok(json_response(status, response_json)) } } } +/// JSON request body for the key-deactivation endpoint. #[derive(Debug, Deserialize, Serialize)] pub struct DeactivateKeyRequest { + /// Key identifier to deactivate or delete. pub kid: String, + /// Whether the key should be deleted from storage after deactivation. #[serde(default)] pub delete: bool, } +/// JSON response body for the key-deactivation endpoint. #[derive(Debug, Deserialize, Serialize)] pub struct DeactivateKeyResponse { + /// Whether the deactivation or deletion succeeded. pub success: bool, + /// Human-readable summary of the operation result. pub message: String, + /// Key identifier that was deactivated or deleted. pub deactivated_kid: String, + /// Whether the key was deleted from storage. pub deleted: bool, + /// Active key identifiers remaining after the operation. pub remaining_active_kids: Vec, + /// Error detail when the operation fails. #[serde(skip_serializing_if = "Option::is_none")] pub error: Option, } -/// Deactivates an active key +/// Deactivates or deletes an active signing key. +/// +/// # Response contract +/// +/// Returns `200 OK` with `success: true` on success, `400 Bad Request` for an +/// invalid operator-supplied `kid`, or `500 Internal Server Error` when +/// deactivation fails. Failure responses include `success: false` and a populated +/// `error` field. Like [`handle_rotate_key`] and unlike +/// [`handle_verify_signature`], the error field contains internal detail — this +/// is intentional because this endpoint is auth-gated and operator-facing only. /// /// # Errors /// -/// Returns an error if the request signing settings are missing, JSON parsing fails, or key deactivation fails. +/// Returns an error if the request signing settings are missing or JSON parsing fails. pub fn handle_deactivate_key( settings: &Settings, services: &RuntimeServices, req: Request, ) -> Result, Report> { - let (config_store_id, secret_store_id) = match &settings.request_signing { - Some(setting) => (&setting.config_store_id, &setting.secret_store_id), - None => { - return Err(TrustedServerError::Configuration { - message: "missing signing storage configuration".to_string(), - } - .into()); - } - }; + let SigningStoreIds { + config_store_id, + secret_store_id, + } = signing_store_ids(settings)?; let body = req.into_body().into_bytes(); + if body.len() > ADMIN_MAX_BODY_BYTES { + return Err(Report::new(TrustedServerError::RequestTooLarge { + message: format!( + "deactivate-key payload {} exceeds limit of {}", + body.len(), + ADMIN_MAX_BODY_BYTES, + ), + })); + } let deactivate_req: DeactivateKeyRequest = serde_json::from_slice(&body).change_context(TrustedServerError::Configuration { message: "invalid JSON request body".into(), @@ -275,11 +394,13 @@ pub fn handle_deactivate_key( let manager = KeyRotationManager::new(config_store_id, secret_store_id); - let result = if deactivate_req.delete { - manager.delete_key(services, &deactivate_req.kid) - } else { - manager.deactivate_key(services, &deactivate_req.kid) - }; + let result = validate_kid(&deactivate_req.kid).and_then(|()| { + if deactivate_req.delete { + manager.delete_key(services, &deactivate_req.kid) + } else { + manager.deactivate_key(services, &deactivate_req.kid) + } + }); match result { Ok(()) => { @@ -310,6 +431,7 @@ pub fn handle_deactivate_key( Ok(json_response(StatusCode::OK, response_json)) } Err(e) => { + let status = e.current_context().status_code(); let response = DeactivateKeyResponse { success: false, message: if deactivate_req.delete { @@ -329,27 +451,20 @@ pub fn handle_deactivate_key( }) })?; - Ok(json_response( - StatusCode::INTERNAL_SERVER_ERROR, - response_json, - )) + Ok(json_response(status, response_json)) } } } #[cfg(test)] mod tests { - use std::collections::HashMap; - use edgezero_core::body::Body as EdgeBody; use error_stack::Report; use http::{header, Method, Request as HttpRequest, StatusCode}; use crate::platform::{ - test_support::{ - build_services_with_config, build_services_with_config_and_secret, noop_services, - }, - PlatformConfigStore, PlatformError, PlatformSecretStore, StoreId, StoreName, + test_support::{build_request_signing_services, build_services_with_config, noop_services}, + PlatformConfigStore, PlatformError, StoreId, StoreName, }; use super::*; @@ -383,67 +498,6 @@ mod tests { ); } - /// Build `RuntimeServices` pre-loaded with a real Ed25519 keypair for - /// testing signature creation and verification in endpoint handlers. - fn build_signing_services_for_test() -> crate::platform::RuntimeServices { - use base64::{engine::general_purpose, Engine}; - use ed25519_dalek::SigningKey; - use rand::rngs::OsRng; - - struct MapConfigStore(HashMap); - impl PlatformConfigStore for MapConfigStore { - fn get(&self, _: &StoreName, key: &str) -> Result> { - self.0 - .get(key) - .cloned() - .ok_or_else(|| Report::new(PlatformError::ConfigStore)) - } - fn put(&self, _: &StoreId, _: &str, _: &str) -> Result<(), Report> { - Err(Report::new(PlatformError::Unsupported)) - } - fn delete(&self, _: &StoreId, _: &str) -> Result<(), Report> { - Err(Report::new(PlatformError::Unsupported)) - } - } - - struct MapSecretStore(HashMap>); - impl PlatformSecretStore for MapSecretStore { - fn get_bytes( - &self, - _: &StoreName, - key: &str, - ) -> Result, Report> { - self.0 - .get(key) - .cloned() - .ok_or_else(|| Report::new(PlatformError::SecretStore)) - } - fn create(&self, _: &StoreId, _: &str, _: &str) -> Result<(), Report> { - Err(Report::new(PlatformError::Unsupported)) - } - fn delete(&self, _: &StoreId, _: &str) -> Result<(), Report> { - Err(Report::new(PlatformError::Unsupported)) - } - } - - let signing_key = SigningKey::generate(&mut OsRng); - let key_b64 = general_purpose::STANDARD.encode(signing_key.as_bytes()); - let x_b64 = general_purpose::URL_SAFE_NO_PAD.encode(signing_key.verifying_key().as_bytes()); - let jwk_json = format!( - r#"{{"kty":"OKP","crv":"Ed25519","x":"{}","kid":"test-kid","alg":"EdDSA"}}"#, - x_b64 - ); - - let mut cfg = HashMap::new(); - cfg.insert("current-kid".to_string(), "test-kid".to_string()); - cfg.insert("test-kid".to_string(), jwk_json); - - let mut sec = HashMap::new(); - sec.insert("test-kid".to_string(), key_b64.into_bytes()); - - build_services_with_config_and_secret(MapConfigStore(cfg), MapSecretStore(sec)) - } - /// Config store stub that returns a minimal JWKS with one Ed25519 key. struct StubJwksConfigStore; @@ -471,7 +525,7 @@ mod tests { #[test] fn test_handle_verify_signature_valid() { let settings = crate::test_support::tests::create_test_settings(); - let services = build_signing_services_for_test(); + let services = build_request_signing_services(); let payload = "test message"; let signer = crate::request_signing::RequestSigner::from_services(&services) @@ -510,7 +564,7 @@ mod tests { #[test] fn test_handle_verify_signature_invalid() { let settings = crate::test_support::tests::create_test_settings(); - let services = build_signing_services_for_test(); + let services = build_request_signing_services(); let signer = crate::request_signing::RequestSigner::from_services(&services) .expect("should create signer from services"); @@ -549,6 +603,50 @@ mod tests { assert!(verify_resp.error.is_some()); } + #[test] + fn test_handle_verify_signature_hides_internal_error_details() { + let settings = crate::test_support::tests::create_test_settings(); + + let verify_req = VerifySignatureRequest { + payload: "test message".to_string(), + signature: "any-signature".to_string(), + kid: "missing-kid".to_string(), + }; + + let body = serde_json::to_string(&verify_req).expect("should serialize verify request"); + let req = build_request( + Method::POST, + "https://test.com/verify-signature", + Some(&body), + ); + + let services = noop_services(); + let resp = handle_verify_signature(&settings, &services, req) + .expect("should return a verification response for internal errors"); + + assert_eq!(resp.status(), StatusCode::OK, "should return 200 OK"); + + let resp_body = response_body_string(resp); + let verify_resp: VerifySignatureResponse = + serde_json::from_str(&resp_body).expect("should deserialize verify response"); + + assert!( + !verify_resp.verified, + "should mark internal verification errors as unverified" + ); + assert_eq!(verify_resp.kid, "missing-kid"); + assert_eq!(verify_resp.message, "Verification error"); + assert_eq!( + verify_resp.error.as_deref(), + Some("internal verification error"), + "should return a generic error to unauthenticated callers" + ); + assert!( + !resp_body.contains("failed"), + "should not leak internal error details in the response body" + ); + } + #[test] fn test_handle_verify_signature_malformed_request() { let settings = crate::test_support::tests::create_test_settings(); @@ -568,20 +666,27 @@ mod tests { let settings = crate::test_support::tests::create_test_settings(); let req = build_request(Method::POST, "https://test.com/admin/keys/rotate", None); - let result = handle_rotate_key(&settings, &noop_services(), req); - match result { - Ok(resp) => { - let body = response_body_string(resp); - let response: RotateKeyResponse = - serde_json::from_str(&body).expect("should deserialize rotate response"); - log::debug!( - "Rotation response: success={}, message={}", - response.success, - response.message - ); - } - Err(e) => log::debug!("Expected error in test environment: {}", e), - } + let resp = handle_rotate_key(&settings, &noop_services(), req) + .expect("should return a response even when stores are unavailable"); + + assert_eq!( + resp.status(), + StatusCode::INTERNAL_SERVER_ERROR, + "should return 500 when store writes fail" + ); + + let body = response_body_string(resp); + let response: RotateKeyResponse = + serde_json::from_str(&body).expect("should deserialize rotate response"); + + assert!( + !response.success, + "should report failure when store writes fail" + ); + assert!( + response.error.is_some(), + "should include error detail in failure response" + ); } #[test] @@ -599,20 +704,27 @@ mod tests { Some(&body_json), ); - let result = handle_rotate_key(&settings, &noop_services(), req); - match result { - Ok(resp) => { - let body = response_body_string(resp); - let response: RotateKeyResponse = - serde_json::from_str(&body).expect("should deserialize rotate response"); - log::debug!( - "Custom KID rotation: success={}, new_kid={}", - response.success, - response.new_kid - ); - } - Err(e) => log::debug!("Expected error in test environment: {}", e), - } + let resp = handle_rotate_key(&settings, &noop_services(), req) + .expect("should return a response even when stores are unavailable"); + + assert_eq!( + resp.status(), + StatusCode::INTERNAL_SERVER_ERROR, + "should return 500 when store writes fail" + ); + + let body = response_body_string(resp); + let response: RotateKeyResponse = + serde_json::from_str(&body).expect("should deserialize rotate response"); + + assert!( + !response.success, + "should report failure when store writes fail" + ); + assert!( + response.error.is_some(), + "should include error detail in failure response" + ); } #[test] @@ -628,6 +740,47 @@ mod tests { assert!(result.is_err(), "Invalid JSON should return error"); } + #[test] + fn test_handle_rotate_key_rejects_invalid_kid() { + let settings = crate::test_support::tests::create_test_settings(); + + let req_body = RotateKeyRequest { + kid: Some("bad,kid".to_string()), + }; + + let body_json = serde_json::to_string(&req_body).expect("should serialize rotate request"); + let req = build_request( + Method::POST, + "https://test.com/admin/keys/rotate", + Some(&body_json), + ); + + let resp = handle_rotate_key(&settings, &noop_services(), req) + .expect("should return a response for invalid kid"); + + assert_eq!( + resp.status(), + StatusCode::BAD_REQUEST, + "should reject malformed kid as a bad request" + ); + + let body = response_body_string(resp); + let response: RotateKeyResponse = + serde_json::from_str(&body).expect("should deserialize rotate response"); + + assert!( + !response.success, + "should report failure when supplied kid is invalid" + ); + assert!( + response + .error + .as_deref() + .is_some_and(|error| error.contains("kid must contain only")), + "should explain the kid character restrictions" + ); + } + #[test] fn test_handle_deactivate_key_request() { let settings = crate::test_support::tests::create_test_settings(); @@ -645,20 +798,27 @@ mod tests { Some(&body_json), ); - let result = handle_deactivate_key(&settings, &noop_services(), req); - match result { - Ok(resp) => { - let body = response_body_string(resp); - let response: DeactivateKeyResponse = - serde_json::from_str(&body).expect("should deserialize deactivate response"); - log::debug!( - "Deactivate response: success={}, message={}", - response.success, - response.message - ); - } - Err(e) => log::debug!("Expected error in test environment: {}", e), - } + let resp = handle_deactivate_key(&settings, &noop_services(), req) + .expect("should return a response even when stores are unavailable"); + + assert_eq!( + resp.status(), + StatusCode::INTERNAL_SERVER_ERROR, + "should return 500 when active-kids cannot be read" + ); + + let body = response_body_string(resp); + let response: DeactivateKeyResponse = + serde_json::from_str(&body).expect("should deserialize deactivate response"); + + assert!( + !response.success, + "should report failure when store reads fail" + ); + assert!( + response.error.is_some(), + "should include error detail in failure response" + ); } #[test] @@ -678,20 +838,31 @@ mod tests { Some(&body_json), ); - let result = handle_deactivate_key(&settings, &noop_services(), req); - match result { - Ok(resp) => { - let body = response_body_string(resp); - let response: DeactivateKeyResponse = - serde_json::from_str(&body).expect("should deserialize deactivate response"); - log::debug!( - "Delete response: success={}, deleted={}", - response.success, - response.deleted - ); - } - Err(e) => log::debug!("Expected error in test environment: {}", e), - } + let resp = handle_deactivate_key(&settings, &noop_services(), req) + .expect("should return a response even when stores are unavailable"); + + assert_eq!( + resp.status(), + StatusCode::INTERNAL_SERVER_ERROR, + "should return 500 when active-kids cannot be read" + ); + + let body = response_body_string(resp); + let response: DeactivateKeyResponse = + serde_json::from_str(&body).expect("should deserialize deactivate response"); + + assert!( + !response.success, + "should report failure when store reads fail" + ); + assert!( + !response.deleted, + "should not report deletion when the operation failed" + ); + assert!( + response.error.is_some(), + "should include error detail in failure response" + ); } #[test] @@ -707,6 +878,75 @@ mod tests { assert!(result.is_err(), "Invalid JSON should return error"); } + #[test] + fn test_handle_deactivate_key_rejects_invalid_kid() { + let settings = crate::test_support::tests::create_test_settings(); + + let req_body = DeactivateKeyRequest { + kid: "bad kid".to_string(), + delete: false, + }; + + let body_json = + serde_json::to_string(&req_body).expect("should serialize deactivate request"); + let req = build_request( + Method::POST, + "https://test.com/admin/keys/deactivate", + Some(&body_json), + ); + + let resp = handle_deactivate_key(&settings, &noop_services(), req) + .expect("should return a response for invalid kid"); + + assert_eq!( + resp.status(), + StatusCode::BAD_REQUEST, + "should reject malformed kid as a bad request" + ); + + let body = response_body_string(resp); + let response: DeactivateKeyResponse = + serde_json::from_str(&body).expect("should deserialize deactivate response"); + + assert!( + !response.success, + "should report failure when supplied kid is invalid" + ); + assert!( + response + .error + .as_deref() + .is_some_and(|error| error.contains("kid must contain only")), + "should explain the kid character restrictions" + ); + } + + #[test] + fn validate_kid_accepts_allowed_operator_supplied_ids() { + validate_kid("azAZ09-_.:").expect("should accept allowed kid characters"); + } + + #[test] + fn validate_kid_rejects_empty_ids() { + let result = validate_kid(""); + + assert!(result.is_err(), "should reject empty kid values"); + } + + #[test] + fn validate_kid_rejects_overlong_ids() { + let result = validate_kid(&"a".repeat(129)); + + assert!(result.is_err(), "should reject kids longer than 128 chars"); + } + + #[test] + fn validate_kid_rejects_csv_separator() { + let result = validate_kid("kid-a,kid-b"); + + assert!(result.is_err(), "should reject commas in kid values"); + } + #[test] fn test_rotate_key_request_deserialization() { let json = r#"{"kid":"custom-key"}"#; @@ -733,28 +973,14 @@ mod tests { None, ); - let services = noop_services(); - let result = handle_trusted_server_discovery(&settings, &services, req); - match result { - Ok(resp) => { - assert_eq!(resp.status(), StatusCode::OK); - assert_json_content_type(&resp); - let body = response_body_string(resp); - - // Parse the discovery document - let discovery: serde_json::Value = - serde_json::from_str(&body).expect("should parse discovery document"); - - // Verify structure - only version and jwks - assert_eq!(discovery["version"], "1.0"); - assert!(discovery["jwks"].is_object()); - - // Verify no extra fields - assert!(discovery.get("endpoints").is_none()); - assert!(discovery.get("capabilities").is_none()); - } - Err(e) => log::debug!("Expected error in test environment: {}", e), - } + // noop_services() config store always returns Err, so the discovery + // handler propagates the error rather than absorbing it into a 500. + let result = handle_trusted_server_discovery(&settings, &noop_services(), req); + + assert!( + result.is_err(), + "should propagate store errors when JWKS cannot be retrieved" + ); } #[test] diff --git a/crates/trusted-server-core/src/request_signing/jwks.rs b/crates/trusted-server-core/src/request_signing/jwks.rs index 5c4dda94e..8d2066390 100644 --- a/crates/trusted-server-core/src/request_signing/jwks.rs +++ b/crates/trusted-server-core/src/request_signing/jwks.rs @@ -3,8 +3,6 @@ //! This module provides functionality for generating, storing, and retrieving //! Ed25519 keypairs in JWK format for request signing. -use std::sync::LazyLock; - use ed25519_dalek::{SigningKey, VerifyingKey}; use error_stack::{Report, ResultExt}; use jose_jwk::{ @@ -14,11 +12,8 @@ use jose_jwk::{ use rand::rngs::OsRng; use crate::error::TrustedServerError; -use crate::platform::{RuntimeServices, StoreName}; -use crate::request_signing::JWKS_CONFIG_STORE_NAME; - -static JWKS_STORE_NAME: LazyLock = - LazyLock::new(|| StoreName::from(JWKS_CONFIG_STORE_NAME)); +use crate::platform::RuntimeServices; +use crate::request_signing::{read_active_kids, JWKS_STORE_NAME}; /// An Ed25519 keypair used for request signing. pub struct Keypair { @@ -75,25 +70,12 @@ impl Keypair { /// cannot be read. The underlying [`crate::platform::PlatformError`] is /// preserved as context in the error chain. pub fn get_active_jwks(services: &RuntimeServices) -> Result> { - let active_kids_str = services - .config_store() - .get(&JWKS_STORE_NAME, "active-kids") - .change_context(TrustedServerError::Configuration { - message: "failed to read active-kids from config store".into(), - }) - .attach("while fetching active kids list")?; - - let active_kids: Vec<&str> = active_kids_str - .split(',') - .map(str::trim) - .filter(|s| !s.is_empty()) - .collect(); - + let active_kids = read_active_kids(services)?; let mut jwks = Vec::new(); for kid in active_kids { let jwk = services .config_store() - .get(&JWKS_STORE_NAME, kid) + .get(&JWKS_STORE_NAME, &kid) .change_context(TrustedServerError::Configuration { message: format!("failed to get JWK for kid: {}", kid), })?; diff --git a/crates/trusted-server-core/src/request_signing/mod.rs b/crates/trusted-server-core/src/request_signing/mod.rs index 415079404..d5d0e79ce 100644 --- a/crates/trusted-server-core/src/request_signing/mod.rs +++ b/crates/trusted-server-core/src/request_signing/mod.rs @@ -5,15 +5,27 @@ //! //! # Store names vs store IDs //! -//! Fastly stores have two identifiers: +//! Platform stores have two identifiers: //! //! - **Store name** ([`JWKS_CONFIG_STORE_NAME`], [`SIGNING_SECRET_STORE_NAME`]): -//! used at the edge for reads via `ConfigStore::open` / `SecretStore::open`. -//! These are configured in `fastly.toml`. +//! used for runtime reads via [`crate::platform::PlatformConfigStore::get`] +//! and [`crate::platform::PlatformSecretStore::get_bytes`] through +//! [`crate::platform::RuntimeServices`]. These names are configured in +//! `fastly.toml` for the Fastly adapter. //! -//! - **Store ID** (`RequestSigning::config_store_id`, `RequestSigning::secret_store_id`): -//! used by the Fastly management API for writes (creating, updating, and -//! deleting items). These are set in `trusted-server.toml`. +//! - **Store ID**: used for write operations via +//! [`crate::platform::PlatformConfigStore::put`] / +//! [`crate::platform::PlatformConfigStore::delete`] and +//! [`crate::platform::PlatformSecretStore::create`] / +//! [`crate::platform::PlatformSecretStore::delete`]. These identifiers come +//! from the request-signing settings in `trusted-server.toml`. + +use std::sync::LazyLock; + +use error_stack::{Report, ResultExt}; + +use crate::error::TrustedServerError; +use crate::platform::{RuntimeServices, StoreName}; pub mod discovery; pub mod endpoints; @@ -21,20 +33,97 @@ pub mod jwks; pub mod rotation; pub mod signing; -/// Config store name for JWKS public keys (edge reads via `ConfigStore::open`). +/// Config store name for JWKS public keys used by runtime read operations. /// /// This must match the store name declared in `fastly.toml` under /// `[local_server.config_stores]`. pub const JWKS_CONFIG_STORE_NAME: &str = "jwks_store"; -/// Secret store name for Ed25519 signing keys (edge reads via `SecretStore::open`). +/// Secret store name for Ed25519 signing keys used by runtime read operations. /// /// This must match the store name declared in `fastly.toml` under /// `[local_server.secret_stores]`. pub const SIGNING_SECRET_STORE_NAME: &str = "signing_keys"; +/// Lazily constructed [`StoreName`] for JWKS config-store reads. +pub(crate) static JWKS_STORE_NAME: LazyLock = + LazyLock::new(|| StoreName::from(JWKS_CONFIG_STORE_NAME)); + +/// Lazily constructed [`StoreName`] for signing-key secret-store reads. +pub(crate) static SIGNING_STORE_NAME: LazyLock = + LazyLock::new(|| StoreName::from(SIGNING_SECRET_STORE_NAME)); + +fn parse_active_kids(active_kids: &str) -> Vec { + active_kids + .split(',') + .map(|kid| kid.trim().to_string()) + .filter(|kid| !kid.is_empty()) + .collect() +} + +fn read_active_kids(services: &RuntimeServices) -> Result, Report> { + services + .config_store() + .get(&JWKS_STORE_NAME, "active-kids") + .change_context(TrustedServerError::Configuration { + message: "failed to read active-kids from config store".into(), + }) + .attach("while fetching active kids list") + .map(|active_kids| parse_active_kids(&active_kids)) +} + pub use discovery::*; pub use endpoints::*; pub use jwks::*; pub use rotation::*; pub use signing::*; + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn parse_active_kids_splits_comma_separated_kids() { + let result = parse_active_kids("kid-a,kid-b,kid-c"); + assert_eq!(result, vec!["kid-a", "kid-b", "kid-c"]); + } + + #[test] + fn parse_active_kids_trims_whitespace_around_each_kid() { + let result = parse_active_kids(" kid-a , kid-b "); + assert_eq!(result, vec!["kid-a", "kid-b"]); + } + + #[test] + fn parse_active_kids_skips_empty_segments() { + let result = parse_active_kids("kid-a,,kid-b"); + assert_eq!(result, vec!["kid-a", "kid-b"]); + } + + #[test] + fn parse_active_kids_skips_whitespace_only_segments() { + let result = parse_active_kids(" kid-a , , kid-b "); + assert_eq!(result, vec!["kid-a", "kid-b"]); + } + + #[test] + fn parse_active_kids_returns_empty_for_empty_string() { + let result = parse_active_kids(""); + assert!(result.is_empty(), "should return no kids for empty input"); + } + + #[test] + fn parse_active_kids_returns_empty_for_only_commas() { + let result = parse_active_kids(",,,"); + assert!( + result.is_empty(), + "should return no kids when input is only commas" + ); + } + + #[test] + fn parse_active_kids_handles_single_kid() { + let result = parse_active_kids("only-kid"); + assert_eq!(result, vec!["only-kid"]); + } +} diff --git a/crates/trusted-server-core/src/request_signing/rotation.rs b/crates/trusted-server-core/src/request_signing/rotation.rs index 3059dfd71..f78ac882f 100644 --- a/crates/trusted-server-core/src/request_signing/rotation.rs +++ b/crates/trusted-server-core/src/request_signing/rotation.rs @@ -4,33 +4,34 @@ //! lifecycle, and storing keys via platform store primitives through //! [`RuntimeServices`]. -use std::sync::LazyLock; - use base64::{engine::general_purpose, Engine}; +use chrono::Utc; use ed25519_dalek::SigningKey; use error_stack::{Report, ResultExt}; use jose_jwk::Jwk; +use uuid::Uuid; +use super::{read_active_kids, Keypair}; use crate::error::TrustedServerError; -use crate::platform::{RuntimeServices, StoreId, StoreName}; -use crate::request_signing::JWKS_CONFIG_STORE_NAME; - -use super::Keypair; - -static JWKS_STORE_NAME: LazyLock = - LazyLock::new(|| StoreName::from(JWKS_CONFIG_STORE_NAME)); +use crate::platform::{RuntimeServices, StoreId}; +use crate::request_signing::JWKS_STORE_NAME; +/// Result of a key rotation operation. #[derive(Debug, Clone)] pub struct KeyRotationResult { + /// Newly generated or supplied key identifier. pub new_kid: String, + /// Previously active key identifier, if one existed. pub previous_kid: Option, + /// Active key identifiers after rotation completes. pub active_kids: Vec, + /// Public JWK associated with the newly active key. pub jwk: Jwk, } /// Manages signing key lifecycle using platform store primitives. /// -/// Reads use the edge-visible store name ([`JWKS_CONFIG_STORE_NAME`]). +/// Reads use the edge-visible store name ([`super::JWKS_CONFIG_STORE_NAME`]). /// Writes use the management API store identifiers supplied at construction. pub struct KeyRotationManager { /// Management API store ID for config store writes. @@ -44,13 +45,13 @@ impl KeyRotationManager { /// /// The `config_store_id` and `secret_store_id` are platform management API /// identifiers used for write operations. Edge reads use the store names - /// defined in [`JWKS_CONFIG_STORE_NAME`] and + /// defined in [`super::JWKS_CONFIG_STORE_NAME`] and /// [`crate::request_signing::SIGNING_SECRET_STORE_NAME`]. #[must_use] - pub fn new(config_store_id: impl Into, secret_store_id: impl Into) -> Self { + pub fn new(config_store_id: &str, secret_store_id: &str) -> Self { Self { - config_store_id: StoreId::from(config_store_id.into()), - secret_store_id: StoreId::from(secret_store_id.into()), + config_store_id: StoreId::from(config_store_id), + secret_store_id: StoreId::from(secret_store_id), } } @@ -64,25 +65,80 @@ impl KeyRotationManager { services: &RuntimeServices, kid: Option, ) -> Result> { - let new_kid = kid.unwrap_or_else(generate_date_based_kid); - - let keypair = Keypair::generate(); - let jwk = keypair.get_jwk(new_kid.clone()); let previous_kid = services .config_store() .get(&JWKS_STORE_NAME, "current-kid") .ok(); + let active_kids = read_active_kids(services).unwrap_or_default(); + let new_kid = match kid { + Some(kid) => { + if self.key_exists(services, &kid, &active_kids) { + return Err(Report::new(TrustedServerError::Configuration { + message: format!("kid '{}' already exists; choose a unique kid", kid), + })); + } + kid + } + None => self.generate_unique_date_based_kid(services, &active_kids), + }; + let keypair = Keypair::generate(); + let jwk = keypair.get_jwk(new_kid.clone()); + + // Step 1: write private key. Nothing to roll back on failure. self.store_private_key(services, &new_kid, &keypair.signing_key)?; - self.store_public_jwk(services, &new_kid, &jwk)?; - let active_kids = match &previous_kid { - Some(prev) if prev != &new_kid => vec![prev.clone(), new_kid.clone()], - _ => vec![new_kid.clone()], - }; + // Step 2: write public JWK. Roll back the private key on failure so no + // orphaned key material is left in the secret store. + if let Err(err) = self.store_public_jwk(services, &new_kid, &jwk) { + if let Err(rollback_err) = services + .secret_store() + .delete(&self.secret_store_id, &new_kid) + { + log::warn!( + "rotate_key: rollback of private key '{}' failed after JWK write error: {}", + new_kid, + rollback_err + ); + } + return Err(err); + } + + let mut active_kids = active_kids; + if !active_kids.iter().any(|kid| kid == &new_kid) { + active_kids.push(new_kid.clone()); + } + + // Step 3: publish the new kid in active-kids BEFORE flipping current-kid. + // Roll back both artifacts on failure so the new kid never appears in JWKS + // without a reachable private key. + if let Err(err) = self.update_active_kids(services, &active_kids) { + if let Err(rollback_err) = services + .config_store() + .delete(&self.config_store_id, &new_kid) + { + log::warn!( + "rotate_key: rollback of JWK '{}' failed after active-kids write error: {}", + new_kid, + rollback_err + ); + } + if let Err(rollback_err) = services + .secret_store() + .delete(&self.secret_store_id, &new_kid) + { + log::warn!( + "rotate_key: rollback of private key '{}' failed after active-kids write error: {}", + new_kid, + rollback_err + ); + } + return Err(err); + } + // Step 4: flip current-kid last. A failure here leaves the old kid still + // active and the new kid visible in JWKS but unused — a recoverable state. self.update_current_kid(services, &new_kid)?; - self.update_active_kids(services, &active_kids)?; Ok(KeyRotationResult { new_kid, @@ -92,12 +148,33 @@ impl KeyRotationManager { }) } + fn key_exists(&self, services: &RuntimeServices, kid: &str, active_kids: &[String]) -> bool { + active_kids.iter().any(|active_kid| active_kid == kid) + || services.config_store().get(&JWKS_STORE_NAME, kid).is_ok() + } + + fn generate_unique_date_based_kid( + &self, + services: &RuntimeServices, + active_kids: &[String], + ) -> String { + let base_kid = generate_date_based_kid(); + if !self.key_exists(services, &base_kid, active_kids) { + return base_kid; + } + + format!("{base_kid}-{}", Uuid::new_v4().simple()) + } + fn store_private_key( &self, services: &RuntimeServices, kid: &str, signing_key: &SigningKey, ) -> Result<(), Report> { + // The platform secret-store write interface is string-based, so signing + // keys are persisted as base64 text. The Fastly adapter applies its own + // transport-level base64 encoding when calling the management API. let key_b64 = general_purpose::STANDARD.encode(signing_key.as_bytes()); services @@ -165,18 +242,7 @@ impl KeyRotationManager { &self, services: &RuntimeServices, ) -> Result, Report> { - let active_kids_str = services - .config_store() - .get(&JWKS_STORE_NAME, "active-kids") - .change_context(TrustedServerError::Configuration { - message: "failed to read active-kids from config store".into(), - })?; - - Ok(active_kids_str - .split(',') - .map(|s| s.trim().to_string()) - .filter(|s| !s.is_empty()) - .collect()) + read_active_kids(services) } /// Deactivates a key by removing it from the active keys list. @@ -189,6 +255,8 @@ impl KeyRotationManager { services: &RuntimeServices, kid: &str, ) -> Result<(), Report> { + self.ensure_not_current_key(services, kid, "deactivate")?; + let mut active_kids = self.list_active_keys(services)?; active_kids.retain(|k| k != kid); @@ -211,37 +279,63 @@ impl KeyRotationManager { services: &RuntimeServices, kid: &str, ) -> Result<(), Report> { + self.ensure_not_current_key(services, kid, "delete")?; self.deactivate_key(services, kid)?; + // Delete the private key first. A failure here leaves the JWK in the + // config store but no private key — the key is verifiable but cannot + // sign, which is safer than orphaned key material with no JWK. Both + // deletes treat 404 as success so retries converge after partial failures. services - .config_store() - .delete(&self.config_store_id, kid) + .secret_store() + .delete(&self.secret_store_id, kid) .change_context(TrustedServerError::Configuration { - message: "failed to delete JWK from config store".into(), + message: "failed to delete signing key from secret store".into(), })?; services - .secret_store() - .delete(&self.secret_store_id, kid) + .config_store() + .delete(&self.config_store_id, kid) .change_context(TrustedServerError::Configuration { - message: "failed to delete signing key from secret store".into(), + message: "failed to delete JWK from config store".into(), })?; Ok(()) } + + fn ensure_not_current_key( + &self, + services: &RuntimeServices, + kid: &str, + operation: &str, + ) -> Result<(), Report> { + if services + .config_store() + .get(&JWKS_STORE_NAME, "current-kid") + .is_ok_and(|current| current == kid) + { + return Err(Report::new(TrustedServerError::Configuration { + message: format!( + "cannot {operation} '{kid}' because it is the current signing key; rotate first" + ), + })); + } + + Ok(()) + } } /// Generates a date-based key ID in the format `ts-YYYY-MM-DD`. #[must_use] pub fn generate_date_based_kid() -> String { - use chrono::Utc; format!("ts-{}", Utc::now().format("%Y-%m-%d")) } #[cfg(test)] mod tests { use std::collections::HashMap; - use std::sync::Mutex; + use std::sync::atomic::{AtomicUsize, Ordering}; + use std::sync::{Arc, Mutex}; use error_stack::Report; @@ -257,25 +351,61 @@ mod tests { // Spy stores: record put/create/delete calls, serve preset get values // --------------------------------------------------------------------------- + #[derive(Clone)] struct SpyConfigStore { + inner: Arc, + } + + struct SpyConfigStoreInner { data: Mutex>, puts: Mutex>, deletes: Mutex>, + /// Fail `put` after this many successful calls. `usize::MAX` means never fail. + fail_after_n_puts: AtomicUsize, } impl SpyConfigStore { fn new(initial: HashMap) -> Self { Self { - data: Mutex::new(initial), - puts: Mutex::new(vec![]), - deletes: Mutex::new(vec![]), + inner: Arc::new(SpyConfigStoreInner { + data: Mutex::new(initial), + puts: Mutex::new(vec![]), + deletes: Mutex::new(vec![]), + fail_after_n_puts: AtomicUsize::new(usize::MAX), + }), + } + } + + /// Returns a store whose `put` succeeds for the first `n` calls, then + /// returns an error. Use `n = 0` to fail immediately. + fn with_put_failure_after(n: usize) -> Self { + Self { + inner: Arc::new(SpyConfigStoreInner { + data: Mutex::new(HashMap::new()), + puts: Mutex::new(vec![]), + deletes: Mutex::new(vec![]), + fail_after_n_puts: AtomicUsize::new(n), + }), } } + + fn puts(&self) -> Vec<(String, String, String)> { + self.inner.puts.lock().expect("should lock puts").clone() + } + + fn deletes(&self) -> Vec<(String, String)> { + self.inner + .deletes + .lock() + .expect("should lock deletes") + .clone() + } } impl PlatformConfigStore for SpyConfigStore { fn get(&self, _: &StoreName, key: &str) -> Result> { - self.data + self.inner + .data .lock() .expect("should lock data") .get(key) @@ -289,12 +419,20 @@ mod tests { key: &str, value: &str, ) -> Result<(), Report> { - self.puts.lock().expect("should lock puts").push(( + let remaining = self.inner.fail_after_n_puts.load(Ordering::SeqCst); + if remaining == 0 { + return Err(Report::new(PlatformError::ConfigStore)); + } + if remaining != usize::MAX { + self.inner.fail_after_n_puts.fetch_sub(1, Ordering::SeqCst); + } + self.inner.puts.lock().expect("should lock puts").push(( store_id.to_string(), key.to_string(), value.to_string(), )); - self.data + self.inner + .data .lock() .expect("should lock data") .insert(key.to_string(), value.to_string()); @@ -302,27 +440,70 @@ mod tests { } fn delete(&self, store_id: &StoreId, key: &str) -> Result<(), Report> { - self.deletes + self.inner + .deletes .lock() .expect("should lock deletes") .push((store_id.to_string(), key.to_string())); - self.data.lock().expect("should lock data").remove(key); + self.inner + .data + .lock() + .expect("should lock data") + .remove(key); Ok(()) } } + #[derive(Clone)] struct SpySecretStore { + inner: Arc, + } + + struct SpySecretStoreInner { creates: Mutex>, deletes: Mutex>, + /// Fail `create` after this many successful calls. `usize::MAX` means never fail. + fail_after_n_creates: AtomicUsize, } impl SpySecretStore { fn new() -> Self { Self { - creates: Mutex::new(vec![]), - deletes: Mutex::new(vec![]), + inner: Arc::new(SpySecretStoreInner { + creates: Mutex::new(vec![]), + deletes: Mutex::new(vec![]), + fail_after_n_creates: AtomicUsize::new(usize::MAX), + }), + } + } + + /// Returns a store whose `create` succeeds for the first `n` calls, then + /// returns an error. Use `n = 0` to fail immediately. + fn with_create_failure_after(n: usize) -> Self { + Self { + inner: Arc::new(SpySecretStoreInner { + creates: Mutex::new(vec![]), + deletes: Mutex::new(vec![]), + fail_after_n_creates: AtomicUsize::new(n), + }), } } + + fn creates(&self) -> Vec<(String, String, String)> { + self.inner + .creates + .lock() + .expect("should lock creates") + .clone() + } + + fn deletes(&self) -> Vec<(String, String)> { + self.inner + .deletes + .lock() + .expect("should lock deletes") + .clone() + } } impl PlatformSecretStore for SpySecretStore { @@ -336,16 +517,26 @@ mod tests { name: &str, value: &str, ) -> Result<(), Report> { - self.creates.lock().expect("should lock creates").push(( - store_id.to_string(), - name.to_string(), - value.to_string(), - )); + let remaining = self.inner.fail_after_n_creates.load(Ordering::SeqCst); + if remaining == 0 { + return Err(Report::new(PlatformError::SecretStore)); + } + if remaining != usize::MAX { + self.inner + .fail_after_n_creates + .fetch_sub(1, Ordering::SeqCst); + } + self.inner + .creates + .lock() + .expect("should lock creates") + .push((store_id.to_string(), name.to_string(), value.to_string())); Ok(()) } fn delete(&self, store_id: &StoreId, name: &str) -> Result<(), Report> { - self.deletes + self.inner + .deletes .lock() .expect("should lock deletes") .push((store_id.to_string(), name.to_string())); @@ -400,6 +591,129 @@ mod tests { ); } + #[test] + fn rotate_key_preserves_existing_active_kids() { + let mut data = HashMap::new(); + data.insert("current-kid".to_string(), "kid-b".to_string()); + data.insert("active-kids".to_string(), "kid-a, kid-b".to_string()); + + let config_store = SpyConfigStore::new(data); + let secret_store = SpySecretStore::new(); + let services = build_services_with_config_and_secret(config_store, secret_store); + + let manager = KeyRotationManager::new("cfg-id", "sec-id"); + let rotation = manager + .rotate_key(&services, Some("kid-c".to_string())) + .expect("should rotate key successfully"); + + assert_eq!( + rotation.active_kids, + vec![ + "kid-a".to_string(), + "kid-b".to_string(), + "kid-c".to_string() + ], + "should preserve previously active keys and append the new kid" + ); + + let active_kids = manager + .list_active_keys(&services) + .expect("should read back updated active kids"); + assert_eq!( + active_kids, + vec![ + "kid-a".to_string(), + "kid-b".to_string(), + "kid-c".to_string() + ], + "should store the full active kid list after rotation" + ); + } + + #[test] + fn rotate_key_does_not_reactivate_deactivated_previous_kid() { + let mut data = HashMap::new(); + data.insert("current-kid".to_string(), "kid-a".to_string()); + data.insert("active-kids".to_string(), "kid-b".to_string()); + + let config_store = SpyConfigStore::new(data); + let secret_store = SpySecretStore::new(); + let services = build_services_with_config_and_secret(config_store, secret_store); + + let manager = KeyRotationManager::new("cfg-id", "sec-id"); + let rotation = manager + .rotate_key(&services, Some("kid-c".to_string())) + .expect("should rotate key successfully"); + + assert_eq!( + rotation.active_kids, + vec!["kid-b".to_string(), "kid-c".to_string()], + "should not resurrect a previous kid that is no longer active" + ); + } + + #[test] + fn rotate_key_rejects_explicit_kid_that_is_already_active() { + let mut data = HashMap::new(); + data.insert("current-kid".to_string(), "kid-b".to_string()); + data.insert("active-kids".to_string(), "kid-a,kid-b".to_string()); + + let config_store = SpyConfigStore::new(data); + let secret_store = SpySecretStore::new(); + let services = + build_services_with_config_and_secret(config_store.clone(), secret_store.clone()); + + let manager = KeyRotationManager::new("cfg-id", "sec-id"); + let result = manager.rotate_key(&services, Some("kid-a".to_string())); + + assert!( + result.is_err(), + "should reject explicit rotation to an existing kid" + ); + assert!( + secret_store.creates().is_empty(), + "should reject duplicate kids before writing private key material" + ); + assert!( + config_store.puts().is_empty(), + "should reject duplicate kids before writing config store entries" + ); + } + + #[test] + fn rotate_key_uniquifies_generated_kid_when_date_based_kid_is_active() { + let base_kid = generate_date_based_kid(); + let mut data = HashMap::new(); + data.insert("current-kid".to_string(), base_kid.clone()); + data.insert("active-kids".to_string(), base_kid.clone()); + + let config_store = SpyConfigStore::new(data); + let secret_store = SpySecretStore::new(); + let services = build_services_with_config_and_secret(config_store, secret_store); + + let manager = KeyRotationManager::new("cfg-id", "sec-id"); + let rotation = manager + .rotate_key(&services, None) + .expect("should rotate with a uniquified generated kid"); + + assert_ne!( + rotation.new_kid, base_kid, + "should not reuse an active date-based kid" + ); + assert!( + rotation.new_kid.starts_with(&format!("{base_kid}-")), + "should preserve the date-based kid prefix for generated collisions" + ); + assert!( + rotation.active_kids.contains(&base_kid), + "should keep the existing kid active" + ); + assert!( + rotation.active_kids.contains(&rotation.new_kid), + "should add the uniquified generated kid" + ); + } + #[test] fn deactivate_key_fails_when_only_one_key_remains() { let mut data = HashMap::new(); @@ -432,4 +746,142 @@ mod tests { assert_eq!(result.active_kids.len(), 2); assert_eq!(result.jwk.prm.kid, Some("test-key".to_string())); } + + #[test] + fn rotate_key_fails_when_private_key_store_write_fails() { + let config_store = SpyConfigStore::new(HashMap::new()); + let secret_store = SpySecretStore::with_create_failure_after(0); + let services = build_services_with_config_and_secret(config_store, secret_store); + + let manager = KeyRotationManager::new("cfg-id", "sec-id"); + let result = manager.rotate_key(&services, Some("new-kid".to_string())); + + assert!( + result.is_err(), + "should fail when the secret store rejects the private key write" + ); + } + + #[test] + fn rotate_key_rolls_back_secret_when_jwk_write_fails() { + let config_store = SpyConfigStore::with_put_failure_after(0); + let secret_store = SpySecretStore::new(); + let services = + build_services_with_config_and_secret(config_store.clone(), secret_store.clone()); + + let manager = KeyRotationManager::new("cfg-id", "sec-id"); + let result = manager.rotate_key(&services, Some("rollback-kid".to_string())); + + assert!(result.is_err(), "should fail when JWK write fails"); + assert_eq!( + secret_store.deletes(), + vec![("sec-id".to_string(), "rollback-kid".to_string())], + "should roll back private key material after JWK write failure" + ); + assert!( + config_store.deletes().is_empty(), + "should not roll back a JWK that was never stored" + ); + } + + #[test] + fn rotate_key_rolls_back_secret_and_jwk_when_active_kids_write_fails() { + let config_store = SpyConfigStore::with_put_failure_after(1); + let secret_store = SpySecretStore::new(); + let services = + build_services_with_config_and_secret(config_store.clone(), secret_store.clone()); + + let manager = KeyRotationManager::new("cfg-id", "sec-id"); + let result = manager.rotate_key(&services, Some("rollback-kid".to_string())); + + assert!(result.is_err(), "should fail when active-kids write fails"); + assert_eq!( + config_store.deletes(), + vec![("cfg-id".to_string(), "rollback-kid".to_string())], + "should roll back the stored JWK after active-kids write failure" + ); + assert_eq!( + secret_store.deletes(), + vec![("sec-id".to_string(), "rollback-kid".to_string())], + "should roll back private key material after active-kids write failure" + ); + } + + #[test] + fn deactivate_key_rejects_current_kid() { + let mut data = HashMap::new(); + data.insert("current-kid".to_string(), "kid-a".to_string()); + data.insert("active-kids".to_string(), "kid-a,kid-b".to_string()); + + let config_store = SpyConfigStore::new(data); + let secret_store = SpySecretStore::new(); + let services = + build_services_with_config_and_secret(config_store.clone(), secret_store.clone()); + + let manager = KeyRotationManager::new("cfg-id", "sec-id"); + let result = manager.deactivate_key(&services, "kid-a"); + + assert!(result.is_err(), "should reject deactivating current-kid"); + assert!( + config_store.puts().is_empty(), + "should reject current-kid deactivation before updating active-kids" + ); + assert!( + secret_store.deletes().is_empty(), + "should not touch secret store during failed deactivation" + ); + } + + #[test] + fn delete_key_rejects_current_kid_before_deleting_storage() { + let mut data = HashMap::new(); + data.insert("current-kid".to_string(), "kid-a".to_string()); + data.insert("active-kids".to_string(), "kid-a,kid-b".to_string()); + + let config_store = SpyConfigStore::new(data); + let secret_store = SpySecretStore::new(); + let services = + build_services_with_config_and_secret(config_store.clone(), secret_store.clone()); + + let manager = KeyRotationManager::new("cfg-id", "sec-id"); + let result = manager.delete_key(&services, "kid-a"); + + assert!(result.is_err(), "should reject deleting current-kid"); + assert!( + secret_store.deletes().is_empty(), + "should reject current-kid deletion before deleting private key material" + ); + assert!( + config_store.deletes().is_empty(), + "should reject current-kid deletion before deleting JWK storage" + ); + } + + #[test] + fn delete_key_removes_secret_before_jwk() { + let mut data = HashMap::new(); + data.insert("active-kids".to_string(), "kid-a, kid-b".to_string()); + data.insert( + "kid-a".to_string(), + r#"{"kty":"OKP","crv":"Ed25519"}"#.to_string(), + ); + + let config_store = SpyConfigStore::new(data); + let secret_store = SpySecretStore::new(); + let services = build_services_with_config_and_secret(config_store, secret_store); + + let manager = KeyRotationManager::new("cfg-id", "sec-id"); + manager + .delete_key(&services, "kid-a") + .expect("should delete key successfully"); + + // After deletion, the JWK entry should be gone from the config store. + let jwk_gone = services + .config_store() + .get(&crate::request_signing::JWKS_STORE_NAME, "kid-a"); + assert!( + jwk_gone.is_err(), + "should remove JWK from the config store after deletion" + ); + } } diff --git a/crates/trusted-server-core/src/request_signing/signing.rs b/crates/trusted-server-core/src/request_signing/signing.rs index 6d78feab6..176f75e11 100644 --- a/crates/trusted-server-core/src/request_signing/signing.rs +++ b/crates/trusted-server-core/src/request_signing/signing.rs @@ -3,22 +3,14 @@ //! This module provides Ed25519-based signing and verification of HTTP requests //! using keys stored via platform store primitives. -use std::sync::LazyLock; - use base64::{engine::general_purpose, Engine}; use ed25519_dalek::{Signature, Signer as Ed25519Signer, SigningKey, Verifier, VerifyingKey}; use error_stack::{Report, ResultExt}; use serde::Serialize; use crate::error::TrustedServerError; -use crate::platform::{RuntimeServices, StoreName}; -use crate::request_signing::{JWKS_CONFIG_STORE_NAME, SIGNING_SECRET_STORE_NAME}; - -static JWKS_STORE_NAME: LazyLock = - LazyLock::new(|| StoreName::from(JWKS_CONFIG_STORE_NAME)); - -static SIGNING_STORE_NAME: LazyLock = - LazyLock::new(|| StoreName::from(SIGNING_SECRET_STORE_NAME)); +use crate::platform::RuntimeServices; +use crate::request_signing::{JWKS_STORE_NAME, SIGNING_STORE_NAME}; /// Retrieves the current active key ID from the config store. /// @@ -36,28 +28,33 @@ pub fn get_current_key_id( }) } -fn parse_ed25519_signing_key(key_bytes: Vec) -> Result> { - let bytes = if key_bytes.len() > 32 { - general_purpose::STANDARD.decode(&key_bytes).map_err(|_| { - Report::new(TrustedServerError::Configuration { - message: "Failed to decode base64 key".into(), - }) - })? - } else { - key_bytes - }; +/// Parses an Ed25519 signing key from secret-store bytes. +/// +/// Request-signing rotation always stores private keys as standard base64 text +/// via [`crate::request_signing::rotation::KeyRotationManager`]. A non-base64 +/// value in the secret store indicates data corruption and is surfaced as an +/// explicit error rather than silently falling back to a length heuristic. +fn parse_ed25519_signing_key(key_bytes: &[u8]) -> Result> { + let bytes = general_purpose::STANDARD.decode(key_bytes).map_err(|_| { + Report::new(TrustedServerError::Configuration { + message: "signing key is not valid base64 — corrupt key material in secret store" + .into(), + }) + })?; let key_array: [u8; 32] = bytes.try_into().map_err(|_| { Report::new(TrustedServerError::Configuration { - message: "Invalid key length (expected 32 bytes for Ed25519)".into(), + message: "signing key must be 32 bytes after base64 decoding".into(), }) })?; Ok(SigningKey::from_bytes(&key_array)) } +/// Signs request payloads using the current Ed25519 private key. pub struct RequestSigner { key: SigningKey, + /// Key identifier associated with the loaded private key. pub kid: String, } @@ -81,9 +78,13 @@ struct SigningPayload<'a> { /// Parameters for enhanced request signing #[derive(Debug, Clone)] pub struct SigningParams { + /// Request identifier to bind into the signature payload. pub request_id: String, + /// Host header value expected by the receiving service. pub request_host: String, + /// Request scheme bound into the signature payload. pub request_scheme: String, + /// Signature timestamp in Unix milliseconds. pub timestamp: u64, } @@ -104,8 +105,8 @@ impl SigningParams { /// Builds the canonical payload string for signing. /// - /// The payload is a JSON-serialized [`SigningPayload`] to prevent signature - /// confusion attacks that could exploit delimiter-based formats. + /// The payload is JSON-serialized to prevent signature confusion attacks + /// that could exploit delimiter-based formats. /// /// # Errors /// @@ -134,10 +135,8 @@ impl RequestSigner { /// /// Returns an error if the key ID cannot be retrieved or the key cannot be parsed. pub fn from_services(services: &RuntimeServices) -> Result> { - let key_id = services - .config_store() - .get(&JWKS_STORE_NAME, "current-kid") - .change_context(TrustedServerError::Configuration { + let key_id = + get_current_key_id(services).change_context(TrustedServerError::Configuration { message: "failed to get current-kid".into(), })?; @@ -148,7 +147,7 @@ impl RequestSigner { message: format!("failed to get signing key for kid: {}", key_id), })?; - let signing_key = parse_ed25519_signing_key(key_bytes)?; + let signing_key = parse_ed25519_signing_key(&key_bytes)?; Ok(Self { key: signing_key, @@ -256,89 +255,13 @@ pub fn verify_signature( #[cfg(test)] mod tests { - use std::collections::HashMap; - - use error_stack::Report; - - use crate::platform::test_support::build_services_with_config_and_secret; - use crate::platform::{ - PlatformConfigStore, PlatformError, PlatformSecretStore, StoreId, StoreName, - }; + use crate::platform::test_support::build_request_signing_services; use super::*; - // --------------------------------------------------------------------------- - // Stub stores with preset data - // --------------------------------------------------------------------------- - - struct StubConfigStore(HashMap); - - impl PlatformConfigStore for StubConfigStore { - fn get(&self, _: &StoreName, key: &str) -> Result> { - self.0 - .get(key) - .cloned() - .ok_or_else(|| Report::new(PlatformError::ConfigStore)) - } - - fn put(&self, _: &StoreId, _: &str, _: &str) -> Result<(), Report> { - Err(Report::new(PlatformError::Unsupported)) - } - - fn delete(&self, _: &StoreId, _: &str) -> Result<(), Report> { - Err(Report::new(PlatformError::Unsupported)) - } - } - - struct StubSecretStore(HashMap>); - - impl PlatformSecretStore for StubSecretStore { - fn get_bytes(&self, _: &StoreName, key: &str) -> Result, Report> { - self.0 - .get(key) - .cloned() - .ok_or_else(|| Report::new(PlatformError::SecretStore)) - } - - fn create(&self, _: &StoreId, _: &str, _: &str) -> Result<(), Report> { - Err(Report::new(PlatformError::Unsupported)) - } - - fn delete(&self, _: &StoreId, _: &str) -> Result<(), Report> { - Err(Report::new(PlatformError::Unsupported)) - } - } - - fn build_signing_services() -> crate::platform::RuntimeServices { - use base64::{engine::general_purpose, Engine}; - use ed25519_dalek::SigningKey; - use rand::rngs::OsRng; - - let signing_key = SigningKey::generate(&mut OsRng); - let key_b64 = general_purpose::STANDARD.encode(signing_key.as_bytes()); - let verifying_key = signing_key.verifying_key(); - let x_b64 = general_purpose::URL_SAFE_NO_PAD.encode(verifying_key.as_bytes()); - let jwk_json = format!( - r#"{{"kty":"OKP","crv":"Ed25519","x":"{}","kid":"test-kid","alg":"EdDSA"}}"#, - x_b64 - ); - - let mut config_data = HashMap::new(); - config_data.insert("current-kid".to_string(), "test-kid".to_string()); - config_data.insert("test-kid".to_string(), jwk_json); - - let mut secret_data = HashMap::new(); - secret_data.insert("test-kid".to_string(), key_b64.into_bytes()); - - build_services_with_config_and_secret( - StubConfigStore(config_data), - StubSecretStore(secret_data), - ) - } - #[test] fn from_services_loads_kid_from_config_store() { - let services = build_signing_services(); + let services = build_request_signing_services(); let signer = RequestSigner::from_services(&services).expect("should create signer from services"); @@ -347,7 +270,7 @@ mod tests { #[test] fn sign_produces_non_empty_url_safe_base64_signature() { - let services = build_signing_services(); + let services = build_request_signing_services(); let signer = RequestSigner::from_services(&services).expect("should create signer from services"); @@ -364,7 +287,7 @@ mod tests { #[test] fn sign_and_verify_roundtrip_succeeds() { - let services = build_signing_services(); + let services = build_request_signing_services(); let signer = RequestSigner::from_services(&services).expect("should create signer from services"); let payload = b"test payload for verification"; @@ -378,7 +301,7 @@ mod tests { #[test] fn verify_returns_false_for_wrong_payload() { - let services = build_signing_services(); + let services = build_request_signing_services(); let signer = RequestSigner::from_services(&services).expect("should create signer from services"); let signature = signer.sign(b"original").expect("should sign"); @@ -391,7 +314,7 @@ mod tests { #[test] fn verify_errors_for_unknown_kid() { - let services = build_signing_services(); + let services = build_request_signing_services(); let signer = RequestSigner::from_services(&services).expect("should create signer from services"); let signature = signer.sign(b"payload").expect("should sign"); @@ -403,7 +326,7 @@ mod tests { #[test] fn verify_errors_for_malformed_signature() { - let services = build_signing_services(); + let services = build_request_signing_services(); let signer = RequestSigner::from_services(&services).expect("should create signer from services"); @@ -460,7 +383,7 @@ mod tests { #[test] fn sign_request_enhanced_produces_verifiable_signature() { - let services = build_signing_services(); + let services = build_request_signing_services(); let signer = RequestSigner::from_services(&services).expect("should create signer from services"); let params = SigningParams::new( @@ -482,7 +405,7 @@ mod tests { #[test] fn sign_request_different_hosts_produce_different_signatures() { - let services = build_signing_services(); + let services = build_request_signing_services(); let signer = RequestSigner::from_services(&services).expect("should create signer from services"); diff --git a/crates/trusted-server-core/src/rsc_flight.rs b/crates/trusted-server-core/src/rsc_flight.rs index 309e95056..6bd173667 100644 --- a/crates/trusted-server-core/src/rsc_flight.rs +++ b/crates/trusted-server-core/src/rsc_flight.rs @@ -1,3 +1,7 @@ +//! RSC flight data processor. +//! +//! See [`crate::platform`] module doc for platform notes. + use std::io; use crate::host_rewrite::rewrite_bare_host_at_boundaries; diff --git a/crates/trusted-server-core/src/settings.rs b/crates/trusted-server-core/src/settings.rs index ec54d72d7..e62153d7e 100644 --- a/crates/trusted-server-core/src/settings.rs +++ b/crates/trusted-server-core/src/settings.rs @@ -31,6 +31,18 @@ pub struct Publisher { } impl Publisher { + /// Known placeholder values that must not be used in production. + pub const PROXY_SECRET_PLACEHOLDERS: &[&str] = &["change-me-proxy-secret", "proxy-secret"]; + + /// Returns `true` if `proxy_secret` matches a known placeholder value + /// (case-insensitive). + #[must_use] + pub fn is_placeholder_proxy_secret(proxy_secret: &str) -> bool { + Self::PROXY_SECRET_PLACEHOLDERS + .iter() + .any(|p| p.eq_ignore_ascii_case(proxy_secret)) + } + /// Extracts the host (including port if present) from the `origin_url`. /// /// # Examples @@ -191,18 +203,27 @@ impl DerefMut for IntegrationSettings { } } +/// Edge Cookie configuration. #[allow(unused)] #[derive(Debug, Default, Clone, Deserialize, Serialize, Validate)] -pub struct Synthetic { - pub counter_store: String, - pub opid_store: String, - #[validate(custom(function = Synthetic::validate_secret_key))] +pub struct EdgeCookie { + #[validate(custom(function = EdgeCookie::validate_secret_key))] pub secret_key: Redacted, - #[validate(length(min = 1))] - pub template: String, } -impl Synthetic { +impl EdgeCookie { + /// Known placeholder values that must not be used in production. + pub const SECRET_KEY_PLACEHOLDERS: &[&str] = &["secret-key", "secret_key", "trusted-server"]; + + /// Returns `true` if `secret_key` matches a known placeholder value + /// (case-insensitive). + #[must_use] + pub fn is_placeholder_secret_key(secret_key: &str) -> bool { + Self::SECRET_KEY_PLACEHOLDERS + .iter() + .any(|p| p.eq_ignore_ascii_case(secret_key)) + } + /// Validates that the secret key is not empty. /// /// # Errors @@ -384,7 +405,7 @@ pub struct Settings { pub publisher: Publisher, #[serde(default)] #[validate(nested)] - pub synthetic: Synthetic, + pub edge_cookie: EdgeCookie, #[serde(default)] pub integrations: IntegrationSettings, #[serde(default, deserialize_with = "vec_from_seq_or_map")] @@ -798,10 +819,7 @@ mod tests { settings.publisher.origin_url, "https://origin.test-publisher.com" ); - assert_eq!(settings.synthetic.counter_store, "test-counter-store"); - assert_eq!(settings.synthetic.opid_store, "test-opid-store"); - assert_eq!(settings.synthetic.secret_key.expose(), "test-secret-key"); - assert!(settings.synthetic.template.contains("{{client_ip}}")); + assert_eq!(settings.edge_cookie.secret_key.expose(), "test-secret-key"); settings.validate().expect("Failed to validate settings"); } @@ -847,6 +865,62 @@ mod tests { ); } + #[test] + fn is_placeholder_secret_key_rejects_all_known_placeholders() { + for placeholder in EdgeCookie::SECRET_KEY_PLACEHOLDERS { + assert!( + EdgeCookie::is_placeholder_secret_key(placeholder), + "should detect placeholder secret_key '{placeholder}'" + ); + } + } + + #[test] + fn is_placeholder_secret_key_is_case_insensitive() { + assert!( + EdgeCookie::is_placeholder_secret_key("SECRET-KEY"), + "should detect case-insensitive placeholder secret_key" + ); + assert!( + EdgeCookie::is_placeholder_secret_key("Trusted-Server"), + "should detect mixed-case placeholder secret_key" + ); + } + + #[test] + fn is_placeholder_secret_key_accepts_non_placeholder() { + assert!( + !EdgeCookie::is_placeholder_secret_key("test-secret-key"), + "should accept non-placeholder secret_key" + ); + } + + #[test] + fn is_placeholder_proxy_secret_rejects_all_known_placeholders() { + for placeholder in Publisher::PROXY_SECRET_PLACEHOLDERS { + assert!( + Publisher::is_placeholder_proxy_secret(placeholder), + "should detect placeholder proxy_secret '{placeholder}'" + ); + } + } + + #[test] + fn is_placeholder_proxy_secret_is_case_insensitive() { + assert!( + Publisher::is_placeholder_proxy_secret("CHANGE-ME-PROXY-SECRET"), + "should detect case-insensitive placeholder proxy_secret" + ); + } + + #[test] + fn is_placeholder_proxy_secret_accepts_non_placeholder() { + assert!( + !Publisher::is_placeholder_proxy_secret("unit-test-proxy-secret"), + "should accept non-placeholder proxy_secret" + ); + } + #[test] fn test_settings_empty_toml() { let toml_str = ""; @@ -1754,11 +1828,8 @@ mod tests { origin_url = "https://origin.test-publisher.com" proxy_secret = "unit-test-proxy-secret" - [synthetic] - counter_store = "test-counter-store" - opid_store = "test-opid-store" + [edge_cookie] secret_key = "test-secret-key" - template = "{{client_ip}}" [request_signing] config_store_id = "test-config-store-id" diff --git a/crates/trusted-server-core/src/settings_data.rs b/crates/trusted-server-core/src/settings_data.rs index e66b1da0b..f69fc7ba2 100644 --- a/crates/trusted-server-core/src/settings_data.rs +++ b/crates/trusted-server-core/src/settings_data.rs @@ -3,7 +3,7 @@ use error_stack::{Report, ResultExt}; use validator::Validate; use crate::error::TrustedServerError; -use crate::settings::Settings; +use crate::settings::{EdgeCookie, Publisher, Settings}; pub use crate::auction_config_types::AuctionConfig; @@ -40,17 +40,17 @@ pub fn get_settings() -> Result> { ); } - if settings.synthetic.secret_key.expose() == "trusted-server" { + if EdgeCookie::is_placeholder_secret_key(settings.edge_cookie.secret_key.expose()) { log::warn!( - "INSECURE: synthetic.secret_key is set to the default placeholder — \ + "INSECURE: edge_cookie.secret_key is set to a default placeholder — \ HMAC-SHA256 signatures can be forged. \ - Override via TRUSTED_SERVER__SYNTHETIC__SECRET_KEY at build time" + Override via TRUSTED_SERVER__EDGE_COOKIE__SECRET_KEY at build time" ); } - if settings.publisher.proxy_secret.expose() == "change-me-proxy-secret" { + if Publisher::is_placeholder_proxy_secret(settings.publisher.proxy_secret.expose()) { log::warn!( - "INSECURE: publisher.proxy_secret is set to the default placeholder — \ + "INSECURE: publisher.proxy_secret is set to a default placeholder — \ XChaCha20-Poly1305 encrypted URLs can be decrypted by anyone. \ Override via TRUSTED_SERVER__PUBLISHER__PROXY_SECRET at build time" ); @@ -69,13 +69,8 @@ mod tests { // "change-me-proxy-secret"). This is expected — production builds override // them via TRUSTED_SERVER__* env vars at build time. let settings = get_settings().expect("should load settings from embedded TOML"); - // Verify basic structure is loaded assert!(!settings.publisher.domain.is_empty()); assert!(!settings.publisher.cookie_domain.is_empty()); assert!(!settings.publisher.origin_url.is_empty()); - assert!(!settings.synthetic.counter_store.is_empty()); - assert!(!settings.synthetic.opid_store.is_empty()); - assert!(!settings.synthetic.secret_key.expose().is_empty()); - assert!(!settings.synthetic.template.is_empty()); } } diff --git a/crates/trusted-server-core/src/consent/kv.rs b/crates/trusted-server-core/src/storage/kv_store.rs similarity index 60% rename from crates/trusted-server-core/src/consent/kv.rs rename to crates/trusted-server-core/src/storage/kv_store.rs index af22ef897..c118005a4 100644 --- a/crates/trusted-server-core/src/consent/kv.rs +++ b/crates/trusted-server-core/src/storage/kv_store.rs @@ -1,26 +1,29 @@ //! KV Store consent persistence. //! -//! Stores and retrieves consent data from a platform-neutral KV Store, keyed -//! by Synthetic ID. This provides consent continuity for returning users -//! whose browsers may not have consent cookies on every request. +//! Stores and retrieves consent data from a KV Store, keyed by EC ID. This +//! provides consent continuity for returning users whose browsers may not +//! have consent cookies on every request. //! //! # Storage layout //! -//! Each entry is a single JSON body ([`KvConsentEntry`]) containing raw consent -//! strings, context flags, and a compact fingerprint for change detection. +//! Each entry uses a single JSON body ([`KvConsentEntry`]) containing the raw +//! consent strings, context flags, and a fingerprint for write-on-change +//! detection. //! //! # Change detection //! //! Writes only occur when consent signals have actually changed. //! [`consent_fingerprint`] hashes the raw strings into a compact fingerprint -//! stored inside the body. On the next request, the existing fingerprint is -//! compared before writing. +//! stored in the body's `fp` field. On the next request, the existing +//! fingerprint is compared before writing. +use bytes::Bytes; use serde::{Deserialize, Serialize}; use sha2::{Digest, Sha256}; -use super::jurisdiction::Jurisdiction; -use super::types::{ConsentContext, ConsentSource}; +use crate::consent::jurisdiction::Jurisdiction; +use crate::consent::types::{ConsentContext, ConsentSource}; +use crate::platform::PlatformKvStore; // --------------------------------------------------------------------------- // KV body (JSON, stored as value) @@ -28,12 +31,23 @@ use super::types::{ConsentContext, ConsentSource}; /// Consent data stored in the KV Store body. /// -/// Contains the raw consent strings needed to reconstruct a [`ConsentContext`], -/// plus a compact fingerprint used for write-on-change detection. +/// Contains the raw consent strings needed to reconstruct a [`ConsentContext`]. /// Decoded data (TCF, GPP, US Privacy) is not stored — it is re-decoded on /// read to avoid stale decoded state. +/// +/// The `fp` field holds the consent fingerprint for write-on-change detection. +/// Entries written before PR5 lack this field; `#[serde(default)]` treats them +/// as having an empty fingerprint, which always triggers a self-healing +/// re-write. #[derive(Debug, Clone, Serialize, Deserialize)] pub struct KvConsentEntry { + /// Fingerprint of consent signals for write-on-change detection. + /// + /// Written by [`save_consent_to_kv`]. Entries written before PR5 lack + /// this field; `#[serde(default)]` treats them as having an empty + /// fingerprint, which always triggers a self-healing re-write. + #[serde(default)] + pub fp: String, /// Raw TC String from `euconsent-v2` cookie. #[serde(skip_serializing_if = "Option::is_none")] pub raw_tc_string: Option, @@ -59,44 +73,6 @@ pub struct KvConsentEntry { /// When this entry was stored (deciseconds since Unix epoch). pub stored_at_ds: u64, - - /// SHA-256 fingerprint (first 16 hex chars) of all raw consent signals. - /// - /// Used for write-on-change detection. If the fingerprint of the stored - /// entry equals the fingerprint of the current request's consent signals, - /// no write is needed. - #[serde(skip_serializing_if = "Option::is_none")] - pub fp: Option, -} - -// --------------------------------------------------------------------------- -// Platform-neutral KV operations trait -// --------------------------------------------------------------------------- - -/// Synchronous KV operations required for consent persistence. -/// -/// Implemented by the platform adapter (e.g., Fastly KV store). Synchronous -/// to remain compatible with the non-async [`super::build_consent_context`] -/// pipeline. -pub trait ConsentKvOps: Send + Sync { - /// Load a consent entry from the KV store. - /// - /// Returns `None` on a cache miss or deserialization failure. Errors are - /// logged internally and never propagated — KV failures must not break - /// the request pipeline. - fn load_entry(&self, key: &str) -> Option; - - /// Save a consent entry with a time-to-live. - /// - /// Errors are logged internally and never propagated. - fn save_entry_with_ttl(&self, key: &str, entry: &KvConsentEntry, ttl: std::time::Duration); - - /// Delete a consent entry. - /// - /// Called when consent is revoked (SSC cookie expiry). Errors are logged - /// internally and never propagated — KV failures must not break the - /// request pipeline. - fn delete_entry(&self, key: &str); } // --------------------------------------------------------------------------- @@ -106,11 +82,12 @@ pub trait ConsentKvOps: Send + Sync { /// Builds a [`KvConsentEntry`] from a [`ConsentContext`]. /// /// Captures only the raw strings and contextual flags. Decoded data is -/// intentionally omitted — it will be re-decoded on read. The entry includes -/// a fingerprint for write-on-change detection on subsequent requests. +/// intentionally omitted — it will be re-decoded on read. The `fp` field is +/// initialized to an empty string and must be set by the caller before writing. #[must_use] pub fn entry_from_context(ctx: &ConsentContext, now_ds: u64) -> KvConsentEntry { KvConsentEntry { + fp: String::new(), raw_tc_string: ctx.raw_tc_string.clone(), raw_gpp_string: ctx.raw_gpp_string.clone(), gpp_section_ids: ctx.gpp_section_ids.clone(), @@ -120,15 +97,14 @@ pub fn entry_from_context(ctx: &ConsentContext, now_ds: u64) -> KvConsentEntry { gpc: ctx.gpc, jurisdiction: ctx.jurisdiction.to_string(), stored_at_ds: now_ds, - fp: Some(consent_fingerprint(ctx)), } } -/// Converts a [`KvConsentEntry`] into [`super::types::RawConsentSignals`] -/// suitable for re-decoding via [`super::build_context_from_signals`]. +/// Converts a [`KvConsentEntry`] into [`crate::consent::types::RawConsentSignals`] +/// suitable for re-decoding via [`crate::consent::build_context_from_signals`]. #[must_use] -pub fn signals_from_entry(entry: &KvConsentEntry) -> super::types::RawConsentSignals { - super::types::RawConsentSignals { +pub fn signals_from_entry(entry: &KvConsentEntry) -> crate::consent::types::RawConsentSignals { + crate::consent::types::RawConsentSignals { raw_tc_string: entry.raw_tc_string.clone(), raw_gpp_string: entry.raw_gpp_string.clone(), raw_gpp_sid: entry.gpp_section_ids.as_ref().map(|ids| { @@ -150,7 +126,7 @@ pub fn signals_from_entry(entry: &KvConsentEntry) -> super::types::RawConsentSig #[must_use] pub fn context_from_entry(entry: &KvConsentEntry) -> ConsentContext { let signals = signals_from_entry(entry); - let mut ctx = super::build_context_from_signals(&signals); + let mut ctx = crate::consent::build_context_from_signals(&signals); // Restore context fields that aren't derived from raw signals. ctx.gdpr_applies = entry.gdpr_applies; @@ -222,87 +198,168 @@ fn parse_jurisdiction(s: &str) -> Jurisdiction { } // --------------------------------------------------------------------------- -// KV Store operations (platform-neutral) +// KV Store operations // --------------------------------------------------------------------------- -/// Loads consent data from the KV Store for a given key. +/// Checks whether the stored consent fingerprint matches the current one. +/// +/// Returns `true` when the stored body's `fp` field equals `new_fp`, meaning +/// no write is needed. Returns `false` when the key is absent, the body +/// cannot be deserialized, or the fingerprint differs. +/// +/// Entries written before PR5 have an empty `fp` (via `#[serde(default)]`), +/// which never matches a computed fingerprint and triggers a self-healing +/// re-write. +fn fingerprint_unchanged(store: &dyn PlatformKvStore, key: &str, new_fp: &str) -> bool { + let bytes = match futures::executor::block_on(store.get_bytes(key)) { + Ok(Some(bytes)) => bytes, + _ => return false, + }; + + serde_json::from_slice::(&bytes) + .map(|entry| entry.fp == new_fp) + .unwrap_or(false) +} + +/// Loads consent data from the KV store for a given EC ID. /// /// Returns `Some(ConsentContext)` if a valid entry is found, [`None`] if the /// key does not exist or deserialization fails. Errors are logged but never -/// propagated — KV Store failures must not break the request pipeline. +/// propagated — KV failures must not break the request pipeline. /// /// # Arguments /// -/// * `kv` — Platform KV implementation for consent operations. -/// * `key` — The Synthetic ID used as the KV Store key. +/// * `store` — KV store opened by the adapter. +/// * `ec_id` — Edge Cookie ID used as the KV key. #[must_use] -pub fn load_consent(kv: &dyn ConsentKvOps, key: &str) -> Option { - let entry = kv.load_entry(key)?; - log::info!( - "Loaded consent from KV store for '{key}' (stored_at_ds={})", - entry.stored_at_ds - ); - Some(context_from_entry(&entry)) +pub fn load_consent_from_kv(store: &dyn PlatformKvStore, ec_id: &str) -> Option { + let bytes = match futures::executor::block_on(store.get_bytes(ec_id)) { + Ok(Some(bytes)) => bytes, + Ok(None) => { + log::debug!("Consent KV lookup miss for '{ec_id}'"); + return None; + } + Err(e) => { + log::debug!("Consent KV lookup error for '{ec_id}': {e}"); + return None; + } + }; + + match serde_json::from_slice::(&bytes) { + Ok(entry) => { + log::info!( + "Loaded consent from KV store for '{ec_id}' (stored_at_ds={})", + entry.stored_at_ds + ); + Some(context_from_entry(&entry)) + } + Err(e) => { + log::warn!("Failed to deserialize consent KV entry for '{ec_id}': {e}"); + None + } + } } -/// Saves consent data to the KV Store, writing only when signals have changed. +/// Saves consent data to the KV store, writing only when signals have changed. /// -/// Compares the fingerprint of the current consent signals against the stored -/// body. If they match, the write is skipped. Otherwise, the entry is written -/// with the configured TTL. +/// Compares the fingerprint of current consent signals against the fingerprint +/// embedded in the stored entry. If they match, the write is skipped. +/// The fingerprint is embedded in the body so no KV metadata is required. /// /// # Arguments /// -/// * `kv` — Platform KV implementation for consent operations. -/// * `key` — The Synthetic ID used as the KV Store key. -/// * `ctx` — The current request's consent context. +/// * `store` — KV store opened by the adapter. +/// * `ec_id` — Edge Cookie ID used as the KV key. +/// * `ctx` — Current request's consent context. /// * `max_age_days` — TTL for the entry, matching `max_consent_age_days`. -pub fn save_consent(kv: &dyn ConsentKvOps, key: &str, ctx: &ConsentContext, max_age_days: u32) { +pub fn save_consent_to_kv( + store: &dyn PlatformKvStore, + ec_id: &str, + ctx: &ConsentContext, + max_age_days: u32, +) { if ctx.is_empty() { log::debug!("Skipping consent KV write: consent is empty"); return; } - let new_fp = consent_fingerprint(ctx); - // Load existing entry once; check fp to skip write when unchanged. - let existing_fp = kv.load_entry(key).and_then(|e| e.fp); - if existing_fp.as_deref() == Some(new_fp.as_str()) { - log::debug!("Consent unchanged for '{key}' (fp={new_fp}), skipping write"); + + let fp = consent_fingerprint(ctx); + + if fingerprint_unchanged(store, ec_id, &fp) { + log::debug!("Consent unchanged for '{ec_id}' (fp={fp}), skipping write"); return; } - let entry = entry_from_context(ctx, super::now_deciseconds()); + + let mut entry = entry_from_context(ctx, crate::consent::now_deciseconds()); + entry.fp = fp.clone(); + + let body = match serde_json::to_vec(&entry) { + Ok(body) => Bytes::from(body), + Err(e) => { + log::warn!("Failed to serialize consent entry for '{ec_id}': {e}"); + return; + } + }; + let ttl = std::time::Duration::from_secs(u64::from(max_age_days) * 86_400); - kv.save_entry_with_ttl(key, &entry, ttl); - log::info!("Saved consent to KV store for '{key}' (fp={new_fp}, ttl={max_age_days}d)"); + + match futures::executor::block_on(store.put_bytes_with_ttl(ec_id, body, ttl)) { + Ok(()) => { + log::info!("Saved consent to KV store for '{ec_id}' (fp={fp}, ttl={max_age_days}d)"); + } + Err(e) => { + log::warn!("Failed to write consent to KV store for '{ec_id}': {e}"); + } + } +} + +/// Deletes a consent entry from the KV store for a given EC ID. +/// +/// Used when a user revokes consent — the existing EC cookie is being +/// expired, so the persisted consent data must also be removed. +/// +/// Errors are logged but never propagated — KV failures must not +/// break the request pipeline. +pub fn delete_consent_from_kv(store: &dyn PlatformKvStore, ec_id: &str) { + match futures::executor::block_on(store.delete(ec_id)) { + Ok(()) => { + log::info!("Deleted consent KV entry for '{ec_id}' (consent revoked)"); + } + Err(e) => { + log::warn!("Failed to delete consent KV entry for '{ec_id}': {e}"); + } + } } // --------------------------------------------------------------------------- // Tests // --------------------------------------------------------------------------- +#[cfg(test)] +fn make_test_context() -> ConsentContext { + ConsentContext { + raw_tc_string: Some("CPXxGfAPXxGfA".to_owned()), + raw_gpp_string: Some("DBACNYA~CPXxGfA".to_owned()), + gpp_section_ids: Some(vec![2, 6]), + raw_us_privacy: Some("1YNN".to_owned()), + raw_ac_string: None, + gdpr_applies: true, + tcf: None, + gpp: None, + us_privacy: None, + expired: false, + gpc: false, + jurisdiction: Jurisdiction::Gdpr, + source: ConsentSource::Cookie, + } +} + #[cfg(test)] mod tests { use super::*; use crate::consent::jurisdiction::Jurisdiction; use crate::consent::types::{ConsentContext, ConsentSource}; - fn make_test_context() -> ConsentContext { - ConsentContext { - raw_tc_string: Some("CPXxGfAPXxGfA".to_owned()), - raw_gpp_string: Some("DBACNYA~CPXxGfA".to_owned()), - gpp_section_ids: Some(vec![2, 6]), - raw_us_privacy: Some("1YNN".to_owned()), - raw_ac_string: None, - gdpr_applies: true, - tcf: None, - gpp: None, - us_privacy: None, - expired: false, - gpc: false, - jurisdiction: Jurisdiction::Gdpr, - source: ConsentSource::Cookie, - } - } - #[test] fn entry_roundtrip() { let ctx = make_test_context(); @@ -320,6 +377,34 @@ mod tests { assert_eq!(restored.stored_at_ds, 1_000_000); } + #[test] + fn kv_consent_entry_roundtrip_preserves_fp() { + let ctx = make_test_context(); + let fp = consent_fingerprint(&ctx); + let mut entry = entry_from_context(&ctx, 1_000_000); + entry.fp = fp.clone(); + let json = serde_json::to_string(&entry).expect("should serialize"); + let restored: KvConsentEntry = serde_json::from_str(&json).expect("should deserialize"); + + assert_eq!( + restored.fp, fp, + "should preserve fingerprint through roundtrip" + ); + } + + #[test] + fn entry_fits_in_2000_bytes() { + let ctx = make_test_context(); + let mut entry = entry_from_context(&ctx, 1_000_000); + entry.fp = consent_fingerprint(&ctx); + let json = serde_json::to_string(&entry).expect("should serialize"); + assert!( + json.len() <= 2000, + "entry JSON must fit in 2000 bytes, was {} bytes", + json.len() + ); + } + #[test] fn context_roundtrip_via_entry() { let original = make_test_context(); @@ -443,133 +528,43 @@ mod tests { "AC string should survive roundtrip" ); } +} - // --- ConsentKvOps integration tests using a stub --- - - struct StubKvOps { - stored: std::sync::Mutex>, - } - - impl StubKvOps { - fn new() -> Self { - Self { - stored: std::sync::Mutex::new(std::collections::HashMap::new()), - } - } - } - - impl ConsentKvOps for StubKvOps { - fn load_entry(&self, key: &str) -> Option { - self.stored - .lock() - .expect("should lock stub KV store") - .get(key) - .cloned() - } - - fn save_entry_with_ttl( - &self, - key: &str, - entry: &KvConsentEntry, - _ttl: std::time::Duration, - ) { - self.stored - .lock() - .expect("should lock stub KV store") - .insert(key.to_owned(), entry.clone()); - } +#[cfg(test)] +mod new_api_tests { + use super::*; + use edgezero_core::key_value_store::NoopKvStore; - fn delete_entry(&self, key: &str) { - self.stored - .lock() - .expect("should lock stub KV store") - .remove(key); - } + fn noop() -> NoopKvStore { + NoopKvStore } #[test] - fn load_consent_returns_none_on_miss() { - let kv = StubKvOps::new(); - let result = load_consent(&kv, "missing-key"); - assert!(result.is_none(), "should return None on cache miss"); + fn load_returns_none_when_key_absent() { + let result = load_consent_from_kv(&noop(), "some-ec-id"); + assert!(result.is_none(), "should return None when key is absent"); } #[test] - fn save_and_load_consent_roundtrip() { - let kv = StubKvOps::new(); + fn save_does_not_panic_with_noop_store() { let ctx = make_test_context(); - save_consent(&kv, "user-1", &ctx, 30); - let loaded = load_consent(&kv, "user-1").expect("should load saved consent"); - assert_eq!( - loaded.raw_tc_string, ctx.raw_tc_string, - "should restore raw TC string" - ); + save_consent_to_kv(&noop(), "some-ec-id", &ctx, 30); } #[test] - fn save_consent_skips_write_when_fingerprint_unchanged() { - let kv = StubKvOps::new(); - let ctx = make_test_context(); - - // First write. - save_consent(&kv, "user-1", &ctx, 30); - assert_eq!( - kv.stored.lock().expect("should lock").len(), - 1, - "should have one entry" - ); - - // Track the stored timestamp to verify no new write happens. - let stored_ts = kv - .stored - .lock() - .expect("should lock") - .get("user-1") - .map(|e| e.stored_at_ds) - .expect("should find entry after first write"); - - // Second write with same context — fingerprint unchanged. - save_consent(&kv, "user-1", &ctx, 30); - let ts_after = kv - .stored - .lock() - .expect("should lock") - .get("user-1") - .map(|e| e.stored_at_ds) - .expect("should find entry after second write"); - - assert_eq!( - stored_ts, ts_after, - "should not overwrite when fingerprint is unchanged" - ); + fn delete_does_not_panic_with_noop_store() { + delete_consent_from_kv(&noop(), "some-ec-id"); } #[test] - fn save_consent_writes_when_fingerprint_changes() { - let kv = StubKvOps::new(); - let ctx1 = make_test_context(); - save_consent(&kv, "user-1", &ctx1, 30); - - let mut ctx2 = make_test_context(); - ctx2.raw_tc_string = Some("DIFFERENT".to_owned()); - save_consent(&kv, "user-1", &ctx2, 30); - - let loaded = load_consent(&kv, "user-1").expect("should load updated entry"); + fn kv_consent_entry_missing_fp_deserialises_as_empty() { + let json = r#"{"gdpr_applies":true,"gpc":false,"jurisdiction":"GDPR","stored_at_ds":0}"#; + let entry: KvConsentEntry = + serde_json::from_str(json).expect("should deserialize legacy entry"); assert_eq!( - loaded.raw_tc_string, - Some("DIFFERENT".to_owned()), - "should reflect updated TC string" - ); - } - - #[test] - fn save_consent_skips_empty_consent() { - let kv = StubKvOps::new(); - let ctx = ConsentContext::default(); - save_consent(&kv, "user-1", &ctx, 30); - assert!( - kv.stored.lock().expect("should lock").is_empty(), - "should not write empty consent" + entry.fp, + String::new(), + "should default fp to empty string for legacy entries" ); } } diff --git a/crates/trusted-server-core/src/storage/mod.rs b/crates/trusted-server-core/src/storage/mod.rs new file mode 100644 index 000000000..0c6998b6d --- /dev/null +++ b/crates/trusted-server-core/src/storage/mod.rs @@ -0,0 +1 @@ +pub mod kv_store; diff --git a/crates/trusted-server-core/src/streaming_processor.rs b/crates/trusted-server-core/src/streaming_processor.rs index 8a4ec2fe4..256d3fa5a 100644 --- a/crates/trusted-server-core/src/streaming_processor.rs +++ b/crates/trusted-server-core/src/streaming_processor.rs @@ -8,18 +8,27 @@ //! //! # Platform notes //! -//! This module is **platform-agnostic** (verified in PR 8). It has zero +//! This module is **platform-agnostic** (verified 2026-03-31; see +//! `docs/superpowers/plans/2026-03-31-pr8-content-rewriting-verification.md`). It has zero //! `fastly` imports. [`StreamingPipeline::process`] is generic over //! `R: Read + W: Write` — any reader or writer works, including //! any platform body type (which implements `std::io::Read`) or standard //! `std::io::Cursor<&[u8]>`. //! -//! Future adapters (PR 16/17) do not need to implement any compression or +//! Future adapters (Cloudflare Workers, Axum, Spin) do not need to implement any compression or //! streaming interface. See `crate::platform` module doc for the //! authoritative note. -use error_stack::{Report, ResultExt}; +use std::cell::RefCell; use std::io::{self, Read, Write}; +use std::rc::Rc; + +use brotli::enc::writer::CompressorWriter; +use brotli::enc::BrotliEncoderParams; +use brotli::Decompressor; +use error_stack::{Report, ResultExt}; +use flate2::read::{GzDecoder, ZlibDecoder}; +use flate2::write::{GzEncoder, ZlibEncoder}; use crate::error::TrustedServerError; @@ -44,7 +53,7 @@ pub trait StreamProcessor { } /// Compression type for the stream -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum Compression { None, Gzip, @@ -65,7 +74,21 @@ impl Compression { } } -/// Configuration for the streaming pipeline +/// Configuration for the streaming pipeline. +/// +/// # Supported compression combinations +/// +/// | Input | Output | Behavior | +/// |-------|--------|----------| +/// | None | None | Pass-through processing | +/// | Gzip | Gzip | Decompress → process → recompress | +/// | Gzip | None | Decompress → process | +/// | Deflate | Deflate | Decompress → process → recompress | +/// | Deflate | None | Decompress → process | +/// | Brotli | Brotli | Decompress → process → recompress | +/// | Brotli | None | Decompress → process | +/// +/// All other combinations return an error at runtime. pub struct PipelineConfig { /// Input compression type pub input_compression: Compression, @@ -103,6 +126,10 @@ impl StreamingPipeline

{ /// Process a stream from input to output /// + /// Handles all supported compression transformations by wrapping the raw + /// reader/writer in the appropriate decoder/encoder, then delegating to + /// [`Self::process_chunks`]. + /// /// # Errors /// /// Returns an error if the compression transformation is unsupported or if reading/writing fails. @@ -115,44 +142,88 @@ impl StreamingPipeline

{ self.config.input_compression, self.config.output_compression, ) { - (Compression::None, Compression::None) => self.process_uncompressed(input, output), - (Compression::Gzip, Compression::Gzip) => self.process_gzip_to_gzip(input, output), - (Compression::Gzip, Compression::None) => self.process_gzip_to_none(input, output), + (Compression::None, Compression::None) => self.process_chunks(input, output), + (Compression::Gzip, Compression::Gzip) => { + let decoder = GzDecoder::new(input); + let mut encoder = GzEncoder::new(output, flate2::Compression::default()); + self.process_chunks(decoder, &mut encoder)?; + encoder.finish().change_context(TrustedServerError::Proxy { + message: "Failed to finalize gzip encoder".to_string(), + })?; + Ok(()) + } + (Compression::Gzip, Compression::None) => { + self.process_chunks(GzDecoder::new(input), output) + } (Compression::Deflate, Compression::Deflate) => { - self.process_deflate_to_deflate(input, output) + let decoder = ZlibDecoder::new(input); + let mut encoder = ZlibEncoder::new(output, flate2::Compression::default()); + self.process_chunks(decoder, &mut encoder)?; + encoder.finish().change_context(TrustedServerError::Proxy { + message: "Failed to finalize deflate encoder".to_string(), + })?; + Ok(()) } (Compression::Deflate, Compression::None) => { - self.process_deflate_to_none(input, output) + self.process_chunks(ZlibDecoder::new(input), output) } (Compression::Brotli, Compression::Brotli) => { - self.process_brotli_to_brotli(input, output) + let decoder = Decompressor::new(input, 4096); + let params = BrotliEncoderParams { + quality: 4, + lgwin: 22, + ..Default::default() + }; + let mut encoder = CompressorWriter::with_params(output, 4096, ¶ms); + self.process_chunks(decoder, &mut encoder)?; + // CompressorWriter emits the brotli stream trailer via flush(), + // which process_chunks already called. into_inner() avoids a + // redundant flush on drop and makes finalization explicit. + // Note: unlike flate2's finish(), CompressorWriter has no + // fallible finalization method — flush() is the only option. + let _ = encoder.into_inner(); + Ok(()) + } + (Compression::Brotli, Compression::None) => { + self.process_chunks(Decompressor::new(input, 4096), output) } - (Compression::Brotli, Compression::None) => self.process_brotli_to_none(input, output), _ => Err(Report::new(TrustedServerError::Proxy { message: "Unsupported compression transformation".to_string(), })), } } - /// Process uncompressed stream - fn process_uncompressed( + /// Read chunks from `reader`, pass each through the processor, and write output to `writer`. + /// + /// This is the single unified chunk loop used by all compression paths. + /// The method calls `writer.flush()` before returning. For the `None → None` + /// path this is the only finalization needed. For compressed paths, the caller + /// must still call the encoder's type-specific finalization after this returns: + /// - **flate2** (`GzEncoder`, `ZlibEncoder`): call `finish()` — `flush()` does + /// not write the gzip/deflate trailer. + /// - **brotli** (`CompressorWriter`): `flush()` does finalize the stream, so + /// the caller only needs `into_inner()` to reclaim the writer. + /// + /// # Errors + /// + /// Returns an error if reading, processing, or writing any chunk fails. + fn process_chunks( &mut self, - mut input: R, - mut output: W, + mut reader: R, + mut writer: W, ) -> Result<(), Report> { let mut buffer = vec![0u8; self.config.chunk_size]; loop { - match input.read(&mut buffer) { + match reader.read(&mut buffer) { Ok(0) => { - // End of stream - process any remaining data let final_chunk = self.processor.process_chunk(&[], true).change_context( TrustedServerError::Proxy { message: "Failed to process final chunk".to_string(), }, )?; if !final_chunk.is_empty() { - output.write_all(&final_chunk).change_context( + writer.write_all(&final_chunk).change_context( TrustedServerError::Proxy { message: "Failed to write final chunk".to_string(), }, @@ -161,7 +232,6 @@ impl StreamingPipeline

{ break; } Ok(n) => { - // Process this chunk let processed = self .processor .process_chunk(&buffer[..n], false) @@ -169,7 +239,7 @@ impl StreamingPipeline

{ message: "Failed to process chunk".to_string(), })?; if !processed.is_empty() { - output + writer .write_all(&processed) .change_context(TrustedServerError::Proxy { message: "Failed to write processed chunk".to_string(), @@ -178,309 +248,98 @@ impl StreamingPipeline

{ } Err(e) => { return Err(Report::new(TrustedServerError::Proxy { - message: format!("Failed to read from input: {}", e), + message: format!("Failed to read: {e}"), })); } } } - output.flush().change_context(TrustedServerError::Proxy { + writer.flush().change_context(TrustedServerError::Proxy { message: "Failed to flush output".to_string(), })?; Ok(()) } +} - /// Process gzip compressed stream - fn process_gzip_to_gzip( - &mut self, - input: R, - output: W, - ) -> Result<(), Report> { - use flate2::read::GzDecoder; - use flate2::write::GzEncoder; - use flate2::Compression; - - // Decompress input - let mut decoder = GzDecoder::new(input); - let mut decompressed = Vec::new(); - decoder - .read_to_end(&mut decompressed) - .change_context(TrustedServerError::Proxy { - message: "Failed to decompress gzip".to_string(), - })?; - - log::info!("Decompressed size: {} bytes", decompressed.len()); - - // Process the decompressed content - let processed = self - .processor - .process_chunk(&decompressed, true) - .change_context(TrustedServerError::Proxy { - message: "Failed to process content".to_string(), - })?; - - log::info!("Processed size: {} bytes", processed.len()); - - // Recompress the output - let mut encoder = GzEncoder::new(output, Compression::default()); - encoder - .write_all(&processed) - .change_context(TrustedServerError::Proxy { - message: "Failed to write to gzip encoder".to_string(), - })?; - encoder.finish().change_context(TrustedServerError::Proxy { - message: "Failed to finish gzip encoder".to_string(), - })?; - - Ok(()) - } - - /// Decompress input, process content, and write uncompressed output. - fn decompress_and_process( - &mut self, - mut decoder: R, - mut output: W, - codec_name: &str, - ) -> Result<(), Report> { - let mut decompressed = Vec::new(); - decoder - .read_to_end(&mut decompressed) - .change_context(TrustedServerError::Proxy { - message: format!("Failed to decompress {codec_name}"), - })?; - - log::info!( - "{codec_name} decompressed size: {} bytes", - decompressed.len() - ); - - let processed = self - .processor - .process_chunk(&decompressed, true) - .change_context(TrustedServerError::Proxy { - message: "Failed to process content".to_string(), - })?; - - log::info!("{codec_name} processed size: {} bytes", processed.len()); - - output - .write_all(&processed) - .change_context(TrustedServerError::Proxy { - message: "Failed to write output".to_string(), - })?; - - Ok(()) - } - - /// Process gzip compressed input to uncompressed output (decompression only) - fn process_gzip_to_none( - &mut self, - input: R, - output: W, - ) -> Result<(), Report> { - use flate2::read::GzDecoder; - - self.decompress_and_process(GzDecoder::new(input), output, "gzip") - } - - /// Process deflate compressed stream - fn process_deflate_to_deflate( - &mut self, - input: R, - output: W, - ) -> Result<(), Report> { - use flate2::read::ZlibDecoder; - use flate2::write::ZlibEncoder; - use flate2::Compression; - - let decoder = ZlibDecoder::new(input); - let encoder = ZlibEncoder::new(output, Compression::default()); - - self.process_through_compression(decoder, encoder) - } - - /// Process deflate compressed input to uncompressed output (decompression only) - fn process_deflate_to_none( - &mut self, - input: R, - output: W, - ) -> Result<(), Report> { - use flate2::read::ZlibDecoder; - - self.decompress_and_process(ZlibDecoder::new(input), output, "deflate") - } - - /// Process brotli compressed stream - fn process_brotli_to_brotli( - &mut self, - input: R, - output: W, - ) -> Result<(), Report> { - use brotli::enc::writer::CompressorWriter; - use brotli::enc::BrotliEncoderParams; - use brotli::Decompressor; - - let decoder = Decompressor::new(input, 4096); - let params = BrotliEncoderParams { - quality: 4, - lgwin: 22, - ..Default::default() - }; - let encoder = CompressorWriter::with_params(output, 4096, ¶ms); - - self.process_through_compression(decoder, encoder) - } - - /// Process brotli compressed input to uncompressed output (decompression only) - fn process_brotli_to_none( - &mut self, - input: R, - output: W, - ) -> Result<(), Report> { - use brotli::Decompressor; - - self.decompress_and_process(Decompressor::new(input, 4096), output, "brotli") - } - - /// Generic processing through compression layers - fn process_through_compression( - &mut self, - mut decoder: R, - mut encoder: W, - ) -> Result<(), Report> { - let mut buffer = vec![0u8; self.config.chunk_size]; - - loop { - match decoder.read(&mut buffer) { - Ok(0) => { - // End of stream - let final_chunk = self.processor.process_chunk(&[], true).change_context( - TrustedServerError::Proxy { - message: "Failed to process final chunk".to_string(), - }, - )?; - if !final_chunk.is_empty() { - encoder.write_all(&final_chunk).change_context( - TrustedServerError::Proxy { - message: "Failed to write final chunk".to_string(), - }, - )?; - } - break; - } - Ok(n) => { - let processed = self - .processor - .process_chunk(&buffer[..n], false) - .change_context(TrustedServerError::Proxy { - message: "Failed to process chunk".to_string(), - })?; - if !processed.is_empty() { - encoder.write_all(&processed).change_context( - TrustedServerError::Proxy { - message: "Failed to write processed chunk".to_string(), - }, - )?; - } - } - Err(e) => { - return Err(Report::new(TrustedServerError::Proxy { - message: format!("Failed to read from decoder: {}", e), - })); - } - } - } - - // Flush encoder (this also finishes compression) - encoder.flush().change_context(TrustedServerError::Proxy { - message: "Failed to flush encoder".to_string(), - })?; - - // For GzEncoder and similar, we need to finish() to properly close the stream - // The flush above might not be enough - drop(encoder); +/// Shared output buffer used as an [`lol_html::OutputSink`]. +/// +/// The `HtmlRewriter` invokes [`OutputSink::handle_chunk`] synchronously during +/// each [`HtmlRewriter::write`] call, so the buffer is drained after every +/// `process_chunk` invocation to emit output incrementally. +struct RcVecSink(Rc>>); - Ok(()) +impl lol_html::OutputSink for RcVecSink { + fn handle_chunk(&mut self, chunk: &[u8]) { + self.0.borrow_mut().extend_from_slice(chunk); } } -/// Adapter to use `lol_html` `HtmlRewriter` as a `StreamProcessor` -/// Important: Due to `lol_html`'s ownership model, we must accumulate input -/// and process it all at once when the stream ends. This is a limitation -/// of the `lol_html` library's API design. +/// Adapter to use `lol_html` [`HtmlRewriter`](lol_html::HtmlRewriter) as a [`StreamProcessor`]. +/// +/// Output is emitted incrementally on every [`process_chunk`](StreamProcessor::process_chunk) +/// call. Script rewriters that receive text from `lol_html` must be fragment-safe — +/// they accumulate text fragments internally until `is_last_in_text_node` is true. +/// +/// The adapter is single-use: one adapter per request. Calling [`StreamProcessor::reset`] +/// is a no-op because the rewriter consumes its settings on construction. pub struct HtmlRewriterAdapter { - settings: lol_html::Settings<'static, 'static>, - accumulated_input: Vec, + rewriter: Option>, + output: Rc>>, } impl HtmlRewriterAdapter { - /// Create a new HTML rewriter adapter + /// Create a new HTML rewriter adapter that streams output per chunk. #[must_use] pub fn new(settings: lol_html::Settings<'static, 'static>) -> Self { + let output = Rc::new(RefCell::new(Vec::new())); + let sink = RcVecSink(Rc::clone(&output)); + let rewriter = lol_html::HtmlRewriter::new(settings, sink); Self { - settings, - accumulated_input: Vec::new(), + rewriter: Some(rewriter), + output, } } } impl StreamProcessor for HtmlRewriterAdapter { fn process_chunk(&mut self, chunk: &[u8], is_last: bool) -> Result, io::Error> { - // Accumulate input chunks - self.accumulated_input.extend_from_slice(chunk); - - if !chunk.is_empty() { - log::debug!( - "Buffering chunk: {} bytes, total buffered: {} bytes", - chunk.len(), - self.accumulated_input.len() - ); + match &mut self.rewriter { + Some(rewriter) => { + if !chunk.is_empty() { + rewriter.write(chunk).map_err(|e| { + log::error!("Failed to process HTML chunk: {e}"); + io::Error::other(format!("HTML processing failed: {e}")) + })?; + } + } + None if !chunk.is_empty() => { + log::warn!( + "HtmlRewriterAdapter: {} bytes received after finalization, data will be lost", + chunk.len() + ); + } + None => {} } - // Only process when we have all the input if is_last { - log::info!( - "Processing complete document: {} bytes", - self.accumulated_input.len() - ); - - // Process all accumulated input at once - let mut output = Vec::new(); - - // Create rewriter with output sink - let mut rewriter = lol_html::HtmlRewriter::new( - std::mem::take(&mut self.settings), - |chunk: &[u8]| { - output.extend_from_slice(chunk); - }, - ); - - // Process the entire document - rewriter.write(&self.accumulated_input).map_err(|e| { - log::error!("Failed to process HTML: {}", e); - io::Error::other(format!("HTML processing failed: {}", e)) - })?; - - // Finalize the rewriter - rewriter.end().map_err(|e| { - log::error!("Failed to finalize: {}", e); - io::Error::other(format!("HTML finalization failed: {}", e)) - })?; - - log::debug!("Output size: {} bytes", output.len()); - self.accumulated_input.clear(); - Ok(output) - } else { - // Return empty until we have all input - // This is a limitation of lol_html's API - Ok(Vec::new()) + if let Some(rewriter) = self.rewriter.take() { + rewriter.end().map_err(|e| { + log::error!("Failed to finalize HTML: {e}"); + io::Error::other(format!("HTML finalization failed: {e}")) + })?; + } } - } - fn reset(&mut self) { - self.accumulated_input.clear(); + // Drain whatever lol_html produced since the last call + Ok(std::mem::take(&mut *self.output.borrow_mut())) } + + /// No-op. `HtmlRewriterAdapter` is single-use: the rewriter consumes its + /// [`Settings`](lol_html::Settings) on construction and cannot be recreated. + /// Calling [`process_chunk`](StreamProcessor::process_chunk) after finalization + /// (`is_last = true`) will produce empty output — the rewriter is already done. + fn reset(&mut self) {} } /// Adapter to use our existing `StreamingReplacer` as a `StreamProcessor` @@ -497,6 +356,57 @@ mod tests { use super::*; use crate::streaming_replacer::{Replacement, StreamingReplacer}; + /// Verify that `lol_html` fragments text nodes when input chunks split + /// mid-text-node. Script rewriters must be fragment-safe — they accumulate + /// text fragments internally until `is_last_in_text_node` is true. + #[test] + fn lol_html_fragments_text_across_chunk_boundaries() { + use std::cell::RefCell; + use std::rc::Rc; + + let fragments: Rc>> = Rc::new(RefCell::new(Vec::new())); + let fragments_clone = Rc::clone(&fragments); + + let mut rewriter = lol_html::HtmlRewriter::new( + lol_html::Settings { + element_content_handlers: vec![lol_html::text!("script", move |text| { + fragments_clone + .borrow_mut() + .push((text.as_str().to_string(), text.last_in_text_node())); + Ok(()) + })], + ..lol_html::Settings::default() + }, + |_chunk: &[u8]| {}, + ); + + // Split "googletagmanager.com/gtm.js" across two chunks + rewriter + .write(b"") + .expect("should write chunk2"); + rewriter.end().expect("should end"); + + let frags = fragments.borrow(); + // lol_html should emit at least 2 text fragments since input was split + assert!( + frags.len() >= 2, + "should fragment text across chunk boundaries, got {} fragments: {:?}", + frags.len(), + *frags + ); + // No single fragment should contain the full domain + assert!( + !frags + .iter() + .any(|(text, _)| text.contains("googletagmanager.com")), + "no individual fragment should contain the full domain when split across chunks: {:?}", + *frags + ); + } + #[test] fn test_uncompressed_pipeline() { let replacer = StreamingReplacer::new(vec![Replacement { @@ -546,7 +456,7 @@ mod tests { } #[test] - fn test_html_rewriter_adapter_accumulates_until_last() { + fn test_html_rewriter_adapter_streams_incrementally() { use lol_html::{element, Settings}; // Create a simple HTML rewriter that replaces text @@ -560,32 +470,40 @@ mod tests { let mut adapter = HtmlRewriterAdapter::new(settings); - // Test that intermediate chunks return empty let chunk1 = b""; let result1 = adapter .process_chunk(chunk1, false) .expect("should process chunk1"); - assert_eq!(result1.len(), 0, "Should return empty for non-last chunk"); let chunk2 = b"

original

"; let result2 = adapter .process_chunk(chunk2, false) .expect("should process chunk2"); - assert_eq!(result2.len(), 0, "Should return empty for non-last chunk"); - // Test that last chunk processes everything let chunk3 = b""; let result3 = adapter .process_chunk(chunk3, true) .expect("should process final chunk"); + + // Concatenate all outputs and verify the final HTML is correct + let mut all_output = result1; + all_output.extend_from_slice(&result2); + all_output.extend_from_slice(&result3); + assert!( - !result3.is_empty(), - "Should return processed content for last chunk" + !all_output.is_empty(), + "should produce non-empty concatenated output" ); - let output = String::from_utf8(result3).expect("output should be valid UTF-8"); - assert!(output.contains("replaced"), "Should have replaced content"); - assert!(output.contains(""), "Should have complete HTML"); + let output = String::from_utf8(all_output).expect("output should be valid UTF-8"); + assert!( + output.contains("replaced"), + "should have replaced content in concatenated output" + ); + assert!( + output.contains(""), + "should have complete HTML in concatenated output" + ); } #[test] @@ -602,59 +520,294 @@ mod tests { } large_html.push_str(""); - // Process in chunks + // Process in chunks and collect all output let chunk_size = 1024; let bytes = large_html.as_bytes(); - let mut chunks = bytes.chunks(chunk_size); - let mut last_chunk = chunks.next().unwrap_or(&[]); + let mut chunks = bytes.chunks(chunk_size).peekable(); + let mut all_output = Vec::new(); - for chunk in chunks { + while let Some(chunk) = chunks.next() { + let is_last = chunks.peek().is_none(); let result = adapter - .process_chunk(last_chunk, false) - .expect("should process intermediate chunk"); - assert_eq!(result.len(), 0, "Intermediate chunks should return empty"); - last_chunk = chunk; + .process_chunk(chunk, is_last) + .expect("should process chunk"); + all_output.extend_from_slice(&result); } - // Process last chunk - let result = adapter - .process_chunk(last_chunk, true) - .expect("should process last chunk"); - assert!(!result.is_empty(), "Last chunk should return content"); + assert!( + !all_output.is_empty(), + "should produce non-empty output for large document" + ); - let output = String::from_utf8(result).expect("output should be valid UTF-8"); + let output = String::from_utf8(all_output).expect("output should be valid UTF-8"); assert!( output.contains("Paragraph 999"), - "Should contain all content" + "should contain all content from large document" ); } #[test] - fn test_html_rewriter_adapter_reset() { + fn test_html_rewriter_adapter_reset_then_finalize() { use lol_html::Settings; let settings = Settings::default(); let mut adapter = HtmlRewriterAdapter::new(settings); - // Process some content - adapter - .process_chunk(b"", false) - .expect("should process html tag"); - adapter - .process_chunk(b"test", false) - .expect("should process body"); + let result1 = adapter + .process_chunk(b"test", false) + .expect("should process html"); - // Reset should clear accumulated input + // reset() is a documented no-op — adapter is single-use adapter.reset(); - // After reset, adapter should be ready for new input - let result = adapter - .process_chunk(b"

new

", true) - .expect("should process new content after reset"); - let output = String::from_utf8(result).expect("output should be valid UTF-8"); + // Finalize still works; the rewriter is still alive + let result2 = adapter + .process_chunk(b"", true) + .expect("should finalize after reset"); + + let mut all_output = result1; + all_output.extend_from_slice(&result2); + let output = String::from_utf8(all_output).expect("output should be valid UTF-8"); + assert!( + output.contains("test"), + "should produce correct output despite no-op reset" + ); + } + + #[test] + fn test_deflate_round_trip_produces_valid_output() { + // Verify that deflate-to-deflate produces valid output that decompresses + // correctly, confirming that encoder finalization works. + use flate2::read::ZlibDecoder; + use flate2::write::ZlibEncoder; + use std::io::{Read as _, Write as _}; + + let input_data = b"hello world"; + + // Compress input + let mut compressed_input = Vec::new(); + { + let mut enc = ZlibEncoder::new(&mut compressed_input, flate2::Compression::default()); + enc.write_all(input_data) + .expect("should compress test input"); + enc.finish().expect("should finish compression"); + } + + let replacer = StreamingReplacer::new(vec![Replacement { + find: "hello".to_string(), + replace_with: "hi".to_string(), + }]); + + let config = PipelineConfig { + input_compression: Compression::Deflate, + output_compression: Compression::Deflate, + chunk_size: 8192, + }; + + let mut pipeline = StreamingPipeline::new(config, replacer); + let mut output = Vec::new(); + + pipeline + .process(&compressed_input[..], &mut output) + .expect("should process deflate-to-deflate"); + + // Decompress output and verify correctness + let mut decompressed = Vec::new(); + ZlibDecoder::new(&output[..]) + .read_to_end(&mut decompressed) + .expect("should decompress output — implies encoder was finalized correctly"); + assert_eq!( - output, "

new

", - "Should only contain new input after reset" + String::from_utf8(decompressed).expect("should be valid UTF-8"), + "hi world", + "should have replaced content through deflate round-trip" + ); + } + + #[test] + fn test_gzip_to_gzip_produces_correct_output() { + use flate2::read::GzDecoder; + use flate2::write::GzEncoder; + use std::io::{Read as _, Write as _}; + + // Arrange + let input_data = b"hello world"; + + let mut compressed_input = Vec::new(); + { + let mut enc = GzEncoder::new(&mut compressed_input, flate2::Compression::default()); + enc.write_all(input_data) + .expect("should compress test input"); + enc.finish().expect("should finish compression"); + } + + let replacer = StreamingReplacer::new(vec![Replacement { + find: "hello".to_string(), + replace_with: "hi".to_string(), + }]); + + let config = PipelineConfig { + input_compression: Compression::Gzip, + output_compression: Compression::Gzip, + chunk_size: 8192, + }; + + let mut pipeline = StreamingPipeline::new(config, replacer); + let mut output = Vec::new(); + + // Act + pipeline + .process(&compressed_input[..], &mut output) + .expect("should process gzip-to-gzip"); + + // Assert + let mut decompressed = Vec::new(); + GzDecoder::new(&output[..]) + .read_to_end(&mut decompressed) + .expect("should decompress output — implies encoder was finalized correctly"); + + assert_eq!( + String::from_utf8(decompressed).expect("should be valid UTF-8"), + "hi world", + "should have replaced content through gzip round-trip" + ); + } + + #[test] + fn test_gzip_to_none_produces_correct_output() { + use flate2::write::GzEncoder; + use std::io::Write as _; + + // Arrange + let input_data = b"hello world"; + + let mut compressed_input = Vec::new(); + { + let mut enc = GzEncoder::new(&mut compressed_input, flate2::Compression::default()); + enc.write_all(input_data) + .expect("should compress test input"); + enc.finish().expect("should finish compression"); + } + + let replacer = StreamingReplacer::new(vec![Replacement { + find: "hello".to_string(), + replace_with: "hi".to_string(), + }]); + + let config = PipelineConfig { + input_compression: Compression::Gzip, + output_compression: Compression::None, + chunk_size: 8192, + }; + + let mut pipeline = StreamingPipeline::new(config, replacer); + let mut output = Vec::new(); + + // Act + pipeline + .process(&compressed_input[..], &mut output) + .expect("should process gzip-to-none"); + + // Assert + let result = String::from_utf8(output).expect("should be valid UTF-8 uncompressed output"); + assert_eq!( + result, "hi world", + "should have replaced content after gzip decompression" + ); + } + + #[test] + fn test_brotli_round_trip_produces_valid_output() { + use brotli::enc::writer::CompressorWriter; + use brotli::Decompressor; + use std::io::{Read as _, Write as _}; + + let input_data = b"hello world"; + + // Compress input with brotli + let mut compressed_input = Vec::new(); + { + let mut enc = CompressorWriter::new(&mut compressed_input, 4096, 4, 22); + enc.write_all(input_data) + .expect("should compress test input"); + enc.flush().expect("should flush brotli encoder"); + } + + let replacer = StreamingReplacer::new(vec![Replacement { + find: "hello".to_string(), + replace_with: "hi".to_string(), + }]); + + let config = PipelineConfig { + input_compression: Compression::Brotli, + output_compression: Compression::Brotli, + chunk_size: 8192, + }; + + let mut pipeline = StreamingPipeline::new(config, replacer); + let mut output = Vec::new(); + + pipeline + .process(&compressed_input[..], &mut output) + .expect("should process brotli-to-brotli"); + + // Decompress output and verify correctness + let mut decompressed = Vec::new(); + Decompressor::new(&output[..], 4096) + .read_to_end(&mut decompressed) + .expect("should decompress output — implies encoder was finalized correctly"); + + assert_eq!( + String::from_utf8(decompressed).expect("should be valid UTF-8"), + "hi world", + "should have replaced content through brotli round-trip" + ); + } + + #[test] + fn test_html_rewriter_adapter_emits_output_per_chunk() { + use lol_html::Settings; + + let settings = Settings::default(); + let mut adapter = HtmlRewriterAdapter::new(settings); + + // Send three chunks — lol_html may buffer internally, so individual + // chunk outputs may vary by version. The contract is that concatenated + // output is correct, and that output is not deferred entirely to is_last. + let result1 = adapter + .process_chunk(b"", false) + .expect("should process chunk1"); + let result2 = adapter + .process_chunk(b"

hello

", false) + .expect("should process chunk2"); + let result3 = adapter + .process_chunk(b"", true) + .expect("should process final chunk"); + + // At least one intermediate chunk should produce output (verifies + // we're not deferring everything to is_last like the old adapter). + assert!( + !result1.is_empty() || !result2.is_empty(), + "should emit some output before is_last" + ); + + // Concatenated output must be correct + let mut all_output = result1; + all_output.extend_from_slice(&result2); + all_output.extend_from_slice(&result3); + + let output = String::from_utf8(all_output).expect("output should be valid UTF-8"); + assert!( + output.contains(""), + "should contain html tag in concatenated output" + ); + assert!( + output.contains("

hello

"), + "should contain paragraph in concatenated output" + ); + assert!( + output.contains(""), + "should contain closing html tag in concatenated output" ); } @@ -695,4 +848,61 @@ mod tests { "Should not contain original URL" ); } + + #[test] + fn test_gzip_pipeline_with_html_rewriter() { + use flate2::read::GzDecoder; + use flate2::write::GzEncoder; + use lol_html::{element, Settings}; + use std::io::{Read as _, Write as _}; + + let settings = Settings { + element_content_handlers: vec![element!("a[href]", |el| { + if let Some(href) = el.get_attribute("href") { + if href.contains("example.com") { + el.set_attribute("href", &href.replace("example.com", "test.com"))?; + } + } + Ok(()) + })], + ..Settings::default() + }; + + let input = b"Link"; + + let mut compressed_input = Vec::new(); + { + let mut enc = GzEncoder::new(&mut compressed_input, flate2::Compression::default()); + enc.write_all(input).expect("should compress test input"); + enc.finish().expect("should finish compression"); + } + + let adapter = HtmlRewriterAdapter::new(settings); + let config = PipelineConfig { + input_compression: Compression::Gzip, + output_compression: Compression::Gzip, + chunk_size: 8192, + }; + let mut pipeline = StreamingPipeline::new(config, adapter); + let mut output = Vec::new(); + + pipeline + .process(&compressed_input[..], &mut output) + .expect("pipeline should process gzip HTML"); + + let mut decompressed = Vec::new(); + GzDecoder::new(&output[..]) + .read_to_end(&mut decompressed) + .expect("should decompress output"); + + let result = String::from_utf8(decompressed).expect("output should be valid UTF-8"); + assert!( + result.contains("https://test.com"), + "should have replaced URL through gzip HTML pipeline" + ); + assert!( + !result.contains("example.com"), + "should not contain original URL after gzip HTML pipeline" + ); + } } diff --git a/crates/trusted-server-core/src/streaming_replacer.rs b/crates/trusted-server-core/src/streaming_replacer.rs index faf8f9a20..1e7291e57 100644 --- a/crates/trusted-server-core/src/streaming_replacer.rs +++ b/crates/trusted-server-core/src/streaming_replacer.rs @@ -2,6 +2,8 @@ //! //! This module provides functionality for replacing patterns in content //! in streaming fashion, handling content that may be split across multiple chunks. +//! +//! See [`crate::platform`] module doc for platform notes. // Note: std::io::{Read, Write} were previously used by stream_process function // which has been removed in favor of StreamingPipeline diff --git a/crates/trusted-server-core/src/synthetic.rs b/crates/trusted-server-core/src/synthetic.rs deleted file mode 100644 index a2d410932..000000000 --- a/crates/trusted-server-core/src/synthetic.rs +++ /dev/null @@ -1,582 +0,0 @@ -//! Synthetic ID generation using HMAC. -//! -//! This module provides functionality for generating privacy-preserving synthetic IDs -//! based on various request parameters and a secret key. - -use std::net::IpAddr; - -use edgezero_core::body::Body as EdgeBody; -use error_stack::{Report, ResultExt}; -use handlebars::Handlebars; -use hmac::{Hmac, Mac}; -use http::header; -use http::Request; -use rand::Rng; -use serde_json::json; -use sha2::Sha256; -use uuid::Uuid; - -use crate::constants::{COOKIE_SYNTHETIC_ID, HEADER_X_SYNTHETIC_ID}; -use crate::cookies::handle_request_cookies; -use crate::error::TrustedServerError; -use crate::platform::RuntimeServices; -use crate::settings::Settings; - -type HmacSha256 = Hmac; - -const ALPHANUMERIC_CHARSET: &[u8] = - b"abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; - -/// Expected byte length of a valid synthetic ID: 64 hex chars + '.' + 6 alphanumeric chars. -const SYNTHETIC_ID_LEN: usize = 71; - -/// Validates that `value` matches the canonical synthetic ID format. -/// -/// The format is `.` where `` is exactly 64 **lowercase** hex -/// characters (HMAC-SHA256 output via [`hex::encode`]) and `` is exactly -/// 6 ASCII alphanumeric characters. Uppercase hex is rejected — the generator -/// never produces it and intermediaries that normalise case would produce an ID -/// that no longer matches its HMAC. -/// -/// The total length is checked first so that oversized attacker-supplied -/// strings are rejected in O(1) before any character scanning occurs. -pub(crate) fn is_valid_synthetic_id(value: &str) -> bool { - if value.len() != SYNTHETIC_ID_LEN { - return false; - } - match value.split_once('.') { - Some((hmac_part, suffix_part)) => { - hmac_part.len() == 64 - && hmac_part - .bytes() - .all(|b| matches!(b, b'0'..=b'9' | b'a'..=b'f')) - && suffix_part.bytes().all(|b| b.is_ascii_alphanumeric()) - } - None => false, - } -} - -/// Normalizes an IP address for stable synthetic ID generation. -/// -/// For IPv6 addresses, masks to /64 prefix to handle Privacy Extensions -/// where devices rotate their interface identifier (lower 64 bits). -/// IPv4 addresses are returned unchanged. -fn normalize_ip(ip: IpAddr) -> String { - match ip { - IpAddr::V4(ipv4) => ipv4.to_string(), - IpAddr::V6(ipv6) => { - let segments = ipv6.segments(); - // Keep only the first 4 segments (64 bits) for /64 prefix - format!( - "{:x}:{:x}:{:x}:{:x}::", - segments[0], segments[1], segments[2], segments[3] - ) - } - } -} - -/// Generates a random alphanumeric string of the specified length. -fn generate_random_suffix(length: usize) -> String { - let mut rng = rand::thread_rng(); - (0..length) - .map(|_| { - let idx = rng.gen_range(0..ALPHANUMERIC_CHARSET.len()); - ALPHANUMERIC_CHARSET[idx] as char - }) - .collect() -} - -/// Generates a fresh synthetic ID based on request parameters. -/// -/// Creates an HMAC-SHA256-based ID using the configured secret key and request -/// attributes, then appends a random suffix for additional uniqueness. -/// -/// # Errors -/// -/// - [`TrustedServerError::Template`] if the template rendering fails -/// - [`TrustedServerError::SyntheticId`] if HMAC generation fails -pub fn generate_synthetic_id( - settings: &Settings, - services: &RuntimeServices, - req: &Request, -) -> Result> { - let client_ip = services.client_info.client_ip.map(normalize_ip); - let user_agent = req - .headers() - .get(header::USER_AGENT) - .map(|h| h.to_str().unwrap_or("unknown")); - let accept_language = req - .headers() - .get(header::ACCEPT_LANGUAGE) - .and_then(|h| h.to_str().ok()) - .map(|lang| lang.split(',').next().unwrap_or("unknown")); - let accept_encoding = req - .headers() - .get(header::ACCEPT_ENCODING) - .and_then(|h| h.to_str().ok()); - let random_uuid = Uuid::new_v4().to_string(); - - let handlebars = Handlebars::new(); - let data = &json!({ - "client_ip": client_ip.unwrap_or("unknown".to_string()), - "user_agent": user_agent.unwrap_or("unknown"), - "accept_language": accept_language.unwrap_or("unknown"), - "accept_encoding": accept_encoding.unwrap_or("unknown"), - "random_uuid": random_uuid - }); - - let input_string = handlebars - .render_template(&settings.synthetic.template, data) - .change_context(TrustedServerError::Template { - message: "Failed to render synthetic ID template".to_string(), - })?; - - log::debug!("Generating fresh synthetic ID from template inputs"); - - let mut mac = HmacSha256::new_from_slice(settings.synthetic.secret_key.expose().as_bytes()) - .change_context(TrustedServerError::SyntheticId { - message: "Failed to create HMAC instance".to_string(), - })?; - mac.update(input_string.as_bytes()); - let hmac_hash = hex::encode(mac.finalize().into_bytes()); - - // Append random 6-character alphanumeric suffix for additional uniqueness - let random_suffix = generate_random_suffix(6); - let synthetic_id = format!("{}.{}", hmac_hash, random_suffix); - - debug_assert!( - is_valid_synthetic_id(&synthetic_id), - "should generate a synthetic ID matching the expected format" - ); - - log::debug!("Generated fresh synthetic ID"); - - Ok(synthetic_id) -} - -/// Reads a validated synthetic ID from the request, if one is present. -/// -/// Checks the `x-synthetic-id` header first, then the `synthetic_id` cookie. -/// Values that do not match the canonical format (`<64-hex>.<6-alphanumeric>`) -/// are discarded and a warning is logged — the raw invalid value is never -/// included in log output. -/// -/// Note: a non-UTF-8 `x-synthetic-id` header value is silently discarded and -/// the cookie is checked next, whereas a non-UTF-8 `Cookie` header propagates -/// as an error. -/// -/// Returns `Ok(None)` when no valid ID is found, allowing the caller to -/// generate a fresh one. -/// -/// # Errors -/// -/// - [`TrustedServerError::InvalidHeaderValue`] if the Cookie header contains invalid UTF-8 -pub fn get_synthetic_id( - req: &Request, -) -> Result, Report> { - if let Some(raw) = req - .headers() - .get(HEADER_X_SYNTHETIC_ID) - .and_then(|h| h.to_str().ok()) - { - if is_valid_synthetic_id(raw) { - log::debug!("Using existing synthetic ID from header"); - return Ok(Some(raw.to_string())); - } - log::warn!( - "Rejecting synthetic ID from header: invalid format (len={})", - raw.len() - ); - } - - match handle_request_cookies(req)? { - Some(jar) => { - if let Some(cookie) = jar.get(COOKIE_SYNTHETIC_ID) { - let raw = cookie.value(); - if is_valid_synthetic_id(raw) { - log::debug!("Using existing synthetic ID from cookie"); - return Ok(Some(raw.to_string())); - } - log::warn!( - "Rejecting synthetic ID from cookie: invalid format (len={})", - raw.len() - ); - } - } - None => { - log::debug!("No cookie header found in request"); - } - } - - Ok(None) -} - -/// Gets a validated synthetic ID from the request, or generates a fresh one. -/// -/// Checks the `x-synthetic-id` header then the `synthetic_id` cookie via -/// [`get_synthetic_id`]. Values that fail format validation are discarded — a -/// warning is logged and a fresh ID is generated in their place, -/// identical to the no-ID-present path. -/// -/// # Errors -/// -/// - [`TrustedServerError::Template`] if template rendering fails during generation -/// - [`TrustedServerError::SyntheticId`] if HMAC generation fails -pub fn get_or_generate_synthetic_id( - settings: &Settings, - services: &RuntimeServices, - req: &Request, -) -> Result> { - if let Some(id) = get_synthetic_id(req)? { - return Ok(id); - } - - // If no existing Synthetic ID found, generate a fresh one - let synthetic_id = generate_synthetic_id(settings, services, req)?; - log::debug!("No existing synthetic ID found, generated a fresh one"); - Ok(synthetic_id) -} - -#[cfg(test)] -mod tests { - use super::*; - use http::{HeaderName, HeaderValue}; - use std::net::{Ipv4Addr, Ipv6Addr}; - - use crate::platform::test_support::{noop_services, noop_services_with_client_ip}; - use crate::test_support::tests::{create_test_settings, VALID_SYNTHETIC_ID}; - - #[test] - fn test_normalize_ip_ipv4_unchanged() { - let ipv4 = IpAddr::V4(Ipv4Addr::new(192, 168, 1, 100)); - assert_eq!(normalize_ip(ipv4), "192.168.1.100"); - } - - #[test] - fn test_normalize_ip_ipv6_masks_to_64() { - // Full IPv6 address with interface identifier - let ipv6 = IpAddr::V6(Ipv6Addr::new( - 0x2001, 0x0db8, 0x85a3, 0x0000, 0x8a2e, 0x0370, 0x7334, 0x1234, - )); - assert_eq!(normalize_ip(ipv6), "2001:db8:85a3:0::"); - } - - #[test] - fn test_normalize_ip_ipv6_different_suffix_same_prefix() { - // Two IPv6 addresses with same /64 prefix but different interface identifiers - // (simulating Privacy Extensions rotation) - let ipv6_a = IpAddr::V6(Ipv6Addr::new( - 0x2001, 0x0db8, 0xabcd, 0x0001, 0x1111, 0x2222, 0x3333, 0x4444, - )); - let ipv6_b = IpAddr::V6(Ipv6Addr::new( - 0x2001, 0x0db8, 0xabcd, 0x0001, 0xaaaa, 0xbbbb, 0xcccc, 0xdddd, - )); - // Both should normalize to the same /64 prefix - assert_eq!(normalize_ip(ipv6_a), normalize_ip(ipv6_b)); - assert_eq!(normalize_ip(ipv6_a), "2001:db8:abcd:1::"); - } - - fn create_test_request(headers: Vec<(HeaderName, &str)>) -> Request { - let mut req = Request::builder() - .method("GET") - .uri("http://example.com") - .body(EdgeBody::empty()) - .expect("should build test request"); - for (key, value) in headers { - req.headers_mut().insert( - key, - HeaderValue::from_str(value).expect("should create valid header value"), - ); - } - - req - } - - #[test] - fn test_generate_synthetic_id() { - let settings: Settings = create_test_settings(); - let req = create_test_request(vec![ - (header::USER_AGENT, "Mozilla/5.0"), - (header::ACCEPT_LANGUAGE, "en-US,en;q=0.9"), - (header::ACCEPT_ENCODING, "gzip, deflate, br"), - ]); - - let synthetic_id = generate_synthetic_id(&settings, &noop_services(), &req) - .expect("should generate synthetic ID"); - assert!( - is_valid_synthetic_id(&synthetic_id), - "should match synthetic ID format" - ); - } - - #[test] - fn test_generate_synthetic_id_uses_client_ip() { - let settings = create_test_settings(); - let req = create_test_request(vec![(header::USER_AGENT, "Mozilla/5.0")]); - let ip = IpAddr::V4(Ipv4Addr::new(203, 0, 113, 1)); - - // Arrange: two service instances — one with a real IP, one without - let id_with_ip = generate_synthetic_id(&settings, &noop_services_with_client_ip(ip), &req) - .expect("should generate synthetic ID with client IP"); - let id_without_ip = generate_synthetic_id(&settings, &noop_services(), &req) - .expect("should generate synthetic ID without client IP"); - - // Assert: both produce valid format - assert!( - is_valid_synthetic_id(&id_with_ip), - "should produce valid format when client IP is present" - ); - assert!( - is_valid_synthetic_id(&id_without_ip), - "should produce valid format when client IP is absent" - ); - - // Assert: HMAC parts differ because client_ip changes the template input - let hmac_with_ip = id_with_ip.split_once('.').expect("should contain dot").0; - let hmac_without_ip = id_without_ip.split_once('.').expect("should contain dot").0; - assert_ne!( - hmac_with_ip, hmac_without_ip, - "should produce different HMAC when client IP differs" - ); - } - - #[test] - fn test_is_valid_synthetic_id_accepts_valid_value() { - assert!( - is_valid_synthetic_id(VALID_SYNTHETIC_ID), - "should accept a well-formed synthetic ID" - ); - } - - #[test] - fn test_is_valid_synthetic_id_rejects_invalid_values() { - let missing_suffix = "a".repeat(64); - assert!( - !is_valid_synthetic_id(&missing_suffix), - "should reject missing suffix" - ); - - let invalid_hex = format!("{}.{}", "a".repeat(63) + "g", "Ab12z9"); - assert!( - !is_valid_synthetic_id(&invalid_hex), - "should reject non-hex HMAC content" - ); - - let invalid_suffix = format!("{}.{}", "a".repeat(64), "ab-129"); - assert!( - !is_valid_synthetic_id(&invalid_suffix), - "should reject non-alphanumeric suffix" - ); - - // 74 bytes — caught by the length guard before any scan. - let extra_segment = format!("{}.{}.{}", "a".repeat(64), "Ab12z9", "zz"); - assert!( - !is_valid_synthetic_id(&extra_segment), - "should reject extra segments" - ); - - // 71 bytes, dot at position 64 (correct), but suffix contains a dot — caught by - // the suffix alphanumeric scan, not the length guard. - let dot_in_suffix = format!("{}.Ab12.z", "a".repeat(64)); - assert!( - !is_valid_synthetic_id(&dot_in_suffix), - "should reject dot within suffix" - ); - - let uppercase_hex = format!("{}.{}", "A".repeat(64), "Ab12z9"); - assert!( - !is_valid_synthetic_id(&uppercase_hex), - "should reject uppercase hex in HMAC part" - ); - - let oversized = "a".repeat(1000); - assert!( - !is_valid_synthetic_id(&oversized), - "should reject oversized input" - ); - - assert!(!is_valid_synthetic_id(""), "should reject empty string"); - } - - #[test] - fn test_get_synthetic_id_with_header() { - let settings = create_test_settings(); - let req = create_test_request(vec![(HEADER_X_SYNTHETIC_ID, VALID_SYNTHETIC_ID)]); - - let synthetic_id = get_synthetic_id(&req).expect("should get synthetic ID"); - assert_eq!( - synthetic_id, - Some(VALID_SYNTHETIC_ID.to_string()), - "should return the valid header ID" - ); - - let synthetic_id = get_or_generate_synthetic_id(&settings, &noop_services(), &req) - .expect("should reuse header synthetic ID"); - assert_eq!( - synthetic_id, VALID_SYNTHETIC_ID, - "should reuse the valid header ID" - ); - } - - #[test] - fn test_get_synthetic_id_with_cookie() { - let settings = create_test_settings(); - let req = create_test_request(vec![( - header::COOKIE, - &format!("{}={}", COOKIE_SYNTHETIC_ID, VALID_SYNTHETIC_ID), - )]); - - let synthetic_id = get_synthetic_id(&req).expect("should get synthetic ID"); - assert_eq!( - synthetic_id, - Some(VALID_SYNTHETIC_ID.to_string()), - "should return the valid cookie ID" - ); - - let synthetic_id = get_or_generate_synthetic_id(&settings, &noop_services(), &req) - .expect("should reuse cookie synthetic ID"); - assert_eq!( - synthetic_id, VALID_SYNTHETIC_ID, - "should reuse the valid cookie ID" - ); - } - - #[test] - fn test_get_synthetic_id_rejects_invalid_header() { - let req = create_test_request(vec![(HEADER_X_SYNTHETIC_ID, "not-a-valid-id")]); - - let synthetic_id = get_synthetic_id(&req).expect("should not error on invalid header ID"); - assert!( - synthetic_id.is_none(), - "should discard invalid synthetic ID from header" - ); - } - - #[test] - fn test_get_synthetic_id_rejects_invalid_cookie() { - let req = create_test_request(vec![( - header::COOKIE, - &format!("{}=not-a-valid-id", COOKIE_SYNTHETIC_ID), - )]); - - let synthetic_id = get_synthetic_id(&req).expect("should not error on invalid cookie ID"); - assert!( - synthetic_id.is_none(), - "should discard invalid synthetic ID from cookie" - ); - } - - #[test] - fn test_get_synthetic_id_invalid_header_falls_through_to_valid_cookie() { - let req = create_test_request(vec![ - (HEADER_X_SYNTHETIC_ID, "not-a-valid-id"), - ( - header::COOKIE, - &format!("{}={}", COOKIE_SYNTHETIC_ID, VALID_SYNTHETIC_ID), - ), - ]); - - let synthetic_id = get_synthetic_id(&req).expect("should not error when cookie is valid"); - assert_eq!( - synthetic_id, - Some(VALID_SYNTHETIC_ID.to_string()), - "should fall through to valid cookie when header ID is invalid" - ); - } - - #[test] - fn test_get_synthetic_id_header_takes_precedence_over_cookie() { - let cookie_id = "b2a1c3d4e5f6a7b8c9d0e1f2a3b4c5d6e7f8a9b0c1d2e3f4a5b6c7d8e9f0b1a2.Zx98y7"; - let req = create_test_request(vec![ - (HEADER_X_SYNTHETIC_ID, VALID_SYNTHETIC_ID), - ( - header::COOKIE, - &format!("{}={}", COOKIE_SYNTHETIC_ID, cookie_id), - ), - ]); - let result = get_synthetic_id(&req).expect("should succeed"); - assert_eq!( - result, - Some(VALID_SYNTHETIC_ID.to_string()), - "should prefer header over cookie" - ); - } - - #[test] - fn test_get_synthetic_id_rejects_invalid_header_and_falls_back_to_cookie() { - let settings = create_test_settings(); - let req = create_test_request(vec![ - (HEADER_X_SYNTHETIC_ID, "evil;injected"), - ( - header::COOKIE, - &format!("{}={}", COOKIE_SYNTHETIC_ID, VALID_SYNTHETIC_ID), - ), - ]); - - let synthetic_id = get_synthetic_id(&req).expect("should resolve synthetic ID"); - assert_eq!( - synthetic_id, - Some(VALID_SYNTHETIC_ID.to_string()), - "should ignore invalid header and reuse valid cookie" - ); - - let synthetic_id = get_or_generate_synthetic_id(&settings, &noop_services(), &req) - .expect("should reuse valid cookie synthetic ID"); - assert_eq!(synthetic_id, VALID_SYNTHETIC_ID); - } - - #[test] - fn test_get_synthetic_id_none() { - let req = create_test_request(vec![]); - let synthetic_id = get_synthetic_id(&req).expect("should handle missing ID"); - assert!( - synthetic_id.is_none(), - "should return None when no ID present" - ); - } - - #[test] - fn test_get_or_generate_synthetic_id_generates_when_invalid_header() { - let settings = create_test_settings(); - // A string that is clearly not a valid synthetic ID (wrong format, wrong length) - let req = create_test_request(vec![(HEADER_X_SYNTHETIC_ID, "totally-invalid-id-value")]); - - let synthetic_id = get_or_generate_synthetic_id(&settings, &noop_services(), &req) - .expect("should generate when header ID is invalid"); - assert!( - is_valid_synthetic_id(&synthetic_id), - "should generate a fresh valid ID when inbound ID is invalid" - ); - } - - #[test] - fn test_get_or_generate_synthetic_id_generate_new() { - let settings = create_test_settings(); - let req = create_test_request(vec![]); - - let synthetic_id = get_or_generate_synthetic_id(&settings, &noop_services(), &req) - .expect("should get or generate synthetic ID"); - assert!( - is_valid_synthetic_id(&synthetic_id), - "should generate a valid synthetic ID" - ); - } - - #[test] - fn test_get_or_generate_synthetic_id_replaces_invalid_header() { - let settings = create_test_settings(); - let req = create_test_request(vec![(HEADER_X_SYNTHETIC_ID, "evil;injected")]); - - let synthetic_id = get_or_generate_synthetic_id(&settings, &noop_services(), &req) - .expect("should replace invalid header synthetic ID"); - - assert!( - is_valid_synthetic_id(&synthetic_id), - "should generate a fresh synthetic ID when the header is invalid" - ); - assert_ne!( - synthetic_id, "evil;injected", - "should not reuse a tampered synthetic ID value" - ); - } -} diff --git a/crates/trusted-server-core/src/test_support.rs b/crates/trusted-server-core/src/test_support.rs index cc29a1afc..8fdfaa85d 100644 --- a/crates/trusted-server-core/src/test_support.rs +++ b/crates/trusted-server-core/src/test_support.rs @@ -34,15 +34,13 @@ pub mod tests { enabled = false rewrite_attributes = ["href", "link", "url"] - [synthetic] - counter_store = "test-counter-store" - opid_store = "test-opid-store" + [edge_cookie] secret_key = "test-secret-key" - template = "{{client_ip}}:{{user_agent}}:{{first_party_id}}:{{auth_user_id}}:{{publisher_domain}}:{{accept_language}}" [request_signing] config_store_id = "test-config-store-id" secret_store_id = "test-secret-store-id" - "#.to_string() + "# + .to_string() } #[must_use] diff --git a/docs/.vitepress/config.mts b/docs/.vitepress/config.mts index 6b7e9396d..d66a820c0 100644 --- a/docs/.vitepress/config.mts +++ b/docs/.vitepress/config.mts @@ -36,7 +36,7 @@ export default withMermaid( defineConfig({ title: 'Trusted Server', description: - 'Privacy-preserving edge computing for ad serving and synthetic ID generation', + 'Privacy-preserving edge computing for ad serving and edge cookie (EC) generation', base: '/trusted-server', // Replace version placeholders like {{NODEJS_VERSION}} with values from .tool-versions @@ -77,7 +77,7 @@ export default withMermaid( { text: 'Core Concepts', items: [ - { text: 'Synthetic IDs', link: '/guide/synthetic-ids' }, + { text: 'Edge Cookies', link: '/guide/edge-cookies' }, { text: 'GDPR Compliance', link: '/guide/gdpr-compliance' }, { text: 'Ad Serving', link: '/guide/ad-serving' }, { diff --git a/docs/business-use-cases.md b/docs/business-use-cases.md index 02f602048..67c7b51d3 100644 --- a/docs/business-use-cases.md +++ b/docs/business-use-cases.md @@ -10,7 +10,7 @@ Trusted Server delivers measurable business value across revenue generation, com **Problem**: Safari and Firefox block third-party cookies, fragmenting user identity and reducing addressable inventory CPMs by 30-50%. -**Solution**: Trusted Server's synthetic ID system maintains user recognition across cookieless browsers through first-party identifiers. +**Solution**: Trusted Server's Edge Cookie (EC) system maintains user recognition across cookieless browsers through first-party identifiers. **Business Impact**: @@ -258,7 +258,7 @@ Trusted Server delivers measurable business value across revenue generation, com **Vendor Consolidation**: -- Identity vendors: 3 → 1 (Lockr + synthetic IDs) +- Identity vendors: 3 → 1 (Lockr + EC IDs) - Ad tech vendors: 5 → 2 (Prebid + Trusted Server) - CMP vendors: Integrated (Didomi first-party) - Analytics: Built-in observability diff --git a/docs/epics/revenue-operations-dashboard.md b/docs/epics/revenue-operations-dashboard.md index ca934759b..c1907d25e 100644 --- a/docs/epics/revenue-operations-dashboard.md +++ b/docs/epics/revenue-operations-dashboard.md @@ -72,7 +72,7 @@ A real-time publisher transparency dashboard providing visibility into ad moneti #### Technical Tasks - [ ] Instrument proxy request handlers to emit domain call events -- [ ] Add session/page_id correlation (via synthetic ID or request header) +- [ ] Add session/page_id correlation (via EC ID or request header) - [ ] Create domain allowlist configuration in settings.toml - [ ] Emit `unauthorized_domain` alert event when unknown domain called - [ ] Track request timing (DNS, connect, TTFB, total) @@ -84,7 +84,7 @@ A real-time publisher transparency dashboard providing visibility into ad moneti "event_type": "domain_call", "timestamp": "2026-01-24T09:45:00Z", "page_id": "uuid", - "synthetic_id": "abc123", + "ec_id": "abc123", "domain": "ads.example.com", "full_url": "https://ads.example.com/bid?id=123", "request_type": "xhr", diff --git a/docs/guide/ad-serving.md b/docs/guide/ad-serving.md index 2999e1214..44449b128 100644 --- a/docs/guide/ad-serving.md +++ b/docs/guide/ad-serving.md @@ -30,7 +30,7 @@ Real-time bidding integration: 1. Request validation 2. GDPR consent check -3. Synthetic ID generation (if consented) +3. EC ID generation (if consented) 4. Ad server request 5. Response processing 6. Creative delivery @@ -77,7 +77,7 @@ Creatives served directly from ad server: // Placeholder example trustedServer.trackImpression({ adId: 'ad-123', - syntheticId: 'synthetic-xyz', + ecId: 'ec-xyz', consent: true, }) ``` @@ -87,7 +87,7 @@ trustedServer.trackImpression({ Click tracking with privacy preservation: - No PII in URLs -- Synthetic ID only (with consent) +- EC ID only (with consent) - Encrypted parameters ## Performance diff --git a/docs/guide/api-reference.md b/docs/guide/api-reference.md index 2ddcb71d3..880efc835 100644 --- a/docs/guide/api-reference.md +++ b/docs/guide/api-reference.md @@ -37,7 +37,7 @@ curl "https://edge.example.com/first-party/ad?slot=header-banner&w=728&h=90" **Response Headers:** -- `x-synthetic-id` - Synthetic ID (`64hex.6alnum` format) +- `x-ts-ec` - EC ID (`64hex.6alnum` format) **Use Cases:** @@ -127,7 +127,7 @@ Unified proxy for resources referenced by creatives (images, scripts, CSS, etc.) - Validates `tstoken` against reconstructed URL - Follows redirects (301/302/303/307/308, max 4 hops) -- Injects synthetic ID as `synthetic_id` query parameter +- Injects EC ID as `ts-ec` query parameter - Logs 1×1 pixel impressions **Example:** @@ -160,12 +160,12 @@ Click tracking redirect endpoint. **Response:** - **Status:** `302 Found` -- **Location:** Reconstructed target URL with synthetic ID injected +- **Location:** Reconstructed target URL with EC ID injected **Behavior:** - Validates `tstoken` against reconstructed URL -- Injects `synthetic_id` query parameter +- Injects `ts-ec` query parameter - Logs click metadata (tsurl, referer, user agent) - Does not proxy content (redirect only) @@ -173,7 +173,7 @@ Click tracking redirect endpoint. ```bash curl -I "https://edge.example.com/first-party/click?tsurl=https://advertiser.com/landing&campaign=123&tstoken=xyz..." -# → 302 Location: https://advertiser.com/landing?campaign=123&synthetic_id=abc123 +# → 302 Location: https://advertiser.com/landing?campaign=123&ts-ec=abc123 ``` --- @@ -529,7 +529,7 @@ Proxies to `cdn.permutive.com` for static assets. #### POST /integrations/testlight/auction -Testing auction endpoint with synthetic ID injection. +Testing auction endpoint with EC ID injection. **Request Body:** @@ -543,11 +543,11 @@ Testing auction endpoint with synthetic ID injection. ``` **Response:** -Proxies to configured endpoint with `user.id` populated with synthetic ID. +Proxies to configured endpoint with `user.id` populated with EC ID. **Response Headers:** -- `x-synthetic-id` - Synthetic ID (`64hex.6alnum` format) +- `x-ts-ec` - EC ID (`64hex.6alnum` format) --- diff --git a/docs/guide/architecture.md b/docs/guide/architecture.md index 769222483..de7d86335 100644 --- a/docs/guide/architecture.md +++ b/docs/guide/architecture.md @@ -4,7 +4,7 @@ Understanding the architecture of Trusted Server. ## High-Level Overview -Trusted Server is built as a Rust-based edge computing application that runs on Fastly Compute platform. +Trusted Server is built as a Rust-based edge computing application. The core logic lives in a platform-agnostic library; platform-specific adapters target different runtimes (Fastly Compute, native Axum). ```mermaid flowchart TD @@ -14,7 +14,7 @@ flowchart TD subgraph edge["Trusted Server"] direction TB gdpr["GDPR Check"] - ids["Synthetic IDs"] + ids["EC IDs"] ads["Ad Serving"] gdpr --> ids --> ads end @@ -29,7 +29,7 @@ flowchart TD Core library containing shared functionality: -- Synthetic ID generation +- Edge Cookie (EC) ID generation - Cookie handling - HTTP abstractions - GDPR consent management @@ -37,12 +37,29 @@ Core library containing shared functionality: ### trusted-server-adapter-fastly -Fastly-specific implementation: +Fastly Compute adapter (WASM binary, `wasm32-wasip1` target): -- Main application entry point -- Fastly SDK integration -- Request/response handling -- KV store access +- Main application entry point for production Fastly deployment +- Fastly SDK integration (KV stores, secret stores, geo lookup) +- Compiled to WebAssembly and run via Viceroy locally or on Fastly's edge + +### trusted-server-adapter-axum + +Native Axum dev/test adapter (native binary): + +- Local development and integration-test adapter — not a production-equivalent runtime +- Platform implementations backed by environment variables instead of Fastly stores +- Listens on `http://localhost:8787` by default + +**Current limitations compared to the Fastly adapter:** + +| Feature | Axum dev server | +|---------|----------------| +| KV store | Unavailable — synthetic-ID and consent routes degrade gracefully | +| Geo lookup | Always returns `None` | +| Config/secret-store writes | Return an error (read-only via env vars) | +| Admin key management (`/admin/keys/*`) | Returns 501 Not Implemented | +| Auction fan-out ordering | Requests run concurrently via `tokio::spawn`; `select` returns first-to-complete but does not replicate Fastly's priority-queue tie-breaking | ## Design Patterns @@ -71,7 +88,7 @@ All tracking operations require explicit GDPR consent checks before execution. 1. **Request Ingress** - Request arrives at Fastly edge 2. **Consent Validation** - GDPR consent checked -3. **ID Generation** - Synthetic ID generated (if consented) +3. **ID Generation** - EC ID generated (if consented) 4. **Ad Request** - Backend ad server called 5. **Response Processing** - Creative processed and modified 6. **Response Egress** - Response sent to browser @@ -85,7 +102,7 @@ Used for: - Counter storage - Domain mappings - Configuration cache -- Synthetic ID state +- EC ID state ### No User Data Persistence @@ -105,13 +122,14 @@ User data is not persisted in storage - only processed in-flight at the edge. - **Request Signing** - Optional request authentication - **Content Security** - Creative scanning and modification -## WebAssembly Target +## Runtime Targets -Compiled to `wasm32-wasip1` for Fastly Compute: +| Adapter | Target | Use case | +| ------------------------------- | --------------- | -------------------------------------- | +| `trusted-server-adapter-fastly` | `wasm32-wasip1` | Production on Fastly Compute | +| `trusted-server-adapter-axum` | native | Local development and integration testing (see limitations above) | -- Sandboxed execution -- Fast cold starts -- Efficient resource usage +The Fastly adapter compiles to WebAssembly for sandboxed, low-cold-start edge execution. The Axum adapter is a standard native binary — no WASM toolchain required for local development. ## Next Steps diff --git a/docs/guide/auction-orchestration.md b/docs/guide/auction-orchestration.md index 666872d45..3c5fe62b7 100644 --- a/docs/guide/auction-orchestration.md +++ b/docs/guide/auction-orchestration.md @@ -176,7 +176,7 @@ The auction system processes requests through a pipeline of transformations: POST /auction (AdRequest in Prebid.js format) │ ├─ Parse body → AdRequest { adUnits[] } - ├─ Generate synthetic + fresh user IDs + ├─ Generate EC + fresh user IDs ├─ Convert adUnits → AdSlots with formats and bidder params ├─ Extract device info (User-Agent, geo) │ @@ -195,7 +195,7 @@ Convert OrchestrationResult → OpenRTB 2.x Response │ ├─ Rewrite creative HTML with first-party proxy URLs ├─ Add ext.orchestrator metadata - └─ Set synthetic ID response headers + └─ Set EC ID response headers ``` ### Key Components @@ -332,7 +332,7 @@ Transforms auction requests into OpenRTB 2.x format and sends them to a Prebid S - `AdSlot` → `Imp` with `Banner { format: [Format { w, h }] }` - Bidder params from slot config → `ext.prebid.bidder` map -- Synthetic and fresh user IDs injected into `User` object +- EC and fresh user IDs injected into `User` object - Device info, geo data, and GPC signals included - Optional Ed25519 request signing (see [Request Signing](/guide/request-signing)) @@ -429,7 +429,7 @@ pub struct AuctionRequest { pub id: String, // UUID pub slots: Vec, // Ad placements pub publisher: PublisherInfo, // Domain, page URL - pub user: UserInfo, // Synthetic ID, fresh ID, consent + pub user: UserInfo, // EC ID, fresh ID, consent pub device: Option, // UA, IP, geo pub site: Option, // Domain, page pub context: HashMap, // Additional metadata @@ -549,11 +549,10 @@ Auction results are returned in standard OpenRTB format with an `ext.orchestrato } ``` -The response also includes synthetic ID headers: +The response also includes EC ID headers: -- `X-Synthetic-ID` — The persistent synthetic user ID -- `X-Synthetic-Fresh` — A fresh ID generated for this session -- `X-Synthetic-Trusted-Server` — Trusted Server marker +- `X-TS-EC` — The persistent EC user ID +- `X-TS-EC-Fresh` — A fresh ID generated for this session ## Creative Processing diff --git a/docs/guide/collective-sync.md b/docs/guide/collective-sync.md index 25082da46..4028062cc 100644 --- a/docs/guide/collective-sync.md +++ b/docs/guide/collective-sync.md @@ -1,6 +1,6 @@ # Collective Sync Architecture -Trusted Server supports cross-publisher data sharing through a **Collective Sync** model. Publishers who share the same synthetic ID secret key can synchronize user data across their properties, enabling privacy-preserving audience insights without third-party cookies. +Trusted Server supports cross-publisher data sharing through a **Collective Sync** model. Publishers who share the same EC secret key can synchronize user data across their properties, enabling privacy-preserving audience insights without third-party cookies. ## Overview @@ -13,7 +13,7 @@ sequenceDiagram participant Partner as Partner TS Instance Browser->>TS: Page request - TS->>KV: Lookup synthetic_id + TS->>KV: Lookup ts-ec alt Cache hit KV-->>TS: Return user data else Cache miss @@ -63,7 +63,7 @@ kv_store = "collective_store" ### KV Store Record -Each synthetic ID maps to a compact JSON record optimized for fast reads: +Each EC ID maps to a compact JSON record optimized for fast reads: ```json { @@ -74,12 +74,12 @@ Each synthetic ID maps to a compact JSON record optimized for fast reads: } ``` -| Field | Type | Description | -| ----- | -------- | ---------------------------------- | -| `sid` | string | Full synthetic ID (`64hex.6alnum`) | -| `seg` | string[] | Audience segments | -| `lst` | integer | Last seen timestamp (Unix epoch) | -| `src` | string[] | Contributing publisher domains | +| Field | Type | Description | +| ----- | -------- | -------------------------------- | +| `sid` | string | Full EC ID (`64hex.6alnum`) | +| `seg` | string[] | Audience segments | +| `lst` | integer | Last seen timestamp (Unix epoch) | +| `src` | string[] | Contributing publisher domains | ### Object Store Record (Source of Truth) @@ -87,7 +87,7 @@ The Object Store maintains a richer record with full history: ```json { - "synthetic_id": "0f99d7dc...a98e.45np22", + "ec_id": "0f99d7dc...a98e.45np22", "hmac_base": "0f99d7dc...a98e", "random_suffix": "45np22", "segments": ["auto-intender", "sports-fan"], @@ -115,8 +115,8 @@ Authorization: Bearer Response (NDJSON stream for large datasets): ```json -{"synthetic_id": "abc123.x1y2z3", "segments": [...], "last_seen": "..."} -{"synthetic_id": "def456.a1b2c3", "segments": [...], "last_seen": "..."} +{"ec_id": "abc123.x1y2z3", "segments": [...], "last_seen": "..."} +{"ec_id": "def456.a1b2c3", "segments": [...], "last_seen": "..."} ``` ### Incremental Updates @@ -133,7 +133,7 @@ Response includes only records modified after the `since` timestamp: ```json { "records": [ - {"synthetic_id": "abc123.x1y2z3", "segments": [...], "last_seen": "..."} + {"ec_id": "abc123.x1y2z3", "segments": [...], "last_seen": "..."} ], "next_cursor": "1706475000", "has_more": false @@ -152,7 +152,7 @@ Content-Type: application/json { "records": [ { - "synthetic_id": "abc123.x1y2z3", + "ec_id": "abc123.x1y2z3", "segments": ["new-segment"], "source_domain": "pub-a.com" } @@ -162,7 +162,7 @@ Content-Type: application/json The sync endpoint handles: -- Deduplication by synthetic_id +- Deduplication by ec_id - Segment merging (union of all observed segments) - Source tracking (which publishers contributed data) - Version increment for conflict resolution @@ -173,8 +173,8 @@ The sync endpoint handles: ``` 1. Browser request arrives at edge -2. Extract/generate synthetic_id -3. KV Store lookup by synthetic_id +2. Extract/generate EC ID +3. KV Store lookup by EC ID 4. If hit: return cached segments 5. If miss: fetch from Object Store, populate KV, return ``` @@ -191,7 +191,7 @@ The sync endpoint handles: ## Privacy Considerations -- **No PII**: Synthetic IDs contain no personally identifiable information +- **No PII**: EC IDs contain no personally identifiable information - **Consent-gated**: Only users with GDPR consent are included - **Publisher control**: Each publisher controls what segments they share - **Audit trail**: Object Store maintains full history of data sources @@ -208,6 +208,6 @@ The sync endpoint handles: ## Next Steps -- [Synthetic IDs](/guide/synthetic-ids) - Understand ID generation +- [Edge Cookies](/guide/edge-cookies) - Understand ID generation - [Configuration Reference](/guide/configuration) - Full config options - [GDPR Compliance](/guide/gdpr-compliance) - Privacy requirements diff --git a/docs/guide/configuration.md b/docs/guide/configuration.md index fb41eb8fe..029163bbf 100644 --- a/docs/guide/configuration.md +++ b/docs/guide/configuration.md @@ -23,11 +23,8 @@ cookie_domain = ".publisher.com" origin_url = "https://origin.publisher.com" proxy_secret = "your-secure-secret-here" -[synthetic] -counter_store = "counter_store" -opid_store = "opid_store" +[edge_cookie] secret_key = "your-hmac-secret" -template = "{{ client_ip }}:{{ user_agent }}:{{ accept_language }}:{{ accept_encoding }}" ``` ### Environment Variable Overrides @@ -40,7 +37,7 @@ at runtime. # Format: TRUSTED_SERVER__SECTION__FIELD export TRUSTED_SERVER__PUBLISHER__DOMAIN=publisher.com export TRUSTED_SERVER__PUBLISHER__ORIGIN_URL=https://origin.publisher.com -export TRUSTED_SERVER__SYNTHETIC__SECRET_KEY=your-secret +export TRUSTED_SERVER__EDGE_COOKIE__SECRET_KEY=your-secret ``` ### Generate Secure Secrets @@ -63,7 +60,7 @@ openssl rand -base64 32 | Section | Purpose | | ------------------- | -------------------------------------------- | | `[publisher]` | Domain, origin, proxy settings | -| `[synthetic]` | Synthetic ID generation | +| `[edge_cookie]` | Edge Cookie (EC) ID generation | | `[proxy]` | Proxy SSRF allowlist | | `[request_signing]` | Ed25519 request signing | | `[auction]` | Auction orchestration | @@ -78,11 +75,8 @@ cookie_domain = ".publisher.com" origin_url = "https://origin.publisher.com" proxy_secret = "change-me-to-secure-value" -[synthetic] -counter_store = "counter_store" -opid_store = "opid_store" +[edge_cookie] secret_key = "your-hmac-secret-key" -template = "{{ client_ip }}:{{ user_agent }}:{{ accept_language }}:{{ accept_encoding }}" [request_signing] enabled = true @@ -205,11 +199,11 @@ TRUSTED_SERVER__PUBLISHER__PROXY_SECRET=your-secret-here #### `cookie_domain` -**Purpose**: Domain scope for synthetic ID cookies. +**Purpose**: Domain scope for EC cookies. **Usage**: -- Set on `synthetic_id` cookie +- Set on `ts-ec` cookie - Controls cookie sharing across subdomains **Format**: Domain with optional leading dot @@ -267,95 +261,34 @@ openssl rand -base64 32 Changing `proxy_secret` invalidates all existing signed URLs. Plan rotations carefully and use graceful transition periods. ::: -## Synthetic ID Configuration +## EC Configuration -Settings for generating privacy-preserving synthetic identifiers. +Settings for generating privacy-preserving Edge Cookie identifiers. -### `[synthetic]` +### `[edge_cookie]` -| Field | Type | Required | Description | -| --------------- | ------ | -------- | ---------------------------------------------- | -| `counter_store` | String | Yes | Fastly KV store name for counters | -| `opid_store` | String | Yes | Fastly KV store name for publisher ID mappings | -| `secret_key` | String | Yes | HMAC secret for ID generation | -| `template` | String | Yes | Handlebars template for ID composition | +| Field | Type | Required | Description | +| ------------ | ------ | -------- | ----------------------------- | +| `secret_key` | String | Yes | HMAC secret for ID generation | **Example**: ```toml -[synthetic] -counter_store = "counter_store" -opid_store = "opid_store" +[edge_cookie] secret_key = "your-secure-hmac-secret" -template = "{{ client_ip }}:{{ user_agent }}:{{ accept_language }}:{{ accept_encoding }}" ``` **Environment Override**: ```bash -TRUSTED_SERVER__SYNTHETIC__COUNTER_STORE=counter_store -TRUSTED_SERVER__SYNTHETIC__OPID_STORE=opid_store -TRUSTED_SERVER__SYNTHETIC__SECRET_KEY=your-secret -TRUSTED_SERVER__SYNTHETIC__TEMPLATE="{{ client_ip }}:{{ user_agent }}:{{ accept_language }}:{{ accept_encoding }}" +TRUSTED_SERVER__EDGE_COOKIE__SECRET_KEY=your-secret ``` ### Field Details -#### `counter_store` - -**Purpose**: Fastly KV store for synthetic ID counters. - -**Usage**: - -- Stores incrementing counters per domain -- Ensures ID uniqueness -- Accessed via Fastly KV Store API - -**Setup**: - -```bash -# Create KV store -fastly kv-store create --name=counter_store -``` - -**Data Format**: - -```json -{ - "publisher.com": 12345, - "another.com": 67890 -} -``` - -#### `opid_store` - -**Purpose**: Fastly KV store for publisher-provided ID mappings. - -**Usage**: - -- Maps publisher IDs to synthetic IDs -- Enables first-party ID integration -- Optional (used if publisher provides IDs) - -**Setup**: - -```bash -# Create KV store -fastly kv-store create --name=opid_store -``` - -**Data Format**: - -```json -{ - "publisher-id-123": "synthetic-abc", - "publisher-id-456": "synthetic-def" -} -``` - #### `secret_key` -**Purpose**: HMAC secret for synthetic ID base generation. +**Purpose**: HMAC secret for EC ID base generation. **Security**: @@ -374,50 +307,6 @@ openssl rand -hex 32 - Empty string -#### `template` - -**Purpose**: Handlebars template defining ID composition. - -**Available Variables**: - -| Variable | Description | Example | -| ----------------- | ------------------------------------------ | -------------------------------------- | -| `client_ip` | Client IP address (IPv6 normalized to /64) | `192.168.1.1` | -| `user_agent` | User-Agent header | `Mozilla/5.0...` | -| `accept_language` | Accept-Language header (first token) | `en-US` | -| `accept_encoding` | Accept-Encoding header | `gzip, deflate` | -| `random_uuid` | Random UUID v4 per generation | `9b1d3b94-1e26-4a5f-bc39-1e6f2b6a3a0f` | - -**Template Examples**: - -**Simple (IP + UA)**: - -```toml -template = "{{ client_ip }}:{{ user_agent }}" -``` - -**With Locale + Encoding**: - -```toml -template = "{{ client_ip }}:{{ accept_language }}:{{ accept_encoding }}" -``` - -**With Randomized Suffix Input**: - -```toml -template = "{{ client_ip }}:{{ user_agent }}:{{ random_uuid }}" -``` - -**Validation**: Must be non-empty string. - -::: tip Template Design -Choose template variables based on your privacy and uniqueness requirements: - -- **More variables** = More unique IDs, less privacy -- **Fewer variables** = More privacy, potential collisions -- **Include `random_uuid`** only if you want a new ID for every generation - ::: - ## Response Headers Custom headers added to all responses. @@ -995,10 +884,10 @@ Configuration is validated at startup: - All fields non-empty - `origin_url` is valid URL -**Synthetic Validation**: +**EC Validation**: - `secret_key` ≥ 1 character -- `template` non-empty +- `secret_key` ≠ known placeholders (`"secret-key"`, `"secret_key"`, `"trusted-server"` — case-insensitive) **Handler Validation**: @@ -1055,7 +944,7 @@ TRUSTED_SERVER__PUBLISHER__PROXY_SECRET=$(cat /run/secrets/proxy_secret_staging) ```bash # All secrets from environment TRUSTED_SERVER__PUBLISHER__PROXY_SECRET=$(cat /run/secrets/proxy_secret) -TRUSTED_SERVER__SYNTHETIC__SECRET_KEY=$(cat /run/secrets/synthetic_secret) +TRUSTED_SERVER__EDGE_COOKIE__SECRET_KEY=$(cat /run/secrets/ec_secret) TRUSTED_SERVER__HANDLERS__0__PASSWORD=$(cat /run/secrets/admin_password) ``` @@ -1107,6 +996,13 @@ trusted-server.dev.toml # Development overrides - Verify all required fields present - Check environment variable format +**"Configuration field '...' is set to a known placeholder value"**: + +- `edge_cookie.secret_key` cannot be `"secret-key"`, `"secret_key"`, or `"trusted-server"` (case-insensitive) +- `publisher.proxy_secret` cannot be `"change-me-proxy-secret"` (case-insensitive) +- Must be non-empty +- Change to a secure random value (see generation commands above) + **"Invalid regex"**: - Handler `path` must be valid regex @@ -1156,5 +1052,5 @@ cat trusted-server.toml | npx toml-cli validate - Set up [Request Signing](/guide/request-signing) for secure API calls - Configure [First-Party Proxy](/guide/first-party-proxy) for URL proxying -- Learn about [Synthetic IDs](/guide/synthetic-ids) for privacy-preserving identification +- Learn about [Edge Cookies](/guide/edge-cookies) for privacy-preserving identification - Review [Integrations](/guide/integrations-overview) for partner support diff --git a/docs/guide/creative-processing.md b/docs/guide/creative-processing.md index 431f4771a..7a8977a99 100644 --- a/docs/guide/creative-processing.md +++ b/docs/guide/creative-processing.md @@ -8,7 +8,7 @@ Creative processing transforms third-party ad creatives by rewriting URLs to go - **Privacy Control** - All resources load through your domain - **First-Party Context** - Cookies and storage use your domain -- **Synthetic ID Integration** - Automatic ID forwarding to trackers +- **EC ID Integration** - Automatic ID forwarding to trackers - **Security** - Validated, signed URLs prevent tampering - **GDPR Compliance** - Controlled data sharing @@ -955,4 +955,4 @@ Track these metrics: - Learn about [First-Party Proxy](/guide/first-party-proxy) for URL handling - Review [Integration Guide](/guide/integration-guide) for custom rewriters - Set up [Configuration](/guide/configuration) for your creatives -- Explore [Synthetic IDs](/guide/synthetic-ids) for identity management +- Explore [Edge Cookies](/guide/edge-cookies) for identity management diff --git a/docs/guide/edge-cookies.md b/docs/guide/edge-cookies.md new file mode 100644 index 000000000..d0e31e2e0 --- /dev/null +++ b/docs/guide/edge-cookies.md @@ -0,0 +1,42 @@ +# Edge Cookies (EC) + +Trusted Server's EC module maintains user recognition across all browsers through first-party identifiers. + +## What are Edge Cookies? + +Edge Cookies (EC) are privacy-safe identifiers generated on a first site visit using HMAC-based hashing that allow tracking with user consent while protecting user privacy. Trusted Server derives a deterministic HMAC base from the client IP address and appends a short random suffix to reduce collision risk. They are passed in requests on subsequent visits and activity. + +Trusted Server surfaces the current EC ID via response headers and a first-party cookie. For the exact header and cookie names, see the [API Reference](/guide/api-reference). + +## How They Work + +### HMAC-Based Generation + +EC IDs use HMAC (Hash-based Message Authentication Code) to generate a deterministic base from the client IP address, then append a short random suffix. + +**Format**: `64-hex-hmac`.`6-alphanumeric-suffix` + +**IP normalization**: IPv6 addresses are normalized to a /64 prefix before hashing. + +## Configuration + +Configure EC secrets in `trusted-server.toml`. See the full [Configuration Reference](/guide/configuration) for the `[edge_cookie]` section and environment variable overrides. + +## Privacy Considerations + +- EC IDs combine a deterministic HMAC base derived from the client IP with a random suffix for uniqueness. The cookie is only set when storage consent is present +- No personally identifiable information (PII) is stored in the ID +- The hash input is the client IP address only +- IDs can be rotated by changing the secret key + +## Best Practices + +1. Always verify GDPR consent before generating IDs +2. Rotate secret keys periodically +3. Monitor ID collision rates + +## Next Steps + +- Learn about [GDPR Compliance](/guide/gdpr-compliance) +- Configure [Ad Serving](/guide/ad-serving) +- Learn about [Collective Sync](/guide/collective-sync) for cross-publisher data sharing details and diagrams diff --git a/docs/guide/error-reference.md b/docs/guide/error-reference.md index 3117cb782..99f611aac 100644 --- a/docs/guide/error-reference.md +++ b/docs/guide/error-reference.md @@ -69,7 +69,7 @@ proxy_secret = "change-me-to-random-string" - `publisher.domain` - `publisher.origin_url` - `publisher.proxy_secret` -- `synthetic.secret_key` +- `edge_cookie.secret_key` --- @@ -129,35 +129,29 @@ See [Configuration Reference](./configuration.md) for complete patterns. ## Runtime Errors -### Synthetic ID generation failed +### EC ID generation failed **Error Message:** ``` -Failed to generate synthetic ID: KV store not available +Failed to generate EC ID: HMAC error ``` -**Cause:** KV store (counter_store or opid_store) not configured in Fastly +**Cause:** HMAC secret key is missing or invalid in the Edge Cookie configuration. **Solution:** -1. Create KV stores in Fastly dashboard -2. Link them to your Compute service -3. Update `trusted-server.toml`: +1. Ensure `secret_key` is set in `trusted-server.toml`: ```toml -[synthetic] -counter_store = "counter_store" # Must match Fastly KV store name -opid_store = "opid_store" +[edge_cookie] +secret_key = "your-secure-hmac-secret" ``` -4. For local development, configure in `fastly.toml`: +2. Or set via environment variable: -```toml -[local_server.kv_stores] - [[local_server.kv_stores.counter_store]] - key = "placeholder" - data = "placeholder" +```bash +TRUSTED_SERVER__EDGE_COOKIE__SECRET_KEY=your-secure-hmac-secret ``` --- diff --git a/docs/guide/first-party-proxy.md b/docs/guide/first-party-proxy.md index 27e0ac428..b978e35f7 100644 --- a/docs/guide/first-party-proxy.md +++ b/docs/guide/first-party-proxy.md @@ -7,7 +7,7 @@ Learn how Trusted Server proxies third-party assets through first-party domains The First-Party Proxy system rewrites third-party URLs in ad creatives to route through your domain, providing: - **Privacy Protection** - No direct third-party cookies or tracking -- **Synthetic ID Forwarding** - Controlled identity propagation +- **EC ID Forwarding** - Controlled identity propagation - **Creative Rewrites** - Automatic HTML/CSS URL transformation - **Click Tracking** - First-party click redirects - **Content Security** - Validated, signed URLs prevent tampering @@ -18,7 +18,7 @@ The First-Party Proxy system rewrites third-party URLs in ad creatives to route flowchart TD original["`Creative (Original) <img src='tracker.com/pixel.gif' />`"] rewritten["Creative (Rewritten)
<img src='/first-party/proxy?
tsurl=https://tracker.com/
pixel.gif&tstoken=abc123...' />"] - server["Trusted Server
1. Validate tstoken
2. Append synthetic_id
3. Proxy to tracker.com
4. Return response"] + server["Trusted Server
1. Validate tstoken
2. Append ts-ec
3. Proxy to tracker.com
4. Return response"] original -->|Rewrite| rewritten -->|Browser Request| server ``` @@ -46,7 +46,7 @@ GET /first-party/proxy?tsurl=https://example.com/ad.html&tstoken=signature **Behavior**: 1. **Validates** the `tstoken` signature against reconstructed URL -2. **Appends** `synthetic_id` query parameter (if available) +2. **Appends** `ts-ec` query parameter (if available) 3. **Proxies** request to target URL with forwarded headers: - `User-Agent` - `Accept` @@ -81,7 +81,7 @@ Signed proxy URL: Final proxied request: ``` -https://tracker.com/pixel.gif?campaign=123&uid=abc&synthetic_id=xyz +https://tracker.com/pixel.gif?campaign=123&uid=abc&ts-ec=xyz ``` ### `/first-party/click` - Click Redirects @@ -99,14 +99,14 @@ GET /first-party/click?tsurl=https://advertiser.com/landing&tstoken=signature **Behavior**: 1. **Validates** the `tstoken` signature -2. **Appends** `synthetic_id` parameter to target URL +2. **Appends** `ts-ec` parameter to target URL 3. **Issues** 302 redirect to target (browser navigates directly) 4. **Logs** click metadata: - Target URL base (`tsurl`) - Whether parameters were present - Full reconstructed URL - Referer, User-Agent - - Synthetic ID (if available) + - EC ID (if available) **Example**: @@ -127,7 +127,7 @@ User clicks → Server responds: ``` HTTP/1.1 302 Found -Location: https://advertiser.com/buy?product=widget&synthetic_id=xyz +Location: https://advertiser.com/buy?product=widget&ts-ec=xyz ``` ::: tip Click vs Proxy @@ -335,7 +335,7 @@ The proxy automatically follows HTTP redirects: **Behavior**: 1. Follow up to **4 redirect hops** -2. Re-apply `synthetic_id` on each hop +2. Re-apply `ts-ec` on each hop 3. Switch to `GET` after `303` response 4. Log when redirect limit reached 5. Preserve request headers across hops @@ -349,16 +349,16 @@ Request: /first-party/proxy?tsurl=https://short.link&tstoken=sig → Rewrite HTML and return ``` -## Synthetic ID Propagation +## EC ID Propagation ### Automatic Forwarding -When proxying, Trusted Server automatically appends the `synthetic_id` parameter: +When proxying, Trusted Server automatically appends the `ts-ec` parameter: **Source Priority**: -1. `x-synthetic-id` request header -2. `synthetic_id` cookie +1. `x-ts-ec` request header +2. `ts-ec` cookie 3. Generate new ID if missing **Example**: @@ -366,23 +366,23 @@ When proxying, Trusted Server automatically appends the `synthetic_id` parameter ``` Original request to proxy: /first-party/proxy?tsurl=https://tracker.com/pixel.gif&tstoken=sig - Cookie: synthetic_id=user123 + Cookie: ts-ec=user123 Proxied backend request: - https://tracker.com/pixel.gif?synthetic_id=user123 + https://tracker.com/pixel.gif?ts-ec=user123 ``` ### Redirect Propagation -Synthetic IDs are re-applied on **every redirect hop**: +EC IDs are re-applied on **every redirect hop**: ``` /first-party/proxy?tsurl=https://redirect1.com&tstoken=sig - → https://redirect1.com?synthetic_id=user123 + → https://redirect1.com?ts-ec=user123 → 302 to https://redirect2.com - → https://redirect2.com?synthetic_id=user123 + → https://redirect2.com?ts-ec=user123 → 302 to https://final.com - → https://final.com?synthetic_id=user123 + → https://final.com?ts-ec=user123 → 200 response ``` @@ -390,7 +390,7 @@ This ensures downstream trackers receive consistent IDs even through redirect ch ### Click ID Forwarding -Click redirects also forward synthetic IDs: +Click redirects also forward EC IDs: ```html @@ -400,15 +400,15 @@ User clicks → redirect includes ID: ``` 302 Found -Location: https://advertiser.com?synthetic_id=user123 +Location: https://advertiser.com?ts-ec=user123 ``` ::: tip Privacy Control -Synthetic IDs are only forwarded when: +EC IDs are only forwarded when: 1. User has given GDPR consent (if required) 2. ID exists in request (header/cookie) -3. Integration hasn't disabled forwarding (`forward_synthetic_id: false`) +3. Integration hasn't disabled forwarding (`forward_ec_id: false`) ::: ## Configuration @@ -422,7 +422,7 @@ Configure proxy behavior in `trusted-server.toml`: domain = "publisher.com" origin_url = "https://origin.publisher.com" proxy_secret = "your-secure-random-secret" -cookie_domain = ".publisher.com" # For synthetic_id cookies +cookie_domain = ".publisher.com" # For ts-ec cookies ``` ### Proxy Allowlist @@ -557,7 +557,7 @@ Only essential headers are forwarded to reduce overhead: **Not Forwarded**: - Authentication headers (unless explicitly added) -- Cookies (except synthetic ID appended as query param) +- Cookies (except EC ID appended as query param) - Custom headers (unless added via `ProxyRequestConfig`) ## Error Handling @@ -698,6 +698,6 @@ X-TS-Version = "1.0" ## Next Steps - Learn about [Creative Processing](/guide/creative-processing) for HTML rewriting details -- Review [Synthetic IDs](/guide/synthetic-ids) for identity management +- Review [Edge Cookies](/guide/edge-cookies) for identity management - Set up [Configuration](/guide/configuration) for your deployment - Explore [Integration Guide](/guide/integration-guide) for custom integrations diff --git a/docs/guide/gdpr-compliance.md b/docs/guide/gdpr-compliance.md index 0b17f3f34..5897066f9 100644 --- a/docs/guide/gdpr-compliance.md +++ b/docs/guide/gdpr-compliance.md @@ -62,7 +62,7 @@ Trusted Server supports: Only essential data is collected: -- Synthetic IDs (with consent) +- EC IDs (with consent) - Minimal request metadata - No PII storage diff --git a/docs/guide/getting-started.md b/docs/guide/getting-started.md index 47eb250df..8acfcf106 100644 --- a/docs/guide/getting-started.md +++ b/docs/guide/getting-started.md @@ -7,9 +7,12 @@ Get up and running with Trusted Server quickly. Before you begin, ensure you have: - Rust 1.91.1 (see `.tool-versions`) +- Basic familiarity with Rust and WebAssembly + +**For Fastly deployment** (optional for local dev): + - Fastly CLI installed - A Fastly account and API key -- Basic familiarity with WebAssembly ## Installation @@ -20,37 +23,77 @@ git clone https://github.com/IABTechLab/trusted-server.git cd trusted-server ``` -### Fastly CLI Setup +## Local Development + +Trusted Server supports two local development modes: -Install and configure the Fastly CLI using the [Fastly setup guide](/guide/fastly). +### Option A — Fastly Compute via Viceroy -### Install Viceroy (Test Runtime) +Simulates the full Fastly production environment locally. + +Install and configure the Fastly CLI using the [Fastly setup guide](/guide/fastly), then install Viceroy: ```bash cargo install viceroy ``` -## Local Development - -### Build the Project +Start the local Fastly simulator: ```bash -cargo build +fastly compute serve ``` -### Run Tests +The server will be available at `http://localhost:7676`. + +### Option B — Axum dev server + +No Fastly account, CLI, or Viceroy needed. Runs natively on your machine. + +The Axum adapter reads configuration from environment variables — it does **not** +auto-load `.env` files. You must export the variables into your shell before starting +the server. ```bash -cargo test +# Copy and edit the environment file +cp .env.dev .env + +# Export the variables into your current shell session +set -a && source .env && set +a + +# Build and start the dev server +cargo run -p trusted-server-adapter-axum ``` -### Start Local Server +The server will be available at `http://localhost:8787`. + +**Environment variable conventions used by the Axum adapter:** + +| Purpose | Pattern | Example | +|---------|---------|---------| +| Config store value | `TRUSTED_SERVER_CONFIG_{STORE}_{KEY}` | `TRUSTED_SERVER_CONFIG_SETTINGS_AD_SERVER_URL=https://…` | +| Secret store value | `TRUSTED_SERVER_SECRET_{STORE}_{KEY}` | `TRUSTED_SERVER_SECRET_KEYS_SIGNING_KEY=abc123` | + +Store names and key names are uppercased with hyphens and dots replaced by underscores. + +> **Dev server limitations:** The Axum adapter does not support KV store, +> geo lookup, config/secret-store writes, or admin key-management routes. +> See [Architecture](/guide/architecture) for the full list. + +### Build the Project ```bash -fastly compute serve +cargo build ``` -The server will be available at `http://localhost:7676`. +### Run Tests + +```bash +# Fastly/WASM crates (requires Viceroy) +cargo test-fastly + +# Axum native adapter +cargo test-axum +``` ## Configuration @@ -58,7 +101,7 @@ Edit `trusted-server.toml` to configure: - Ad server integrations - KV store mappings -- Synthetic ID templates +- EC configuration - GDPR settings See [Configuration](/guide/configuration) for details. @@ -71,6 +114,6 @@ fastly compute publish ## Next Steps -- Learn about [Synthetic IDs](/guide/synthetic-ids) +- Learn about [Edge Cookies](/guide/edge-cookies) - Understand [GDPR Compliance](/guide/gdpr-compliance) - Configure [Ad Serving](/guide/ad-serving) diff --git a/docs/guide/integration-guide.md b/docs/guide/integration-guide.md index b1bfc43d5..fb8c99daa 100644 --- a/docs/guide/integration-guide.md +++ b/docs/guide/integration-guide.md @@ -112,7 +112,7 @@ impl IntegrationProxy for MyIntegration { settings: &Settings, req: Request, ) -> Result> { - // Parse/generate synthetic IDs, forward upstream, and return the response. + // Parse/generate EC IDs, forward upstream, and return the response. } } ``` @@ -127,7 +127,7 @@ The shared context already injects Trusted Server logging, headers, and error ha #### Proxying Upstream Requests -Use the shared helper in `crates/trusted-server-core/src/proxy.rs` to forward requests so you automatically get the same header copying, redirect handling, HTML/CSS rewrite behavior, and synthetic ID handling the first-party proxy uses: +Use the shared helper in `crates/trusted-server-core/src/proxy.rs` to forward requests so you automatically get the same header copying, redirect handling, HTML/CSS rewrite behavior, and EC ID handling the first-party proxy uses: ```rust use crate::proxy::{proxy_request, ProxyRequestConfig}; @@ -145,7 +145,7 @@ let response = proxy_request( .await?; ``` -Set `forward_synthetic_id` to `false` if the upstream should not receive the caller's synthetic ID (`Testlight` does this), and disable `follow_redirects` if you need to surface redirects directly to the caller. +Set `forward_ec_id` to `false` if the upstream should not receive the caller's EC ID (`Testlight` does this), and disable `follow_redirects` if you need to surface redirects directly to the caller. **Streaming passthrough example:** @@ -261,7 +261,7 @@ Integrations that ship additional JS (such as Testlight) typically expose a `shi 4. Use `fastly compute serve` (with Viceroy installed) to hit `/integrations//…` and fetch HTML from your origin to confirm rewrites are applied. ::: tip Testing Strategy -For unit tests, prefer exposing helper constructors that accept a synthetic `shim_src` so your tests can point rewriters at a deterministic URL without touching the Tsjs build artifacts. +For unit tests, prefer exposing helper constructors that accept a stub `shim_src` so your tests can point rewriters at a deterministic URL without touching the Tsjs build artifacts. ::: By following these steps you can ship independent integration modules that plug into the Trusted Server runtime without modifying the Fastly entrypoint or HTML processor each time. @@ -290,7 +290,7 @@ Integrations are loaded in one of two ways: **Loading**: Deferred (`