From d7577f1ed4da3ea5905b98ffe7b9983f97c4fb86 Mon Sep 17 00:00:00 2001 From: Taylor Mutch Date: Wed, 6 May 2026 18:13:19 -0700 Subject: [PATCH 1/2] test(helm): Add kube gateway e2e tests Signed-off-by: Taylor Mutch --- e2e/rust/e2e-helm.sh | 20 ++ e2e/rust/src/harness/driver.rs | 20 ++ e2e/rust/src/harness/mod.rs | 1 + e2e/rust/tests/forward_proxy_graphql_l7.rs | 4 + e2e/rust/tests/forward_proxy_l7_bypass.rs | 7 + e2e/rust/tests/host_gateway_alias.rs | 10 + e2e/with-kube-gateway.sh | 220 +++++++++++++++++++++ tasks/test.toml | 4 + 8 files changed, 286 insertions(+) create mode 100755 e2e/rust/e2e-helm.sh create mode 100644 e2e/rust/src/harness/driver.rs create mode 100755 e2e/with-kube-gateway.sh diff --git a/e2e/rust/e2e-helm.sh b/e2e/rust/e2e-helm.sh new file mode 100755 index 000000000..7d7042c47 --- /dev/null +++ b/e2e/rust/e2e-helm.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Run a Rust e2e test against a Helm-deployed OpenShell gateway. Set +# OPENSHELL_E2E_KUBE_CONTEXT to target an existing cluster; otherwise an +# ephemeral k3d cluster is created and torn down by with-kube-gateway.sh. + +set -euo pipefail + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +E2E_TEST="${OPENSHELL_E2E_KUBE_TEST:-smoke}" + +cargo build -p openshell-cli --features openshell-core/dev-settings + +exec "${ROOT}/e2e/with-kube-gateway.sh" \ + cargo test --manifest-path "${ROOT}/e2e/rust/Cargo.toml" \ + --features e2e \ + --test "${E2E_TEST}" \ + -- --nocapture diff --git a/e2e/rust/src/harness/driver.rs b/e2e/rust/src/harness/driver.rs new file mode 100644 index 000000000..07921e461 --- /dev/null +++ b/e2e/rust/src/harness/driver.rs @@ -0,0 +1,20 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Active compute-driver detection for tests with driver-specific assumptions. + +/// Returns true and prints a skip notice when running against the kube driver. +/// +/// Tests that depend on docker/podman host-network features (e.g. +/// `host.openshell.internal` reachability, sibling-container test servers) +/// can early-return when this is true. +pub fn skip_if_kube(reason: &str) -> bool { + if matches!( + std::env::var("OPENSHELL_E2E_DRIVER").as_deref(), + Ok("kubernetes") + ) { + eprintln!("skipping on kubernetes driver: {reason}"); + return true; + } + false +} diff --git a/e2e/rust/src/harness/mod.rs b/e2e/rust/src/harness/mod.rs index 5feb21c70..89a095548 100644 --- a/e2e/rust/src/harness/mod.rs +++ b/e2e/rust/src/harness/mod.rs @@ -5,6 +5,7 @@ pub mod binary; pub mod container; +pub mod driver; pub mod gateway; pub mod output; pub mod port; diff --git a/e2e/rust/tests/forward_proxy_graphql_l7.rs b/e2e/rust/tests/forward_proxy_graphql_l7.rs index aeb3648b0..bfc561a20 100644 --- a/e2e/rust/tests/forward_proxy_graphql_l7.rs +++ b/e2e/rust/tests/forward_proxy_graphql_l7.rs @@ -13,6 +13,7 @@ use std::io::Write; use openshell_e2e::harness::container::ContainerHttpServer; +use openshell_e2e::harness::driver::skip_if_kube; use openshell_e2e::harness::sandbox::SandboxGuard; use tempfile::NamedTempFile; @@ -131,6 +132,9 @@ network_policies: #[tokio::test] #[allow(clippy::too_many_lines)] async fn graphql_l7_enforces_allow_and_deny_rules_on_forward_and_connect_paths() { + if skip_if_kube("uses host.openshell.internal to reach a sibling container") { + return; + } let server = start_test_server().await.expect("start test server"); let policy = write_graphql_policy(&server.host, server.port).expect("write custom policy"); let policy_path = policy diff --git a/e2e/rust/tests/forward_proxy_l7_bypass.rs b/e2e/rust/tests/forward_proxy_l7_bypass.rs index 6cbaca1eb..1d3f872d0 100644 --- a/e2e/rust/tests/forward_proxy_l7_bypass.rs +++ b/e2e/rust/tests/forward_proxy_l7_bypass.rs @@ -11,6 +11,7 @@ use std::io::Write; use openshell_e2e::harness::container::ContainerHttpServer; +use openshell_e2e::harness::driver::skip_if_kube; use openshell_e2e::harness::sandbox::SandboxGuard; use tempfile::NamedTempFile; @@ -98,6 +99,9 @@ network_policies: /// GET /allowed should succeed — the L7 policy explicitly allows it. #[tokio::test] async fn forward_proxy_allows_l7_permitted_request() { + if skip_if_kube("uses host.openshell.internal to reach a sibling container") { + return; + } let server = start_test_server().await.expect("start test server"); let policy = write_policy_with_l7_rules(&server.host, server.port).expect("write custom policy"); @@ -138,6 +142,9 @@ except Exception as e: /// POST /allowed should be denied — the L7 policy only allows GET. #[tokio::test] async fn forward_proxy_denies_l7_blocked_request() { + if skip_if_kube("uses host.openshell.internal to reach a sibling container") { + return; + } let server = start_test_server().await.expect("start test server"); let policy = write_policy_with_l7_rules(&server.host, server.port).expect("write custom policy"); diff --git a/e2e/rust/tests/host_gateway_alias.rs b/e2e/rust/tests/host_gateway_alias.rs index 2dbdbf1dc..8e58a3de1 100644 --- a/e2e/rust/tests/host_gateway_alias.rs +++ b/e2e/rust/tests/host_gateway_alias.rs @@ -8,6 +8,7 @@ use std::process::Stdio; use std::sync::Mutex; use openshell_e2e::harness::binary::openshell_cmd; +use openshell_e2e::harness::driver::skip_if_kube; use openshell_e2e::harness::sandbox::SandboxGuard; use tempfile::NamedTempFile; use tokio::io::AsyncReadExt; @@ -190,6 +191,9 @@ network_policies: #[tokio::test] async fn sandbox_reaches_host_openshell_internal_via_host_gateway_alias() { + if skip_if_kube("requires host.openshell.internal alias") { + return; + } let server = HostServer::start(r#"{"message":"hello-from-host"}"#) .await .expect("start host echo server"); @@ -225,6 +229,9 @@ async fn sandbox_reaches_host_openshell_internal_via_host_gateway_alias() { #[tokio::test] async fn sandbox_inference_local_routes_to_host_openshell_internal() { + if skip_if_kube("requires host.openshell.internal alias") { + return; + } let _inference_lock = INFERENCE_ROUTE_LOCK .lock() .unwrap_or_else(std::sync::PoisonError::into_inner); @@ -301,6 +308,9 @@ async fn sandbox_inference_local_routes_to_host_openshell_internal() { #[tokio::test] async fn inference_set_supports_no_verify_for_unreachable_endpoint() { + if skip_if_kube("uses host.openshell.internal as the unreachable target") { + return; + } let _inference_lock = INFERENCE_ROUTE_LOCK .lock() .unwrap_or_else(std::sync::PoisonError::into_inner); diff --git a/e2e/with-kube-gateway.sh b/e2e/with-kube-gateway.sh new file mode 100755 index 000000000..d316876e6 --- /dev/null +++ b/e2e/with-kube-gateway.sh @@ -0,0 +1,220 @@ +#!/usr/bin/env bash +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Run an e2e command against a Helm-deployed OpenShell gateway in Kubernetes. +# +# Modes: +# - OPENSHELL_E2E_KUBE_CONTEXT set: +# Target the named kubectl context, install the chart into an ephemeral +# namespace, and port-forward the gateway. Cluster lifecycle is the +# caller's responsibility (e.g. CI provisions kind via helm/kind-action). +# - OPENSHELL_E2E_KUBE_CONTEXT unset: +# Create a local k3d cluster via tasks/scripts/helm-k3s-local.sh, install +# the chart, port-forward, and tear the cluster down on exit. +# +# Helm e2e currently uses plaintext gateway traffic (ci/values-tls-disabled.yaml). +# +# Image source: helm install pulls from ${OPENSHELL_REGISTRY}/{gateway,supervisor}:${IMAGE_TAG} +# (defaults: ghcr.io/nvidia/openshell, latest). CI sets IMAGE_TAG to the commit SHA; +# local devs should set it to a tag pulled from a registry the cluster can reach, +# or build and import images via a separate bootstrap step before running this script. + +set -euo pipefail + +if [ "$#" -eq 0 ]; then + echo "Usage: e2e/with-kube-gateway.sh [args...]" >&2 + exit 2 +fi + +ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +# shellcheck source=e2e/support/gateway-common.sh +source "${ROOT}/e2e/support/gateway-common.sh" + +WORKDIR_PARENT="${TMPDIR:-/tmp}" +WORKDIR_PARENT="${WORKDIR_PARENT%/}" +WORKDIR="$(mktemp -d "${WORKDIR_PARENT}/openshell-e2e-kube.XXXXXX")" + +CLUSTER_CREATED_BY_US=0 +CLUSTER_NAME="" +KUBE_CONTEXT="" +NAMESPACE="openshell" +RELEASE_NAME="openshell" +PORTFORWARD_PID="" +PORTFORWARD_LOG="${WORKDIR}/portforward.log" +HELM_INSTALLED=0 + +# Isolate CLI/SDK gateway metadata from the developer's real config. +export XDG_CONFIG_HOME="${WORKDIR}/config" +export XDG_DATA_HOME="${WORKDIR}/data" + +kctl() { + kubectl --context "${KUBE_CONTEXT}" "$@" +} + +helmctl() { + helm --kube-context "${KUBE_CONTEXT}" "$@" +} + +cleanup() { + local exit_code=$? + + if [ -n "${PORTFORWARD_PID}" ]; then + kill "${PORTFORWARD_PID}" >/dev/null 2>&1 || true + wait "${PORTFORWARD_PID}" >/dev/null 2>&1 || true + fi + + if [ "${exit_code}" -ne 0 ] && [ -n "${KUBE_CONTEXT}" ] && [ -n "${NAMESPACE}" ]; then + if command -v kubectl >/dev/null 2>&1 \ + && kctl get namespace "${NAMESPACE}" >/dev/null 2>&1; then + echo "=== gateway pod state (preserved for debugging) ===" + kctl -n "${NAMESPACE}" get pods -o wide 2>&1 || true + echo "=== gateway events ===" + kctl -n "${NAMESPACE}" get events --sort-by=.lastTimestamp 2>&1 \ + | tail -n 80 || true + echo "=== gateway logs (last 200 lines) ===" + kctl -n "${NAMESPACE}" logs \ + -l "app.kubernetes.io/instance=${RELEASE_NAME}" --tail=200 \ + --all-containers --prefix 2>&1 || true + echo "=== end gateway debug output ===" + fi + if [ -f "${PORTFORWARD_LOG}" ]; then + echo "=== port-forward log ===" + cat "${PORTFORWARD_LOG}" || true + echo "=== end port-forward log ===" + fi + fi + + if [ "${HELM_INSTALLED}" = "1" ] && [ -n "${KUBE_CONTEXT}" ] && [ -n "${NAMESPACE}" ]; then + if command -v helm >/dev/null 2>&1; then + helmctl uninstall "${RELEASE_NAME}" --namespace "${NAMESPACE}" --wait \ + --timeout 60s >/dev/null 2>&1 || true + fi + if command -v kubectl >/dev/null 2>&1; then + kctl delete namespace "${NAMESPACE}" --wait=false \ + --ignore-not-found >/dev/null 2>&1 || true + fi + fi + + if [ "${CLUSTER_CREATED_BY_US}" = "1" ] && [ -n "${CLUSTER_NAME}" ]; then + if command -v k3d >/dev/null 2>&1 && k3d cluster list "${CLUSTER_NAME}" \ + >/dev/null 2>&1; then + echo "Deleting ephemeral k3d cluster ${CLUSTER_NAME}..." + k3d cluster delete "${CLUSTER_NAME}" >/dev/null 2>&1 || true + fi + fi + + rm -rf "${WORKDIR}" 2>/dev/null || true +} +trap cleanup EXIT + +require_cmd() { + if ! command -v "$1" >/dev/null 2>&1; then + echo "ERROR: $1 is required to run Helm-backed e2e tests" >&2 + exit 2 + fi +} + +require_cmd helm +require_cmd kubectl +require_cmd curl + +if [ -n "${OPENSHELL_E2E_KUBE_CONTEXT:-}" ]; then + KUBE_CONTEXT="${OPENSHELL_E2E_KUBE_CONTEXT}" + echo "Using existing kubectl context: ${KUBE_CONTEXT}" + if ! kctl cluster-info >/dev/null 2>&1; then + echo "ERROR: kubectl context '${KUBE_CONTEXT}' is not reachable." >&2 + exit 2 + fi +else + require_cmd k3d + CLUSTER_NAME="oshe2e-$$-$(date +%s | tail -c 8)" + echo "Creating ephemeral k3d cluster ${CLUSTER_NAME}..." + HELM_K3S_CLUSTER_NAME="${CLUSTER_NAME}" \ + HELM_K3S_KUBECONFIG="${WORKDIR}/kubeconfig" \ + bash "${ROOT}/tasks/scripts/helm-k3s-local.sh" create + CLUSTER_CREATED_BY_US=1 + export KUBECONFIG="${WORKDIR}/kubeconfig" + KUBE_CONTEXT="k3d-${CLUSTER_NAME}" +fi + +IMAGE_TAG_VALUE="${IMAGE_TAG:-latest}" +REGISTRY_VALUE="${OPENSHELL_REGISTRY:-ghcr.io/nvidia/openshell}" +REGISTRY_VALUE="${REGISTRY_VALUE%/}" + +# When this script created the cluster, import locally-available gateway and +# supervisor images so devs without a registry login can iterate. Best-effort: +# missing images fall through to the cluster's pull behavior at install time. +if [ "${CLUSTER_CREATED_BY_US}" = "1" ]; then + for image in \ + "${REGISTRY_VALUE}/gateway:${IMAGE_TAG_VALUE}" \ + "${REGISTRY_VALUE}/supervisor:${IMAGE_TAG_VALUE}"; do + if docker image inspect "${image}" >/dev/null 2>&1; then + echo "Importing ${image} into k3d cluster ${CLUSTER_NAME}..." + k3d image import "${image}" --cluster "${CLUSTER_NAME}" \ + --mode direct >/dev/null + fi + done +fi + +# The Kubernetes compute driver creates and watches Sandbox CRs reconciled +# by the upstream agent-sandbox-controller. Without the CRD + controller, +# every gateway K8s call 404s and CreateSandbox never produces a Pod. +echo "Installing agent-sandbox CRDs and controller..." +kctl apply -f "${ROOT}/deploy/kube/manifests/agent-sandbox.yaml" +kctl wait --for=condition=Established crd/sandboxes.agents.x-k8s.io --timeout=120s +kctl -n agent-sandbox-system rollout status statefulset/agent-sandbox-controller --timeout=300s + +echo "Installing Helm chart (release=${RELEASE_NAME}, namespace=${NAMESPACE}, tag=${IMAGE_TAG_VALUE})..." +helmctl install "${RELEASE_NAME}" "${ROOT}/deploy/helm/openshell" \ + --namespace "${NAMESPACE}" --create-namespace \ + --values "${ROOT}/deploy/helm/openshell/ci/values-tls-disabled.yaml" \ + --set "fullnameOverride=openshell" \ + --set "image.repository=${REGISTRY_VALUE}/gateway" \ + --set "image.tag=${IMAGE_TAG_VALUE}" \ + --set "supervisor.image.repository=${REGISTRY_VALUE}/supervisor" \ + --set "supervisor.image.tag=${IMAGE_TAG_VALUE}" \ + --wait --timeout 5m +HELM_INSTALLED=1 + +LOCAL_PORT="$(e2e_pick_port)" +echo "Starting kubectl port-forward svc/openshell ${LOCAL_PORT}:8080..." +kctl -n "${NAMESPACE}" port-forward "svc/openshell" \ + "${LOCAL_PORT}:8080" >"${PORTFORWARD_LOG}" 2>&1 & +PORTFORWARD_PID=$! + +elapsed=0 +timeout=30 +while [ "${elapsed}" -lt "${timeout}" ]; do + if ! kill -0 "${PORTFORWARD_PID}" 2>/dev/null; then + echo "ERROR: kubectl port-forward exited before becoming reachable" >&2 + cat "${PORTFORWARD_LOG}" >&2 || true + exit 1 + fi + if curl -s -o /dev/null --connect-timeout 1 "http://127.0.0.1:${LOCAL_PORT}"; then + break + fi + sleep 1 + elapsed=$((elapsed + 1)) +done +if [ "${elapsed}" -ge "${timeout}" ]; then + echo "ERROR: port-forward did not accept TCP within ${timeout}s" >&2 + cat "${PORTFORWARD_LOG}" >&2 || true + exit 1 +fi + +GATEWAY_NAME="openshell-e2e-kube-${LOCAL_PORT}" +GATEWAY_ENDPOINT="http://127.0.0.1:${LOCAL_PORT}" +e2e_register_plaintext_gateway \ + "${XDG_CONFIG_HOME}" \ + "${GATEWAY_NAME}" \ + "${GATEWAY_ENDPOINT}" \ + "${LOCAL_PORT}" + +export OPENSHELL_GATEWAY="${GATEWAY_NAME}" +export OPENSHELL_E2E_DRIVER="kubernetes" +export OPENSHELL_E2E_SANDBOX_NAMESPACE="${NAMESPACE}" +export OPENSHELL_PROVISION_TIMEOUT="${OPENSHELL_PROVISION_TIMEOUT:-300}" + +echo "Running e2e command against ${GATEWAY_ENDPOINT}: $*" +"$@" diff --git a/tasks/test.toml b/tasks/test.toml index bf5741c72..c9e1dc817 100644 --- a/tasks/test.toml +++ b/tasks/test.toml @@ -50,6 +50,10 @@ run = "e2e/with-docker-gateway.sh uv run pytest -o python_files='test_*.py' -m g description = "Run Rust CLI e2e tests against a Podman-backed gateway" run = "e2e/rust/e2e-podman.sh" +["e2e:helm"] +description = "Run smoke e2e against a Helm-deployed gateway (set OPENSHELL_E2E_KUBE_CONTEXT to reuse a cluster, otherwise creates a local k3d cluster)" +run = "e2e/rust/e2e-helm.sh" + ["e2e:vm"] description = "Start openshell-gateway with the VM compute driver and run the cluster-agnostic smoke e2e" run = "e2e/rust/e2e-vm.sh" From d55f22497493ab0c0eb028e5ade4849ca06205a6 Mon Sep 17 00:00:00 2001 From: Taylor Mutch Date: Thu, 7 May 2026 16:50:02 -0700 Subject: [PATCH 2/2] ci(helm): add Branch Helm E2E workflow gated on test:e2e-helm Adds a label-gated GitHub Actions workflow that exercises the Helm chart end-to-end against the Rust e2e suite via `mise run e2e:helm`. Pipeline: - pr_metadata gates on the `test:e2e-helm` label via the pr-gate action. - build-gateway / build-supervisor build and push Docker images using the reusable docker-build.yml workflow. - helm-e2e (bare runner): apt-installs z3 build deps so cargo can compile the openshell-policy crate's z3-sys backend, creates a kind cluster via helm/kind-action, materializes the kind kubeconfig at the path mise's [env] block expects, side-loads the freshly built gateway/supervisor images, applies deploy/kube/manifests/agent-sandbox.yaml so the sandboxes.agents.x-k8s.io CRD and reconciling StatefulSet are in place, and finally runs `mise run e2e:helm`. Also expands the `e2e:helm` task to run the full Rust e2e suite (matching `e2e:podman`) instead of only the smoke test, with OPENSHELL_E2E_KUBE_TEST as an opt-in single-test override for local debugging. Extends the e2e-label-help workflow so applying `test:e2e-helm` posts the next-step hint pointing at this workflow. Signed-off-by: Taylor Mutch --- .github/workflows/branch-helm-e2e.yml | 126 ++++++++++++++++++++++++++ .github/workflows/e2e-label-help.yml | 3 +- e2e/rust/e2e-helm.sh | 13 ++- tasks/test.toml | 2 +- 4 files changed, 139 insertions(+), 5 deletions(-) create mode 100644 .github/workflows/branch-helm-e2e.yml diff --git a/.github/workflows/branch-helm-e2e.yml b/.github/workflows/branch-helm-e2e.yml new file mode 100644 index 000000000..926874a08 --- /dev/null +++ b/.github/workflows/branch-helm-e2e.yml @@ -0,0 +1,126 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +name: Branch Helm E2E + +on: + push: + branches: + - "pull-request/[0-9]+" + workflow_dispatch: {} + +permissions: {} + +jobs: + pr_metadata: + name: Resolve PR metadata + runs-on: ubuntu-latest + permissions: + contents: read + pull-requests: read + outputs: + should_run: ${{ steps.gate.outputs.should_run }} + steps: + - uses: actions/checkout@v6 + + - id: gate + uses: ./.github/actions/pr-gate + with: + required_label: test:e2e-helm + + build-gateway: + needs: [pr_metadata] + if: needs.pr_metadata.outputs.should_run == 'true' + permissions: + contents: read + packages: write + uses: ./.github/workflows/docker-build.yml + with: + component: gateway + platform: linux/amd64 + + build-supervisor: + needs: [pr_metadata] + if: needs.pr_metadata.outputs.should_run == 'true' + permissions: + contents: read + packages: write + uses: ./.github/workflows/docker-build.yml + with: + component: supervisor + platform: linux/amd64 + + helm-e2e: + name: Helm E2E (Rust smoke) + needs: [pr_metadata, build-gateway, build-supervisor] + if: needs.pr_metadata.outputs.should_run == 'true' + # Bare runner: running kind-in-container hits nested-Docker / kubeconfig + # complications. The runner has Docker; mise installs helm, kubectl, and + # the Rust toolchain. + runs-on: linux-amd64-cpu8 + timeout-minutes: 60 + permissions: + contents: read + packages: read + env: + MISE_GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + KIND_CLUSTER_NAME: helm-e2e-${{ github.run_id }} + steps: + - uses: actions/checkout@v6 + + - name: Install mise + run: | + curl https://mise.run | sh + echo "$HOME/.local/bin" >> "$GITHUB_PATH" + echo "$HOME/.local/share/mise/shims" >> "$GITHUB_PATH" + + - name: Install tools + run: mise install --locked + + # The openshell-policy crate transitively pulls in z3-sys, whose + # build script needs the z3 C/C++ headers and clang/bindgen to + # compile. The bare runner doesn't ship them; the CI container + # image used by other Rust e2e jobs does, but we can't run helm-e2e + # there (the runner's container handler injects its own --network + # bridge, which conflicts with the --network host we need so kind's + # API server is reachable from the test process). + - name: Install z3 build deps + run: sudo apt-get update && sudo apt-get install -y --no-install-recommends libz3-dev clang + + - name: Log in to GHCR + run: echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin + + - name: Create kind cluster + uses: helm/kind-action@v1 + with: + cluster_name: ${{ env.KIND_CLUSTER_NAME }} + wait: 120s + + # mise.toml sets KUBECONFIG="{{config_root}}/kubeconfig"; helm/kind-action + # writes to ~/.kube/config. Materialize the kind context at the mise path + # so `mise run e2e:helm` (and the wrapper's `kubectl --context=…`) finds + # the kind cluster. + - name: Export kind kubeconfig to mise path + run: | + set -euo pipefail + kind get kubeconfig --name "$KIND_CLUSTER_NAME" > "$GITHUB_WORKSPACE/kubeconfig" + chmod 600 "$GITHUB_WORKSPACE/kubeconfig" + + # Pre-pull and side-load: kind nodes don't have ghcr credentials, and + # tagging IMAGE_TAG to a SHA means the chart's IfNotPresent pull policy + # is satisfied once the image is loaded into the node's containerd. + - name: Load gateway and supervisor images into kind + run: | + set -euo pipefail + for component in gateway supervisor; do + image="ghcr.io/nvidia/openshell/${component}:${{ github.sha }}" + docker pull "$image" + kind load docker-image "$image" --name "$KIND_CLUSTER_NAME" + done + + - name: Run Helm E2E (Rust smoke) + env: + OPENSHELL_E2E_KUBE_CONTEXT: kind-${{ env.KIND_CLUSTER_NAME }} + IMAGE_TAG: ${{ github.sha }} + OPENSHELL_REGISTRY: ghcr.io/nvidia/openshell + run: mise run --no-deps --skip-deps e2e:helm diff --git a/.github/workflows/e2e-label-help.yml b/.github/workflows/e2e-label-help.yml index 2a61660d2..a5463f986 100644 --- a/.github/workflows/e2e-label-help.yml +++ b/.github/workflows/e2e-label-help.yml @@ -19,7 +19,7 @@ permissions: {} jobs: hint: name: Post next-step hint for E2E label - if: github.event.label.name == 'test:e2e' || github.event.label.name == 'test:e2e-gpu' + if: github.event.label.name == 'test:e2e' || github.event.label.name == 'test:e2e-gpu' || github.event.label.name == 'test:e2e-helm' runs-on: ubuntu-latest permissions: pull-requests: write @@ -40,6 +40,7 @@ jobs: case "$LABEL_NAME" in test:e2e) workflow_file=branch-e2e.yml; workflow_name="Branch E2E Checks" ;; test:e2e-gpu) workflow_file=test-gpu.yml; workflow_name="GPU Test" ;; + test:e2e-helm) workflow_file=branch-helm-e2e.yml; workflow_name="Branch Helm E2E" ;; *) echo "Unrecognized label $LABEL_NAME"; exit 1 ;; esac diff --git a/e2e/rust/e2e-helm.sh b/e2e/rust/e2e-helm.sh index 7d7042c47..6b161f344 100755 --- a/e2e/rust/e2e-helm.sh +++ b/e2e/rust/e2e-helm.sh @@ -2,19 +2,26 @@ # SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # SPDX-License-Identifier: Apache-2.0 -# Run a Rust e2e test against a Helm-deployed OpenShell gateway. Set +# Run the Rust e2e suite against a Helm-deployed OpenShell gateway. Set # OPENSHELL_E2E_KUBE_CONTEXT to target an existing cluster; otherwise an # ephemeral k3d cluster is created and torn down by with-kube-gateway.sh. +# Set OPENSHELL_E2E_KUBE_TEST to scope to a single integration test +# (e.g. smoke) for local debugging. set -euo pipefail ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" -E2E_TEST="${OPENSHELL_E2E_KUBE_TEST:-smoke}" cargo build -p openshell-cli --features openshell-core/dev-settings +test_filter=() +if [ -n "${OPENSHELL_E2E_KUBE_TEST:-}" ]; then + test_filter+=(--test "${OPENSHELL_E2E_KUBE_TEST}") +fi + exec "${ROOT}/e2e/with-kube-gateway.sh" \ cargo test --manifest-path "${ROOT}/e2e/rust/Cargo.toml" \ --features e2e \ - --test "${E2E_TEST}" \ + --no-fail-fast \ + ${test_filter[@]+"${test_filter[@]}"} \ -- --nocapture diff --git a/tasks/test.toml b/tasks/test.toml index c9e1dc817..00a6823b2 100644 --- a/tasks/test.toml +++ b/tasks/test.toml @@ -51,7 +51,7 @@ description = "Run Rust CLI e2e tests against a Podman-backed gateway" run = "e2e/rust/e2e-podman.sh" ["e2e:helm"] -description = "Run smoke e2e against a Helm-deployed gateway (set OPENSHELL_E2E_KUBE_CONTEXT to reuse a cluster, otherwise creates a local k3d cluster)" +description = "Run Rust CLI e2e tests against a Helm-deployed gateway (set OPENSHELL_E2E_KUBE_CONTEXT to reuse a cluster, otherwise creates a local k3d cluster; set OPENSHELL_E2E_KUBE_TEST= to scope to one test)" run = "e2e/rust/e2e-helm.sh" ["e2e:vm"]