diff --git a/Cargo.lock b/Cargo.lock index 2bf308fd2..788006116 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3366,6 +3366,7 @@ dependencies = [ "openshell-policy", "openshell-prover", "openshell-providers", + "openshell-server", "openshell-tui", "owo-colors", "prost-types", @@ -3384,6 +3385,7 @@ dependencies = [ "tokio-stream", "tokio-tungstenite 0.26.2", "tonic", + "tonic-health", "tracing", "tracing-subscriber", "url", @@ -3662,6 +3664,8 @@ dependencies = [ "tokio-stream", "tokio-tungstenite 0.26.2", "tonic", + "tonic-health", + "tonic-reflection", "tower 0.5.3", "tower-http 0.6.8", "tracing", @@ -6133,6 +6137,32 @@ dependencies = [ "syn 2.0.117", ] +[[package]] +name = "tonic-health" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1eaf34ddb812120f5c601162d5429933c9b527d901ab0e7f930d3147e33a09b2" +dependencies = [ + "async-stream", + "prost", + "tokio", + "tokio-stream", + "tonic", +] + +[[package]] +name = "tonic-reflection" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "878d81f52e7fcfd80026b7fdb6a9b578b3c3653ba987f87f0dce4b64043cba27" +dependencies = [ + "prost", + "prost-types", + "tokio", + "tokio-stream", + "tonic", +] + [[package]] name = "tower" version = "0.4.13" diff --git a/Cargo.toml b/Cargo.toml index 9bc3f9ea2..67af6ca35 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,6 +19,8 @@ tokio = { version = "1.43", features = ["full"] } # gRPC/Protobuf tonic = "0.12" tonic-build = "0.12" +tonic-health = "0.12" +tonic-reflection = "0.12" prost = "0.13" prost-types = "0.13" diff --git a/architecture/gateway.md b/architecture/gateway.md index c85f77ef6..6798438b8 100644 --- a/architecture/gateway.md +++ b/architecture/gateway.md @@ -327,6 +327,17 @@ These complement the unit tests inside `supervisor_session.rs` (registry-only be ## gRPC Services +### Standard health and reflection (infrastructure) + +The gateway multiplexes these alongside application services on the same TLS port (`crates/openshell-server/src/multiplex.rs`): + +| Service | Path prefix | Notes | +|---------|-------------|--------| +| `grpc.health.v1.Health` | `/grpc.health.v1.Health/` | `Check` reports **process liveness** only (same semantics as legacy `OpenShell/Health` today—not database, driver, or inference readiness). Registered names include `openshell.v1.OpenShell`, `openshell.inference.v1.Inference`, and aggregate probe `service: ""`. `Watch` returns `UNIMPLEMENTED`. | +| `grpc.reflection.v1.ServerReflection` | `/grpc.reflection.v1.ServerReflection/` | Reflection aggregates multiple encoded `FileDescriptorSet`s: OpenShell protos from `openshell-core` (`crates/openshell-core/build.rs`), `grpc.health.v1` from `tonic_health::pb::FILE_DESCRIPTOR_SET`, and `grpc.reflection.v1` from the set `tonic_reflection::server::Builder::build_v1()` registers by default ([reflection proto](https://github.com/grpc/grpc/blob/master/src/proto/grpc/reflection/v1/reflection.proto)). When OIDC is enabled, `/grpc.reflection.*` remains **unauthenticated** by design (see `UNAUTHENTICATED_PREFIXES` in `crates/openshell-server/src/auth/oidc.rs`). | + +Prefer **`grpc.health.v1.Health/Check`** for probes and tooling; `openshell.v1.OpenShell/Health` is **deprecated** in `proto/openshell.proto` but retained for backward compatibility. + ### OpenShell Service Defined in `proto/openshell.proto`, implemented in `crates/openshell-server/src/grpc/mod.rs` as `OpenShellService`. Per-concern handlers live in `crates/openshell-server/src/grpc/` submodules. @@ -335,7 +346,7 @@ Defined in `proto/openshell.proto`, implemented in `crates/openshell-server/src/ | RPC | Description | Key behavior | |-----|-------------|--------------| -| `Health` | Returns service status and version | Always returns `HEALTHY` with `CARGO_PKG_VERSION` | +| `Health` | Returns service status and version | **Deprecated** — use `grpc.health.v1.Health/Check`. When called, reflects process liveness (`HEALTHY` / `UNHEALTHY`) and `CARGO_PKG_VERSION` (or git-derived version). | | `CreateSandbox` | Create a new sandbox | Validates spec and policy, validates provider names exist (fail-fast), persists to store, creates the compute-driver sandbox. On driver failure, rolls back the store record and index entry. | | `GetSandbox` | Fetch sandbox by name | Looks up by name via `store.get_message_by_name()` | | `ListSandboxes` | List sandboxes | Paginated (default limit 100), decodes protobuf payloads from store records | diff --git a/architecture/oidc-auth.md b/architecture/oidc-auth.md index 746a6a519..16e86141b 100644 --- a/architecture/oidc-auth.md +++ b/architecture/oidc-auth.md @@ -158,10 +158,11 @@ These methods require no authentication at all — health probes and infrastruct | Method / Prefix | Reason | |---|---| -| `OpenShell/Health` | Kubernetes liveness/readiness probes | -| `Inference/Health` | Inference service health probes | +| `OpenShell/Health` | Deprecated custom health RPC on `OpenShell` for legacy clients/tooling. Kubernetes **gRPC probes** call `grpc.health.v1.Health/Check` (see `/grpc.health.*` below), not this method | | `/grpc.reflection.*` | gRPC server reflection (debugging tools) | -| `/grpc.health.*` | gRPC health check protocol | +| `/grpc.health.*` | Standard gRPC health check protocol (covers logical services by name, e.g. `openshell.inference.v1.Inference`; there is no separate `Inference/Health` RPC on `Inference`) | + +Note: The `Inference` service in `proto/inference.proto` never defined a `Health` method — only bundle/cluster inference RPCs. Any historical OIDC allowlist entry for `Inference/Health` pointed at a non-existent method. ### Sandbox-Secret Authenticated diff --git a/crates/openshell-cli/Cargo.toml b/crates/openshell-cli/Cargo.toml index 1507cecef..9da63c3e5 100644 --- a/crates/openshell-cli/Cargo.toml +++ b/crates/openshell-cli/Cargo.toml @@ -84,6 +84,8 @@ bundled-z3 = ["openshell-prover/bundled-z3"] dev-settings = ["openshell-core/dev-settings"] [dev-dependencies] +openshell-server = { path = "../openshell-server" } +tonic-health = { workspace = true } futures = { workspace = true } rcgen = { version = "0.13", features = ["crypto", "pem"] } reqwest = { workspace = true } diff --git a/crates/openshell-cli/src/run.rs b/crates/openshell-cli/src/run.rs index c4de827ab..2e9802348 100644 --- a/crates/openshell-cli/src/run.rs +++ b/crates/openshell-cli/src/run.rs @@ -696,7 +696,10 @@ fn is_progress_status(status: &str) -> bool { } /// Show gateway status. -#[allow(clippy::branches_sharing_code)] +/// +/// Still calls legacy `OpenShell/Health` for version text; migrate to +/// `grpc.health.v1.Health/Check` when a generated client is wired here. +#[allow(clippy::branches_sharing_code, deprecated)] pub async fn gateway_status(gateway_name: &str, server: &str, tls: &TlsOptions) -> Result<()> { println!("{}", "Server Status".cyan().bold()); println!(); diff --git a/crates/openshell-cli/tests/mtls_integration.rs b/crates/openshell-cli/tests/mtls_integration.rs index 69d7b7354..47cffb5fb 100644 --- a/crates/openshell-cli/tests/mtls_integration.rs +++ b/crates/openshell-cli/tests/mtls_integration.rs @@ -1,7 +1,7 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 -use openshell_cli::tls::{TlsOptions, grpc_client}; +use openshell_cli::tls::{TlsOptions, build_channel}; use openshell_core::proto::{ CreateProviderRequest, CreateSshSessionRequest, CreateSshSessionResponse, DeleteProviderRequest, DeleteProviderResponse, ExecSandboxEvent, ExecSandboxRequest, @@ -10,6 +10,7 @@ use openshell_core::proto::{ UpdateProviderRequest, open_shell_server::{OpenShell, OpenShellServer}, }; +use openshell_server::{GatewayStandardHealth, MAX_GRPC_DECODE_SIZE, OPENSHELL_SERVICE_NAME}; use rcgen::{ BasicConstraints, Certificate, CertificateParams, ExtendedKeyUsagePurpose, IsCa, KeyPair, }; @@ -21,6 +22,9 @@ use tonic::{ Response, Status, transport::{Certificate as TlsCertificate, Identity, Server, ServerTlsConfig}, }; +use tonic_health::pb::{ + HealthCheckRequest, health_check_response::ServingStatus, health_client::HealthClient, +}; struct EnvVarGuard { key: &'static str, @@ -407,7 +411,10 @@ async fn run_server( Server::builder() .tls_config(tls) .unwrap() - .add_service(OpenShellServer::new(TestOpenShell)) + .add_service(GatewayStandardHealth::server(MAX_GRPC_DECODE_SIZE)) + .add_service( + OpenShellServer::new(TestOpenShell).max_decoding_message_size(MAX_GRPC_DECODE_SIZE), + ) .serve_with_incoming(incoming) .await .unwrap(); @@ -437,9 +444,16 @@ async fn cli_connects_with_client_cert() { let tls = TlsOptions::new(Some(ca_path), Some(cert_path), Some(key_path)); let endpoint = format!("https://localhost:{}", addr.port()); - let mut client = grpc_client(&endpoint, &tls).await.unwrap(); - let response = client.health(HealthRequest {}).await.unwrap(); - assert_eq!(response.get_ref().status, ServiceStatus::Healthy as i32); + let channel = build_channel(&endpoint, &tls).await.unwrap(); + let mut health = HealthClient::new(channel); + let response = health + .check(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) + .await + .unwrap() + .into_inner(); + assert_eq!(response.status, ServingStatus::Serving as i32); } #[tokio::test] @@ -461,6 +475,6 @@ async fn cli_requires_client_cert_for_https() { let tls = TlsOptions::new(Some(ca_path), None, None); let endpoint = format!("https://localhost:{}", addr.port()); - let result = grpc_client(&endpoint, &tls).await; + let result = build_channel(&endpoint, &tls).await; assert!(result.is_err()); } diff --git a/crates/openshell-core/build.rs b/crates/openshell-core/build.rs index 7613c8754..1d710200c 100644 --- a/crates/openshell-core/build.rs +++ b/crates/openshell-core/build.rs @@ -40,10 +40,15 @@ fn main() -> Result<(), Box> { collect_proto_files(&proto_root, &mut proto_files)?; proto_files.sort(); + // Serialized FileDescriptorSet for gRPC server reflection on the gateway. + let out_dir = PathBuf::from(env::var("OUT_DIR")?); + let descriptor_path = out_dir.join("openshell_file_descriptor_set.bin"); + // Configure tonic-build tonic_build::configure() .build_server(true) .build_client(true) + .file_descriptor_set_path(&descriptor_path) .compile_protos(&proto_files, &[proto_root.as_path()])?; Ok(()) diff --git a/crates/openshell-core/src/proto/descriptor_set.rs b/crates/openshell-core/src/proto/descriptor_set.rs new file mode 100644 index 000000000..a3a8705b3 --- /dev/null +++ b/crates/openshell-core/src/proto/descriptor_set.rs @@ -0,0 +1,15 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Embedded protobuf `FileDescriptorSet` for gRPC server reflection. +//! +//! This blob covers **`OpenShell`** protos only. The gateway reflection service also registers +//! `grpc.health.v1` and `grpc.reflection.v1` using the embedded sets exported by the +//! `tonic-health` and `tonic-reflection` crates (`tonic_health::pb::FILE_DESCRIPTOR_SET` and the +//! set `tonic_reflection::server::Builder::build_v1()` adds by default). + +/// Serialized `FileDescriptorSet` covering `OpenShell` gateway protos (see `build.rs`). +pub const FILE_DESCRIPTOR_SET: &[u8] = include_bytes!(concat!( + env!("OUT_DIR"), + "/openshell_file_descriptor_set.bin" +)); diff --git a/crates/openshell-core/src/proto/mod.rs b/crates/openshell-core/src/proto/mod.rs index 08b062d2e..672f4f582 100644 --- a/crates/openshell-core/src/proto/mod.rs +++ b/crates/openshell-core/src/proto/mod.rs @@ -5,6 +5,10 @@ //! //! This module re-exports the generated protobuf types and service definitions. +mod descriptor_set; + +pub use descriptor_set::FILE_DESCRIPTOR_SET; + #[allow( clippy::all, clippy::pedantic, diff --git a/crates/openshell-server/Cargo.toml b/crates/openshell-server/Cargo.toml index cb6561f3e..f02bb5d6c 100644 --- a/crates/openshell-server/Cargo.toml +++ b/crates/openshell-server/Cargo.toml @@ -29,6 +29,8 @@ tokio = { workspace = true } # gRPC tonic = { workspace = true, features = ["channel", "tls"] } +tonic-health = { workspace = true } +tonic-reflection = { workspace = true } prost = { workspace = true } prost-types = { workspace = true } diff --git a/crates/openshell-server/src/auth/oidc.rs b/crates/openshell-server/src/auth/oidc.rs index d3b74aa81..39d35f1e7 100644 --- a/crates/openshell-server/src/auth/oidc.rs +++ b/crates/openshell-server/src/auth/oidc.rs @@ -31,10 +31,7 @@ pub const INTERNAL_AUTH_SOURCE_HEADER: &str = "x-openshell-auth-source"; pub const AUTH_SOURCE_SANDBOX_SECRET: &str = "sandbox-secret"; /// Truly unauthenticated methods — health probes and infrastructure. -const UNAUTHENTICATED_METHODS: &[&str] = &[ - "/openshell.v1.OpenShell/Health", - "/openshell.inference.v1.Inference/Health", -]; +const UNAUTHENTICATED_METHODS: &[&str] = &["/openshell.v1.OpenShell/Health"]; /// Path prefixes that bypass OIDC validation (gRPC reflection, health probes). const UNAUTHENTICATED_PREFIXES: &[&str] = &["/grpc.reflection.", "/grpc.health."]; diff --git a/crates/openshell-server/src/grpc/gateway_health.rs b/crates/openshell-server/src/grpc/gateway_health.rs new file mode 100644 index 000000000..e14aca2d7 --- /dev/null +++ b/crates/openshell-server/src/grpc/gateway_health.rs @@ -0,0 +1,158 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Standard gRPC health checking ([`grpc.health.v1.Health`]) for the `OpenShell` gateway. +//! +//! # What this means today (process liveness only) +//! +//! Both the legacy `openshell.v1.OpenShell/Health` RPC and +//! `grpc.health.v1.Health/Check` answer **process +//! liveness**: the gateway process is running and accepting gRPC. They do **not** prove database +//! connectivity, compute-driver readiness, inference route configuration, or per-subsystem +//! degradation. +//! +//! # `Watch` +//! +//! `Health::watch` is intentionally [`tonic::Code::Unimplemented`]. +//! A future implementation backed by [`tonic_health::server::HealthReporter`] would still be +//! **compatibility-only** until real readiness signals exist (it would emit `SERVING` once and +//! idle, or mirror `Check` without meaningful transitions). +//! +//! # Toward “real” health +//! +//! When the product needs accurate readiness: define criteria (e.g. store ping, driver health), +//! decide per-service vs global status (`openshell.v1.OpenShell` vs `openshell.inference.v1.Inference`), +//! centralize transitions (e.g. `HealthReporter`), then consider implementing `Watch`. + +use std::pin::Pin; + +use tokio_stream::Stream; +use tonic::{Request, Response, Status}; +use tonic_health::pb::health_check_response::ServingStatus; +use tonic_health::pb::health_server::{Health, HealthServer}; +use tonic_health::pb::{HealthCheckRequest, HealthCheckResponse}; + +/// Fully-qualified gRPC service name for the main `OpenShell` API (matches generated tonic paths). +pub const OPENSHELL_SERVICE_NAME: &str = "openshell.v1.OpenShell"; + +/// Fully-qualified gRPC service name for the Inference API multiplexed on the same port. +pub const INFERENCE_SERVICE_NAME: &str = "openshell.inference.v1.Inference"; + +/// Empty `CheckRequest.service` value: aggregate / process-level probe (not a separate proto service). +const AGGREGATE_SERVICE_NAME: &str = ""; + +/// Services reported as [`ServingStatus::Serving`] while [`gateway_process_accepting_rpc`] is true. +const KNOWN_SERVICES: &[&str] = &[ + AGGREGATE_SERVICE_NAME, + OPENSHELL_SERVICE_NAME, + INFERENCE_SERVICE_NAME, +]; + +fn is_registered_service(name: &str) -> bool { + KNOWN_SERVICES.contains(&name) +} + +/// Shared process-level gate used by legacy `OpenShell/Health` and standard `grpc.health.v1` `Check`. +/// +/// Currently always `true` whenever the handler runs; keep this as the single hook if the gateway +/// ever needs to report not ready while still listening (e.g. draining). +#[must_use] +pub fn gateway_process_accepting_rpc() -> bool { + true +} + +/// Minimal [`Health`] implementation: `Check` mirrors legacy liveness; `Watch` is unimplemented. +#[derive(Clone, Copy, Debug, Default)] +pub struct GatewayStandardHealth; + +impl GatewayStandardHealth { + /// Build a [`HealthServer`] with the same decoding cap as other gateway gRPC services. + #[must_use] + pub fn server(max_decoding_message_size: usize) -> HealthServer { + HealthServer::new(Self).max_decoding_message_size(max_decoding_message_size) + } +} + +#[tonic::async_trait] +impl Health for GatewayStandardHealth { + async fn check( + &self, + request: Request, + ) -> Result, Status> { + let service = request.into_inner().service; + if !is_registered_service(&service) { + // Match `tonic_health::server::HealthService`: unknown service name → `NOT_FOUND`. + return Err(Status::not_found("service not registered")); + } + if !gateway_process_accepting_rpc() { + return Ok(Response::new(HealthCheckResponse { + status: ServingStatus::NotServing as i32, + })); + } + Ok(Response::new(HealthCheckResponse { + status: ServingStatus::Serving as i32, + })) + } + + type WatchStream = + Pin> + Send + 'static>>; + + async fn watch( + &self, + _request: Request, + ) -> Result, Status> { + Err(Status::unimplemented( + "grpc.health.v1.Health.Watch is not yet supported in OpenShell; use Check", + )) + } +} + +#[cfg(test)] +mod tests { + use super::*; + use tonic::Code; + + #[tokio::test] + async fn check_serving_for_openshell_and_aggregate() { + let h = GatewayStandardHealth; + let r = h + .check(Request::new(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + })) + .await + .unwrap() + .into_inner(); + assert_eq!(r.status, ServingStatus::Serving as i32); + + let r = h + .check(Request::new(HealthCheckRequest { + service: String::new(), + })) + .await + .unwrap() + .into_inner(); + assert_eq!(r.status, ServingStatus::Serving as i32); + } + + #[tokio::test] + async fn check_not_found_for_unknown_service() { + let h = GatewayStandardHealth; + let e = h + .check(Request::new(HealthCheckRequest { + service: "no.such.Service".to_string(), + })) + .await + .expect_err("unknown service"); + assert_eq!(e.code(), Code::NotFound); + } + + #[tokio::test] + async fn watch_unimplemented() { + let h = GatewayStandardHealth; + let res = h.watch(Request::new(HealthCheckRequest::default())).await; + let Err(e) = res else { + panic!("expected Watch to return an error"); + }; + assert_eq!(e.code(), Code::Unimplemented); + } +} diff --git a/crates/openshell-server/src/grpc/mod.rs b/crates/openshell-server/src/grpc/mod.rs index 31970e9c5..02852cb86 100644 --- a/crates/openshell-server/src/grpc/mod.rs +++ b/crates/openshell-server/src/grpc/mod.rs @@ -3,6 +3,7 @@ //! gRPC service implementation. +pub mod gateway_health; pub mod policy; mod provider; mod sandbox; @@ -159,8 +160,13 @@ impl OpenShell for OpenShellService { &self, _request: Request, ) -> Result, Status> { + let status = if gateway_health::gateway_process_accepting_rpc() { + ServiceStatus::Healthy + } else { + ServiceStatus::Unhealthy + }; Ok(Response::new(HealthResponse { - status: ServiceStatus::Healthy.into(), + status: status.into(), version: openshell_core::VERSION.to_string(), })) } diff --git a/crates/openshell-server/src/lib.rs b/crates/openshell-server/src/lib.rs index c024310b2..2119ad925 100644 --- a/crates/openshell-server/src/lib.rs +++ b/crates/openshell-server/src/lib.rs @@ -49,8 +49,11 @@ use tracing::{debug, error, info, warn}; use compute::{ComputeRuntime, DockerComputeConfig, VmComputeConfig}; pub use grpc::OpenShellService; +pub use grpc::gateway_health::{GatewayStandardHealth, OPENSHELL_SERVICE_NAME}; pub use http::{health_router, http_router, metrics_router}; -pub use multiplex::{MultiplexService, MultiplexedService}; +pub use multiplex::{ + GatewayGrpcRouter, MAX_GRPC_DECODE_SIZE, MultiplexService, MultiplexedService, +}; use openshell_driver_kubernetes::KubernetesComputeConfig; use persistence::Store; use sandbox_index::SandboxIndex; diff --git a/crates/openshell-server/src/multiplex.rs b/crates/openshell-server/src/multiplex.rs index 93e58d202..e1242d36b 100644 --- a/crates/openshell-server/src/multiplex.rs +++ b/crates/openshell-server/src/multiplex.rs @@ -30,8 +30,8 @@ use tower_http::request_id::{MakeRequestId, RequestId}; use tracing::Span; use crate::{ - OpenShellService, ServerState, auth::authz::AuthzPolicy, auth::oidc, http_router, - inference::InferenceService, + OpenShellService, ServerState, auth::authz::AuthzPolicy, auth::oidc, grpc::gateway_health, + http_router, inference::InferenceService, }; /// Request-ID generator that produces a UUID v4 for each inbound request. @@ -110,7 +110,7 @@ macro_rules! request_id_middleware { /// Replaces tonic's implicit 4 MB default with a conservative limit to /// bound memory allocation from a single request. Sandbox creation is /// the largest payload and well within this cap under normal use. -const MAX_GRPC_DECODE_SIZE: usize = 1_048_576; +pub const MAX_GRPC_DECODE_SIZE: usize = 1_048_576; /// Multiplexed gRPC/HTTP service. #[derive(Clone)] @@ -131,6 +131,21 @@ impl MultiplexService { where S: AsyncRead + AsyncWrite + Unpin + Send + 'static, { + let standard_health = gateway_health::GatewayStandardHealth::server(MAX_GRPC_DECODE_SIZE); + + // gRPC server reflection: OIDC treats `/grpc.reflection.*` as unauthenticated by design. + // A future hardening pass could require authentication here and/or register a reduced + // file descriptor set so only a subset of services is discoverable. + // + // Descriptor sets (see crates `tonic-health`, `tonic-reflection`): `build_v1()` registers + // `grpc.reflection.v1` by default; we add OpenShell protos from `openshell-core` and + // `grpc.health.v1` via `tonic_health::pb::FILE_DESCRIPTOR_SET`. + let reflection = tonic_reflection::server::Builder::configure() + .register_encoded_file_descriptor_set(openshell_core::proto::FILE_DESCRIPTOR_SET) + .register_encoded_file_descriptor_set(tonic_health::pb::FILE_DESCRIPTOR_SET) + .build_v1() + .map_err(|e| -> Box { Box::new(e) })?; + let openshell = OpenShellServer::new(OpenShellService::new(self.state.clone())) .max_decoding_message_size(MAX_GRPC_DECODE_SIZE); let inference = InferenceServer::new(InferenceService::new(self.state.clone())) @@ -141,7 +156,7 @@ impl MultiplexService { scopes_enabled: !oidc.scopes_claim.is_empty(), }); let grpc_service = AuthGrpcRouter::new( - GrpcRouter::new(openshell, inference), + GatewayGrpcRouter::new(standard_health, reflection, openshell, inference), self.state.oidc_cache.clone(), authz_policy, self.state.config.ssh_handshake_secret.clone(), @@ -171,40 +186,57 @@ impl MultiplexService { } } -/// Combined gRPC service that routes between `OpenShell` and Inference services -/// based on the request path prefix. +/// Combined gRPC service: standard health, reflection, `Inference`, then `OpenShell` (default). #[derive(Clone)] -pub struct GrpcRouter { - openshell: N, +pub struct GatewayGrpcRouter { + health: H, + reflection: R, + openshell: O, inference: I, } -impl GrpcRouter { - fn new(openshell: N, inference: I) -> Self { +impl GatewayGrpcRouter { + /// Combine standard health, reflection, `OpenShell`, and `Inference` gRPC services. + #[must_use] + pub fn new(health: H, reflection: R, openshell: O, inference: I) -> Self { Self { + health, + reflection, openshell, inference, } } } +const GRPC_HEALTH_V1_PREFIX: &str = "/grpc.health.v1.Health/"; +const GRPC_REFLECTION_V1_PREFIX: &str = "/grpc.reflection.v1.ServerReflection/"; const INFERENCE_PATH_PREFIX: &str = "/openshell.inference.v1.Inference/"; -impl tower::Service> for GrpcRouter +impl tower::Service> for GatewayGrpcRouter where - N: tower::Service> + Clone + Send + 'static, - N::Response: Send, - N::Future: Send, - N::Error: Send, - I: tower::Service, Response = N::Response, Error = N::Error> + H: tower::Service> + Clone + Send + 'static, + H::Response: Send, + H::Future: Send, + H::Error: Send, + R: tower::Service, Response = H::Response, Error = H::Error> + + Clone + + Send + + 'static, + R::Future: Send, + O: tower::Service, Response = H::Response, Error = H::Error> + + Clone + + Send + + 'static, + O::Future: Send, + I: tower::Service, Response = H::Response, Error = H::Error> + Clone + Send + 'static, I::Future: Send, B: Send + 'static, { - type Response = N::Response; - type Error = N::Error; + type Response = H::Response; + type Error = H::Error; type Future = Pin> + Send>>; fn poll_ready(&mut self, _cx: &mut Context<'_>) -> Poll> { @@ -212,9 +244,14 @@ where } fn call(&mut self, req: Request) -> Self::Future { - let is_inference = req.uri().path().starts_with(INFERENCE_PATH_PREFIX); - - if is_inference { + let path = req.uri().path(); + if path.starts_with(GRPC_HEALTH_V1_PREFIX) { + let mut svc = self.health.clone(); + Box::pin(async move { svc.ready().await?.call(req).await }) + } else if path.starts_with(GRPC_REFLECTION_V1_PREFIX) { + let mut svc = self.reflection.clone(); + Box::pin(async move { svc.ready().await?.call(req).await }) + } else if path.starts_with(INFERENCE_PATH_PREFIX) { let mut svc = self.inference.clone(); Box::pin(async move { svc.ready().await?.call(req).await }) } else { @@ -709,6 +746,22 @@ mod tests { ); } + #[test] + fn grpc_method_extracts_standard_health_check() { + assert_eq!( + grpc_method_from_path("/grpc.health.v1.Health/Check"), + "Check" + ); + } + + #[test] + fn grpc_method_extracts_reflection_info() { + assert_eq!( + grpc_method_from_path("/grpc.reflection.v1.ServerReflection/ServerReflectionInfo"), + "ServerReflectionInfo" + ); + } + #[test] fn grpc_method_handles_bare_path() { assert_eq!(grpc_method_from_path("Health"), "Health"); diff --git a/crates/openshell-server/tests/auth_endpoint_integration.rs b/crates/openshell-server/tests/auth_endpoint_integration.rs index e5f9dc4e9..3d7cde368 100644 --- a/crates/openshell-server/tests/auth_endpoint_integration.rs +++ b/crates/openshell-server/tests/auth_endpoint_integration.rs @@ -26,6 +26,9 @@ use serde::Deserialize; use std::net::SocketAddr; use tokio::net::TcpListener; +#[macro_use] +mod common; + // --------------------------------------------------------------------------- // Test auth handler (mirrors auth.rs but no ServerState) // --------------------------------------------------------------------------- @@ -710,16 +713,16 @@ impl openshell_core::proto::open_shell_server::OpenShell for TestOpenShell { /// Uses `serve_connection_with_upgrades` to also support WebSocket upgrades. #[tokio::test] async fn plaintext_server_accepts_grpc_and_http() { - use openshell_core::proto::{ - HealthRequest, ServiceStatus, open_shell_client::OpenShellClient, - open_shell_server::OpenShellServer, + use openshell_server::{MultiplexedService, OPENSHELL_SERVICE_NAME, health_router}; + use tonic_health::pb::{ + HealthCheckRequest, health_check_response::ServingStatus, health_client::HealthClient, }; - use openshell_server::{MultiplexedService, health_router}; let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap(); - let grpc_service = OpenShellServer::new(TestOpenShell); + let grpc_service = + gateway_test_grpc_router!(common::openshell_max_decode_server(TestOpenShell)); let http_service = health_router(); let service = MultiplexedService::new(grpc_service, http_service); @@ -739,17 +742,23 @@ async fn plaintext_server_accepts_grpc_and_http() { } }); - // gRPC over plaintext HTTP - let mut client = OpenShellClient::connect(format!("http://{addr}")) + // gRPC over plaintext HTTP (standard health) + let channel = tonic::transport::Endpoint::from_shared(format!("http://{addr}")) + .unwrap() + .connect() .await .expect("plaintext gRPC connect failed"); - let resp = client - .health(HealthRequest {}) + let mut health = HealthClient::new(channel); + let resp = health + .check(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) .await - .expect("plaintext health RPC failed"); + .expect("plaintext health RPC failed") + .into_inner(); assert_eq!( - resp.get_ref().status, - ServiceStatus::Healthy as i32, + resp.status, + ServingStatus::Serving as i32, "gRPC health check should succeed over plaintext" ); diff --git a/crates/openshell-server/tests/common/mod.rs b/crates/openshell-server/tests/common/mod.rs new file mode 100644 index 000000000..10df7a482 --- /dev/null +++ b/crates/openshell-server/tests/common/mod.rs @@ -0,0 +1,72 @@ +// SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +//! Shared types for `openshell-server` integration tests. + +use openshell_core::proto::{ + GetClusterInferenceRequest, GetClusterInferenceResponse, GetInferenceBundleRequest, + GetInferenceBundleResponse, SetClusterInferenceRequest, SetClusterInferenceResponse, + inference_server::Inference, open_shell_server::OpenShellServer, +}; +use tonic::{Response, Status}; + +/// Re-export the production gateway gRPC decode cap for test stacks. +pub use openshell_server::MAX_GRPC_DECODE_SIZE as MAX_GRPC_DECODE; + +/// Wrap an `OpenShell` impl with the same gRPC decode cap as production. +#[must_use] +pub fn openshell_max_decode_server(inner: S) -> OpenShellServer { + OpenShellServer::new(inner).max_decoding_message_size(MAX_GRPC_DECODE) +} + +/// Build a [`GatewayGrpcRouter`](::openshell_server::GatewayGrpcRouter) matching production multiplex +/// wiring: `grpc.health.v1`, reflection (`OpenShell` + `grpc.health` file descriptor sets), the given +/// [`OpenShellServer`], and [`TestInference`]. +/// +/// Bring into scope with `#[macro_use] mod common;` in the integration test crate root. +macro_rules! gateway_test_grpc_router { + ($openshell:expr) => {{ + ::openshell_server::GatewayGrpcRouter::new( + ::openshell_server::GatewayStandardHealth::server( + ::openshell_server::MAX_GRPC_DECODE_SIZE, + ), + ::tonic_reflection::server::Builder::configure() + .register_encoded_file_descriptor_set(::openshell_core::proto::FILE_DESCRIPTOR_SET) + .register_encoded_file_descriptor_set(::tonic_health::pb::FILE_DESCRIPTOR_SET) + .build_v1() + .expect("OpenShell + grpc.health reflection descriptors"), + $openshell, + ::openshell_core::proto::inference_server::InferenceServer::new( + $crate::common::TestInference, + ) + .max_decoding_message_size(::openshell_server::MAX_GRPC_DECODE_SIZE), + ) + }}; +} + +#[derive(Clone, Default)] +pub struct TestInference; + +#[tonic::async_trait] +impl Inference for TestInference { + async fn get_inference_bundle( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("test")) + } + + async fn set_cluster_inference( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("test")) + } + + async fn get_cluster_inference( + &self, + _request: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("test")) + } +} diff --git a/crates/openshell-server/tests/edge_tunnel_auth.rs b/crates/openshell-server/tests/edge_tunnel_auth.rs index ed6ed398f..d2e8719eb 100644 --- a/crates/openshell-server/tests/edge_tunnel_auth.rs +++ b/crates/openshell-server/tests/edge_tunnel_auth.rs @@ -44,11 +44,15 @@ use openshell_core::proto::{ ListProvidersRequest, ListProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, ProviderResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, SandboxStreamEvent, ServiceStatus, SupervisorMessage, UpdateProviderRequest, - WatchSandboxRequest, - open_shell_client::OpenShellClient, - open_shell_server::{OpenShell, OpenShellServer}, + WatchSandboxRequest, open_shell_server::OpenShell, }; -use openshell_server::{MultiplexedService, TlsAcceptor, health_router}; +use openshell_server::{MultiplexedService, OPENSHELL_SERVICE_NAME, TlsAcceptor, health_router}; +use tonic_health::pb::{ + HealthCheckRequest, health_check_response::ServingStatus, health_client::HealthClient, +}; + +#[macro_use] +mod common; use rcgen::{CertificateParams, IsCa, KeyPair}; use rustls::RootCertStore; use rustls::pki_types::CertificateDer; @@ -418,7 +422,8 @@ async fn start_test_server( let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap(); - let grpc_service = OpenShellServer::new(TestOpenShell); + let grpc_service = + gateway_test_grpc_router!(common::openshell_max_decode_server(TestOpenShell)); let http_service = health_router(); let service = MultiplexedService::new(grpc_service, http_service); @@ -443,13 +448,13 @@ async fn start_test_server( (addr, handle) } -/// Build a gRPC client with mTLS (CA + client cert). -async fn grpc_client_mtls( +/// Build a gRPC channel with mTLS (CA + client cert). +async fn grpc_channel_mtls( addr: std::net::SocketAddr, ca_pem: Vec, client_cert_pem: Vec, client_key_pem: Vec, -) -> OpenShellClient { +) -> Channel { let ca_cert = tonic::transport::Certificate::from_pem(ca_pem); let identity = tonic::transport::Identity::from_pem(client_cert_pem, client_key_pem); let tls = ClientTlsConfig::new() @@ -460,15 +465,11 @@ async fn grpc_client_mtls( .expect("invalid endpoint") .tls_config(tls) .expect("failed to set tls"); - let channel = endpoint.connect().await.expect("failed to connect"); - OpenShellClient::new(channel) + endpoint.connect().await.expect("failed to connect") } -/// Build a gRPC client *without* a client cert (simulates Cloudflare tunnel). -async fn grpc_client_no_cert( - addr: std::net::SocketAddr, - ca_pem: Vec, -) -> OpenShellClient { +/// Build a gRPC channel *without* a client cert (simulates Cloudflare tunnel). +async fn grpc_channel_no_cert(addr: std::net::SocketAddr, ca_pem: Vec) -> Channel { let ca_cert = tonic::transport::Certificate::from_pem(ca_pem); let tls = ClientTlsConfig::new() .ca_certificate(ca_cert) @@ -477,17 +478,16 @@ async fn grpc_client_no_cert( .expect("invalid endpoint") .tls_config(tls) .expect("failed to set tls"); - let channel = endpoint.connect().await.expect("failed to connect"); - OpenShellClient::new(channel) + endpoint.connect().await.expect("failed to connect") } -/// Build a gRPC client without a client cert, adding a `cf-authorization` +/// Build a standard-health client without a client cert, adding a `cf-authorization` /// metadata header to every request (simulates the steady-state tunnel flow). -async fn grpc_client_with_cf_header( +async fn grpc_health_client_with_cf_header( addr: std::net::SocketAddr, ca_pem: Vec, token: &str, -) -> OpenShellClient> { +) -> HealthClient> { let ca_cert = tonic::transport::Certificate::from_pem(ca_pem); let tls = ClientTlsConfig::new() .ca_certificate(ca_cert) @@ -497,7 +497,7 @@ async fn grpc_client_with_cf_header( .tls_config(tls) .expect("failed to set tls"); let channel = endpoint.connect().await.expect("failed to connect"); - OpenShellClient::with_interceptor( + HealthClient::with_interceptor( channel, CfInterceptor { token: token.to_string(), @@ -603,16 +603,23 @@ async fn baseline_mtls_works_with_mandatory_client_certs() { let (addr, server) = start_test_server(tls_acceptor).await; - // gRPC - let mut grpc = grpc_client_mtls( + // gRPC (standard health) + let channel = grpc_channel_mtls( addr, pki.ca_cert_pem.clone(), pki.client_cert_pem.clone(), pki.client_key_pem.clone(), ) .await; - let resp = grpc.health(HealthRequest {}).await.unwrap(); - assert_eq!(resp.get_ref().status, ServiceStatus::Healthy as i32); + let mut health = HealthClient::new(channel); + let resp = health + .check(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) + .await + .unwrap() + .into_inner(); + assert_eq!(resp.status, ServingStatus::Serving as i32); // HTTP let client = https_client_mtls(&pki); @@ -655,8 +662,12 @@ async fn baseline_no_cert_rejected_with_mandatory_mtls() { let result = endpoint.connect().await; if let Ok(channel) = result { - let mut client = OpenShellClient::new(channel); - let rpc_result = client.health(HealthRequest {}).await; + let mut health = HealthClient::new(channel); + let rpc_result = health + .check(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) + .await; assert!( rpc_result.is_err(), "expected RPC to fail without client cert when mTLS is mandatory" @@ -684,15 +695,22 @@ async fn dual_auth_mtls_still_accepted() { let (addr, server) = start_test_server(tls_acceptor).await; // gRPC with mTLS should still work - let mut grpc = grpc_client_mtls( + let channel = grpc_channel_mtls( addr, pki.ca_cert_pem.clone(), pki.client_cert_pem.clone(), pki.client_key_pem.clone(), ) .await; - let resp = grpc.health(HealthRequest {}).await.unwrap(); - assert_eq!(resp.get_ref().status, ServiceStatus::Healthy as i32); + let mut health = HealthClient::new(channel); + let resp = health + .check(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) + .await + .unwrap() + .into_inner(); + assert_eq!(resp.status, ServingStatus::Serving as i32); // HTTP with mTLS should still work let client = https_client_mtls(&pki); @@ -729,11 +747,18 @@ async fn tunnel_mode_no_cert_passes_tls_handshake() { let (addr, server) = start_test_server(tls_acceptor).await; // gRPC without client cert — should pass TLS handshake - let mut grpc = grpc_client_no_cert(addr, pki.ca_cert_pem.clone()).await; - let resp = grpc.health(HealthRequest {}).await.unwrap(); + let channel = grpc_channel_no_cert(addr, pki.ca_cert_pem.clone()).await; + let mut health = HealthClient::new(channel); + let resp = health + .check(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) + .await + .unwrap() + .into_inner(); assert_eq!( - resp.get_ref().status, - ServiceStatus::Healthy as i32, + resp.status, + ServingStatus::Serving as i32, "gRPC health check should succeed without client cert in tunnel mode" ); @@ -757,9 +782,6 @@ async fn tunnel_mode_no_cert_passes_tls_handshake() { /// Simulate the steady-state Cloudflare tunnel flow: no client cert, but the /// `cf-authorization` header carries a token. At the TLS level this must /// succeed; the header is passed through to the gRPC handler. -/// -/// Note: We use a dummy token value here. When real JWT verification middleware -/// is added, this test should use a properly-signed test JWT. #[tokio::test] async fn tunnel_mode_cf_authorization_header_reaches_server() { install_rustls_provider(); @@ -776,13 +798,18 @@ async fn tunnel_mode_cf_authorization_header_reaches_server() { let (addr, server) = start_test_server(tls_acceptor).await; // gRPC without client cert but with cf-authorization header - let mut grpc = - grpc_client_with_cf_header(addr, pki.ca_cert_pem.clone(), "eyJhbGciOiJSUzI1NiJ9.test") - .await; - let resp = grpc.health(HealthRequest {}).await.unwrap(); + let token = "eyJhbGciOiJSUzI1NiJ9.test"; + let mut health = grpc_health_client_with_cf_header(addr, pki.ca_cert_pem.clone(), token).await; + let resp = health + .check(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) + .await + .unwrap() + .into_inner(); assert_eq!( - resp.get_ref().status, - ServiceStatus::Healthy as i32, + resp.status, + ServingStatus::Serving as i32, "gRPC with cf-authorization header should succeed in tunnel mode" ); @@ -844,8 +871,12 @@ async fn tunnel_mode_rogue_cert_still_rejected() { let result = endpoint.connect().await; if let Ok(channel) = result { - let mut client = OpenShellClient::new(channel); - let rpc_result = client.health(HealthRequest {}).await; + let mut health = HealthClient::new(channel); + let rpc_result = health + .check(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) + .await; assert!( rpc_result.is_err(), "expected RPC to fail with rogue client cert even in tunnel mode" diff --git a/crates/openshell-server/tests/multiplex_integration.rs b/crates/openshell-server/tests/multiplex_integration.rs index c91dd6061..c9a864ec1 100644 --- a/crates/openshell-server/tests/multiplex_integration.rs +++ b/crates/openshell-server/tests/multiplex_integration.rs @@ -18,15 +18,24 @@ use openshell_core::proto::{ ListProvidersRequest, ListProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, ProviderResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, SandboxStreamEvent, ServiceStatus, SupervisorMessage, UpdateProviderRequest, - WatchSandboxRequest, - open_shell_client::OpenShellClient, - open_shell_server::{OpenShell, OpenShellServer}, + WatchSandboxRequest, open_shell_client::OpenShellClient, open_shell_server::OpenShell, }; -use openshell_server::{MultiplexedService, health_router}; +use openshell_server::{MultiplexedService, OPENSHELL_SERVICE_NAME, health_router}; + +#[macro_use] +mod common; use tokio::net::TcpListener; use tokio::sync::mpsc; -use tokio_stream::wrappers::ReceiverStream; +use tokio_stream::{StreamExt, wrappers::ReceiverStream}; +use tonic::Code; use tonic::{Response, Status}; +use tonic_health::pb::{ + HealthCheckRequest, health_check_response::ServingStatus, health_client::HealthClient, +}; +use tonic_reflection::pb::v1::{ + ServerReflectionRequest, server_reflection_client::ServerReflectionClient, + server_reflection_request::MessageRequest, server_reflection_response::MessageResponse, +}; #[derive(Clone, Default)] struct TestOpenShell; @@ -314,9 +323,9 @@ async fn serves_grpc_and_http_on_same_port() { let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap(); - let grpc_service = OpenShellServer::new(TestOpenShell); + let grpc_router = gateway_test_grpc_router!(common::openshell_max_decode_server(TestOpenShell)); let http_service = health_router(); - let service = MultiplexedService::new(grpc_service, http_service); + let service = MultiplexedService::new(grpc_router, http_service); let server = tokio::spawn(async move { loop { @@ -332,11 +341,20 @@ async fn serves_grpc_and_http_on_same_port() { } }); - let mut client = OpenShellClient::connect(format!("http://{addr}")) + let channel = tonic::transport::Endpoint::from_shared(format!("http://{addr}")) + .unwrap() + .connect() .await .unwrap(); - let response = client.health(HealthRequest {}).await.unwrap(); - assert_eq!(response.get_ref().status, ServiceStatus::Healthy as i32); + let mut health = HealthClient::new(channel); + let response = health + .check(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) + .await + .unwrap() + .into_inner(); + assert_eq!(response.status, ServingStatus::Serving as i32); let stream = tokio::net::TcpStream::connect(addr).await.unwrap(); let (mut sender, conn) = hyper::client::conn::http1::Builder::new() @@ -365,6 +383,7 @@ async fn serves_grpc_and_http_on_same_port() { /// (which is crate-private). Production middleware composition and /// layer ordering are covered by the unit tests in `multiplex::tests`. #[tokio::test] +#[allow(deprecated)] // Legacy `OpenShell/Health` still used here to exercise response metadata paths. async fn grpc_response_propagates_request_id() { use tower::ServiceBuilder; use tower_http::request_id::{ @@ -384,6 +403,8 @@ async fn grpc_response_propagates_request_id() { let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap(); + let grpc_router = gateway_test_grpc_router!(common::openshell_max_decode_server(TestOpenShell)); + let x_request_id = http::HeaderName::from_static("x-request-id"); let grpc_service = ServiceBuilder::new() .layer(SetRequestIdLayer::new( @@ -391,11 +412,11 @@ async fn grpc_response_propagates_request_id() { TestUuidRequestId, )) .layer(PropagateRequestIdLayer::new(x_request_id)) - .service(OpenShellServer::new(TestOpenShell)); + .service(grpc_router); let http_service = health_router(); let service = MultiplexedService::new(grpc_service, http_service); - tokio::spawn(async move { + let server = tokio::spawn(async move { loop { let Ok((stream, _)) = listener.accept().await else { continue; @@ -429,4 +450,105 @@ async fn grpc_response_propagates_request_id() { let response = client.health(request).await.unwrap(); let echoed = response.metadata().get("x-request-id").unwrap(); assert_eq!(echoed.to_str().unwrap(), "grpc-corr-id"); + + server.abort(); +} + +#[tokio::test] +async fn standard_grpc_health_reflection_multiplexed() { + let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); + let addr = listener.local_addr().unwrap(); + + let grpc_router = gateway_test_grpc_router!(common::openshell_max_decode_server(TestOpenShell)); + let http_service = health_router(); + let service = MultiplexedService::new(grpc_router, http_service); + + let server = tokio::spawn(async move { + loop { + let Ok((stream, _)) = listener.accept().await else { + continue; + }; + let svc = service.clone(); + tokio::spawn(async move { + let _ = Builder::new(TokioExecutor::new()) + .serve_connection(TokioIo::new(stream), svc) + .await; + }); + } + }); + + let endpoint = format!("http://{addr}"); + let channel = tonic::transport::Endpoint::from_shared(endpoint.clone()) + .unwrap() + .connect() + .await + .unwrap(); + let mut health = HealthClient::new(channel); + + let check = health + .check(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) + .await + .expect("health check") + .into_inner(); + assert_eq!(check.status, ServingStatus::Serving as i32); + + let check_agg = health + .check(HealthCheckRequest { + service: String::new(), + }) + .await + .expect("aggregate health check") + .into_inner(); + assert_eq!(check_agg.status, ServingStatus::Serving as i32); + + let watch_res = health + .watch(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) + .await; + let Err(watch_err) = watch_res else { + panic!("watch should fail with UNIMPLEMENTED"); + }; + assert_eq!(watch_err.code(), Code::Unimplemented); + + let channel2 = tonic::transport::Endpoint::from_shared(endpoint) + .unwrap() + .connect() + .await + .unwrap(); + let mut refl = ServerReflectionClient::new(channel2); + + let req = tonic::Request::new(tokio_stream::once(ServerReflectionRequest { + host: String::new(), + message_request: Some(MessageRequest::ListServices(String::new())), + })); + let mut inbound = refl + .server_reflection_info(req) + .await + .expect("reflection") + .into_inner(); + let msg = inbound + .next() + .await + .expect("stream item") + .expect("reflection response"); + let response = msg.message_response.expect("message_response"); + match response { + MessageResponse::ListServicesResponse(list) => { + let names: Vec<&str> = list.service.iter().map(|s| s.name.as_str()).collect(); + assert!( + names.contains(&"openshell.v1.OpenShell"), + "expected openshell.v1.OpenShell in reflection list, got {names:?}" + ); + assert!( + names.contains(&"grpc.health.v1.Health"), + "expected grpc.health.v1.Health in reflection list, got {names:?}" + ); + } + other => panic!("unexpected reflection response: {other:?}"), + } + + server.abort(); } diff --git a/crates/openshell-server/tests/multiplex_tls_integration.rs b/crates/openshell-server/tests/multiplex_tls_integration.rs index 6942d66f7..c7221505d 100644 --- a/crates/openshell-server/tests/multiplex_tls_integration.rs +++ b/crates/openshell-server/tests/multiplex_tls_integration.rs @@ -20,11 +20,15 @@ use openshell_core::proto::{ ListProvidersRequest, ListProvidersResponse, ListSandboxesRequest, ListSandboxesResponse, ProviderResponse, RevokeSshSessionRequest, RevokeSshSessionResponse, SandboxResponse, SandboxStreamEvent, ServiceStatus, SupervisorMessage, UpdateProviderRequest, - WatchSandboxRequest, - open_shell_client::OpenShellClient, - open_shell_server::{OpenShell, OpenShellServer}, + WatchSandboxRequest, open_shell_server::OpenShell, }; -use openshell_server::{MultiplexedService, TlsAcceptor, health_router}; +use openshell_server::{MultiplexedService, OPENSHELL_SERVICE_NAME, TlsAcceptor, health_router}; +use tonic_health::pb::{ + HealthCheckRequest, health_check_response::ServingStatus, health_client::HealthClient, +}; + +#[macro_use] +mod common; use rcgen::{CertificateParams, IsCa, KeyPair}; use rustls::RootCertStore; use rustls::pki_types::CertificateDer; @@ -398,7 +402,8 @@ async fn start_test_server( let listener = TcpListener::bind("127.0.0.1:0").await.unwrap(); let addr = listener.local_addr().unwrap(); - let grpc_service = OpenShellServer::new(TestOpenShell); + let grpc_service = + gateway_test_grpc_router!(common::openshell_max_decode_server(TestOpenShell)); let http_service = health_router(); let service = MultiplexedService::new(grpc_service, http_service); @@ -423,13 +428,13 @@ async fn start_test_server( (addr, handle) } -/// Build a gRPC client with mTLS (CA + client cert). -async fn grpc_client_mtls( +/// Build a gRPC channel with mTLS (CA + client cert). +async fn grpc_channel_mtls( addr: std::net::SocketAddr, ca_pem: Vec, client_cert_pem: Vec, client_key_pem: Vec, -) -> OpenShellClient { +) -> Channel { let ca_cert = tonic::transport::Certificate::from_pem(ca_pem); let identity = tonic::transport::Identity::from_pem(client_cert_pem, client_key_pem); let tls = ClientTlsConfig::new() @@ -440,8 +445,7 @@ async fn grpc_client_mtls( .expect("invalid endpoint") .tls_config(tls) .expect("failed to set tls"); - let channel = endpoint.connect().await.expect("failed to connect"); - OpenShellClient::new(channel) + endpoint.connect().await.expect("failed to connect") } fn build_tls_root(cert_pem: &[u8]) -> RootCertStore { @@ -505,16 +509,23 @@ async fn serves_grpc_and_http_over_tls_on_same_port() { let (addr, server) = start_test_server(tls_acceptor).await; - // gRPC with mTLS - let mut grpc = grpc_client_mtls( + // gRPC with mTLS (standard health) + let channel = grpc_channel_mtls( addr, pki.ca_cert_pem.clone(), pki.client_cert_pem.clone(), pki.client_key_pem.clone(), ) .await; - let response = grpc.health(HealthRequest {}).await.unwrap(); - assert_eq!(response.get_ref().status, ServiceStatus::Healthy as i32); + let mut health = HealthClient::new(channel); + let response = health + .check(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) + .await + .unwrap() + .into_inner(); + assert_eq!(response.status, ServingStatus::Serving as i32); // HTTP with mTLS let client = https_client_mtls(&pki); @@ -544,15 +555,22 @@ async fn mtls_valid_client_cert_accepted() { let (addr, server) = start_test_server(tls_acceptor).await; - let mut grpc = grpc_client_mtls( + let channel = grpc_channel_mtls( addr, pki.ca_cert_pem.clone(), pki.client_cert_pem.clone(), pki.client_key_pem.clone(), ) .await; - let response = grpc.health(HealthRequest {}).await.unwrap(); - assert_eq!(response.get_ref().status, ServiceStatus::Healthy as i32); + let mut health = HealthClient::new(channel); + let response = health + .check(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) + .await + .unwrap() + .into_inner(); + assert_eq!(response.status, ServingStatus::Serving as i32); server.abort(); } @@ -586,8 +604,12 @@ async fn mtls_no_client_cert_rejected() { // Connection should fail at the TLS handshake level or shortly after. // The exact error depends on timing -- it may fail on connect or on first RPC. if let Ok(channel) = result { - let mut client = OpenShellClient::new(channel); - let rpc_result = client.health(HealthRequest {}).await; + let mut health = HealthClient::new(channel); + let rpc_result = health + .check(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) + .await; assert!( rpc_result.is_err(), "expected RPC to fail without client cert" @@ -651,8 +673,12 @@ async fn mtls_wrong_ca_client_cert_rejected() { let result = endpoint.connect().await; if let Ok(channel) = result { - let mut client = OpenShellClient::new(channel); - let rpc_result = client.health(HealthRequest {}).await; + let mut health = HealthClient::new(channel); + let rpc_result = health + .check(HealthCheckRequest { + service: OPENSHELL_SERVICE_NAME.to_string(), + }) + .await; assert!( rpc_result.is_err(), "expected RPC to fail with rogue client cert" diff --git a/crates/openshell-server/tests/ws_tunnel_integration.rs b/crates/openshell-server/tests/ws_tunnel_integration.rs index f196edb07..eca5fb241 100644 --- a/crates/openshell-server/tests/ws_tunnel_integration.rs +++ b/crates/openshell-server/tests/ws_tunnel_integration.rs @@ -1,6 +1,9 @@ // SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. // SPDX-License-Identifier: Apache-2.0 +#![allow(deprecated)] +// Transport-focused tests still call legacy `openshell.v1.OpenShell/Health` over the tunnel. + //! Integration tests for the WebSocket tunnel (`/_ws_tunnel`). //! //! These tests verify that gRPC traffic can be tunneled through a WebSocket diff --git a/crates/openshell-tui/src/lib.rs b/crates/openshell-tui/src/lib.rs index 8571ebbe1..ff6aa62e0 100644 --- a/crates/openshell-tui/src/lib.rs +++ b/crates/openshell-tui/src/lib.rs @@ -2156,6 +2156,8 @@ fn spawn_delete_sandbox_setting(app: &App, tx: mpsc::UnboundedSender) { }); } +/// Polls legacy `OpenShell/Health` until the TUI moves to `grpc.health.v1.Health/Check`. +#[allow(deprecated)] async fn refresh_health(app: &mut App) { let req = openshell_core::proto::HealthRequest {}; let result = tokio::time::timeout(Duration::from_secs(5), app.client.health(req)).await; diff --git a/crates/openshell-vm/src/health.rs b/crates/openshell-vm/src/health.rs index c24015bf1..ad61fb37a 100644 --- a/crates/openshell-vm/src/health.rs +++ b/crates/openshell-vm/src/health.rs @@ -48,6 +48,10 @@ fn build_tls_config(ca: Vec, cert: Vec, key: Vec) -> ClientTlsConfig /// /// Returns `Ok(())` if the health check succeeds (service reports healthy), /// or an error describing why the check failed. +/// +/// Uses legacy `openshell.v1.OpenShell/Health` until this crate switches to +/// `grpc.health.v1.Health/Check`. +#[allow(deprecated)] async fn grpc_health_check(gateway_port: u16, gateway_name: &str) -> Result<(), String> { // Load mTLS materials let (ca, cert, key) = load_mtls_materials(gateway_name)?; diff --git a/proto/openshell.proto b/proto/openshell.proto index 529ee0629..8f5aa72cf 100644 --- a/proto/openshell.proto +++ b/proto/openshell.proto @@ -19,7 +19,12 @@ import "sandbox.proto"; // resource messages before persisting or returning them to clients. service OpenShell { // Check the health of the service. - rpc Health(HealthRequest) returns (HealthResponse); + // + // Deprecated: prefer the standard `grpc.health.v1.Health/Check` RPC for interoperability + // (e.g. `grpcurl`, Kubernetes gRPC probes). This RPC remains for backward compatibility. + rpc Health(HealthRequest) returns (HealthResponse) { + option deprecated = true; + } // Create a new sandbox. rpc CreateSandbox(CreateSandboxRequest) returns (SandboxResponse); @@ -166,10 +171,18 @@ service OpenShell { } // Health check request. -message HealthRequest {} +// +// Deprecated: use standard gRPC health (`grpc.health.v1.Health/Check`) instead. +message HealthRequest { + option deprecated = true; +} // Health check response. +// +// Deprecated: use standard gRPC health (`grpc.health.v1.Health/Check`) instead. message HealthResponse { + option deprecated = true; + // Service status. ServiceStatus status = 1;