diff --git a/.jules/bolt.md b/.jules/bolt.md index 581a930..6da0456 100644 --- a/.jules/bolt.md +++ b/.jules/bolt.md @@ -224,3 +224,9 @@ In Express applications handling cross-origin traffic, placing the `cors()` midd Action: Moved the `cors()` middleware before `helmet()` in the global middleware stack. This allows `OPTIONS` preflight requests to be intercepted and resolved immediately by `cors`, bypassing unnecessary security header processing and improving baseline latency. Consolidated the `res.setHeader` calls in the JSON error handler into a single global setter. + +## 2026-05-12 — Compression Middleware Overhead +Learning: +Global `compression()` middleware introduces significant CPU and memory allocation overhead on unhandled routes (404s) and lightweight responses. +Action: +Always apply `compression()` as a route-specific middleware only to endpoints that return large payloads. diff --git a/.jules/warden.md b/.jules/warden.md index 799a948..e93c169 100644 --- a/.jules/warden.md +++ b/.jules/warden.md @@ -202,3 +202,9 @@ Observation / Pruned: Assessed repository state following previous optimizations. Since no new functional or architectural changes were introduced by the prior agent run, no new release cut or version bump is warranted. Maintained semantic integrity by preserving the existing v1.1.31 state. Zero dead code identified and pruned. Alignment / Deferred: Release deferred. Repository state verified and stable. + +2026-05-12 — Assessment & Lifecycle +Observation / Pruned: +Assessed BOLT's optimization converting `compression()` to a route-specific middleware. This prevents unhandled routes and simple endpoints from undergoing redundant compression overhead. Tests verified. Checked for unused dependencies and dead code. Zero dead code or unused files found. +Alignment / Deferred: +Appended release notes. Version bumped to 1.1.32. diff --git a/CHANGELOG.md b/CHANGELOG.md index 42005e3..05f2194 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## [1.1.32] - 2026-05-12 +### Changed +* **[Performance]:** Converted `compression()` from a global middleware to a route-specific middleware on the `/v1/chat/completions` endpoint. This prevents unhandled routes (404s) and lightweight responses from incurring unnecessary CPU overhead and memory allocation for compression. + ## [1.1.31] - 2026-05-04 ### Changed * **[Performance]:** Moved the `cors()` middleware to be above `helmet()` in the global middleware stack. This allows `OPTIONS` preflight requests to be intercepted and resolved immediately by `cors`, bypassing unnecessary security header processing. Also, consolidated the `res.setHeader` calls in the JSON error handler. diff --git a/package-lock.json b/package-lock.json index 835a72e..ba19c59 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "one-api", - "version": "1.1.31", + "version": "1.1.32", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "one-api", - "version": "1.1.31", + "version": "1.1.32", "license": "MIT", "dependencies": { "compression": "^1.8.1", diff --git a/package.json b/package.json index 96ec14d..8af0472 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "one-api", - "version": "1.1.31", + "version": "1.1.32", "description": "One API to rule them all. Unified gateway for 20+ LLM providers. OpenAI-compatible, single binary, zero config.", "main": "src/index.js", "scripts": { diff --git a/src/index.js b/src/index.js index fe4d27b..c094020 100644 --- a/src/index.js +++ b/src/index.js @@ -50,7 +50,7 @@ app.use((req, res, next) => { // Compress all responses to reduce bandwidth and latency -app.use(compression()); +const compressMiddleware = compression(); const ERROR_INVALID_JSON = Buffer.from(JSON.stringify({ error: 'Invalid JSON payload' })); const ERROR_PAYLOAD_TOO_LARGE = Buffer.from(JSON.stringify({ error: 'Payload too large' })); @@ -96,7 +96,7 @@ const ERROR_MALFORMED_MESSAGE = Buffer.from(JSON.stringify({ error: 'Malformed m // Set a larger JSON limit since LLM contexts can be quite large const jsonParser = express.json({ limit: '10mb' }); -app.post('/v1/chat/completions', jsonParser, (req, res) => { +app.post('/v1/chat/completions', jsonParser, compressMiddleware, (req, res) => { const body = req.body; const model = body ? body.model : undefined; const messages = body ? body.messages : undefined;