Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .server-changes/nodejs-heap-metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
area: webapp
type: improvement
---

Add per-worker Node.js heap metrics to the OTel meter — `nodejs.memory.heap.used`, `nodejs.memory.heap.total`, `nodejs.memory.heap.limit`, `nodejs.memory.external`, `nodejs.memory.array_buffers`, `nodejs.memory.rss`. Host-metrics only publishes RSS, which overstates V8 heap by the external + native footprint; these give direct heap visibility per cluster worker so `NODE_MAX_OLD_SPACE_SIZE` can be sized against observed heap peaks rather than RSS.
63 changes: 63 additions & 0 deletions apps/webapp/app/v3/tracer.server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ import { ATTR_SERVICE_NAME } from "@opentelemetry/semantic-conventions";
import { PrismaInstrumentation } from "@prisma/instrumentation";
import { HostMetrics } from "@opentelemetry/host-metrics";
import { AwsInstrumentation as AwsSdkInstrumentation } from "@opentelemetry/instrumentation-aws-sdk";
import v8 from "node:v8";
import { awsEcsDetector, awsEc2Detector } from "@opentelemetry/resource-detector-aws";
import {
detectResources,
Expand Down Expand Up @@ -630,6 +631,39 @@ function configureNodejsMetrics({ meter }: { meter: Meter }) {
unit: "1", // OpenTelemetry convention for ratios
});

// V8 heap + process memory. `NODE_MAX_OLD_SPACE_SIZE` caps V8 old space
// (reflected in `heap.limit`), but doesn't cap external/arrayBuffers/native
// memory — which is why RSS can exceed the heap total. Tracking all of these
// per-worker lets us size `NODE_MAX_OLD_SPACE_SIZE` against observed heap
// peaks rather than RSS (which overstates heap by the external + native
// footprint). `host-metrics` already publishes `process.memory.usage`
// (RSS), but we duplicate it under `nodejs.memory.rss` so all the memory
// numbers land in the same scope and are queryable together.
const heapUsedGauge = meter.createObservableGauge("nodejs.memory.heap.used", {
description: "V8 heap actively in use after the last GC",
unit: "By",
});
const heapTotalGauge = meter.createObservableGauge("nodejs.memory.heap.total", {
description: "V8 heap reserved (young + old generations)",
unit: "By",
});
const heapLimitGauge = meter.createObservableGauge("nodejs.memory.heap.limit", {
description: "V8 heap size limit (configured via --max-old-space-size)",
unit: "By",
});
const externalMemoryGauge = meter.createObservableGauge("nodejs.memory.external", {
description: "Memory used by C++ objects bound to JS (Buffer, etc.)",
unit: "By",
});
const arrayBuffersGauge = meter.createObservableGauge("nodejs.memory.array_buffers", {
description: "Memory allocated for ArrayBuffers and SharedArrayBuffers",
unit: "By",
});
const rssGauge = meter.createObservableGauge("nodejs.memory.rss", {
description: "Resident set size — total physical memory held by the process",
unit: "By",
});

// Get UV threadpool size (defaults to 4 if not set)
const uvThreadpoolSize = parseInt(process.env.UV_THREADPOOL_SIZE || "4", 10);

Expand Down Expand Up @@ -683,10 +717,16 @@ function configureNodejsMetrics({ meter }: { meter: Meter }) {
currentEventLoopUtilization,
lastEventLoopUtilization
);
// Rotate the baseline so the next collection reports per-interval
// utilization rather than the cumulative average from process start.
lastEventLoopUtilization = currentEventLoopUtilization;

// diff.utilization is between 0 and 1 (fraction of time "active")
const utilization = Number.isFinite(diff.utilization) ? diff.utilization : 0;
Comment thread
ericallam marked this conversation as resolved.

const mem = process.memoryUsage();
const heapStats = v8.getHeapStatistics();

return {
threadpoolSize: uvThreadpoolSize,
handlesByType,
Expand All @@ -702,6 +742,14 @@ function configureNodejsMetrics({ meter }: { meter: Meter }) {
p99: eventLoopLagP99?.values?.[0]?.value ?? 0,
utilization,
},
memory: {
heapUsed: mem.heapUsed,
heapTotal: mem.heapTotal,
heapLimit: heapStats.heap_size_limit,
external: mem.external,
arrayBuffers: mem.arrayBuffers,
rss: mem.rss,
},
};
}

Expand All @@ -714,6 +762,7 @@ function configureNodejsMetrics({ meter }: { meter: Meter }) {
requestsByType,
requestsTotal,
eventLoop,
memory,
} = await readNodeMetrics();

// Observe UV threadpool size
Expand All @@ -739,6 +788,14 @@ function configureNodejsMetrics({ meter }: { meter: Meter }) {
res.observe(eventLoopLagP90Gauge, eventLoop.p90);
res.observe(eventLoopLagP99Gauge, eventLoop.p99);
res.observe(eluGauge, eventLoop.utilization);

// Observe memory metrics (bytes)
res.observe(heapUsedGauge, memory.heapUsed);
res.observe(heapTotalGauge, memory.heapTotal);
res.observe(heapLimitGauge, memory.heapLimit);
res.observe(externalMemoryGauge, memory.external);
res.observe(arrayBuffersGauge, memory.arrayBuffers);
res.observe(rssGauge, memory.rss);
},
[
uvThreadpoolSizeGauge,
Expand All @@ -753,6 +810,12 @@ function configureNodejsMetrics({ meter }: { meter: Meter }) {
eventLoopLagP90Gauge,
eventLoopLagP99Gauge,
eluGauge,
heapUsedGauge,
heapTotalGauge,
heapLimitGauge,
externalMemoryGauge,
arrayBuffersGauge,
rssGauge,
]
);
}
Expand Down
Loading