diff --git a/.gitlab/scripts/poll_e2e.sh b/.gitlab/scripts/poll_e2e.sh index 938fd06e3..bd40442df 100755 --- a/.gitlab/scripts/poll_e2e.sh +++ b/.gitlab/scripts/poll_e2e.sh @@ -21,6 +21,8 @@ URL="${CI_API_V4_URL}/projects/${CI_PROJECT_ID}/pipelines/${CI_PIPELINE_ID}/brid echo "Fetching E2E job status from: $URL" +PRINTED_PIPELINE_URL=0 + while true; do HTTP_STATUS=$(curl -s -o /tmp/e2e_response.json -w "%{http_code}" --header "PRIVATE-TOKEN: ${GITLAB_TOKEN}" "$URL") if [ "$HTTP_STATUS" != "200" ]; then @@ -30,6 +32,13 @@ while true; do fi RESPONSE=$(cat /tmp/e2e_response.json) E2E_JOB_STATUS=$(echo "$RESPONSE" | jq -r --arg name "$E2E_JOB_NAME" '.[] | select(.name==$name) | .downstream_pipeline.status') + if [ "$PRINTED_PIPELINE_URL" -eq 0 ]; then + E2E_PIPELINE_URL=$(echo "$RESPONSE" | jq -r --arg name "$E2E_JOB_NAME" '.[] | select(.name==$name) | .downstream_pipeline.web_url // empty') + if [ -n "$E2E_PIPELINE_URL" ]; then + echo "Polling downstream pipeline for \"$E2E_JOB_NAME\": $E2E_PIPELINE_URL" + PRINTED_PIPELINE_URL=1 + fi + fi echo -n "E2E job status: $E2E_JOB_STATUS, " if [ "$E2E_JOB_STATUS" == "success" ]; then echo "E2E tests completed successfully" diff --git a/bottlecap/src/lifecycle/invocation/processor.rs b/bottlecap/src/lifecycle/invocation/processor.rs index ec1af99ab..16e28bdde 100644 --- a/bottlecap/src/lifecycle/invocation/processor.rs +++ b/bottlecap/src/lifecycle/invocation/processor.rs @@ -548,18 +548,25 @@ impl Processor { } } - self.process_on_platform_runtime_done(request_id, status, tags_provider, trace_sender) - .await; + self.process_on_platform_runtime_done( + request_id, + status, + error_type.as_deref(), + tags_provider, + trace_sender, + ) + .await; } async fn process_on_platform_runtime_done( &mut self, request_id: &String, status: Status, + error_type: Option<&str>, tags_provider: Arc, trace_sender: Arc, ) { - let context = self.enrich_ctx_at_platform_done(request_id, status); + let context = self.enrich_ctx_at_platform_done(request_id, status, error_type); if self.tracer_detected { if let Some(ctx) = context @@ -579,6 +586,7 @@ impl Processor { &mut self, request_id: &String, status: Status, + error_type: Option<&str>, ) -> Option { let Some(context) = self.context_buffer.get_mut(request_id) else { debug!( @@ -607,6 +615,29 @@ impl Processor { .insert("error.type".to_string(), "Timeout".to_string()); } + // Handle OOM error case. When Lambda SIGKILLs the function process for hitting + // the memory limit, the tracer dies before flushing end-of-invocation headers, + // leaving trace_id/span_id at 0 and dropping the span at the send guard in + // process_on_platform_runtime_done. Mirror the Timeout path so the invocation + // still gets a span. + if status == Status::Error && error_type == Some("Runtime.OutOfMemory") { + if context.invocation_span.trace_id == 0 { + context.invocation_span.trace_id = generate_span_id(); + } + if context.invocation_span.span_id == 0 { + context.invocation_span.span_id = generate_span_id(); + } + context.invocation_span.error = 1; + context.invocation_span.meta.insert( + "error.msg".to_string(), + "Datadog detected an Out of Memory error".to_string(), + ); + context + .invocation_span + .meta + .insert("error.type".to_string(), "OutOfMemory".to_string()); + } + // Process enhanced metrics if available if let Some(offsets) = &context.enhanced_metric_data { self.enhanced_metrics.set_cpu_utilization_enhanced_metrics( @@ -2391,7 +2422,7 @@ mod tests { p.on_platform_start(request_id.clone(), chrono::Utc::now()); let ctx = p - .enrich_ctx_at_platform_done(&request_id, Status::Success) + .enrich_ctx_at_platform_done(&request_id, Status::Success, None) .expect("context must be present"); assert_eq!( @@ -2409,7 +2440,7 @@ mod tests { p.on_platform_start(request_id.clone(), chrono::Utc::now()); let ctx = p - .enrich_ctx_at_platform_done(&request_id, Status::Success) + .enrich_ctx_at_platform_done(&request_id, Status::Success, None) .expect("context must be present"); assert!( @@ -2436,7 +2467,7 @@ mod tests { .insert("_dd.appsec.enabled".to_string(), 0.0); let ctx = p - .enrich_ctx_at_platform_done(&request_id, Status::Success) + .enrich_ctx_at_platform_done(&request_id, Status::Success, None) .expect("context must be present"); assert_eq!( @@ -2445,4 +2476,69 @@ mod tests { "pre-existing _dd.appsec.enabled value must not be overwritten" ); } + + #[tokio::test] + async fn enrich_ctx_synthesizes_span_on_runtime_oom() { + let mut p = setup(); + let request_id = String::from("req-oom"); + p.on_invoke_event(request_id.clone()); + p.on_platform_start(request_id.clone(), chrono::Utc::now()); + + // Simulate the tracer never having flushed: trace_id/span_id remain 0 + // because the function process was SIGKILL'd before UniversalInstrumentationEnd. + let ctx = p + .enrich_ctx_at_platform_done(&request_id, Status::Error, Some("Runtime.OutOfMemory")) + .expect("context must be present"); + + assert_ne!( + ctx.invocation_span.trace_id, 0, + "trace_id must be synthesized for OOM" + ); + assert_ne!( + ctx.invocation_span.span_id, 0, + "span_id must be synthesized for OOM" + ); + assert_eq!(ctx.invocation_span.error, 1, "span must be marked error"); + assert_eq!( + ctx.invocation_span + .meta + .get("error.type") + .map(String::as_str), + Some("OutOfMemory"), + ); + assert_eq!( + ctx.invocation_span + .meta + .get("error.msg") + .map(String::as_str), + Some("Datadog detected an Out of Memory error"), + ); + } + + #[tokio::test] + async fn enrich_ctx_does_not_synthesize_span_for_non_oom_error() { + let mut p = setup(); + let request_id = String::from("req-other-error"); + p.on_invoke_event(request_id.clone()); + p.on_platform_start(request_id.clone(), chrono::Utc::now()); + + let ctx = p + .enrich_ctx_at_platform_done(&request_id, Status::Error, Some("Runtime.UnknownReason")) + .expect("context must be present"); + + assert_eq!( + ctx.invocation_span.trace_id, 0, + "trace_id must not be synthesized for non-OOM error" + ); + assert_eq!( + ctx.invocation_span.span_id, 0, + "span_id must not be synthesized for non-OOM error" + ); + assert_eq!( + ctx.invocation_span.error, 0, + "non-OOM error must not be marked error here" + ); + assert!(!ctx.invocation_span.meta.contains_key("error.type")); + assert!(!ctx.invocation_span.meta.contains_key("error.msg")); + } }