coder · ibetitsmike · Apr 25, 2026
diff --git a/providers/openai/openai_test.go b/providers/openai/openai_test.go
@@ -4016,20 +4016,69 @@ func TestResponsesToPrompt_ReasoningWithStore(t *testing.T) {
 		},
 	}
 
-	t.Run("store true skips reasoning", func(t *testing.T) {
+	t.Run("store true emits item_reference for reasoning", func(t *testing.T) {
 		t.Parallel()
 
 		input, warnings := toResponsesPrompt(prompt, "system", true)
 		require.Empty(t, warnings)
 
-		// With store=true: user, assistant text (reasoning
-		// skipped), follow-up user.
-		require.Len(t, input, 3)
+		// With store=true the reasoning item is replayed as an
+		// item_reference so any following provider-executed
+		// item (e.g. web_search_call) pairs correctly. Order:
+		// user, item_reference(rs_*), assistant text, user.
+		require.Len(t, input, 4)
 
-		// Verify no reasoning item leaked through.
+		// No inline OfReasoning items should ever appear; the
+		// API rejects them when store is enabled.
 		for _, item := range input {
 			require.Nil(t, item.OfReasoning,
-				"reasoning items must not appear when store=true")
+				"reasoning items must not appear inline when store=true")
+		}
+
+		require.NotNil(t, input[1].OfItemReference,
+			"expected reasoning replayed via item_reference")
+		require.Equal(t, reasoningItemID, input[1].OfItemReference.ID)
+	})
+
+	t.Run("store true skips reasoning when item id missing", func(t *testing.T) {
+		t.Parallel()
+
+		noIDPrompt := fantasy.Prompt{
+			{
+				Role: fantasy.MessageRoleUser,
+				Content: []fantasy.MessagePart{
+					fantasy.TextPart{Text: "What is 2+2?"},
+				},
+			},
+			{
+				Role: fantasy.MessageRoleAssistant,
+				Content: []fantasy.MessagePart{
+					fantasy.ReasoningPart{
+						Text: "thinking",
+						ProviderOptions: fantasy.ProviderOptions{
+							Name: &ResponsesReasoningMetadata{Summary: []string{}},
+						},
+					},
+					fantasy.TextPart{Text: "4"},
+				},
+			},
+			{
+				Role: fantasy.MessageRoleUser,
+				Content: []fantasy.MessagePart{
+					fantasy.TextPart{Text: "And 3+3?"},
+				},
+			},
+		}
+
+		input, warnings := toResponsesPrompt(noIDPrompt, "system", true)
+		require.Empty(t, warnings)
+
+		// Without an ItemID we cannot reference; the reasoning
+		// part falls through. user, assistant text, user.
+		require.Len(t, input, 3)
+		for _, item := range input {
+			require.Nil(t, item.OfReasoning)
+			require.Nil(t, item.OfItemReference)
 		}
 	})
 
@@ -4040,15 +4089,175 @@ func TestResponsesToPrompt_ReasoningWithStore(t *testing.T) {
 		require.Empty(t, warnings)
 
 		// With store=false: user, assistant text, follow-up user.
+		// Reasoning IDs are ephemeral and cannot be referenced.
 		require.Len(t, input, 3)
 
 		for _, item := range input {
 			require.Nil(t, item.OfReasoning,
 				"reasoning items must not appear when store=false")
+			require.Nil(t, item.OfItemReference,
+				"reasoning item_reference must not appear when store=false")
+		}
+	})
+}
+
+// TestResponsesToPrompt_ReasoningWithWebSearchCombined verifies that a
+// single assistant turn containing reasoning + a provider-executed
+// web_search_call replays both items as item_references in order, so
+// the OpenAI API does not reject the input with:
+//
+//	Item 'ws_xxx' of type 'web_search_call' was provided without its
+//	required 'reasoning' item: 'rs_xxx'.
+func TestResponsesToPrompt_ReasoningWithWebSearchCombined(t *testing.T) {
+	t.Parallel()
+
+	reasoningItemID := "rs_002"
+	webSearchItemID := "ws_002"
+
+	prompt := fantasy.Prompt{
+		{
+			Role: fantasy.MessageRoleUser,
+			Content: []fantasy.MessagePart{
+				fantasy.TextPart{Text: "What is the weather in San Francisco?"},
+			},
+		},
+		{
+			Role: fantasy.MessageRoleAssistant,
+			Content: []fantasy.MessagePart{
+				fantasy.ReasoningPart{
+					Text: "I should look this up.",
+					ProviderOptions: fantasy.ProviderOptions{
+						Name: &ResponsesReasoningMetadata{
+							ItemID:  reasoningItemID,
+							Summary: []string{},
+						},
+					},
+				},
+				fantasy.ToolCallPart{
+					ToolCallID:       webSearchItemID,
+					ToolName:         "web_search",
+					ProviderExecuted: true,
+				},
+				fantasy.ToolResultPart{
+					ToolCallID:       webSearchItemID,
+					ProviderExecuted: true,
+				},
+				fantasy.TextPart{Text: "Sunny."},
+			},
+		},
+		{
+			Role: fantasy.MessageRoleUser,
+			Content: []fantasy.MessagePart{
+				fantasy.TextPart{Text: "And Tokyo?"},
+			},
+		},
+	}
+
+	t.Run("store true pairs reasoning and web search via item_reference", func(t *testing.T) {
+		t.Parallel()
+
+		input, warnings := toResponsesPrompt(prompt, "system", true)
+		require.Empty(t, warnings)
+
+		// Order: user, item_reference(rs_*), item_reference(ws_*),
+		// assistant text, user.
+		require.Len(t, input, 5)
+
+		require.NotNil(t, input[1].OfItemReference)
+		require.Equal(t, reasoningItemID, input[1].OfItemReference.ID,
+			"reasoning item_reference must precede web_search item_reference")
+
+		require.NotNil(t, input[2].OfItemReference)
+		require.Equal(t, webSearchItemID, input[2].OfItemReference.ID)
+	})
+
+	t.Run("store false skips both reasoning and provider tool call", func(t *testing.T) {
+		t.Parallel()
+
+		input, warnings := toResponsesPrompt(prompt, "system", false)
+		require.Empty(t, warnings)
+
+		// Both reasoning and the provider-executed web_search_call
+		// are skipped under store=false. user, assistant text, user.
+		require.Len(t, input, 3)
+		for _, item := range input {
+			require.Nil(t, item.OfItemReference)
+			require.Nil(t, item.OfReasoning)
 		}
 	})
 }
 
+// TestResponsesToPrompt_ReasoningWithFunctionCallCombined verifies that
+// reasoning + a regular function_call (and its function_call_output)
+// round-trips with the reasoning item_reference preceding the function
+// call when store=true.
+func TestResponsesToPrompt_ReasoningWithFunctionCallCombined(t *testing.T) {
+	t.Parallel()
+
+	reasoningItemID := "rs_003"
+	functionCallID := "call_003"
+
+	prompt := fantasy.Prompt{
+		{
+			Role: fantasy.MessageRoleUser,
+			Content: []fantasy.MessagePart{
+				fantasy.TextPart{Text: "compute 1+1"},
+			},
+		},
+		{
+			Role: fantasy.MessageRoleAssistant,
+			Content: []fantasy.MessagePart{
+				fantasy.ReasoningPart{
+					Text: "I'll call add.",
+					ProviderOptions: fantasy.ProviderOptions{
+						Name: &ResponsesReasoningMetadata{
+							ItemID:  reasoningItemID,
+							Summary: []string{},
+						},
+					},
+				},
+				fantasy.ToolCallPart{
+					ToolCallID: functionCallID,
+					ToolName:   "add",
+					Input:      `{"a":1,"b":1}`,
+				},
+			},
+		},
+		{
+			Role: fantasy.MessageRoleTool,
+			Content: []fantasy.MessagePart{
+				fantasy.ToolResultPart{
+					ToolCallID: functionCallID,
+					Output:     fantasy.ToolResultOutputContentText{Text: "2"},
+				},
+			},
+		},
+		{
+			Role: fantasy.MessageRoleUser,
+			Content: []fantasy.MessagePart{
+				fantasy.TextPart{Text: "thanks"},
+			},
+		},
+	}
+
+	input, warnings := toResponsesPrompt(prompt, "system", true)
+	require.Empty(t, warnings)
+
+	// Order: user, item_reference(rs_003), function_call(call_003),
+	// function_call_output(call_003), user.
+	require.Len(t, input, 5)
+
+	require.NotNil(t, input[1].OfItemReference)
+	require.Equal(t, reasoningItemID, input[1].OfItemReference.ID,
+		"reasoning item_reference must precede function_call")
+
+	require.NotNil(t, input[2].OfFunctionCall)
+	require.Equal(t, functionCallID, input[2].OfFunctionCall.CallID)
+
+	require.NotNil(t, input[3].OfFunctionCallOutput)
+	require.Equal(t, functionCallID, input[3].OfFunctionCallOutput.CallID)
+}
+
 func TestResponsesStream_WebSearchResponse(t *testing.T) {
 	t.Parallel()
 

diff --git a/providers/openai/responses_language_model.go b/providers/openai/responses_language_model.go
@@ -565,12 +565,44 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string, store bo
 					// recognised Responses API input type; skip.
 					continue
 				case fantasy.ContentTypeReasoning:
-					// Reasoning items are always skipped during replay.
-					// When store is enabled, the API already has them
-					// persisted server-side. When store is disabled, the
-					// item IDs are ephemeral and referencing them causes
-					// "Item not found" errors. In both cases, replaying
-					// reasoning inline is not supported by the API.
+					if !store {
+						// When store is disabled, server-side reasoning
+						// items are ephemeral and the IDs cannot be
+						// referenced. Provider-executed tool calls in the
+						// same turn are also skipped under store=false,
+						// so there is nothing to pair with.
+						continue
+					}
+					// Store=true: replay the reasoning item via
+					// item_reference using the persisted ItemID. Without
+					// this reference the API rejects any following
+					// provider-executed item (e.g. web_search_call) with:
+					//
+					//	Item 'ws_xxx' of type 'web_search_call' was
+					//	provided without its required 'reasoning' item:
+					//	'rs_xxx'.
+					//
+					// Inline OfReasoning replay is intentionally not
+					// used: the API rejects reconstructed reasoning
+					// items because they cannot be paired with the
+					// output items that originally followed them
+					// (see fantasy upstream PR #181).
+					reasoningPart, ok := fantasy.AsContentType[fantasy.ReasoningPart](c)
+					if !ok {
+						warnings = append(warnings, fantasy.CallWarning{
+							Type:    fantasy.CallWarningTypeOther,
+							Message: "assistant reasoning part does not have the right type",
+						})
+						continue
+					}
+					meta := GetReasoningMetadata(reasoningPart.ProviderOptions)
+					if meta == nil || meta.ItemID == "" {
+						// No persisted ID to reference. Falling back to
+						// skipping is safe; the rest of the assistant
+						// message still replays.
+						continue
+					}
+					input = append(input, responses.ResponseInputItemParamOfItemReference(meta.ItemID))
 					continue
 				}
 			}