""" WebSearch Interception Handler CustomLogger that intercepts WebSearch tool calls for models that don't natively support web search (e.g., Bedrock/Claude) and executes them server-side using litellm router's search tools. """ import asyncio import math import uuid from typing import Any, Dict, List, Optional, Tuple, Union, cast import litellm from litellm._logging import verbose_logger from litellm.anthropic_interface import messages as anthropic_messages from litellm.constants import LITELLM_WEB_SEARCH_TOOL_NAME from litellm.integrations.custom_logger import CustomLogger from litellm.integrations.websearch_interception.tools import ( get_litellm_web_search_tool, get_litellm_web_search_tool_openai, is_anthropic_native_web_search_tool, is_web_search_tool, is_web_search_tool_chat_completion, ) from litellm.integrations.websearch_interception.transformation import ( WebSearchTransformation, ) from litellm.llms.base_llm.search.transformation import SearchResponse from litellm.types.integrations.websearch_interception import ( WebSearchInterceptionConfig, ) from litellm.types.integrations.custom_logger import ( AgenticLoopPlan, AgenticLoopRequestPatch, ) from litellm.types.llms.openai import AllMessageValues from litellm.types.utils import LlmProviders from litellm.utils import ProviderConfigManager # Key used to flag, on per-request kwargs, that the originating client sent # an Anthropic-native ``web_search_*`` tool — meaning the final response # should include ``web_search_tool_result`` content blocks so the client # (e.g. Claude Desktop's citations panel) can render sources. WEBSEARCH_EMIT_NATIVE_BLOCKS_KEY = "_websearch_interception_emit_native_blocks" # Key on ``AgenticLoopPlan.metadata`` carrying the list of pre-built # ``web_search_tool_result`` blocks to inject into the final response. WEBSEARCH_NATIVE_BLOCKS_METADATA_KEY = "websearch_native_blocks" class WebSearchInterceptionLogger(CustomLogger): """ CustomLogger that intercepts WebSearch tool calls for models that don't natively support web search. Implements agentic loop: 1. Detects WebSearch tool_use in model response 2. Executes litellm.asearch() for each query using router's search tools 3. Makes follow-up request with search results 4. Returns final response """ def __init__( self, enabled_providers: Optional[List[Union[LlmProviders, str]]] = None, search_tool_name: Optional[str] = None, ): """ Args: enabled_providers: List of LLM providers to enable interception for. Use LlmProviders enum values (e.g., [LlmProviders.BEDROCK]) If None or empty list, enables for ALL providers. Default: None (all providers enabled) search_tool_name: Name of search tool configured in router's search_tools. If None, will attempt to use first available search tool. """ super().__init__() # Convert enum values to strings for comparison if enabled_providers is None: self.enabled_providers = [LlmProviders.BEDROCK.value] else: self.enabled_providers = [ p.value if isinstance(p, LlmProviders) else p for p in enabled_providers ] self.search_tool_name = search_tool_name self._request_has_websearch = False # Track if current request has web search async def try_short_circuit_search( self, model: str, messages: List[Dict], tools: Optional[List[Dict]], custom_llm_provider: Optional[str], ) -> Optional[Dict[str, Any]]: """ Short-circuit web-search-only requests by executing the search directly. Claude Code sends web search as a separate, standalone /v1/messages request with a simple prompt and only web_search tool(s). For providers that don't natively support web search (e.g. github_copilot), there is no need to route this through the backend LLM — we can detect the pattern, execute the search via Tavily/Perplexity, and return a synthetic Anthropic response immediately. Args: model: Model name from the request messages: Messages list from the request tools: Tools list from the request custom_llm_provider: Provider name Returns: An AnthropicMessagesResponse dict if short-circuited, or None to continue normal processing. """ if not tools: return None # Check if provider is in enabled list provider_str = custom_llm_provider or "" if ( self.enabled_providers is not None and provider_str not in self.enabled_providers ): return None # Only short-circuit for providers without native Anthropic Messages # support. Providers that have a BaseAnthropicMessagesConfig (bedrock, # vertex_ai, azure_ai, anthropic) already use the agentic loop, which # includes a follow-up LLM call to synthesize the answer from search # results. Short-circuiting those would skip that synthesis step and # return raw search text — a regression for existing users. try: provider_enum = LlmProviders(provider_str) anthropic_config = ( ProviderConfigManager.get_provider_anthropic_messages_config( model=model, provider=provider_enum ) ) if anthropic_config is not None: verbose_logger.debug( f"WebSearchInterception: Skipping short-circuit for {provider_str} " "(provider has native Anthropic Messages support, using agentic loop)" ) return None except (ValueError, Exception): pass # unknown provider enum → safe to short-circuit # All tools must be web search tools if not all(is_web_search_tool(t) for t in tools): return None # Extract search query from the last user message from litellm.litellm_core_utils.prompt_templates.common_utils import ( get_last_user_message, ) query = get_last_user_message(cast(List[AllMessageValues], messages)) if not query: return None verbose_logger.debug( "WebSearchInterception: Short-circuit search detected " f"(provider={provider_str}, query='{query}')" ) # Native clients (Claude Desktop / Cowork / Anthropic SDK) make a # standalone /v1/messages sub-request just for the search, and they # expect the response in native shape with server_tool_use + # web_search_tool_result content blocks so the citations panel can # render. The agentic-loop post-hook never fires on this path because # there is no model call — emit the native blocks here instead. native_tool = next( (t for t in tools if is_anthropic_native_web_search_tool(t)), None, ) # Execute search — keep the structured SearchResponse so the native # block can carry per-result url/title/page_age. try: search_result_text, structured = await self._execute_search(query) except Exception as e: verbose_logger.error( f"WebSearchInterception: Short-circuit search failed: {e}" ) search_result_text, structured = f"Search failed: {e}", None content: List[Dict[str, Any]] = [] if native_tool is not None: tool_use_id = f"srvtoolu_{uuid.uuid4().hex}" tool_name = native_tool.get("name") or "web_search" content.append( { "type": "server_tool_use", "id": tool_use_id, "name": tool_name, "input": {"query": query}, } ) content.append( WebSearchTransformation.build_web_search_tool_result_block( tool_use_id=tool_use_id, search_response=structured, ) ) # Keep the text block so non-native short-circuit callers (Claude Code, # github_copilot, etc.) see the same payload they always have. content.append({"type": "text", "text": search_result_text}) response: Dict[str, Any] = { "id": f"msg_{str(uuid.uuid4())}", "type": "message", "role": "assistant", "model": model, "content": content, "stop_reason": "end_turn", "stop_sequence": None, "usage": {"input_tokens": 0, "output_tokens": 0}, } verbose_logger.debug( "WebSearchInterception: Short-circuit search completed, " f"returning synthetic response ({len(search_result_text)} chars, " f"native_blocks={native_tool is not None})" ) return response async def async_pre_call_deployment_hook( self, kwargs: Dict[str, Any], call_type: Optional[Any] ) -> Optional[dict]: """ Pre-call hook to convert native Anthropic web_search tools to regular tools. This prevents Bedrock from trying to execute web search server-side (which fails). Instead, we convert it to a regular tool so the model returns tool_use blocks that we can intercept and execute ourselves. """ # Check if this is for an enabled provider # Try top-level kwargs first, then nested litellm_params, then derive from model name custom_llm_provider = kwargs.get("custom_llm_provider", "") or kwargs.get( "litellm_params", {} ).get("custom_llm_provider", "") if not custom_llm_provider: try: _, custom_llm_provider, _, _ = litellm.get_llm_provider( model=kwargs.get("model", "") ) except Exception: custom_llm_provider = "" if custom_llm_provider not in self.enabled_providers: return None # Check if request has tools with native web_search tools = kwargs.get("tools") if not tools: return None # Check if any tool is a web search tool (native or already LiteLLM standard) has_websearch = any(is_web_search_tool(t) for t in tools) if not has_websearch: return None verbose_logger.debug( "WebSearchInterception: Converting native web_search tools to LiteLLM standard" ) # If the client sent an Anthropic-native web_search_* tool, mark the # request so the agentic loop emits native web_search_tool_result # blocks in the final response (matches async_pre_request_hook). This # deployment hook fires before async_pre_request_hook on some paths, # so flagging here ensures the signal isn't lost regardless of order. if any(is_anthropic_native_web_search_tool(t) for t in tools): kwargs[WEBSEARCH_EMIT_NATIVE_BLOCKS_KEY] = True # Convert native/custom web_search tools to LiteLLM standard converted_tools = [] for tool in tools: if is_web_search_tool(tool): # Convert to LiteLLM standard web search tool converted_tool = get_litellm_web_search_tool_openai() converted_tools.append(converted_tool) verbose_logger.debug( f"WebSearchInterception: Converted {tool.get('name', 'unknown')} " f"(type={tool.get('type', 'none')}) to {LITELLM_WEB_SEARCH_TOOL_NAME}" ) else: # Keep other tools as-is converted_tools.append(tool) kwargs["tools"] = converted_tools if kwargs.get("stream"): verbose_logger.debug( "WebSearchInterception: deployment hook converting stream=True to stream=False" ) kwargs["stream"] = False kwargs["_websearch_interception_converted_stream"] = True return kwargs @classmethod def from_config_yaml( cls, config: WebSearchInterceptionConfig ) -> "WebSearchInterceptionLogger": """ Initialize WebSearchInterceptionLogger from proxy config.yaml parameters. Args: config: Configuration dictionary from litellm_settings.websearch_interception_params Returns: Configured WebSearchInterceptionLogger instance Example: From proxy_config.yaml: litellm_settings: websearch_interception_params: enabled_providers: ["bedrock"] search_tool_name: "my-perplexity-search" Usage: config = litellm_settings.get("websearch_interception_params", {}) logger = WebSearchInterceptionLogger.from_config_yaml(config) """ # Extract parameters from config enabled_providers_str = config.get("enabled_providers", None) search_tool_name = config.get("search_tool_name", None) # Convert string provider names to LlmProviders enum values enabled_providers: Optional[List[Union[LlmProviders, str]]] = None if enabled_providers_str is not None: enabled_providers = [] for provider in enabled_providers_str: try: # Try to convert string to LlmProviders enum provider_enum = LlmProviders(provider) enabled_providers.append(provider_enum) except ValueError: # If conversion fails, keep as string enabled_providers.append(provider) return cls( enabled_providers=enabled_providers, search_tool_name=search_tool_name, ) async def async_pre_request_hook( self, model: str, messages: List[Dict], kwargs: Dict ) -> Optional[Dict]: """ Pre-request hook to convert native web search tools to LiteLLM standard. This hook is called before the API request is made, allowing us to: 1. Detect native web search tools (web_search_20250305, etc.) 2. Convert them to LiteLLM standard format (litellm_web_search) 3. Convert stream=True to stream=False for interception This prevents providers like Bedrock from trying to execute web search natively (which fails), and ensures our agentic loop can intercept tool_use. Returns: Modified kwargs dict with converted tools, or None if no modifications needed """ # Check if this request is for an enabled provider custom_llm_provider = kwargs.get("litellm_params", {}).get( "custom_llm_provider", "" ) verbose_logger.debug( f"WebSearchInterception: Pre-request hook called" f" - custom_llm_provider={custom_llm_provider}" f" - enabled_providers={self.enabled_providers or 'ALL'}" ) if ( self.enabled_providers is not None and custom_llm_provider not in self.enabled_providers ): verbose_logger.debug( f"WebSearchInterception: Skipping - provider {custom_llm_provider} not in {self.enabled_providers}" ) return None # Check if request has tools tools = kwargs.get("tools") if not tools: return None # Check if any tool is a web search tool has_websearch = any(is_web_search_tool(t) for t in tools) if not has_websearch: return None verbose_logger.debug( f"WebSearchInterception: Pre-request hook triggered for provider={custom_llm_provider}" ) # If the client sent an Anthropic-native web_search_* tool, mark the # request so the agentic loop emits native web_search_tool_result # blocks in the final response (for citations panels, etc.). The flag # is read by async_build_agentic_loop_plan; the leading underscore # prefix ensures it is stripped before the follow-up call kwargs. if any(is_anthropic_native_web_search_tool(t) for t in tools): kwargs[WEBSEARCH_EMIT_NATIVE_BLOCKS_KEY] = True # Convert native web search tools to LiteLLM standard converted_tools = [] for tool in tools: if is_web_search_tool(tool): standard_tool = get_litellm_web_search_tool() converted_tools.append(standard_tool) verbose_logger.debug( f"WebSearchInterception: Converted {tool.get('name', 'unknown')} " f"(type={tool.get('type', 'none')}) to {LITELLM_WEB_SEARCH_TOOL_NAME}" ) else: converted_tools.append(tool) kwargs["tools"] = converted_tools verbose_logger.debug( f"WebSearchInterception: Tools after conversion: {[t.get('name') for t in converted_tools]}" ) # Also convert here for direct callers that bypass the deployment hook. if kwargs.get("stream"): verbose_logger.debug( "WebSearchInterception: Converting stream=True to stream=False" ) kwargs["stream"] = False kwargs["_websearch_interception_converted_stream"] = True return kwargs async def async_should_run_agentic_loop( self, response: Any, model: str, messages: List[Dict], tools: Optional[List[Dict]], stream: bool, custom_llm_provider: str, kwargs: Dict, ) -> Tuple[bool, Dict]: """ Check if WebSearch tool interception is needed for Anthropic Messages API. This is the legacy method for Anthropic-style responses. For chat completions, use async_should_run_chat_completion_agentic_loop instead. """ verbose_logger.debug( f"WebSearchInterception: Hook called! provider={custom_llm_provider}, stream={stream}" ) verbose_logger.debug(f"WebSearchInterception: Response type: {type(response)}") # Check if provider should be intercepted # Note: custom_llm_provider is already normalized by get_llm_provider() # (e.g., "bedrock/invoke/..." -> "bedrock") if ( self.enabled_providers is not None and custom_llm_provider not in self.enabled_providers ): verbose_logger.debug( f"WebSearchInterception: Skipping provider {custom_llm_provider} (not in enabled list: {self.enabled_providers})" ) return False, {} # Check if tools include any web search tool (LiteLLM standard or native) has_websearch_tool = any(is_web_search_tool(t) for t in (tools or [])) if not has_websearch_tool: verbose_logger.debug("WebSearchInterception: No web search tool in request") return False, {} # Detect WebSearch tool_use in response (Anthropic format) should_intercept, tool_calls = WebSearchTransformation.transform_request( response=response, stream=stream, response_format="anthropic", ) if not should_intercept: verbose_logger.debug( "WebSearchInterception: No WebSearch tool_use detected in response" ) return False, {} verbose_logger.debug( f"WebSearchInterception: Detected {len(tool_calls)} WebSearch tool call(s), executing agentic loop" ) # Extract thinking blocks from response content. # When extended thinking is enabled, the model response includes # thinking/redacted_thinking blocks that must be preserved and # prepended to the follow-up assistant message. thinking_blocks: List[Dict] = [] if isinstance(response, dict): content = response.get("content", []) else: content = getattr(response, "content", []) or [] for block in content: if isinstance(block, dict): block_type = block.get("type") else: block_type = getattr(block, "type", None) if block_type in ("thinking", "redacted_thinking"): if isinstance(block, dict): thinking_blocks.append(block) else: # Convert object to dict using getattr, matching the # pattern in _detect_from_non_streaming_response thinking_block_dict: Dict = {"type": block_type} if block_type == "thinking": thinking_block_dict["thinking"] = getattr(block, "thinking", "") thinking_block_dict["signature"] = getattr( block, "signature", "" ) else: # redacted_thinking thinking_block_dict["data"] = getattr(block, "data", "") thinking_blocks.append(thinking_block_dict) if thinking_blocks: verbose_logger.debug( f"WebSearchInterception: Extracted {len(thinking_blocks)} thinking block(s) from response" ) # Return tools dict with tool calls and thinking blocks tools_dict = { "tool_calls": tool_calls, "tool_type": "websearch", "provider": custom_llm_provider, "response_format": "anthropic", "thinking_blocks": thinking_blocks, } return True, tools_dict async def async_should_run_chat_completion_agentic_loop( self, response: Any, model: str, messages: List[Dict], tools: Optional[List[Dict]], stream: bool, custom_llm_provider: str, kwargs: Dict, ) -> Tuple[bool, Dict]: """ Check if WebSearch tool interception is needed for Chat Completions API. Similar to async_should_run_agentic_loop but for OpenAI-style chat completions. """ verbose_logger.debug( f"WebSearchInterception: Chat completion hook called! provider={custom_llm_provider}, stream={stream}" ) verbose_logger.debug(f"WebSearchInterception: Response type: {type(response)}") # Check if provider should be intercepted if ( self.enabled_providers is not None and custom_llm_provider not in self.enabled_providers ): verbose_logger.debug( f"WebSearchInterception: Skipping provider {custom_llm_provider} (not in enabled list: {self.enabled_providers})" ) return False, {} # Check if tools include any web search tool (strict check for chat completions) has_websearch_tool = any( is_web_search_tool_chat_completion(t) for t in (tools or []) ) if not has_websearch_tool: verbose_logger.debug( "WebSearchInterception: No litellm_web_search tool in request" ) return False, {} # Detect WebSearch tool_calls in response (OpenAI format) should_intercept, tool_calls = WebSearchTransformation.transform_request( response=response, stream=stream, response_format="openai", ) if not should_intercept: verbose_logger.debug( "WebSearchInterception: No WebSearch tool_calls detected in response" ) return False, {} verbose_logger.debug( f"WebSearchInterception: Detected {len(tool_calls)} WebSearch tool call(s), executing agentic loop" ) # Return tools dict with tool calls tools_dict = { "tool_calls": tool_calls, "tool_type": "websearch", "provider": custom_llm_provider, "response_format": "openai", } return True, tools_dict async def async_run_agentic_loop( self, tools: Dict, model: str, messages: List[Dict], response: Any, anthropic_messages_provider_config: Any, anthropic_messages_optional_request_params: Dict, logging_obj: Any, stream: bool, kwargs: Dict, ) -> Any: """ Execute agentic loop with WebSearch execution for Anthropic Messages API. This is the legacy method for Anthropic-style responses. """ tool_calls = tools["tool_calls"] thinking_blocks = tools.get("thinking_blocks", []) verbose_logger.debug( f"WebSearchInterception: Executing agentic loop for {len(tool_calls)} search(es)" ) return await self._execute_agentic_loop( model=model, messages=messages, tool_calls=tool_calls, thinking_blocks=thinking_blocks, anthropic_messages_optional_request_params=anthropic_messages_optional_request_params, logging_obj=logging_obj, stream=stream, kwargs=kwargs, ) async def async_build_agentic_loop_plan( self, tools: Dict, model: str, messages: List[Dict], response: Any, anthropic_messages_provider_config: Any, anthropic_messages_optional_request_params: Dict, logging_obj: Any, stream: bool, kwargs: Dict, ) -> AgenticLoopPlan: tool_calls = tools["tool_calls"] thinking_blocks = tools.get("thinking_blocks", []) request_patch, structured_results = await self._build_anthropic_request_patch( model=model, messages=messages, tool_calls=tool_calls, thinking_blocks=thinking_blocks, anthropic_messages_optional_request_params=anthropic_messages_optional_request_params, logging_obj=logging_obj, kwargs=kwargs, ) metadata: Dict[str, Any] = { "tool_type": "websearch", "response_format": "anthropic", } # If the client request originally carried a native web_search_* tool, # pre-build the Anthropic-native ``web_search_tool_result`` blocks now # (while we still have the structured SearchResponse list) and stash # them on plan metadata for the post-hook to inject. if kwargs.get(WEBSEARCH_EMIT_NATIVE_BLOCKS_KEY): metadata[WEBSEARCH_NATIVE_BLOCKS_METADATA_KEY] = ( self._build_native_result_blocks( tool_calls=tool_calls, structured_results=structured_results, ) ) return AgenticLoopPlan( run_agentic_loop=True, request_patch=request_patch, metadata=metadata, ) async def async_post_agentic_loop_response_hook( self, response: Any, plan: AgenticLoopPlan, kwargs: Dict, ) -> Any: """ Inject Anthropic-native ``web_search_tool_result`` blocks into the final response when the originating client used a native ``web_search_*`` tool. See ``WebSearchTransformation.build_web_search_tool_result_block`` for the block shape. The blocks are prepended to ``response.content`` so Anthropic-native clients (Claude Desktop, the Anthropic SDK) can render citations / sources alongside the model's textual reply. """ native_blocks = plan.metadata.get(WEBSEARCH_NATIVE_BLOCKS_METADATA_KEY) if not native_blocks: return response return self._inject_native_blocks(response, native_blocks) @staticmethod def _build_native_result_blocks( tool_calls: List[Dict], structured_results: List[Optional[SearchResponse]], ) -> List[Dict[str, Any]]: """Build one ``web_search_tool_result`` block per tool_call.""" blocks: List[Dict[str, Any]] = [] for i, tool_call in enumerate(tool_calls): tool_use_id = tool_call.get("id") or "" structured = structured_results[i] if i < len(structured_results) else None blocks.append( WebSearchTransformation.build_web_search_tool_result_block( tool_use_id=tool_use_id, search_response=structured, ) ) return blocks @staticmethod def _inject_native_blocks( response: Any, native_blocks: List[Dict[str, Any]] ) -> Any: """Prepend native blocks to response content, dict or object form.""" if not native_blocks: return response if isinstance(response, dict): existing = response.get("content") or [] response["content"] = list(native_blocks) + list(existing) return response existing = getattr(response, "content", None) or [] try: response.content = list(native_blocks) + list(existing) except (AttributeError, TypeError): # Object refused write — fall through and leave the response # untouched rather than crash the request. verbose_logger.debug( "WebSearchInterception: could not inject native blocks into " f"response of type {type(response).__name__}" ) return response async def async_run_chat_completion_agentic_loop( self, tools: Dict, model: str, messages: List[Dict], response: Any, optional_params: Dict, logging_obj: Any, stream: bool, kwargs: Dict, ) -> Any: """ Execute agentic loop with WebSearch execution for Chat Completions API. Similar to async_run_agentic_loop but for OpenAI-style chat completions. """ tool_calls = tools["tool_calls"] response_format = tools.get("response_format", "openai") verbose_logger.debug( f"WebSearchInterception: Executing chat completion agentic loop for {len(tool_calls)} search(es)" ) return await self._execute_chat_completion_agentic_loop( model=model, messages=messages, tool_calls=tool_calls, optional_params=optional_params, logging_obj=logging_obj, stream=stream, kwargs=kwargs, response_format=response_format, ) async def async_build_chat_completion_agentic_loop_plan( self, tools: Dict, model: str, messages: List[Dict], response: Any, optional_params: Dict, logging_obj: Any, stream: bool, kwargs: Dict, ) -> AgenticLoopPlan: tool_calls = tools["tool_calls"] response_format = tools.get("response_format", "openai") request_patch = await self._build_chat_completion_request_patch( model=model, messages=messages, tool_calls=tool_calls, optional_params=optional_params, kwargs=kwargs, response_format=response_format, ) return AgenticLoopPlan( run_agentic_loop=True, request_patch=request_patch, metadata={"tool_type": "websearch", "response_format": response_format}, ) @staticmethod def _resolve_max_tokens( optional_params: Dict, kwargs: Dict, ) -> int: """Extract max_tokens and validate against thinking.budget_tokens. Anthropic API requires ``max_tokens > thinking.budget_tokens``. If the constraint is violated, auto-adjust to ``budget_tokens + 1024``. """ max_tokens: int = optional_params.get( "max_tokens", kwargs.get("max_tokens", 1024), ) thinking_param = optional_params.get("thinking") if thinking_param and isinstance(thinking_param, dict): budget_tokens = thinking_param.get("budget_tokens") if ( budget_tokens is not None and isinstance(budget_tokens, (int, float)) and math.isfinite(budget_tokens) and budget_tokens > 0 ): if max_tokens <= budget_tokens: adjusted = math.ceil(budget_tokens) + 1024 verbose_logger.debug( "WebSearchInterception: max_tokens=%s <= thinking.budget_tokens=%s, " "adjusting to %s to satisfy Anthropic API constraint", max_tokens, budget_tokens, adjusted, ) max_tokens = adjusted return max_tokens @staticmethod def _prepare_followup_kwargs(kwargs: Dict) -> Dict: """Build kwargs for the follow-up call, excluding internal keys. ``litellm_logging_obj`` MUST be excluded so the follow-up call creates its own ``Logging`` instance via ``function_setup``. Reusing the initial call's logging object triggers the dedup flag (``has_logged_async_success``) which silently prevents the initial call's spend from being recorded — the root cause of the SpendLog / AWS billing mismatch. """ _internal_keys = {"litellm_logging_obj"} return { k: v for k, v in kwargs.items() if not k.startswith("_websearch_interception") and k not in _internal_keys } async def _execute_agentic_loop( self, model: str, messages: List[Dict], tool_calls: List[Dict], thinking_blocks: List[Dict], anthropic_messages_optional_request_params: Dict, logging_obj: Any, stream: bool, kwargs: Dict, ) -> Any: """Legacy path: execute search + build patch + run follow-up call.""" request_patch, structured_results = await self._build_anthropic_request_patch( model=model, messages=messages, tool_calls=tool_calls, thinking_blocks=thinking_blocks, anthropic_messages_optional_request_params=anthropic_messages_optional_request_params, logging_obj=logging_obj, kwargs=kwargs, ) if request_patch.messages is None: raise ValueError("WebSearchInterception: missing follow-up messages") optional_params = dict(anthropic_messages_optional_request_params) optional_params.update(request_patch.optional_params) max_tokens = request_patch.max_tokens if max_tokens is None: max_tokens = cast(Optional[int], optional_params.pop("max_tokens", None)) else: optional_params.pop("max_tokens", None) if max_tokens is None: max_tokens = cast(int, kwargs.get("max_tokens", 1024)) response = await anthropic_messages.acreate( max_tokens=max_tokens, messages=request_patch.messages, model=request_patch.model or model, **optional_params, **request_patch.kwargs, ) # Legacy path: the new path goes through the typed plan + core # dispatcher which runs the post-hook automatically. Mirror the # native-block injection here so both paths behave identically. if kwargs.get(WEBSEARCH_EMIT_NATIVE_BLOCKS_KEY): native_blocks = self._build_native_result_blocks( tool_calls=tool_calls, structured_results=structured_results, ) response = self._inject_native_blocks(response, native_blocks) return response async def _build_anthropic_request_patch( self, model: str, messages: List[Dict], tool_calls: List[Dict], thinking_blocks: List[Dict], anthropic_messages_optional_request_params: Dict, logging_obj: Any, kwargs: Dict, ) -> Tuple[AgenticLoopRequestPatch, List[Optional[SearchResponse]]]: """ Execute litellm.search() and build follow-up request patch. Returns the patch alongside the parallel list of structured ``SearchResponse`` objects (one per tool_call, ``None`` when the search failed or the tool_call had no query). The caller uses these to optionally build Anthropic-native ``web_search_tool_result`` content blocks for the final response. """ # Extract search queries from tool_use blocks search_tasks = [] for tool_call in tool_calls: query = tool_call["input"].get("query") if query: verbose_logger.debug( f"WebSearchInterception: Queuing search for query='{query}'" ) search_tasks.append(self._execute_search(query)) else: verbose_logger.debug( f"WebSearchInterception: Tool call {tool_call['id']} has no query" ) # Add empty result for tools without query search_tasks.append(self._create_empty_search_result()) # Execute searches in parallel verbose_logger.debug( f"WebSearchInterception: Executing {len(search_tasks)} search(es) in parallel" ) search_results = await asyncio.gather(*search_tasks, return_exceptions=True) # Split the gathered (text, structured) tuples into two parallel lists. # The text list feeds the follow-up model call; the structured list # is returned to the caller for native-block emission. final_search_results: List[str] = [] structured_results: List[Optional[SearchResponse]] = [] for i, result in enumerate(search_results): if isinstance(result, Exception): verbose_logger.error( f"WebSearchInterception: Search {i} failed with error: {str(result)}" ) final_search_results.append(f"Search failed: {str(result)}") structured_results.append(None) elif isinstance(result, tuple) and len(result) == 2: text_value, structured_value = result final_search_results.append( cast(str, text_value) if isinstance(text_value, str) else str(text_value) ) structured_results.append( structured_value if isinstance(structured_value, SearchResponse) else None ) else: # Defensive: legacy callers / unexpected shape — preserve text, # drop structure. verbose_logger.debug( f"WebSearchInterception: Unexpected result type {type(result)} at index {i}" ) final_search_results.append(str(result)) structured_results.append(None) # Build assistant and user messages using transformation assistant_message, user_message = WebSearchTransformation.transform_response( tool_calls=tool_calls, search_results=final_search_results, thinking_blocks=thinking_blocks, ) follow_up_messages = messages + [assistant_message, cast(Dict, user_message)] # Correlation context for structured logging _call_id = getattr(logging_obj, "litellm_call_id", None) or kwargs.get( "litellm_call_id", "unknown" ) full_model_name = model # safe default before try block max_tokens = self._resolve_max_tokens( anthropic_messages_optional_request_params, kwargs ) verbose_logger.debug( f"WebSearchInterception: Using max_tokens={max_tokens} for follow-up request" ) optional_params_without_max_tokens = { k: v for k, v in anthropic_messages_optional_request_params.items() if k != "max_tokens" } kwargs_for_followup = self._prepare_followup_kwargs(kwargs) if logging_obj is not None: agentic_params = logging_obj.model_call_details.get( "agentic_loop_params", {} ) full_model_name = agentic_params.get("model", model) verbose_logger.debug( "WebSearchInterception: Built anthropic request patch " "[call_id=%s model=%s messages=%d searches=%d]", _call_id, full_model_name, len(follow_up_messages), len(final_search_results), ) patch = AgenticLoopRequestPatch( model=full_model_name, messages=follow_up_messages, max_tokens=max_tokens, optional_params=optional_params_without_max_tokens, kwargs=kwargs_for_followup, ) return patch, structured_results async def _execute_search(self, query: str) -> Tuple[str, Optional[SearchResponse]]: """ Execute a single web search using router's search tools. Returns both the formatted text (fed back to the model in the follow-up call) and the structured ``SearchResponse`` (preserved so callers can build Anthropic-native ``web_search_tool_result`` blocks for clients that requested a native ``web_search_*`` tool). The structured value is None on the failure path so callers can still emit an empty result block rather than dropping the search entirely. """ try: # Import router from proxy_server try: from litellm.proxy.proxy_server import llm_router except ImportError: verbose_logger.debug( "WebSearchInterception: Could not import llm_router from proxy_server, " "falling back to direct litellm.asearch() with perplexity" ) llm_router = None # Determine search provider from router's search_tools search_provider: Optional[str] = None if llm_router is not None and hasattr(llm_router, "search_tools"): if self.search_tool_name: # Find specific search tool by name matching_tools = [ tool for tool in llm_router.search_tools if tool.get("search_tool_name") == self.search_tool_name ] if matching_tools: search_tool = matching_tools[0] search_provider = search_tool.get("litellm_params", {}).get( "search_provider" ) verbose_logger.debug( f"WebSearchInterception: Found search tool '{self.search_tool_name}' " f"with provider '{search_provider}'" ) else: verbose_logger.debug( f"WebSearchInterception: Search tool '{self.search_tool_name}' not found in router, " "falling back to first available or perplexity" ) # If no specific tool or not found, use first available if not search_provider and llm_router.search_tools: first_tool = llm_router.search_tools[0] search_provider = first_tool.get("litellm_params", {}).get( "search_provider" ) verbose_logger.debug( f"WebSearchInterception: Using first available search tool with provider '{search_provider}'" ) # Fallback to perplexity if no router or no search tools configured if not search_provider: search_provider = "perplexity" verbose_logger.debug( "WebSearchInterception: No search tools configured in router, " f"using default provider '{search_provider}'" ) verbose_logger.debug( f"WebSearchInterception: Executing search for '{query}' using provider '{search_provider}'" ) result = await litellm.asearch(query=query, search_provider=search_provider) # Format using transformation function search_result_text = WebSearchTransformation.format_search_response(result) verbose_logger.debug( f"WebSearchInterception: Search completed for '{query}', got {len(search_result_text)} chars" ) return search_result_text, result except Exception as e: verbose_logger.error( f"WebSearchInterception: Search failed for '{query}': {str(e)}" ) raise async def _execute_chat_completion_agentic_loop( self, model: str, messages: List[Dict], tool_calls: List[Dict], optional_params: Dict, logging_obj: Any, stream: bool, kwargs: Dict, response_format: str = "openai", ) -> Any: """Legacy path: execute search + build patch + run follow-up call.""" request_patch = await self._build_chat_completion_request_patch( model=model, messages=messages, tool_calls=tool_calls, optional_params=optional_params, kwargs=kwargs, response_format=response_format, ) if request_patch.messages is None: raise ValueError("WebSearchInterception: missing follow-up messages") params = dict(optional_params) params.update(request_patch.optional_params) return await litellm.acompletion( model=request_patch.model or model, messages=request_patch.messages, **params, **request_patch.kwargs, ) async def _build_chat_completion_request_patch( self, model: str, messages: List[Dict], tool_calls: List[Dict], optional_params: Dict, kwargs: Dict, response_format: str = "openai", ) -> AgenticLoopRequestPatch: """Execute litellm.search() and build chat-completion rerun patch.""" # Extract search queries from tool_calls search_tasks = [] for tool_call in tool_calls: # Handle both Anthropic-style input and OpenAI-style function.arguments query = None if "input" in tool_call and isinstance(tool_call["input"], dict): query = tool_call["input"].get("query") elif "function" in tool_call: func = tool_call["function"] if isinstance(func, dict): args = func.get("arguments", {}) if isinstance(args, dict): query = args.get("query") if query: verbose_logger.debug( f"WebSearchInterception: Queuing search for query='{query}'" ) search_tasks.append(self._execute_search(query)) else: verbose_logger.debug( f"WebSearchInterception: Tool call {tool_call.get('id')} has no query" ) # Add empty result for tools without query search_tasks.append(self._create_empty_search_result()) # Execute searches in parallel verbose_logger.debug( f"WebSearchInterception: Executing {len(search_tasks)} search(es) in parallel" ) search_results = await asyncio.gather(*search_tasks, return_exceptions=True) # Chat-completion path only needs text — OpenAI tool_result format # has no equivalent of Anthropic's web_search_tool_result block. final_search_results: List[str] = [] for i, result in enumerate(search_results): if isinstance(result, Exception): verbose_logger.error( f"WebSearchInterception: Search {i} failed with error: {str(result)}" ) final_search_results.append(f"Search failed: {str(result)}") elif isinstance(result, tuple) and len(result) == 2: text_value, _ = result final_search_results.append( cast(str, text_value) if isinstance(text_value, str) else str(text_value) ) else: verbose_logger.debug( f"WebSearchInterception: Unexpected result type {type(result)} at index {i}" ) final_search_results.append(str(result)) # Build assistant and tool messages using transformation ( assistant_message, tool_messages_or_user, ) = WebSearchTransformation.transform_response( tool_calls=tool_calls, search_results=final_search_results, response_format=response_format, ) # Make follow-up request with search results # For OpenAI format, tool_messages_or_user is a list of tool messages if response_format == "openai": follow_up_messages = ( messages + [assistant_message] + cast(List[Dict], tool_messages_or_user) ) else: # For Anthropic format (shouldn't happen in this method, but handle it) follow_up_messages = messages + [ assistant_message, cast(Dict, tool_messages_or_user), ] verbose_logger.debug( "WebSearchInterception: Making follow-up chat completion request with search results" ) verbose_logger.debug( f"WebSearchInterception: Follow-up messages count: {len(follow_up_messages)}" ) # Remove internal parameters that shouldn't be passed to follow-up request internal_params = { "_websearch_interception", "acompletion", "litellm_logging_obj", "custom_llm_provider", "model_alias_map", "stream_response", "custom_prompt_dict", } kwargs_for_followup = { k: v for k, v in kwargs.items() if not k.startswith("_websearch_interception") and k not in internal_params } full_model_name = model if "custom_llm_provider" in kwargs: custom_llm_provider = kwargs["custom_llm_provider"] if not model.startswith(custom_llm_provider) and "/" not in model: full_model_name = f"{custom_llm_provider}/{model}" verbose_logger.debug( "WebSearchInterception: Built chat completion request patch model=%s messages=%d", full_model_name, len(follow_up_messages), ) tools_param = optional_params.get("tools") optional_params_clean = { k: v for k, v in optional_params.items() if k not in { "tools", "extra_body", "model_alias_map", "stream_response", "custom_prompt_dict", } } if tools_param is not None: optional_params_clean["tools"] = tools_param return AgenticLoopRequestPatch( model=full_model_name, messages=follow_up_messages, optional_params=optional_params_clean, kwargs=kwargs_for_followup, ) async def _create_empty_search_result( self, ) -> Tuple[str, Optional[SearchResponse]]: """Create an empty search result for tool calls without queries""" return "No search query provided", None @staticmethod def initialize_from_proxy_config( litellm_settings: Dict[str, Any], callback_specific_params: Dict[str, Any], ) -> "WebSearchInterceptionLogger": """ Static method to initialize WebSearchInterceptionLogger from proxy config. Used in callback_utils.py to simplify initialization logic. Args: litellm_settings: Dictionary containing litellm_settings from proxy_config.yaml callback_specific_params: Dictionary containing callback-specific parameters Returns: Configured WebSearchInterceptionLogger instance Example: From callback_utils.py: websearch_obj = WebSearchInterceptionLogger.initialize_from_proxy_config( litellm_settings=litellm_settings, callback_specific_params=callback_specific_params ) """ # Get websearch_interception_params from litellm_settings or callback_specific_params websearch_params: WebSearchInterceptionConfig = {} if "websearch_interception_params" in litellm_settings: websearch_params = litellm_settings["websearch_interception_params"] elif "websearch_interception" in callback_specific_params and isinstance( callback_specific_params["websearch_interception"], dict ): websearch_params = cast( WebSearchInterceptionConfig, callback_specific_params["websearch_interception"], ) # Use classmethod to initialize from config return WebSearchInterceptionLogger.from_config_yaml(websearch_params)