fa45d8aa5f
- health_checklist.json: 192.168.1.122→node122
- ocr_client.py: docstring IP→node122
- docs/market-data-requirements.md: IP→node122
- 所有API调用通过ProxyHandler({})绕过系统代理
Privoxy对node122:18003返回500,直连正常
1352 lines
53 KiB
Python
1352 lines
53 KiB
Python
"""
|
|
WebSearch Interception Handler
|
|
|
|
CustomLogger that intercepts WebSearch tool calls for models that don't
|
|
natively support web search (e.g., Bedrock/Claude) and executes them
|
|
server-side using litellm router's search tools.
|
|
"""
|
|
|
|
import asyncio
|
|
import math
|
|
import uuid
|
|
from typing import Any, Dict, List, Optional, Tuple, Union, cast
|
|
|
|
import litellm
|
|
from litellm._logging import verbose_logger
|
|
from litellm.anthropic_interface import messages as anthropic_messages
|
|
from litellm.constants import LITELLM_WEB_SEARCH_TOOL_NAME
|
|
from litellm.integrations.custom_logger import CustomLogger
|
|
from litellm.integrations.websearch_interception.tools import (
|
|
get_litellm_web_search_tool,
|
|
get_litellm_web_search_tool_openai,
|
|
is_anthropic_native_web_search_tool,
|
|
is_web_search_tool,
|
|
is_web_search_tool_chat_completion,
|
|
)
|
|
from litellm.integrations.websearch_interception.transformation import (
|
|
WebSearchTransformation,
|
|
)
|
|
from litellm.llms.base_llm.search.transformation import SearchResponse
|
|
from litellm.types.integrations.websearch_interception import (
|
|
WebSearchInterceptionConfig,
|
|
)
|
|
from litellm.types.integrations.custom_logger import (
|
|
AgenticLoopPlan,
|
|
AgenticLoopRequestPatch,
|
|
)
|
|
from litellm.types.llms.openai import AllMessageValues
|
|
from litellm.types.utils import LlmProviders
|
|
from litellm.utils import ProviderConfigManager
|
|
|
|
# Key used to flag, on per-request kwargs, that the originating client sent
|
|
# an Anthropic-native ``web_search_*`` tool — meaning the final response
|
|
# should include ``web_search_tool_result`` content blocks so the client
|
|
# (e.g. Claude Desktop's citations panel) can render sources.
|
|
WEBSEARCH_EMIT_NATIVE_BLOCKS_KEY = "_websearch_interception_emit_native_blocks"
|
|
|
|
# Key on ``AgenticLoopPlan.metadata`` carrying the list of pre-built
|
|
# ``web_search_tool_result`` blocks to inject into the final response.
|
|
WEBSEARCH_NATIVE_BLOCKS_METADATA_KEY = "websearch_native_blocks"
|
|
|
|
|
|
class WebSearchInterceptionLogger(CustomLogger):
|
|
"""
|
|
CustomLogger that intercepts WebSearch tool calls for models that don't
|
|
natively support web search.
|
|
|
|
Implements agentic loop:
|
|
1. Detects WebSearch tool_use in model response
|
|
2. Executes litellm.asearch() for each query using router's search tools
|
|
3. Makes follow-up request with search results
|
|
4. Returns final response
|
|
"""
|
|
|
|
def __init__(
|
|
self,
|
|
enabled_providers: Optional[List[Union[LlmProviders, str]]] = None,
|
|
search_tool_name: Optional[str] = None,
|
|
):
|
|
"""
|
|
Args:
|
|
enabled_providers: List of LLM providers to enable interception for.
|
|
Use LlmProviders enum values (e.g., [LlmProviders.BEDROCK])
|
|
If None or empty list, enables for ALL providers.
|
|
Default: None (all providers enabled)
|
|
search_tool_name: Name of search tool configured in router's search_tools.
|
|
If None, will attempt to use first available search tool.
|
|
"""
|
|
super().__init__()
|
|
# Convert enum values to strings for comparison
|
|
if enabled_providers is None:
|
|
self.enabled_providers = [LlmProviders.BEDROCK.value]
|
|
else:
|
|
self.enabled_providers = [
|
|
p.value if isinstance(p, LlmProviders) else p for p in enabled_providers
|
|
]
|
|
self.search_tool_name = search_tool_name
|
|
self._request_has_websearch = False # Track if current request has web search
|
|
|
|
async def try_short_circuit_search(
|
|
self,
|
|
model: str,
|
|
messages: List[Dict],
|
|
tools: Optional[List[Dict]],
|
|
custom_llm_provider: Optional[str],
|
|
) -> Optional[Dict[str, Any]]:
|
|
"""
|
|
Short-circuit web-search-only requests by executing the search directly.
|
|
|
|
Claude Code sends web search as a separate, standalone /v1/messages
|
|
request with a simple prompt and only web_search tool(s). For providers
|
|
that don't natively support web search (e.g. github_copilot), there is
|
|
no need to route this through the backend LLM — we can detect the
|
|
pattern, execute the search via Tavily/Perplexity, and return a
|
|
synthetic Anthropic response immediately.
|
|
|
|
Args:
|
|
model: Model name from the request
|
|
messages: Messages list from the request
|
|
tools: Tools list from the request
|
|
custom_llm_provider: Provider name
|
|
|
|
Returns:
|
|
An AnthropicMessagesResponse dict if short-circuited, or None to
|
|
continue normal processing.
|
|
"""
|
|
if not tools:
|
|
return None
|
|
|
|
# Check if provider is in enabled list
|
|
provider_str = custom_llm_provider or ""
|
|
if (
|
|
self.enabled_providers is not None
|
|
and provider_str not in self.enabled_providers
|
|
):
|
|
return None
|
|
|
|
# Only short-circuit for providers without native Anthropic Messages
|
|
# support. Providers that have a BaseAnthropicMessagesConfig (bedrock,
|
|
# vertex_ai, azure_ai, anthropic) already use the agentic loop, which
|
|
# includes a follow-up LLM call to synthesize the answer from search
|
|
# results. Short-circuiting those would skip that synthesis step and
|
|
# return raw search text — a regression for existing users.
|
|
try:
|
|
provider_enum = LlmProviders(provider_str)
|
|
anthropic_config = (
|
|
ProviderConfigManager.get_provider_anthropic_messages_config(
|
|
model=model, provider=provider_enum
|
|
)
|
|
)
|
|
if anthropic_config is not None:
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Skipping short-circuit for {provider_str} "
|
|
"(provider has native Anthropic Messages support, using agentic loop)"
|
|
)
|
|
return None
|
|
except (ValueError, Exception):
|
|
pass # unknown provider enum → safe to short-circuit
|
|
|
|
# All tools must be web search tools
|
|
if not all(is_web_search_tool(t) for t in tools):
|
|
return None
|
|
|
|
# Extract search query from the last user message
|
|
from litellm.litellm_core_utils.prompt_templates.common_utils import (
|
|
get_last_user_message,
|
|
)
|
|
|
|
query = get_last_user_message(cast(List[AllMessageValues], messages))
|
|
if not query:
|
|
return None
|
|
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: Short-circuit search detected "
|
|
f"(provider={provider_str}, query='{query}')"
|
|
)
|
|
|
|
# Native clients (Claude Desktop / Cowork / Anthropic SDK) make a
|
|
# standalone /v1/messages sub-request just for the search, and they
|
|
# expect the response in native shape with server_tool_use +
|
|
# web_search_tool_result content blocks so the citations panel can
|
|
# render. The agentic-loop post-hook never fires on this path because
|
|
# there is no model call — emit the native blocks here instead.
|
|
native_tool = next(
|
|
(t for t in tools if is_anthropic_native_web_search_tool(t)),
|
|
None,
|
|
)
|
|
|
|
# Execute search — keep the structured SearchResponse so the native
|
|
# block can carry per-result url/title/page_age.
|
|
try:
|
|
search_result_text, structured = await self._execute_search(query)
|
|
except Exception as e:
|
|
verbose_logger.error(
|
|
f"WebSearchInterception: Short-circuit search failed: {e}"
|
|
)
|
|
search_result_text, structured = f"Search failed: {e}", None
|
|
|
|
content: List[Dict[str, Any]] = []
|
|
if native_tool is not None:
|
|
tool_use_id = f"srvtoolu_{uuid.uuid4().hex}"
|
|
tool_name = native_tool.get("name") or "web_search"
|
|
content.append(
|
|
{
|
|
"type": "server_tool_use",
|
|
"id": tool_use_id,
|
|
"name": tool_name,
|
|
"input": {"query": query},
|
|
}
|
|
)
|
|
content.append(
|
|
WebSearchTransformation.build_web_search_tool_result_block(
|
|
tool_use_id=tool_use_id,
|
|
search_response=structured,
|
|
)
|
|
)
|
|
# Keep the text block so non-native short-circuit callers (Claude Code,
|
|
# github_copilot, etc.) see the same payload they always have.
|
|
content.append({"type": "text", "text": search_result_text})
|
|
|
|
response: Dict[str, Any] = {
|
|
"id": f"msg_{str(uuid.uuid4())}",
|
|
"type": "message",
|
|
"role": "assistant",
|
|
"model": model,
|
|
"content": content,
|
|
"stop_reason": "end_turn",
|
|
"stop_sequence": None,
|
|
"usage": {"input_tokens": 0, "output_tokens": 0},
|
|
}
|
|
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: Short-circuit search completed, "
|
|
f"returning synthetic response ({len(search_result_text)} chars, "
|
|
f"native_blocks={native_tool is not None})"
|
|
)
|
|
return response
|
|
|
|
async def async_pre_call_deployment_hook(
|
|
self, kwargs: Dict[str, Any], call_type: Optional[Any]
|
|
) -> Optional[dict]:
|
|
"""
|
|
Pre-call hook to convert native Anthropic web_search tools to regular tools.
|
|
|
|
This prevents Bedrock from trying to execute web search server-side (which fails).
|
|
Instead, we convert it to a regular tool so the model returns tool_use blocks
|
|
that we can intercept and execute ourselves.
|
|
"""
|
|
# Check if this is for an enabled provider
|
|
# Try top-level kwargs first, then nested litellm_params, then derive from model name
|
|
custom_llm_provider = kwargs.get("custom_llm_provider", "") or kwargs.get(
|
|
"litellm_params", {}
|
|
).get("custom_llm_provider", "")
|
|
if not custom_llm_provider:
|
|
try:
|
|
_, custom_llm_provider, _, _ = litellm.get_llm_provider(
|
|
model=kwargs.get("model", "")
|
|
)
|
|
except Exception:
|
|
custom_llm_provider = ""
|
|
if custom_llm_provider not in self.enabled_providers:
|
|
return None
|
|
|
|
# Check if request has tools with native web_search
|
|
tools = kwargs.get("tools")
|
|
if not tools:
|
|
return None
|
|
|
|
# Check if any tool is a web search tool (native or already LiteLLM standard)
|
|
has_websearch = any(is_web_search_tool(t) for t in tools)
|
|
|
|
if not has_websearch:
|
|
return None
|
|
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: Converting native web_search tools to LiteLLM standard"
|
|
)
|
|
|
|
# If the client sent an Anthropic-native web_search_* tool, mark the
|
|
# request so the agentic loop emits native web_search_tool_result
|
|
# blocks in the final response (matches async_pre_request_hook). This
|
|
# deployment hook fires before async_pre_request_hook on some paths,
|
|
# so flagging here ensures the signal isn't lost regardless of order.
|
|
if any(is_anthropic_native_web_search_tool(t) for t in tools):
|
|
kwargs[WEBSEARCH_EMIT_NATIVE_BLOCKS_KEY] = True
|
|
|
|
# Convert native/custom web_search tools to LiteLLM standard
|
|
converted_tools = []
|
|
for tool in tools:
|
|
if is_web_search_tool(tool):
|
|
# Convert to LiteLLM standard web search tool
|
|
converted_tool = get_litellm_web_search_tool_openai()
|
|
converted_tools.append(converted_tool)
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Converted {tool.get('name', 'unknown')} "
|
|
f"(type={tool.get('type', 'none')}) to {LITELLM_WEB_SEARCH_TOOL_NAME}"
|
|
)
|
|
else:
|
|
# Keep other tools as-is
|
|
converted_tools.append(tool)
|
|
|
|
kwargs["tools"] = converted_tools
|
|
|
|
if kwargs.get("stream"):
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: deployment hook converting stream=True to stream=False"
|
|
)
|
|
kwargs["stream"] = False
|
|
kwargs["_websearch_interception_converted_stream"] = True
|
|
|
|
return kwargs
|
|
|
|
@classmethod
|
|
def from_config_yaml(
|
|
cls, config: WebSearchInterceptionConfig
|
|
) -> "WebSearchInterceptionLogger":
|
|
"""
|
|
Initialize WebSearchInterceptionLogger from proxy config.yaml parameters.
|
|
|
|
Args:
|
|
config: Configuration dictionary from litellm_settings.websearch_interception_params
|
|
|
|
Returns:
|
|
Configured WebSearchInterceptionLogger instance
|
|
|
|
Example:
|
|
From proxy_config.yaml:
|
|
litellm_settings:
|
|
websearch_interception_params:
|
|
enabled_providers: ["bedrock"]
|
|
search_tool_name: "my-perplexity-search"
|
|
|
|
Usage:
|
|
config = litellm_settings.get("websearch_interception_params", {})
|
|
logger = WebSearchInterceptionLogger.from_config_yaml(config)
|
|
"""
|
|
# Extract parameters from config
|
|
enabled_providers_str = config.get("enabled_providers", None)
|
|
search_tool_name = config.get("search_tool_name", None)
|
|
|
|
# Convert string provider names to LlmProviders enum values
|
|
enabled_providers: Optional[List[Union[LlmProviders, str]]] = None
|
|
if enabled_providers_str is not None:
|
|
enabled_providers = []
|
|
for provider in enabled_providers_str:
|
|
try:
|
|
# Try to convert string to LlmProviders enum
|
|
provider_enum = LlmProviders(provider)
|
|
enabled_providers.append(provider_enum)
|
|
except ValueError:
|
|
# If conversion fails, keep as string
|
|
enabled_providers.append(provider)
|
|
|
|
return cls(
|
|
enabled_providers=enabled_providers,
|
|
search_tool_name=search_tool_name,
|
|
)
|
|
|
|
async def async_pre_request_hook(
|
|
self, model: str, messages: List[Dict], kwargs: Dict
|
|
) -> Optional[Dict]:
|
|
"""
|
|
Pre-request hook to convert native web search tools to LiteLLM standard.
|
|
|
|
This hook is called before the API request is made, allowing us to:
|
|
1. Detect native web search tools (web_search_20250305, etc.)
|
|
2. Convert them to LiteLLM standard format (litellm_web_search)
|
|
3. Convert stream=True to stream=False for interception
|
|
|
|
This prevents providers like Bedrock from trying to execute web search
|
|
natively (which fails), and ensures our agentic loop can intercept tool_use.
|
|
|
|
Returns:
|
|
Modified kwargs dict with converted tools, or None if no modifications needed
|
|
"""
|
|
# Check if this request is for an enabled provider
|
|
custom_llm_provider = kwargs.get("litellm_params", {}).get(
|
|
"custom_llm_provider", ""
|
|
)
|
|
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Pre-request hook called"
|
|
f" - custom_llm_provider={custom_llm_provider}"
|
|
f" - enabled_providers={self.enabled_providers or 'ALL'}"
|
|
)
|
|
|
|
if (
|
|
self.enabled_providers is not None
|
|
and custom_llm_provider not in self.enabled_providers
|
|
):
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Skipping - provider {custom_llm_provider} not in {self.enabled_providers}"
|
|
)
|
|
return None
|
|
|
|
# Check if request has tools
|
|
tools = kwargs.get("tools")
|
|
if not tools:
|
|
return None
|
|
|
|
# Check if any tool is a web search tool
|
|
has_websearch = any(is_web_search_tool(t) for t in tools)
|
|
if not has_websearch:
|
|
return None
|
|
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Pre-request hook triggered for provider={custom_llm_provider}"
|
|
)
|
|
|
|
# If the client sent an Anthropic-native web_search_* tool, mark the
|
|
# request so the agentic loop emits native web_search_tool_result
|
|
# blocks in the final response (for citations panels, etc.). The flag
|
|
# is read by async_build_agentic_loop_plan; the leading underscore
|
|
# prefix ensures it is stripped before the follow-up call kwargs.
|
|
if any(is_anthropic_native_web_search_tool(t) for t in tools):
|
|
kwargs[WEBSEARCH_EMIT_NATIVE_BLOCKS_KEY] = True
|
|
|
|
# Convert native web search tools to LiteLLM standard
|
|
converted_tools = []
|
|
for tool in tools:
|
|
if is_web_search_tool(tool):
|
|
standard_tool = get_litellm_web_search_tool()
|
|
converted_tools.append(standard_tool)
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Converted {tool.get('name', 'unknown')} "
|
|
f"(type={tool.get('type', 'none')}) to {LITELLM_WEB_SEARCH_TOOL_NAME}"
|
|
)
|
|
else:
|
|
converted_tools.append(tool)
|
|
|
|
kwargs["tools"] = converted_tools
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Tools after conversion: {[t.get('name') for t in converted_tools]}"
|
|
)
|
|
|
|
# Also convert here for direct callers that bypass the deployment hook.
|
|
if kwargs.get("stream"):
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: Converting stream=True to stream=False"
|
|
)
|
|
kwargs["stream"] = False
|
|
kwargs["_websearch_interception_converted_stream"] = True
|
|
|
|
return kwargs
|
|
|
|
async def async_should_run_agentic_loop(
|
|
self,
|
|
response: Any,
|
|
model: str,
|
|
messages: List[Dict],
|
|
tools: Optional[List[Dict]],
|
|
stream: bool,
|
|
custom_llm_provider: str,
|
|
kwargs: Dict,
|
|
) -> Tuple[bool, Dict]:
|
|
"""
|
|
Check if WebSearch tool interception is needed for Anthropic Messages API.
|
|
|
|
This is the legacy method for Anthropic-style responses.
|
|
For chat completions, use async_should_run_chat_completion_agentic_loop instead.
|
|
"""
|
|
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Hook called! provider={custom_llm_provider}, stream={stream}"
|
|
)
|
|
verbose_logger.debug(f"WebSearchInterception: Response type: {type(response)}")
|
|
|
|
# Check if provider should be intercepted
|
|
# Note: custom_llm_provider is already normalized by get_llm_provider()
|
|
# (e.g., "bedrock/invoke/..." -> "bedrock")
|
|
if (
|
|
self.enabled_providers is not None
|
|
and custom_llm_provider not in self.enabled_providers
|
|
):
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Skipping provider {custom_llm_provider} (not in enabled list: {self.enabled_providers})"
|
|
)
|
|
return False, {}
|
|
|
|
# Check if tools include any web search tool (LiteLLM standard or native)
|
|
has_websearch_tool = any(is_web_search_tool(t) for t in (tools or []))
|
|
if not has_websearch_tool:
|
|
verbose_logger.debug("WebSearchInterception: No web search tool in request")
|
|
return False, {}
|
|
|
|
# Detect WebSearch tool_use in response (Anthropic format)
|
|
should_intercept, tool_calls = WebSearchTransformation.transform_request(
|
|
response=response,
|
|
stream=stream,
|
|
response_format="anthropic",
|
|
)
|
|
|
|
if not should_intercept:
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: No WebSearch tool_use detected in response"
|
|
)
|
|
return False, {}
|
|
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Detected {len(tool_calls)} WebSearch tool call(s), executing agentic loop"
|
|
)
|
|
|
|
# Extract thinking blocks from response content.
|
|
# When extended thinking is enabled, the model response includes
|
|
# thinking/redacted_thinking blocks that must be preserved and
|
|
# prepended to the follow-up assistant message.
|
|
thinking_blocks: List[Dict] = []
|
|
if isinstance(response, dict):
|
|
content = response.get("content", [])
|
|
else:
|
|
content = getattr(response, "content", []) or []
|
|
|
|
for block in content:
|
|
if isinstance(block, dict):
|
|
block_type = block.get("type")
|
|
else:
|
|
block_type = getattr(block, "type", None)
|
|
|
|
if block_type in ("thinking", "redacted_thinking"):
|
|
if isinstance(block, dict):
|
|
thinking_blocks.append(block)
|
|
else:
|
|
# Convert object to dict using getattr, matching the
|
|
# pattern in _detect_from_non_streaming_response
|
|
thinking_block_dict: Dict = {"type": block_type}
|
|
if block_type == "thinking":
|
|
thinking_block_dict["thinking"] = getattr(block, "thinking", "")
|
|
thinking_block_dict["signature"] = getattr(
|
|
block, "signature", ""
|
|
)
|
|
else: # redacted_thinking
|
|
thinking_block_dict["data"] = getattr(block, "data", "")
|
|
thinking_blocks.append(thinking_block_dict)
|
|
|
|
if thinking_blocks:
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Extracted {len(thinking_blocks)} thinking block(s) from response"
|
|
)
|
|
|
|
# Return tools dict with tool calls and thinking blocks
|
|
tools_dict = {
|
|
"tool_calls": tool_calls,
|
|
"tool_type": "websearch",
|
|
"provider": custom_llm_provider,
|
|
"response_format": "anthropic",
|
|
"thinking_blocks": thinking_blocks,
|
|
}
|
|
return True, tools_dict
|
|
|
|
async def async_should_run_chat_completion_agentic_loop(
|
|
self,
|
|
response: Any,
|
|
model: str,
|
|
messages: List[Dict],
|
|
tools: Optional[List[Dict]],
|
|
stream: bool,
|
|
custom_llm_provider: str,
|
|
kwargs: Dict,
|
|
) -> Tuple[bool, Dict]:
|
|
"""
|
|
Check if WebSearch tool interception is needed for Chat Completions API.
|
|
|
|
Similar to async_should_run_agentic_loop but for OpenAI-style chat completions.
|
|
"""
|
|
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Chat completion hook called! provider={custom_llm_provider}, stream={stream}"
|
|
)
|
|
verbose_logger.debug(f"WebSearchInterception: Response type: {type(response)}")
|
|
|
|
# Check if provider should be intercepted
|
|
if (
|
|
self.enabled_providers is not None
|
|
and custom_llm_provider not in self.enabled_providers
|
|
):
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Skipping provider {custom_llm_provider} (not in enabled list: {self.enabled_providers})"
|
|
)
|
|
return False, {}
|
|
|
|
# Check if tools include any web search tool (strict check for chat completions)
|
|
has_websearch_tool = any(
|
|
is_web_search_tool_chat_completion(t) for t in (tools or [])
|
|
)
|
|
if not has_websearch_tool:
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: No litellm_web_search tool in request"
|
|
)
|
|
return False, {}
|
|
|
|
# Detect WebSearch tool_calls in response (OpenAI format)
|
|
should_intercept, tool_calls = WebSearchTransformation.transform_request(
|
|
response=response,
|
|
stream=stream,
|
|
response_format="openai",
|
|
)
|
|
|
|
if not should_intercept:
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: No WebSearch tool_calls detected in response"
|
|
)
|
|
return False, {}
|
|
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Detected {len(tool_calls)} WebSearch tool call(s), executing agentic loop"
|
|
)
|
|
|
|
# Return tools dict with tool calls
|
|
tools_dict = {
|
|
"tool_calls": tool_calls,
|
|
"tool_type": "websearch",
|
|
"provider": custom_llm_provider,
|
|
"response_format": "openai",
|
|
}
|
|
return True, tools_dict
|
|
|
|
async def async_run_agentic_loop(
|
|
self,
|
|
tools: Dict,
|
|
model: str,
|
|
messages: List[Dict],
|
|
response: Any,
|
|
anthropic_messages_provider_config: Any,
|
|
anthropic_messages_optional_request_params: Dict,
|
|
logging_obj: Any,
|
|
stream: bool,
|
|
kwargs: Dict,
|
|
) -> Any:
|
|
"""
|
|
Execute agentic loop with WebSearch execution for Anthropic Messages API.
|
|
|
|
This is the legacy method for Anthropic-style responses.
|
|
"""
|
|
|
|
tool_calls = tools["tool_calls"]
|
|
thinking_blocks = tools.get("thinking_blocks", [])
|
|
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Executing agentic loop for {len(tool_calls)} search(es)"
|
|
)
|
|
|
|
return await self._execute_agentic_loop(
|
|
model=model,
|
|
messages=messages,
|
|
tool_calls=tool_calls,
|
|
thinking_blocks=thinking_blocks,
|
|
anthropic_messages_optional_request_params=anthropic_messages_optional_request_params,
|
|
logging_obj=logging_obj,
|
|
stream=stream,
|
|
kwargs=kwargs,
|
|
)
|
|
|
|
async def async_build_agentic_loop_plan(
|
|
self,
|
|
tools: Dict,
|
|
model: str,
|
|
messages: List[Dict],
|
|
response: Any,
|
|
anthropic_messages_provider_config: Any,
|
|
anthropic_messages_optional_request_params: Dict,
|
|
logging_obj: Any,
|
|
stream: bool,
|
|
kwargs: Dict,
|
|
) -> AgenticLoopPlan:
|
|
tool_calls = tools["tool_calls"]
|
|
thinking_blocks = tools.get("thinking_blocks", [])
|
|
request_patch, structured_results = await self._build_anthropic_request_patch(
|
|
model=model,
|
|
messages=messages,
|
|
tool_calls=tool_calls,
|
|
thinking_blocks=thinking_blocks,
|
|
anthropic_messages_optional_request_params=anthropic_messages_optional_request_params,
|
|
logging_obj=logging_obj,
|
|
kwargs=kwargs,
|
|
)
|
|
|
|
metadata: Dict[str, Any] = {
|
|
"tool_type": "websearch",
|
|
"response_format": "anthropic",
|
|
}
|
|
|
|
# If the client request originally carried a native web_search_* tool,
|
|
# pre-build the Anthropic-native ``web_search_tool_result`` blocks now
|
|
# (while we still have the structured SearchResponse list) and stash
|
|
# them on plan metadata for the post-hook to inject.
|
|
if kwargs.get(WEBSEARCH_EMIT_NATIVE_BLOCKS_KEY):
|
|
metadata[WEBSEARCH_NATIVE_BLOCKS_METADATA_KEY] = (
|
|
self._build_native_result_blocks(
|
|
tool_calls=tool_calls,
|
|
structured_results=structured_results,
|
|
)
|
|
)
|
|
|
|
return AgenticLoopPlan(
|
|
run_agentic_loop=True,
|
|
request_patch=request_patch,
|
|
metadata=metadata,
|
|
)
|
|
|
|
async def async_post_agentic_loop_response_hook(
|
|
self,
|
|
response: Any,
|
|
plan: AgenticLoopPlan,
|
|
kwargs: Dict,
|
|
) -> Any:
|
|
"""
|
|
Inject Anthropic-native ``web_search_tool_result`` blocks into the
|
|
final response when the originating client used a native
|
|
``web_search_*`` tool.
|
|
|
|
See ``WebSearchTransformation.build_web_search_tool_result_block`` for
|
|
the block shape. The blocks are prepended to ``response.content`` so
|
|
Anthropic-native clients (Claude Desktop, the Anthropic SDK) can
|
|
render citations / sources alongside the model's textual reply.
|
|
"""
|
|
native_blocks = plan.metadata.get(WEBSEARCH_NATIVE_BLOCKS_METADATA_KEY)
|
|
if not native_blocks:
|
|
return response
|
|
return self._inject_native_blocks(response, native_blocks)
|
|
|
|
@staticmethod
|
|
def _build_native_result_blocks(
|
|
tool_calls: List[Dict],
|
|
structured_results: List[Optional[SearchResponse]],
|
|
) -> List[Dict[str, Any]]:
|
|
"""Build one ``web_search_tool_result`` block per tool_call."""
|
|
blocks: List[Dict[str, Any]] = []
|
|
for i, tool_call in enumerate(tool_calls):
|
|
tool_use_id = tool_call.get("id") or ""
|
|
structured = structured_results[i] if i < len(structured_results) else None
|
|
blocks.append(
|
|
WebSearchTransformation.build_web_search_tool_result_block(
|
|
tool_use_id=tool_use_id,
|
|
search_response=structured,
|
|
)
|
|
)
|
|
return blocks
|
|
|
|
@staticmethod
|
|
def _inject_native_blocks(
|
|
response: Any, native_blocks: List[Dict[str, Any]]
|
|
) -> Any:
|
|
"""Prepend native blocks to response content, dict or object form."""
|
|
if not native_blocks:
|
|
return response
|
|
if isinstance(response, dict):
|
|
existing = response.get("content") or []
|
|
response["content"] = list(native_blocks) + list(existing)
|
|
return response
|
|
existing = getattr(response, "content", None) or []
|
|
try:
|
|
response.content = list(native_blocks) + list(existing)
|
|
except (AttributeError, TypeError):
|
|
# Object refused write — fall through and leave the response
|
|
# untouched rather than crash the request.
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: could not inject native blocks into "
|
|
f"response of type {type(response).__name__}"
|
|
)
|
|
return response
|
|
|
|
async def async_run_chat_completion_agentic_loop(
|
|
self,
|
|
tools: Dict,
|
|
model: str,
|
|
messages: List[Dict],
|
|
response: Any,
|
|
optional_params: Dict,
|
|
logging_obj: Any,
|
|
stream: bool,
|
|
kwargs: Dict,
|
|
) -> Any:
|
|
"""
|
|
Execute agentic loop with WebSearch execution for Chat Completions API.
|
|
|
|
Similar to async_run_agentic_loop but for OpenAI-style chat completions.
|
|
"""
|
|
|
|
tool_calls = tools["tool_calls"]
|
|
response_format = tools.get("response_format", "openai")
|
|
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Executing chat completion agentic loop for {len(tool_calls)} search(es)"
|
|
)
|
|
|
|
return await self._execute_chat_completion_agentic_loop(
|
|
model=model,
|
|
messages=messages,
|
|
tool_calls=tool_calls,
|
|
optional_params=optional_params,
|
|
logging_obj=logging_obj,
|
|
stream=stream,
|
|
kwargs=kwargs,
|
|
response_format=response_format,
|
|
)
|
|
|
|
async def async_build_chat_completion_agentic_loop_plan(
|
|
self,
|
|
tools: Dict,
|
|
model: str,
|
|
messages: List[Dict],
|
|
response: Any,
|
|
optional_params: Dict,
|
|
logging_obj: Any,
|
|
stream: bool,
|
|
kwargs: Dict,
|
|
) -> AgenticLoopPlan:
|
|
tool_calls = tools["tool_calls"]
|
|
response_format = tools.get("response_format", "openai")
|
|
request_patch = await self._build_chat_completion_request_patch(
|
|
model=model,
|
|
messages=messages,
|
|
tool_calls=tool_calls,
|
|
optional_params=optional_params,
|
|
kwargs=kwargs,
|
|
response_format=response_format,
|
|
)
|
|
return AgenticLoopPlan(
|
|
run_agentic_loop=True,
|
|
request_patch=request_patch,
|
|
metadata={"tool_type": "websearch", "response_format": response_format},
|
|
)
|
|
|
|
@staticmethod
|
|
def _resolve_max_tokens(
|
|
optional_params: Dict,
|
|
kwargs: Dict,
|
|
) -> int:
|
|
"""Extract max_tokens and validate against thinking.budget_tokens.
|
|
|
|
Anthropic API requires ``max_tokens > thinking.budget_tokens``.
|
|
If the constraint is violated, auto-adjust to ``budget_tokens + 1024``.
|
|
"""
|
|
max_tokens: int = optional_params.get(
|
|
"max_tokens",
|
|
kwargs.get("max_tokens", 1024),
|
|
)
|
|
thinking_param = optional_params.get("thinking")
|
|
if thinking_param and isinstance(thinking_param, dict):
|
|
budget_tokens = thinking_param.get("budget_tokens")
|
|
if (
|
|
budget_tokens is not None
|
|
and isinstance(budget_tokens, (int, float))
|
|
and math.isfinite(budget_tokens)
|
|
and budget_tokens > 0
|
|
):
|
|
if max_tokens <= budget_tokens:
|
|
adjusted = math.ceil(budget_tokens) + 1024
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: max_tokens=%s <= thinking.budget_tokens=%s, "
|
|
"adjusting to %s to satisfy Anthropic API constraint",
|
|
max_tokens,
|
|
budget_tokens,
|
|
adjusted,
|
|
)
|
|
max_tokens = adjusted
|
|
return max_tokens
|
|
|
|
@staticmethod
|
|
def _prepare_followup_kwargs(kwargs: Dict) -> Dict:
|
|
"""Build kwargs for the follow-up call, excluding internal keys.
|
|
|
|
``litellm_logging_obj`` MUST be excluded so the follow-up call creates
|
|
its own ``Logging`` instance via ``function_setup``. Reusing the
|
|
initial call's logging object triggers the dedup flag
|
|
(``has_logged_async_success``) which silently prevents the initial
|
|
call's spend from being recorded — the root cause of the
|
|
SpendLog / AWS billing mismatch.
|
|
"""
|
|
_internal_keys = {"litellm_logging_obj"}
|
|
return {
|
|
k: v
|
|
for k, v in kwargs.items()
|
|
if not k.startswith("_websearch_interception") and k not in _internal_keys
|
|
}
|
|
|
|
async def _execute_agentic_loop(
|
|
self,
|
|
model: str,
|
|
messages: List[Dict],
|
|
tool_calls: List[Dict],
|
|
thinking_blocks: List[Dict],
|
|
anthropic_messages_optional_request_params: Dict,
|
|
logging_obj: Any,
|
|
stream: bool,
|
|
kwargs: Dict,
|
|
) -> Any:
|
|
"""Legacy path: execute search + build patch + run follow-up call."""
|
|
request_patch, structured_results = await self._build_anthropic_request_patch(
|
|
model=model,
|
|
messages=messages,
|
|
tool_calls=tool_calls,
|
|
thinking_blocks=thinking_blocks,
|
|
anthropic_messages_optional_request_params=anthropic_messages_optional_request_params,
|
|
logging_obj=logging_obj,
|
|
kwargs=kwargs,
|
|
)
|
|
if request_patch.messages is None:
|
|
raise ValueError("WebSearchInterception: missing follow-up messages")
|
|
|
|
optional_params = dict(anthropic_messages_optional_request_params)
|
|
optional_params.update(request_patch.optional_params)
|
|
max_tokens = request_patch.max_tokens
|
|
if max_tokens is None:
|
|
max_tokens = cast(Optional[int], optional_params.pop("max_tokens", None))
|
|
else:
|
|
optional_params.pop("max_tokens", None)
|
|
if max_tokens is None:
|
|
max_tokens = cast(int, kwargs.get("max_tokens", 1024))
|
|
|
|
response = await anthropic_messages.acreate(
|
|
max_tokens=max_tokens,
|
|
messages=request_patch.messages,
|
|
model=request_patch.model or model,
|
|
**optional_params,
|
|
**request_patch.kwargs,
|
|
)
|
|
|
|
# Legacy path: the new path goes through the typed plan + core
|
|
# dispatcher which runs the post-hook automatically. Mirror the
|
|
# native-block injection here so both paths behave identically.
|
|
if kwargs.get(WEBSEARCH_EMIT_NATIVE_BLOCKS_KEY):
|
|
native_blocks = self._build_native_result_blocks(
|
|
tool_calls=tool_calls,
|
|
structured_results=structured_results,
|
|
)
|
|
response = self._inject_native_blocks(response, native_blocks)
|
|
|
|
return response
|
|
|
|
async def _build_anthropic_request_patch(
|
|
self,
|
|
model: str,
|
|
messages: List[Dict],
|
|
tool_calls: List[Dict],
|
|
thinking_blocks: List[Dict],
|
|
anthropic_messages_optional_request_params: Dict,
|
|
logging_obj: Any,
|
|
kwargs: Dict,
|
|
) -> Tuple[AgenticLoopRequestPatch, List[Optional[SearchResponse]]]:
|
|
"""
|
|
Execute litellm.search() and build follow-up request patch.
|
|
|
|
Returns the patch alongside the parallel list of structured
|
|
``SearchResponse`` objects (one per tool_call, ``None`` when the
|
|
search failed or the tool_call had no query). The caller uses these
|
|
to optionally build Anthropic-native ``web_search_tool_result``
|
|
content blocks for the final response.
|
|
"""
|
|
|
|
# Extract search queries from tool_use blocks
|
|
search_tasks = []
|
|
for tool_call in tool_calls:
|
|
query = tool_call["input"].get("query")
|
|
if query:
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Queuing search for query='{query}'"
|
|
)
|
|
search_tasks.append(self._execute_search(query))
|
|
else:
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Tool call {tool_call['id']} has no query"
|
|
)
|
|
# Add empty result for tools without query
|
|
search_tasks.append(self._create_empty_search_result())
|
|
|
|
# Execute searches in parallel
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Executing {len(search_tasks)} search(es) in parallel"
|
|
)
|
|
search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
|
|
|
# Split the gathered (text, structured) tuples into two parallel lists.
|
|
# The text list feeds the follow-up model call; the structured list
|
|
# is returned to the caller for native-block emission.
|
|
final_search_results: List[str] = []
|
|
structured_results: List[Optional[SearchResponse]] = []
|
|
for i, result in enumerate(search_results):
|
|
if isinstance(result, Exception):
|
|
verbose_logger.error(
|
|
f"WebSearchInterception: Search {i} failed with error: {str(result)}"
|
|
)
|
|
final_search_results.append(f"Search failed: {str(result)}")
|
|
structured_results.append(None)
|
|
elif isinstance(result, tuple) and len(result) == 2:
|
|
text_value, structured_value = result
|
|
final_search_results.append(
|
|
cast(str, text_value)
|
|
if isinstance(text_value, str)
|
|
else str(text_value)
|
|
)
|
|
structured_results.append(
|
|
structured_value
|
|
if isinstance(structured_value, SearchResponse)
|
|
else None
|
|
)
|
|
else:
|
|
# Defensive: legacy callers / unexpected shape — preserve text,
|
|
# drop structure.
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Unexpected result type {type(result)} at index {i}"
|
|
)
|
|
final_search_results.append(str(result))
|
|
structured_results.append(None)
|
|
|
|
# Build assistant and user messages using transformation
|
|
assistant_message, user_message = WebSearchTransformation.transform_response(
|
|
tool_calls=tool_calls,
|
|
search_results=final_search_results,
|
|
thinking_blocks=thinking_blocks,
|
|
)
|
|
|
|
follow_up_messages = messages + [assistant_message, cast(Dict, user_message)]
|
|
|
|
# Correlation context for structured logging
|
|
_call_id = getattr(logging_obj, "litellm_call_id", None) or kwargs.get(
|
|
"litellm_call_id", "unknown"
|
|
)
|
|
|
|
full_model_name = model # safe default before try block
|
|
|
|
max_tokens = self._resolve_max_tokens(
|
|
anthropic_messages_optional_request_params, kwargs
|
|
)
|
|
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Using max_tokens={max_tokens} for follow-up request"
|
|
)
|
|
|
|
optional_params_without_max_tokens = {
|
|
k: v
|
|
for k, v in anthropic_messages_optional_request_params.items()
|
|
if k != "max_tokens"
|
|
}
|
|
kwargs_for_followup = self._prepare_followup_kwargs(kwargs)
|
|
|
|
if logging_obj is not None:
|
|
agentic_params = logging_obj.model_call_details.get(
|
|
"agentic_loop_params", {}
|
|
)
|
|
full_model_name = agentic_params.get("model", model)
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: Built anthropic request patch "
|
|
"[call_id=%s model=%s messages=%d searches=%d]",
|
|
_call_id,
|
|
full_model_name,
|
|
len(follow_up_messages),
|
|
len(final_search_results),
|
|
)
|
|
patch = AgenticLoopRequestPatch(
|
|
model=full_model_name,
|
|
messages=follow_up_messages,
|
|
max_tokens=max_tokens,
|
|
optional_params=optional_params_without_max_tokens,
|
|
kwargs=kwargs_for_followup,
|
|
)
|
|
return patch, structured_results
|
|
|
|
async def _execute_search(self, query: str) -> Tuple[str, Optional[SearchResponse]]:
|
|
"""
|
|
Execute a single web search using router's search tools.
|
|
|
|
Returns both the formatted text (fed back to the model in the follow-up
|
|
call) and the structured ``SearchResponse`` (preserved so callers can
|
|
build Anthropic-native ``web_search_tool_result`` blocks for clients
|
|
that requested a native ``web_search_*`` tool). The structured value
|
|
is None on the failure path so callers can still emit an empty result
|
|
block rather than dropping the search entirely.
|
|
"""
|
|
try:
|
|
# Import router from proxy_server
|
|
try:
|
|
from litellm.proxy.proxy_server import llm_router
|
|
except ImportError:
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: Could not import llm_router from proxy_server, "
|
|
"falling back to direct litellm.asearch() with perplexity"
|
|
)
|
|
llm_router = None
|
|
|
|
# Determine search provider from router's search_tools
|
|
search_provider: Optional[str] = None
|
|
if llm_router is not None and hasattr(llm_router, "search_tools"):
|
|
if self.search_tool_name:
|
|
# Find specific search tool by name
|
|
matching_tools = [
|
|
tool
|
|
for tool in llm_router.search_tools
|
|
if tool.get("search_tool_name") == self.search_tool_name
|
|
]
|
|
if matching_tools:
|
|
search_tool = matching_tools[0]
|
|
search_provider = search_tool.get("litellm_params", {}).get(
|
|
"search_provider"
|
|
)
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Found search tool '{self.search_tool_name}' "
|
|
f"with provider '{search_provider}'"
|
|
)
|
|
else:
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Search tool '{self.search_tool_name}' not found in router, "
|
|
"falling back to first available or perplexity"
|
|
)
|
|
|
|
# If no specific tool or not found, use first available
|
|
if not search_provider and llm_router.search_tools:
|
|
first_tool = llm_router.search_tools[0]
|
|
search_provider = first_tool.get("litellm_params", {}).get(
|
|
"search_provider"
|
|
)
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Using first available search tool with provider '{search_provider}'"
|
|
)
|
|
|
|
# Fallback to perplexity if no router or no search tools configured
|
|
if not search_provider:
|
|
search_provider = "perplexity"
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: No search tools configured in router, "
|
|
f"using default provider '{search_provider}'"
|
|
)
|
|
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Executing search for '{query}' using provider '{search_provider}'"
|
|
)
|
|
result = await litellm.asearch(query=query, search_provider=search_provider)
|
|
|
|
# Format using transformation function
|
|
search_result_text = WebSearchTransformation.format_search_response(result)
|
|
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Search completed for '{query}', got {len(search_result_text)} chars"
|
|
)
|
|
return search_result_text, result
|
|
except Exception as e:
|
|
verbose_logger.error(
|
|
f"WebSearchInterception: Search failed for '{query}': {str(e)}"
|
|
)
|
|
raise
|
|
|
|
async def _execute_chat_completion_agentic_loop(
|
|
self,
|
|
model: str,
|
|
messages: List[Dict],
|
|
tool_calls: List[Dict],
|
|
optional_params: Dict,
|
|
logging_obj: Any,
|
|
stream: bool,
|
|
kwargs: Dict,
|
|
response_format: str = "openai",
|
|
) -> Any:
|
|
"""Legacy path: execute search + build patch + run follow-up call."""
|
|
request_patch = await self._build_chat_completion_request_patch(
|
|
model=model,
|
|
messages=messages,
|
|
tool_calls=tool_calls,
|
|
optional_params=optional_params,
|
|
kwargs=kwargs,
|
|
response_format=response_format,
|
|
)
|
|
if request_patch.messages is None:
|
|
raise ValueError("WebSearchInterception: missing follow-up messages")
|
|
params = dict(optional_params)
|
|
params.update(request_patch.optional_params)
|
|
return await litellm.acompletion(
|
|
model=request_patch.model or model,
|
|
messages=request_patch.messages,
|
|
**params,
|
|
**request_patch.kwargs,
|
|
)
|
|
|
|
async def _build_chat_completion_request_patch(
|
|
self,
|
|
model: str,
|
|
messages: List[Dict],
|
|
tool_calls: List[Dict],
|
|
optional_params: Dict,
|
|
kwargs: Dict,
|
|
response_format: str = "openai",
|
|
) -> AgenticLoopRequestPatch:
|
|
"""Execute litellm.search() and build chat-completion rerun patch."""
|
|
|
|
# Extract search queries from tool_calls
|
|
search_tasks = []
|
|
for tool_call in tool_calls:
|
|
# Handle both Anthropic-style input and OpenAI-style function.arguments
|
|
query = None
|
|
if "input" in tool_call and isinstance(tool_call["input"], dict):
|
|
query = tool_call["input"].get("query")
|
|
elif "function" in tool_call:
|
|
func = tool_call["function"]
|
|
if isinstance(func, dict):
|
|
args = func.get("arguments", {})
|
|
if isinstance(args, dict):
|
|
query = args.get("query")
|
|
|
|
if query:
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Queuing search for query='{query}'"
|
|
)
|
|
search_tasks.append(self._execute_search(query))
|
|
else:
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Tool call {tool_call.get('id')} has no query"
|
|
)
|
|
# Add empty result for tools without query
|
|
search_tasks.append(self._create_empty_search_result())
|
|
|
|
# Execute searches in parallel
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Executing {len(search_tasks)} search(es) in parallel"
|
|
)
|
|
search_results = await asyncio.gather(*search_tasks, return_exceptions=True)
|
|
|
|
# Chat-completion path only needs text — OpenAI tool_result format
|
|
# has no equivalent of Anthropic's web_search_tool_result block.
|
|
final_search_results: List[str] = []
|
|
for i, result in enumerate(search_results):
|
|
if isinstance(result, Exception):
|
|
verbose_logger.error(
|
|
f"WebSearchInterception: Search {i} failed with error: {str(result)}"
|
|
)
|
|
final_search_results.append(f"Search failed: {str(result)}")
|
|
elif isinstance(result, tuple) and len(result) == 2:
|
|
text_value, _ = result
|
|
final_search_results.append(
|
|
cast(str, text_value)
|
|
if isinstance(text_value, str)
|
|
else str(text_value)
|
|
)
|
|
else:
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Unexpected result type {type(result)} at index {i}"
|
|
)
|
|
final_search_results.append(str(result))
|
|
|
|
# Build assistant and tool messages using transformation
|
|
(
|
|
assistant_message,
|
|
tool_messages_or_user,
|
|
) = WebSearchTransformation.transform_response(
|
|
tool_calls=tool_calls,
|
|
search_results=final_search_results,
|
|
response_format=response_format,
|
|
)
|
|
|
|
# Make follow-up request with search results
|
|
# For OpenAI format, tool_messages_or_user is a list of tool messages
|
|
if response_format == "openai":
|
|
follow_up_messages = (
|
|
messages + [assistant_message] + cast(List[Dict], tool_messages_or_user)
|
|
)
|
|
else:
|
|
# For Anthropic format (shouldn't happen in this method, but handle it)
|
|
follow_up_messages = messages + [
|
|
assistant_message,
|
|
cast(Dict, tool_messages_or_user),
|
|
]
|
|
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: Making follow-up chat completion request with search results"
|
|
)
|
|
verbose_logger.debug(
|
|
f"WebSearchInterception: Follow-up messages count: {len(follow_up_messages)}"
|
|
)
|
|
|
|
# Remove internal parameters that shouldn't be passed to follow-up request
|
|
internal_params = {
|
|
"_websearch_interception",
|
|
"acompletion",
|
|
"litellm_logging_obj",
|
|
"custom_llm_provider",
|
|
"model_alias_map",
|
|
"stream_response",
|
|
"custom_prompt_dict",
|
|
}
|
|
kwargs_for_followup = {
|
|
k: v
|
|
for k, v in kwargs.items()
|
|
if not k.startswith("_websearch_interception") and k not in internal_params
|
|
}
|
|
|
|
full_model_name = model
|
|
if "custom_llm_provider" in kwargs:
|
|
custom_llm_provider = kwargs["custom_llm_provider"]
|
|
if not model.startswith(custom_llm_provider) and "/" not in model:
|
|
full_model_name = f"{custom_llm_provider}/{model}"
|
|
|
|
verbose_logger.debug(
|
|
"WebSearchInterception: Built chat completion request patch model=%s messages=%d",
|
|
full_model_name,
|
|
len(follow_up_messages),
|
|
)
|
|
|
|
tools_param = optional_params.get("tools")
|
|
optional_params_clean = {
|
|
k: v
|
|
for k, v in optional_params.items()
|
|
if k
|
|
not in {
|
|
"tools",
|
|
"extra_body",
|
|
"model_alias_map",
|
|
"stream_response",
|
|
"custom_prompt_dict",
|
|
}
|
|
}
|
|
if tools_param is not None:
|
|
optional_params_clean["tools"] = tools_param
|
|
|
|
return AgenticLoopRequestPatch(
|
|
model=full_model_name,
|
|
messages=follow_up_messages,
|
|
optional_params=optional_params_clean,
|
|
kwargs=kwargs_for_followup,
|
|
)
|
|
|
|
async def _create_empty_search_result(
|
|
self,
|
|
) -> Tuple[str, Optional[SearchResponse]]:
|
|
"""Create an empty search result for tool calls without queries"""
|
|
return "No search query provided", None
|
|
|
|
@staticmethod
|
|
def initialize_from_proxy_config(
|
|
litellm_settings: Dict[str, Any],
|
|
callback_specific_params: Dict[str, Any],
|
|
) -> "WebSearchInterceptionLogger":
|
|
"""
|
|
Static method to initialize WebSearchInterceptionLogger from proxy config.
|
|
|
|
Used in callback_utils.py to simplify initialization logic.
|
|
|
|
Args:
|
|
litellm_settings: Dictionary containing litellm_settings from proxy_config.yaml
|
|
callback_specific_params: Dictionary containing callback-specific parameters
|
|
|
|
Returns:
|
|
Configured WebSearchInterceptionLogger instance
|
|
|
|
Example:
|
|
From callback_utils.py:
|
|
websearch_obj = WebSearchInterceptionLogger.initialize_from_proxy_config(
|
|
litellm_settings=litellm_settings,
|
|
callback_specific_params=callback_specific_params
|
|
)
|
|
"""
|
|
# Get websearch_interception_params from litellm_settings or callback_specific_params
|
|
websearch_params: WebSearchInterceptionConfig = {}
|
|
if "websearch_interception_params" in litellm_settings:
|
|
websearch_params = litellm_settings["websearch_interception_params"]
|
|
elif "websearch_interception" in callback_specific_params and isinstance(
|
|
callback_specific_params["websearch_interception"], dict
|
|
):
|
|
websearch_params = cast(
|
|
WebSearchInterceptionConfig,
|
|
callback_specific_params["websearch_interception"],
|
|
)
|
|
|
|
# Use classmethod to initialize from config
|
|
return WebSearchInterceptionLogger.from_config_yaml(websearch_params)
|