feat: add timeout configuration for outgoing HTTP requests and update documentation

This commit is contained in:
Hans Aschauer 2026-05-18 07:33:07 +02:00
parent 3ebd7c5e4a
commit 8288787b4e
4 changed files with 99 additions and 20 deletions

View file

@ -3,6 +3,10 @@
# URL of the SearxNG instance to query. # URL of the SearxNG instance to query.
SEARXNG_BASE_URL=http://localhost:8080 SEARXNG_BASE_URL=http://localhost:8080
# Default network timeout (seconds) for outgoing HTTP requests.
# Tools may override this per call via timeout_seconds.
#SEARXNG_REQUEST_TIMEOUT_SECONDS=30
# Set to true to run fetched pages through the prompt-guard honeypot before # Set to true to run fetched pages through the prompt-guard honeypot before
# returning them to the agent. Requires PROMPT_GUARD_* settings below. # returning them to the agent. Requires PROMPT_GUARD_* settings below.
#SEARXNG_GUARD_ENABLED=false #SEARXNG_GUARD_ENABLED=false

View file

@ -8,8 +8,8 @@ Built with [FastMCP](https://github.com/prefecthq/fastmcp).
| Name | Type | Description | | Name | Type | Description |
|------|------|-------------| |------|------|-------------|
| `search` | Tool | Query the web via SearxNG. Supports categories, engines, language, time range, safe search, and pagination. | | `search` | Tool | Query the web via SearxNG. Supports categories, engines, language, time range, safe search, pagination, and per-call timeout overrides. |
| `fetch` | Tool | Fetch a URL and extract its main content (strips ads, navigation, boilerplate). Returns a preview with `total_chars` and `truncated` metadata. Supports `start`/`end` slicing, `max_chars`, multiple output formats, and in-memory caching. | | `fetch` | Tool | Fetch a URL and extract its main content (strips ads, navigation, boilerplate). Returns a preview with `total_chars` and `truncated` metadata. Supports `start`/`end` slicing, `max_chars`, multiple output formats, in-memory caching, and per-call timeout overrides. |
| `web://fetch{?url,...}` | Resource | Read an arbitrary character slice of a fetched page without going through the tool call. Useful for paging through large documents. | | `web://fetch{?url,...}` | Resource | Read an arbitrary character slice of a fetched page without going through the tool call. Useful for paging through large documents. |
## Requirements ## Requirements
@ -37,14 +37,20 @@ cp .env.example .env
`.env`: `.env`:
```dotenv ```dotenv
SEARXNG_BASE_URL=http://localhost:8080 SEARXNG_BASE_URL=http://localhost:8080
SEARXNG_REQUEST_TIMEOUT_SECONDS=30
``` ```
All settings can also be provided as environment variables with the `SEARXNG_` prefix: All settings can also be provided as environment variables with the `SEARXNG_` prefix:
```bash ```bash
export SEARXNG_BASE_URL=http://localhost:8080 export SEARXNG_BASE_URL=http://localhost:8080
export SEARXNG_REQUEST_TIMEOUT_SECONDS=30
``` ```
Timeout behavior:
- `SEARXNG_REQUEST_TIMEOUT_SECONDS` sets the default timeout for outgoing HTTP calls.
- `search`, `fetch`, `fetch_raw`, and `web://fetch` accept optional `timeout_seconds` to override the default for a single call.
## Starting the server ## Starting the server
### stdio (default — for use with MCP clients like Claude Desktop) ### stdio (default — for use with MCP clients like Claude Desktop)

View file

@ -13,6 +13,7 @@ async def search(
pageno: int = 1, pageno: int = 1,
time_range: str | None = None, time_range: str | None = None,
safesearch: int = 0, safesearch: int = 0,
timeout_seconds: float = 30.0,
) -> dict[str, Any]: ) -> dict[str, Any]:
"""Send a search request to a SearxNG instance and return parsed JSON.""" """Send a search request to a SearxNG instance and return parsed JSON."""
params: dict[str, Any] = { params: dict[str, Any] = {
@ -30,7 +31,12 @@ async def search(
if time_range: if time_range:
params["time_range"] = time_range params["time_range"] = time_range
async with httpx.AsyncClient() as client: try:
response = await client.get(f"{base_url.rstrip('/')}/search", params=params) async with httpx.AsyncClient(timeout=timeout_seconds) as client:
response.raise_for_status() response = await client.get(f"{base_url.rstrip('/')}/search", params=params)
return response.json() response.raise_for_status()
return response.json()
except httpx.TimeoutException as exc:
raise ValueError(
f"SearxNG request timed out after {timeout_seconds:.2f}s"
) from exc

View file

@ -20,6 +20,8 @@ class Settings(BaseSettings):
model_config = SettingsConfigDict(env_prefix="SEARXNG_", env_file=".env", env_file_encoding="utf-8", extra="ignore") model_config = SettingsConfigDict(env_prefix="SEARXNG_", env_file=".env", env_file_encoding="utf-8", extra="ignore")
base_url: str = "http://localhost:8080" base_url: str = "http://localhost:8080"
request_timeout_seconds: float = 30.0
"""Default network timeout (seconds) for outgoing HTTP requests."""
guard_enabled: bool = False guard_enabled: bool = False
"""Run fetched content through the prompt-guard honeypot before returning it. """Run fetched content through the prompt-guard honeypot before returning it.
Requires PROMPT_GUARD_* settings to be configured.""" Requires PROMPT_GUARD_* settings to be configured."""
@ -49,6 +51,7 @@ async def _fetch_and_extract(
include_images: bool = False, include_images: bool = False,
include_links: bool = False, include_links: bool = False,
use_cache: bool = True, use_cache: bool = True,
timeout_seconds: float | None = None,
) -> str: ) -> str:
"""Shared fetch+extract logic used by both the tool and resource.""" """Shared fetch+extract logic used by both the tool and resource."""
cache_key = (url, output_format, include_tables, include_images, include_links) cache_key = (url, output_format, include_tables, include_images, include_links)
@ -56,10 +59,20 @@ async def _fetch_and_extract(
if use_cache and cache_key in _cache: if use_cache and cache_key in _cache:
return _cache[cache_key] return _cache[cache_key]
effective_timeout = timeout_seconds if timeout_seconds is not None else settings.request_timeout_seconds
try:
async with httpx.AsyncClient(
headers={"User-Agent": "searxng-mcp/1.0", "Accept": "*/*"},
follow_redirects=True,
timeout=effective_timeout,
) as client:
response = await client.get(url)
response.raise_for_status()
downloaded = response.text
except httpx.TimeoutException as exc:
raise ValueError(f"Request timed out after {effective_timeout:.2f}s for URL: {url}") from exc
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
downloaded = await loop.run_in_executor(None, trafilatura.fetch_url, url)
if not downloaded:
raise ValueError(f"Failed to fetch URL: {url}")
result = await loop.run_in_executor( result = await loop.run_in_executor(
None, None,
lambda: trafilatura.extract( lambda: trafilatura.extract(
@ -120,6 +133,16 @@ async def search(
Literal[0, 1, 2], Literal[0, 1, 2],
Field(description="Safe search level: 0=off, 1=moderate, 2=strict."), Field(description="Safe search level: 0=off, 1=moderate, 2=strict."),
] = 0, ] = 0,
timeout_seconds: Annotated[
float | None,
Field(
description=(
"Request timeout in seconds for this call. "
"If omitted, uses SEARXNG_REQUEST_TIMEOUT_SECONDS."
),
gt=0,
),
] = None,
) -> list[dict]: ) -> list[dict]:
"""Search the web via SearxNG and return a list of results. """Search the web via SearxNG and return a list of results.
@ -135,6 +158,7 @@ async def search(
pageno=pageno, pageno=pageno,
time_range=time_range, time_range=time_range,
safesearch=safesearch, safesearch=safesearch,
timeout_seconds=timeout_seconds if timeout_seconds is not None else settings.request_timeout_seconds,
) )
results = data.get("results", []) results = data.get("results", [])
return [ return [
@ -184,6 +208,16 @@ async def fetch(
bool, bool,
Field(description="Return cached content if available. Set to false to force a fresh download."), Field(description="Return cached content if available. Set to false to force a fresh download."),
] = True, ] = True,
timeout_seconds: Annotated[
float | None,
Field(
description=(
"Request timeout in seconds for downloading this URL. "
"If omitted, uses SEARXNG_REQUEST_TIMEOUT_SECONDS."
),
gt=0,
),
] = None,
) -> dict: ) -> dict:
"""Fetch a URL and extract its main content, stripping navigation, ads, and boilerplate. """Fetch a URL and extract its main content, stripping navigation, ads, and boilerplate.
@ -191,7 +225,15 @@ async def fetch(
If truncated, use start/end to page through the full content, or read the resource If truncated, use start/end to page through the full content, or read the resource
web://fetch?url=<url>&start=<n>&end=<m> for specific slices. web://fetch?url=<url>&start=<n>&end=<m> for specific slices.
""" """
content = await _fetch_and_extract(url, output_format, include_tables, include_images, include_links, use_cache) content = await _fetch_and_extract(
url,
output_format,
include_tables,
include_images,
include_links,
use_cache,
timeout_seconds,
)
total_chars = len(content) total_chars = len(content)
# Apply explicit start/end slice first (takes priority over max_chars windowing) # Apply explicit start/end slice first (takes priority over max_chars windowing)
@ -218,7 +260,7 @@ async def fetch(
@mcp.resource( @mcp.resource(
"web://fetch{?url,start,end,output_format,include_links,include_tables,include_images,use_cache}", "web://fetch{?url,start,end,output_format,include_links,include_tables,include_images,use_cache,timeout_seconds}",
mime_type="text/markdown", mime_type="text/markdown",
) )
async def fetch_slice( async def fetch_slice(
@ -230,6 +272,7 @@ async def fetch_slice(
include_tables: bool = True, include_tables: bool = True,
include_images: bool = False, include_images: bool = False,
use_cache: bool = True, use_cache: bool = True,
timeout_seconds: float | None = None,
) -> str: ) -> str:
"""Fetch a URL and return a character slice of the extracted content. """Fetch a URL and return a character slice of the extracted content.
@ -238,7 +281,15 @@ async def fetch_slice(
""" """
if not url: if not url:
raise ValueError("url parameter is required") raise ValueError("url parameter is required")
content = await _fetch_and_extract(url, output_format, include_tables, include_images, include_links, use_cache) content = await _fetch_and_extract(
url,
output_format,
include_tables,
include_images,
include_links,
use_cache,
timeout_seconds,
)
if end > 0: if end > 0:
return content[start:end] return content[start:end]
return content[start:] return content[start:]
@ -249,6 +300,16 @@ async def fetch_raw(
url: Annotated[str, Field(description="URL to fetch. Returns raw text content without HTML extraction.")], url: Annotated[str, Field(description="URL to fetch. Returns raw text content without HTML extraction.")],
encoding: Annotated[str, Field(description="Text encoding to decode the response bytes. Default 'utf-8'.")] = "utf-8", encoding: Annotated[str, Field(description="Text encoding to decode the response bytes. Default 'utf-8'.")] = "utf-8",
max_bytes: Annotated[int, Field(description="Maximum bytes to return. 0 = no limit.", ge=0)] = 0, max_bytes: Annotated[int, Field(description="Maximum bytes to return. 0 = no limit.", ge=0)] = 0,
timeout_seconds: Annotated[
float | None,
Field(
description=(
"Request timeout in seconds for this call. "
"If omitted, uses SEARXNG_REQUEST_TIMEOUT_SECONDS."
),
gt=0,
),
] = None,
) -> dict: ) -> dict:
"""Fetch a URL and return the raw response body as text, bypassing trafilatura extraction. """Fetch a URL and return the raw response body as text, bypassing trafilatura extraction.
@ -259,17 +320,19 @@ async def fetch_raw(
Returns a dict with: content (str), status_code (int), content_type (str), total_bytes (int). Returns a dict with: content (str), status_code (int), content_type (str), total_bytes (int).
""" """
def _do_fetch() -> tuple[bytes, int, str]: effective_timeout = timeout_seconds if timeout_seconds is not None else settings.request_timeout_seconds
with httpx.Client( try:
async with httpx.AsyncClient(
headers={"User-Agent": "searxng-mcp/1.0", "Accept": "*/*"}, headers={"User-Agent": "searxng-mcp/1.0", "Accept": "*/*"},
follow_redirects=True, follow_redirects=True,
timeout=30.0, timeout=effective_timeout,
) as client: ) as client:
response = client.get(url) response = await client.get(url)
return response.content, response.status_code, response.headers.get("content-type", "") raw = response.content
status_code = response.status_code
loop = asyncio.get_event_loop() content_type = response.headers.get("content-type", "")
raw, status_code, content_type = await loop.run_in_executor(None, _do_fetch) except httpx.TimeoutException as exc:
raise ValueError(f"Request timed out after {effective_timeout:.2f}s for URL: {url}") from exc
total_bytes = len(raw) total_bytes = len(raw)
if max_bytes > 0: if max_bytes > 0: