feat: add timeout configuration for outgoing HTTP requests and update documentation

2026-05-18 07:33:07 +02:00 · 2026-05-18 07:33:07 +02:00 · 8288787b4e
commit 8288787b4e
parent 3ebd7c5e4a
4 changed files with 99 additions and 20 deletions
--- a/.env.example
+++ b/.env.example
@ -3,6 +3,10 @@
 # URL of the SearxNG instance to query.
 SEARXNG_BASE_URL=http://localhost:8080

+# Default network timeout (seconds) for outgoing HTTP requests.
+# Tools may override this per call via timeout_seconds.
+#SEARXNG_REQUEST_TIMEOUT_SECONDS=30
+
 # Set to true to run fetched pages through the prompt-guard honeypot before
 # returning them to the agent. Requires PROMPT_GUARD_* settings below.
 #SEARXNG_GUARD_ENABLED=false
--- a/README.md
+++ b/README.md
@ -8,8 +8,8 @@ Built with [FastMCP](https://github.com/prefecthq/fastmcp).

 | Name | Type | Description |
 |------|------|-------------|
-| `search` | Tool | Query the web via SearxNG. Supports categories, engines, language, time range, safe search, and pagination. |
-| `fetch` | Tool | Fetch a URL and extract its main content (strips ads, navigation, boilerplate). Returns a preview with `total_chars` and `truncated` metadata. Supports `start`/`end` slicing, `max_chars`, multiple output formats, and in-memory caching. |
+| `search` | Tool | Query the web via SearxNG. Supports categories, engines, language, time range, safe search, pagination, and per-call timeout overrides. |
+| `fetch` | Tool | Fetch a URL and extract its main content (strips ads, navigation, boilerplate). Returns a preview with `total_chars` and `truncated` metadata. Supports `start`/`end` slicing, `max_chars`, multiple output formats, in-memory caching, and per-call timeout overrides. |
 | `web://fetch{?url,...}` | Resource | Read an arbitrary character slice of a fetched page without going through the tool call. Useful for paging through large documents. |

 ## Requirements
@ -37,14 +37,20 @@ cp .env.example .env
 `.env`:
 ```dotenv
 SEARXNG_BASE_URL=http://localhost:8080
+SEARXNG_REQUEST_TIMEOUT_SECONDS=30
 ```

 All settings can also be provided as environment variables with the `SEARXNG_` prefix:

 ```bash
 export SEARXNG_BASE_URL=http://localhost:8080
+export SEARXNG_REQUEST_TIMEOUT_SECONDS=30
 ```

+Timeout behavior:
+- `SEARXNG_REQUEST_TIMEOUT_SECONDS` sets the default timeout for outgoing HTTP calls.
+- `search`, `fetch`, `fetch_raw`, and `web://fetch` accept optional `timeout_seconds` to override the default for a single call.
+
 ## Starting the server

 ### stdio (default — for use with MCP clients like Claude Desktop)
--- a/src/searxng_mcp/searxng.py
+++ b/src/searxng_mcp/searxng.py
@ -13,6 +13,7 @@ async def search(
    pageno: int = 1,
    time_range: str | None = None,
    safesearch: int = 0,
+    timeout_seconds: float = 30.0,
 ) -> dict[str, Any]:
    """Send a search request to a SearxNG instance and return parsed JSON."""
    params: dict[str, Any] = {
@ -30,7 +31,12 @@ async def search(
    if time_range:
        params["time_range"] = time_range

-    async with httpx.AsyncClient() as client:
+    try:
+        async with httpx.AsyncClient(timeout=timeout_seconds) as client:
            response = await client.get(f"{base_url.rstrip('/')}/search", params=params)
            response.raise_for_status()
            return response.json()
+    except httpx.TimeoutException as exc:
+        raise ValueError(
+            f"SearxNG request timed out after {timeout_seconds:.2f}s"
+        ) from exc
--- a/src/searxng_mcp/server.py
+++ b/src/searxng_mcp/server.py
@ -20,6 +20,8 @@ class Settings(BaseSettings):
    model_config = SettingsConfigDict(env_prefix="SEARXNG_", env_file=".env", env_file_encoding="utf-8", extra="ignore")

    base_url: str = "http://localhost:8080"
+    request_timeout_seconds: float = 30.0
+    """Default network timeout (seconds) for outgoing HTTP requests."""
    guard_enabled: bool = False
    """Run fetched content through the prompt-guard honeypot before returning it.
    Requires PROMPT_GUARD_* settings to be configured."""
@ -49,6 +51,7 @@ async def _fetch_and_extract(
    include_images: bool = False,
    include_links: bool = False,
    use_cache: bool = True,
+    timeout_seconds: float | None = None,
 ) -> str:
    """Shared fetch+extract logic used by both the tool and resource."""
    cache_key = (url, output_format, include_tables, include_images, include_links)
@ -56,10 +59,20 @@ async def _fetch_and_extract(
    if use_cache and cache_key in _cache:
        return _cache[cache_key]

+    effective_timeout = timeout_seconds if timeout_seconds is not None else settings.request_timeout_seconds
+    try:
+        async with httpx.AsyncClient(
+            headers={"User-Agent": "searxng-mcp/1.0", "Accept": "*/*"},
+            follow_redirects=True,
+            timeout=effective_timeout,
+        ) as client:
+            response = await client.get(url)
+            response.raise_for_status()
+            downloaded = response.text
+    except httpx.TimeoutException as exc:
+        raise ValueError(f"Request timed out after {effective_timeout:.2f}s for URL: {url}") from exc
+
    loop = asyncio.get_event_loop()
-    downloaded = await loop.run_in_executor(None, trafilatura.fetch_url, url)
-    if not downloaded:
-        raise ValueError(f"Failed to fetch URL: {url}")
    result = await loop.run_in_executor(
        None,
        lambda: trafilatura.extract(
@ -120,6 +133,16 @@ async def search(
        Literal[0, 1, 2],
        Field(description="Safe search level: 0=off, 1=moderate, 2=strict."),
    ] = 0,
+    timeout_seconds: Annotated[
+        float | None,
+        Field(
+            description=(
+                "Request timeout in seconds for this call. "
+                "If omitted, uses SEARXNG_REQUEST_TIMEOUT_SECONDS."
+            ),
+            gt=0,
+        ),
+    ] = None,
 ) -> list[dict]:
    """Search the web via SearxNG and return a list of results.

@ -135,6 +158,7 @@ async def search(
        pageno=pageno,
        time_range=time_range,
        safesearch=safesearch,
+        timeout_seconds=timeout_seconds if timeout_seconds is not None else settings.request_timeout_seconds,
    )
    results = data.get("results", [])
    return [
@ -184,6 +208,16 @@ async def fetch(
        bool,
        Field(description="Return cached content if available. Set to false to force a fresh download."),
    ] = True,
+    timeout_seconds: Annotated[
+        float | None,
+        Field(
+            description=(
+                "Request timeout in seconds for downloading this URL. "
+                "If omitted, uses SEARXNG_REQUEST_TIMEOUT_SECONDS."
+            ),
+            gt=0,
+        ),
+    ] = None,
 ) -> dict:
    """Fetch a URL and extract its main content, stripping navigation, ads, and boilerplate.

@ -191,7 +225,15 @@ async def fetch(
    If truncated, use start/end to page through the full content, or read the resource
    web://fetch?url=<url>&start=<n>&end=<m> for specific slices.
    """
-    content = await _fetch_and_extract(url, output_format, include_tables, include_images, include_links, use_cache)
+    content = await _fetch_and_extract(
+        url,
+        output_format,
+        include_tables,
+        include_images,
+        include_links,
+        use_cache,
+        timeout_seconds,
+    )
    total_chars = len(content)

    # Apply explicit start/end slice first (takes priority over max_chars windowing)
@ -218,7 +260,7 @@ async def fetch(


@mcp.resource(
-    "web://fetch{?url,start,end,output_format,include_links,include_tables,include_images,use_cache}",
+    "web://fetch{?url,start,end,output_format,include_links,include_tables,include_images,use_cache,timeout_seconds}",
    mime_type="text/markdown",
 )
 async def fetch_slice(
@ -230,6 +272,7 @@ async def fetch_slice(
    include_tables: bool = True,
    include_images: bool = False,
    use_cache: bool = True,
+    timeout_seconds: float | None = None,
 ) -> str:
    """Fetch a URL and return a character slice of the extracted content.

@ -238,7 +281,15 @@ async def fetch_slice(
    """
    if not url:
        raise ValueError("url parameter is required")
-    content = await _fetch_and_extract(url, output_format, include_tables, include_images, include_links, use_cache)
+    content = await _fetch_and_extract(
+        url,
+        output_format,
+        include_tables,
+        include_images,
+        include_links,
+        use_cache,
+        timeout_seconds,
+    )
    if end > 0:
        return content[start:end]
    return content[start:]
@ -249,6 +300,16 @@ async def fetch_raw(
    url: Annotated[str, Field(description="URL to fetch. Returns raw text content without HTML extraction.")],
    encoding: Annotated[str, Field(description="Text encoding to decode the response bytes. Default 'utf-8'.")] = "utf-8",
    max_bytes: Annotated[int, Field(description="Maximum bytes to return. 0 = no limit.", ge=0)] = 0,
+    timeout_seconds: Annotated[
+        float | None,
+        Field(
+            description=(
+                "Request timeout in seconds for this call. "
+                "If omitted, uses SEARXNG_REQUEST_TIMEOUT_SECONDS."
+            ),
+            gt=0,
+        ),
+    ] = None,
 ) -> dict:
    """Fetch a URL and return the raw response body as text, bypassing trafilatura extraction.

@ -259,17 +320,19 @@ async def fetch_raw(

    Returns a dict with: content (str), status_code (int), content_type (str), total_bytes (int).
    """
-    def _do_fetch() -> tuple[bytes, int, str]:
-        with httpx.Client(
+    effective_timeout = timeout_seconds if timeout_seconds is not None else settings.request_timeout_seconds
+    try:
+        async with httpx.AsyncClient(
            headers={"User-Agent": "searxng-mcp/1.0", "Accept": "*/*"},
            follow_redirects=True,
-            timeout=30.0,
+            timeout=effective_timeout,
        ) as client:
-            response = client.get(url)
-            return response.content, response.status_code, response.headers.get("content-type", "")
-
-    loop = asyncio.get_event_loop()
-    raw, status_code, content_type = await loop.run_in_executor(None, _do_fetch)
+            response = await client.get(url)
+            raw = response.content
+            status_code = response.status_code
+            content_type = response.headers.get("content-type", "")
+    except httpx.TimeoutException as exc:
+        raise ValueError(f"Request timed out after {effective_timeout:.2f}s for URL: {url}") from exc

    total_bytes = len(raw)
    if max_bytes > 0: