diff --git a/.env.example b/.env.example index 5f21045..f70a9d9 100644 --- a/.env.example +++ b/.env.example @@ -3,6 +3,10 @@ # URL of the SearxNG instance to query. SEARXNG_BASE_URL=http://localhost:8080 +# Default network timeout (seconds) for outgoing HTTP requests. +# Tools may override this per call via timeout_seconds. +#SEARXNG_REQUEST_TIMEOUT_SECONDS=30 + # Set to true to run fetched pages through the prompt-guard honeypot before # returning them to the agent. Requires PROMPT_GUARD_* settings below. #SEARXNG_GUARD_ENABLED=false diff --git a/README.md b/README.md index b9ca58c..32dc126 100644 --- a/README.md +++ b/README.md @@ -8,8 +8,8 @@ Built with [FastMCP](https://github.com/prefecthq/fastmcp). | Name | Type | Description | |------|------|-------------| -| `search` | Tool | Query the web via SearxNG. Supports categories, engines, language, time range, safe search, and pagination. | -| `fetch` | Tool | Fetch a URL and extract its main content (strips ads, navigation, boilerplate). Returns a preview with `total_chars` and `truncated` metadata. Supports `start`/`end` slicing, `max_chars`, multiple output formats, and in-memory caching. | +| `search` | Tool | Query the web via SearxNG. Supports categories, engines, language, time range, safe search, pagination, and per-call timeout overrides. | +| `fetch` | Tool | Fetch a URL and extract its main content (strips ads, navigation, boilerplate). Returns a preview with `total_chars` and `truncated` metadata. Supports `start`/`end` slicing, `max_chars`, multiple output formats, in-memory caching, and per-call timeout overrides. | | `web://fetch{?url,...}` | Resource | Read an arbitrary character slice of a fetched page without going through the tool call. Useful for paging through large documents. | ## Requirements @@ -37,14 +37,20 @@ cp .env.example .env `.env`: ```dotenv SEARXNG_BASE_URL=http://localhost:8080 +SEARXNG_REQUEST_TIMEOUT_SECONDS=30 ``` All settings can also be provided as environment variables with the `SEARXNG_` prefix: ```bash export SEARXNG_BASE_URL=http://localhost:8080 +export SEARXNG_REQUEST_TIMEOUT_SECONDS=30 ``` +Timeout behavior: +- `SEARXNG_REQUEST_TIMEOUT_SECONDS` sets the default timeout for outgoing HTTP calls. +- `search`, `fetch`, `fetch_raw`, and `web://fetch` accept optional `timeout_seconds` to override the default for a single call. + ## Starting the server ### stdio (default — for use with MCP clients like Claude Desktop) diff --git a/src/searxng_mcp/searxng.py b/src/searxng_mcp/searxng.py index 44010ee..74cb538 100644 --- a/src/searxng_mcp/searxng.py +++ b/src/searxng_mcp/searxng.py @@ -13,6 +13,7 @@ async def search( pageno: int = 1, time_range: str | None = None, safesearch: int = 0, + timeout_seconds: float = 30.0, ) -> dict[str, Any]: """Send a search request to a SearxNG instance and return parsed JSON.""" params: dict[str, Any] = { @@ -30,7 +31,12 @@ async def search( if time_range: params["time_range"] = time_range - async with httpx.AsyncClient() as client: - response = await client.get(f"{base_url.rstrip('/')}/search", params=params) - response.raise_for_status() - return response.json() + try: + async with httpx.AsyncClient(timeout=timeout_seconds) as client: + response = await client.get(f"{base_url.rstrip('/')}/search", params=params) + response.raise_for_status() + return response.json() + except httpx.TimeoutException as exc: + raise ValueError( + f"SearxNG request timed out after {timeout_seconds:.2f}s" + ) from exc diff --git a/src/searxng_mcp/server.py b/src/searxng_mcp/server.py index 2ece0e4..220663f 100644 --- a/src/searxng_mcp/server.py +++ b/src/searxng_mcp/server.py @@ -20,6 +20,8 @@ class Settings(BaseSettings): model_config = SettingsConfigDict(env_prefix="SEARXNG_", env_file=".env", env_file_encoding="utf-8", extra="ignore") base_url: str = "http://localhost:8080" + request_timeout_seconds: float = 30.0 + """Default network timeout (seconds) for outgoing HTTP requests.""" guard_enabled: bool = False """Run fetched content through the prompt-guard honeypot before returning it. Requires PROMPT_GUARD_* settings to be configured.""" @@ -49,6 +51,7 @@ async def _fetch_and_extract( include_images: bool = False, include_links: bool = False, use_cache: bool = True, + timeout_seconds: float | None = None, ) -> str: """Shared fetch+extract logic used by both the tool and resource.""" cache_key = (url, output_format, include_tables, include_images, include_links) @@ -56,10 +59,20 @@ async def _fetch_and_extract( if use_cache and cache_key in _cache: return _cache[cache_key] + effective_timeout = timeout_seconds if timeout_seconds is not None else settings.request_timeout_seconds + try: + async with httpx.AsyncClient( + headers={"User-Agent": "searxng-mcp/1.0", "Accept": "*/*"}, + follow_redirects=True, + timeout=effective_timeout, + ) as client: + response = await client.get(url) + response.raise_for_status() + downloaded = response.text + except httpx.TimeoutException as exc: + raise ValueError(f"Request timed out after {effective_timeout:.2f}s for URL: {url}") from exc + loop = asyncio.get_event_loop() - downloaded = await loop.run_in_executor(None, trafilatura.fetch_url, url) - if not downloaded: - raise ValueError(f"Failed to fetch URL: {url}") result = await loop.run_in_executor( None, lambda: trafilatura.extract( @@ -120,6 +133,16 @@ async def search( Literal[0, 1, 2], Field(description="Safe search level: 0=off, 1=moderate, 2=strict."), ] = 0, + timeout_seconds: Annotated[ + float | None, + Field( + description=( + "Request timeout in seconds for this call. " + "If omitted, uses SEARXNG_REQUEST_TIMEOUT_SECONDS." + ), + gt=0, + ), + ] = None, ) -> list[dict]: """Search the web via SearxNG and return a list of results. @@ -135,6 +158,7 @@ async def search( pageno=pageno, time_range=time_range, safesearch=safesearch, + timeout_seconds=timeout_seconds if timeout_seconds is not None else settings.request_timeout_seconds, ) results = data.get("results", []) return [ @@ -184,6 +208,16 @@ async def fetch( bool, Field(description="Return cached content if available. Set to false to force a fresh download."), ] = True, + timeout_seconds: Annotated[ + float | None, + Field( + description=( + "Request timeout in seconds for downloading this URL. " + "If omitted, uses SEARXNG_REQUEST_TIMEOUT_SECONDS." + ), + gt=0, + ), + ] = None, ) -> dict: """Fetch a URL and extract its main content, stripping navigation, ads, and boilerplate. @@ -191,7 +225,15 @@ async def fetch( If truncated, use start/end to page through the full content, or read the resource web://fetch?url=&start=&end= for specific slices. """ - content = await _fetch_and_extract(url, output_format, include_tables, include_images, include_links, use_cache) + content = await _fetch_and_extract( + url, + output_format, + include_tables, + include_images, + include_links, + use_cache, + timeout_seconds, + ) total_chars = len(content) # Apply explicit start/end slice first (takes priority over max_chars windowing) @@ -218,7 +260,7 @@ async def fetch( @mcp.resource( - "web://fetch{?url,start,end,output_format,include_links,include_tables,include_images,use_cache}", + "web://fetch{?url,start,end,output_format,include_links,include_tables,include_images,use_cache,timeout_seconds}", mime_type="text/markdown", ) async def fetch_slice( @@ -230,6 +272,7 @@ async def fetch_slice( include_tables: bool = True, include_images: bool = False, use_cache: bool = True, + timeout_seconds: float | None = None, ) -> str: """Fetch a URL and return a character slice of the extracted content. @@ -238,7 +281,15 @@ async def fetch_slice( """ if not url: raise ValueError("url parameter is required") - content = await _fetch_and_extract(url, output_format, include_tables, include_images, include_links, use_cache) + content = await _fetch_and_extract( + url, + output_format, + include_tables, + include_images, + include_links, + use_cache, + timeout_seconds, + ) if end > 0: return content[start:end] return content[start:] @@ -249,6 +300,16 @@ async def fetch_raw( url: Annotated[str, Field(description="URL to fetch. Returns raw text content without HTML extraction.")], encoding: Annotated[str, Field(description="Text encoding to decode the response bytes. Default 'utf-8'.")] = "utf-8", max_bytes: Annotated[int, Field(description="Maximum bytes to return. 0 = no limit.", ge=0)] = 0, + timeout_seconds: Annotated[ + float | None, + Field( + description=( + "Request timeout in seconds for this call. " + "If omitted, uses SEARXNG_REQUEST_TIMEOUT_SECONDS." + ), + gt=0, + ), + ] = None, ) -> dict: """Fetch a URL and return the raw response body as text, bypassing trafilatura extraction. @@ -259,17 +320,19 @@ async def fetch_raw( Returns a dict with: content (str), status_code (int), content_type (str), total_bytes (int). """ - def _do_fetch() -> tuple[bytes, int, str]: - with httpx.Client( + effective_timeout = timeout_seconds if timeout_seconds is not None else settings.request_timeout_seconds + try: + async with httpx.AsyncClient( headers={"User-Agent": "searxng-mcp/1.0", "Accept": "*/*"}, follow_redirects=True, - timeout=30.0, + timeout=effective_timeout, ) as client: - response = client.get(url) - return response.content, response.status_code, response.headers.get("content-type", "") - - loop = asyncio.get_event_loop() - raw, status_code, content_type = await loop.run_in_executor(None, _do_fetch) + response = await client.get(url) + raw = response.content + status_code = response.status_code + content_type = response.headers.get("content-type", "") + except httpx.TimeoutException as exc: + raise ValueError(f"Request timed out after {effective_timeout:.2f}s for URL: {url}") from exc total_bytes = len(raw) if max_bytes > 0: