feat: add timeout configuration for outgoing HTTP requests and update documentation
This commit is contained in:
parent
3ebd7c5e4a
commit
8288787b4e
4 changed files with 99 additions and 20 deletions
|
|
@ -3,6 +3,10 @@
|
||||||
# URL of the SearxNG instance to query.
|
# URL of the SearxNG instance to query.
|
||||||
SEARXNG_BASE_URL=http://localhost:8080
|
SEARXNG_BASE_URL=http://localhost:8080
|
||||||
|
|
||||||
|
# Default network timeout (seconds) for outgoing HTTP requests.
|
||||||
|
# Tools may override this per call via timeout_seconds.
|
||||||
|
#SEARXNG_REQUEST_TIMEOUT_SECONDS=30
|
||||||
|
|
||||||
# Set to true to run fetched pages through the prompt-guard honeypot before
|
# Set to true to run fetched pages through the prompt-guard honeypot before
|
||||||
# returning them to the agent. Requires PROMPT_GUARD_* settings below.
|
# returning them to the agent. Requires PROMPT_GUARD_* settings below.
|
||||||
#SEARXNG_GUARD_ENABLED=false
|
#SEARXNG_GUARD_ENABLED=false
|
||||||
|
|
|
||||||
10
README.md
10
README.md
|
|
@ -8,8 +8,8 @@ Built with [FastMCP](https://github.com/prefecthq/fastmcp).
|
||||||
|
|
||||||
| Name | Type | Description |
|
| Name | Type | Description |
|
||||||
|------|------|-------------|
|
|------|------|-------------|
|
||||||
| `search` | Tool | Query the web via SearxNG. Supports categories, engines, language, time range, safe search, and pagination. |
|
| `search` | Tool | Query the web via SearxNG. Supports categories, engines, language, time range, safe search, pagination, and per-call timeout overrides. |
|
||||||
| `fetch` | Tool | Fetch a URL and extract its main content (strips ads, navigation, boilerplate). Returns a preview with `total_chars` and `truncated` metadata. Supports `start`/`end` slicing, `max_chars`, multiple output formats, and in-memory caching. |
|
| `fetch` | Tool | Fetch a URL and extract its main content (strips ads, navigation, boilerplate). Returns a preview with `total_chars` and `truncated` metadata. Supports `start`/`end` slicing, `max_chars`, multiple output formats, in-memory caching, and per-call timeout overrides. |
|
||||||
| `web://fetch{?url,...}` | Resource | Read an arbitrary character slice of a fetched page without going through the tool call. Useful for paging through large documents. |
|
| `web://fetch{?url,...}` | Resource | Read an arbitrary character slice of a fetched page without going through the tool call. Useful for paging through large documents. |
|
||||||
|
|
||||||
## Requirements
|
## Requirements
|
||||||
|
|
@ -37,14 +37,20 @@ cp .env.example .env
|
||||||
`.env`:
|
`.env`:
|
||||||
```dotenv
|
```dotenv
|
||||||
SEARXNG_BASE_URL=http://localhost:8080
|
SEARXNG_BASE_URL=http://localhost:8080
|
||||||
|
SEARXNG_REQUEST_TIMEOUT_SECONDS=30
|
||||||
```
|
```
|
||||||
|
|
||||||
All settings can also be provided as environment variables with the `SEARXNG_` prefix:
|
All settings can also be provided as environment variables with the `SEARXNG_` prefix:
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
export SEARXNG_BASE_URL=http://localhost:8080
|
export SEARXNG_BASE_URL=http://localhost:8080
|
||||||
|
export SEARXNG_REQUEST_TIMEOUT_SECONDS=30
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Timeout behavior:
|
||||||
|
- `SEARXNG_REQUEST_TIMEOUT_SECONDS` sets the default timeout for outgoing HTTP calls.
|
||||||
|
- `search`, `fetch`, `fetch_raw`, and `web://fetch` accept optional `timeout_seconds` to override the default for a single call.
|
||||||
|
|
||||||
## Starting the server
|
## Starting the server
|
||||||
|
|
||||||
### stdio (default — for use with MCP clients like Claude Desktop)
|
### stdio (default — for use with MCP clients like Claude Desktop)
|
||||||
|
|
|
||||||
|
|
@ -13,6 +13,7 @@ async def search(
|
||||||
pageno: int = 1,
|
pageno: int = 1,
|
||||||
time_range: str | None = None,
|
time_range: str | None = None,
|
||||||
safesearch: int = 0,
|
safesearch: int = 0,
|
||||||
|
timeout_seconds: float = 30.0,
|
||||||
) -> dict[str, Any]:
|
) -> dict[str, Any]:
|
||||||
"""Send a search request to a SearxNG instance and return parsed JSON."""
|
"""Send a search request to a SearxNG instance and return parsed JSON."""
|
||||||
params: dict[str, Any] = {
|
params: dict[str, Any] = {
|
||||||
|
|
@ -30,7 +31,12 @@ async def search(
|
||||||
if time_range:
|
if time_range:
|
||||||
params["time_range"] = time_range
|
params["time_range"] = time_range
|
||||||
|
|
||||||
async with httpx.AsyncClient() as client:
|
try:
|
||||||
response = await client.get(f"{base_url.rstrip('/')}/search", params=params)
|
async with httpx.AsyncClient(timeout=timeout_seconds) as client:
|
||||||
response.raise_for_status()
|
response = await client.get(f"{base_url.rstrip('/')}/search", params=params)
|
||||||
return response.json()
|
response.raise_for_status()
|
||||||
|
return response.json()
|
||||||
|
except httpx.TimeoutException as exc:
|
||||||
|
raise ValueError(
|
||||||
|
f"SearxNG request timed out after {timeout_seconds:.2f}s"
|
||||||
|
) from exc
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,8 @@ class Settings(BaseSettings):
|
||||||
model_config = SettingsConfigDict(env_prefix="SEARXNG_", env_file=".env", env_file_encoding="utf-8", extra="ignore")
|
model_config = SettingsConfigDict(env_prefix="SEARXNG_", env_file=".env", env_file_encoding="utf-8", extra="ignore")
|
||||||
|
|
||||||
base_url: str = "http://localhost:8080"
|
base_url: str = "http://localhost:8080"
|
||||||
|
request_timeout_seconds: float = 30.0
|
||||||
|
"""Default network timeout (seconds) for outgoing HTTP requests."""
|
||||||
guard_enabled: bool = False
|
guard_enabled: bool = False
|
||||||
"""Run fetched content through the prompt-guard honeypot before returning it.
|
"""Run fetched content through the prompt-guard honeypot before returning it.
|
||||||
Requires PROMPT_GUARD_* settings to be configured."""
|
Requires PROMPT_GUARD_* settings to be configured."""
|
||||||
|
|
@ -49,6 +51,7 @@ async def _fetch_and_extract(
|
||||||
include_images: bool = False,
|
include_images: bool = False,
|
||||||
include_links: bool = False,
|
include_links: bool = False,
|
||||||
use_cache: bool = True,
|
use_cache: bool = True,
|
||||||
|
timeout_seconds: float | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Shared fetch+extract logic used by both the tool and resource."""
|
"""Shared fetch+extract logic used by both the tool and resource."""
|
||||||
cache_key = (url, output_format, include_tables, include_images, include_links)
|
cache_key = (url, output_format, include_tables, include_images, include_links)
|
||||||
|
|
@ -56,10 +59,20 @@ async def _fetch_and_extract(
|
||||||
if use_cache and cache_key in _cache:
|
if use_cache and cache_key in _cache:
|
||||||
return _cache[cache_key]
|
return _cache[cache_key]
|
||||||
|
|
||||||
|
effective_timeout = timeout_seconds if timeout_seconds is not None else settings.request_timeout_seconds
|
||||||
|
try:
|
||||||
|
async with httpx.AsyncClient(
|
||||||
|
headers={"User-Agent": "searxng-mcp/1.0", "Accept": "*/*"},
|
||||||
|
follow_redirects=True,
|
||||||
|
timeout=effective_timeout,
|
||||||
|
) as client:
|
||||||
|
response = await client.get(url)
|
||||||
|
response.raise_for_status()
|
||||||
|
downloaded = response.text
|
||||||
|
except httpx.TimeoutException as exc:
|
||||||
|
raise ValueError(f"Request timed out after {effective_timeout:.2f}s for URL: {url}") from exc
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
loop = asyncio.get_event_loop()
|
||||||
downloaded = await loop.run_in_executor(None, trafilatura.fetch_url, url)
|
|
||||||
if not downloaded:
|
|
||||||
raise ValueError(f"Failed to fetch URL: {url}")
|
|
||||||
result = await loop.run_in_executor(
|
result = await loop.run_in_executor(
|
||||||
None,
|
None,
|
||||||
lambda: trafilatura.extract(
|
lambda: trafilatura.extract(
|
||||||
|
|
@ -120,6 +133,16 @@ async def search(
|
||||||
Literal[0, 1, 2],
|
Literal[0, 1, 2],
|
||||||
Field(description="Safe search level: 0=off, 1=moderate, 2=strict."),
|
Field(description="Safe search level: 0=off, 1=moderate, 2=strict."),
|
||||||
] = 0,
|
] = 0,
|
||||||
|
timeout_seconds: Annotated[
|
||||||
|
float | None,
|
||||||
|
Field(
|
||||||
|
description=(
|
||||||
|
"Request timeout in seconds for this call. "
|
||||||
|
"If omitted, uses SEARXNG_REQUEST_TIMEOUT_SECONDS."
|
||||||
|
),
|
||||||
|
gt=0,
|
||||||
|
),
|
||||||
|
] = None,
|
||||||
) -> list[dict]:
|
) -> list[dict]:
|
||||||
"""Search the web via SearxNG and return a list of results.
|
"""Search the web via SearxNG and return a list of results.
|
||||||
|
|
||||||
|
|
@ -135,6 +158,7 @@ async def search(
|
||||||
pageno=pageno,
|
pageno=pageno,
|
||||||
time_range=time_range,
|
time_range=time_range,
|
||||||
safesearch=safesearch,
|
safesearch=safesearch,
|
||||||
|
timeout_seconds=timeout_seconds if timeout_seconds is not None else settings.request_timeout_seconds,
|
||||||
)
|
)
|
||||||
results = data.get("results", [])
|
results = data.get("results", [])
|
||||||
return [
|
return [
|
||||||
|
|
@ -184,6 +208,16 @@ async def fetch(
|
||||||
bool,
|
bool,
|
||||||
Field(description="Return cached content if available. Set to false to force a fresh download."),
|
Field(description="Return cached content if available. Set to false to force a fresh download."),
|
||||||
] = True,
|
] = True,
|
||||||
|
timeout_seconds: Annotated[
|
||||||
|
float | None,
|
||||||
|
Field(
|
||||||
|
description=(
|
||||||
|
"Request timeout in seconds for downloading this URL. "
|
||||||
|
"If omitted, uses SEARXNG_REQUEST_TIMEOUT_SECONDS."
|
||||||
|
),
|
||||||
|
gt=0,
|
||||||
|
),
|
||||||
|
] = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Fetch a URL and extract its main content, stripping navigation, ads, and boilerplate.
|
"""Fetch a URL and extract its main content, stripping navigation, ads, and boilerplate.
|
||||||
|
|
||||||
|
|
@ -191,7 +225,15 @@ async def fetch(
|
||||||
If truncated, use start/end to page through the full content, or read the resource
|
If truncated, use start/end to page through the full content, or read the resource
|
||||||
web://fetch?url=<url>&start=<n>&end=<m> for specific slices.
|
web://fetch?url=<url>&start=<n>&end=<m> for specific slices.
|
||||||
"""
|
"""
|
||||||
content = await _fetch_and_extract(url, output_format, include_tables, include_images, include_links, use_cache)
|
content = await _fetch_and_extract(
|
||||||
|
url,
|
||||||
|
output_format,
|
||||||
|
include_tables,
|
||||||
|
include_images,
|
||||||
|
include_links,
|
||||||
|
use_cache,
|
||||||
|
timeout_seconds,
|
||||||
|
)
|
||||||
total_chars = len(content)
|
total_chars = len(content)
|
||||||
|
|
||||||
# Apply explicit start/end slice first (takes priority over max_chars windowing)
|
# Apply explicit start/end slice first (takes priority over max_chars windowing)
|
||||||
|
|
@ -218,7 +260,7 @@ async def fetch(
|
||||||
|
|
||||||
|
|
||||||
@mcp.resource(
|
@mcp.resource(
|
||||||
"web://fetch{?url,start,end,output_format,include_links,include_tables,include_images,use_cache}",
|
"web://fetch{?url,start,end,output_format,include_links,include_tables,include_images,use_cache,timeout_seconds}",
|
||||||
mime_type="text/markdown",
|
mime_type="text/markdown",
|
||||||
)
|
)
|
||||||
async def fetch_slice(
|
async def fetch_slice(
|
||||||
|
|
@ -230,6 +272,7 @@ async def fetch_slice(
|
||||||
include_tables: bool = True,
|
include_tables: bool = True,
|
||||||
include_images: bool = False,
|
include_images: bool = False,
|
||||||
use_cache: bool = True,
|
use_cache: bool = True,
|
||||||
|
timeout_seconds: float | None = None,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""Fetch a URL and return a character slice of the extracted content.
|
"""Fetch a URL and return a character slice of the extracted content.
|
||||||
|
|
||||||
|
|
@ -238,7 +281,15 @@ async def fetch_slice(
|
||||||
"""
|
"""
|
||||||
if not url:
|
if not url:
|
||||||
raise ValueError("url parameter is required")
|
raise ValueError("url parameter is required")
|
||||||
content = await _fetch_and_extract(url, output_format, include_tables, include_images, include_links, use_cache)
|
content = await _fetch_and_extract(
|
||||||
|
url,
|
||||||
|
output_format,
|
||||||
|
include_tables,
|
||||||
|
include_images,
|
||||||
|
include_links,
|
||||||
|
use_cache,
|
||||||
|
timeout_seconds,
|
||||||
|
)
|
||||||
if end > 0:
|
if end > 0:
|
||||||
return content[start:end]
|
return content[start:end]
|
||||||
return content[start:]
|
return content[start:]
|
||||||
|
|
@ -249,6 +300,16 @@ async def fetch_raw(
|
||||||
url: Annotated[str, Field(description="URL to fetch. Returns raw text content without HTML extraction.")],
|
url: Annotated[str, Field(description="URL to fetch. Returns raw text content without HTML extraction.")],
|
||||||
encoding: Annotated[str, Field(description="Text encoding to decode the response bytes. Default 'utf-8'.")] = "utf-8",
|
encoding: Annotated[str, Field(description="Text encoding to decode the response bytes. Default 'utf-8'.")] = "utf-8",
|
||||||
max_bytes: Annotated[int, Field(description="Maximum bytes to return. 0 = no limit.", ge=0)] = 0,
|
max_bytes: Annotated[int, Field(description="Maximum bytes to return. 0 = no limit.", ge=0)] = 0,
|
||||||
|
timeout_seconds: Annotated[
|
||||||
|
float | None,
|
||||||
|
Field(
|
||||||
|
description=(
|
||||||
|
"Request timeout in seconds for this call. "
|
||||||
|
"If omitted, uses SEARXNG_REQUEST_TIMEOUT_SECONDS."
|
||||||
|
),
|
||||||
|
gt=0,
|
||||||
|
),
|
||||||
|
] = None,
|
||||||
) -> dict:
|
) -> dict:
|
||||||
"""Fetch a URL and return the raw response body as text, bypassing trafilatura extraction.
|
"""Fetch a URL and return the raw response body as text, bypassing trafilatura extraction.
|
||||||
|
|
||||||
|
|
@ -259,17 +320,19 @@ async def fetch_raw(
|
||||||
|
|
||||||
Returns a dict with: content (str), status_code (int), content_type (str), total_bytes (int).
|
Returns a dict with: content (str), status_code (int), content_type (str), total_bytes (int).
|
||||||
"""
|
"""
|
||||||
def _do_fetch() -> tuple[bytes, int, str]:
|
effective_timeout = timeout_seconds if timeout_seconds is not None else settings.request_timeout_seconds
|
||||||
with httpx.Client(
|
try:
|
||||||
|
async with httpx.AsyncClient(
|
||||||
headers={"User-Agent": "searxng-mcp/1.0", "Accept": "*/*"},
|
headers={"User-Agent": "searxng-mcp/1.0", "Accept": "*/*"},
|
||||||
follow_redirects=True,
|
follow_redirects=True,
|
||||||
timeout=30.0,
|
timeout=effective_timeout,
|
||||||
) as client:
|
) as client:
|
||||||
response = client.get(url)
|
response = await client.get(url)
|
||||||
return response.content, response.status_code, response.headers.get("content-type", "")
|
raw = response.content
|
||||||
|
status_code = response.status_code
|
||||||
loop = asyncio.get_event_loop()
|
content_type = response.headers.get("content-type", "")
|
||||||
raw, status_code, content_type = await loop.run_in_executor(None, _do_fetch)
|
except httpx.TimeoutException as exc:
|
||||||
|
raise ValueError(f"Request timed out after {effective_timeout:.2f}s for URL: {url}") from exc
|
||||||
|
|
||||||
total_bytes = len(raw)
|
total_bytes = len(raw)
|
||||||
if max_bytes > 0:
|
if max_bytes > 0:
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue