Initial commit
This commit is contained in:
commit
8885c1872f
14 changed files with 1990 additions and 0 deletions
1
.env.example
Normal file
1
.env.example
Normal file
|
|
@ -0,0 +1 @@
|
|||
SEARXNG_BASE_URL=http://localhost:8080
|
||||
13
.gitignore
vendored
Normal file
13
.gitignore
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
# Python-generated files
|
||||
__pycache__/
|
||||
*.py[oc]
|
||||
build/
|
||||
dist/
|
||||
wheels/
|
||||
*.egg-info
|
||||
|
||||
# Virtual environments
|
||||
.venv
|
||||
|
||||
# Environment variables
|
||||
.env
|
||||
76
.opencode/skills/mcp-forge-conventions/SKILL.md
Normal file
76
.opencode/skills/mcp-forge-conventions/SKILL.md
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
---
|
||||
name: mcp-forge-conventions
|
||||
description: How to call MCP tools from within mcp-forge execute_python scripts, including tool naming and injection syntax
|
||||
---
|
||||
|
||||
# mcp-forge Conventions
|
||||
|
||||
## Tool naming
|
||||
|
||||
mcp-forge injects tools using their **bare function name**, not the namespaced name visible to the agent.
|
||||
|
||||
| Agent-side name | mcp-forge `mcp_tools` value | In-script call |
|
||||
|---|---|---|
|
||||
| `searxng_search` | `"search"` | `search(...)` |
|
||||
| `searxng_fetch` | `"fetch"` | `fetch(...)` |
|
||||
| `rag-mcp_browse_documents` | `"browse_documents"` | `browse_documents(...)` |
|
||||
| `rag-mcp_search_records` | `"search_records"` | `search_records(...)` |
|
||||
|
||||
The pattern: strip any server prefix (e.g. `searxng_`, `rag-mcp_`) and use only the function name.
|
||||
|
||||
## Injection syntax
|
||||
|
||||
Pass a JSON array of bare tool names to `mcp_tools`:
|
||||
|
||||
```python
|
||||
mcp-forge_execute_python(
|
||||
code='results = search(query="foo"); print(results)',
|
||||
mcp_tools=["search", "fetch"]
|
||||
)
|
||||
```
|
||||
|
||||
## Listing all available tools
|
||||
|
||||
Use the agent-side `mcp-forge_list_injectable_tools` tool to get the full catalogue before writing scripts:
|
||||
|
||||
```
|
||||
mcp-forge_list_injectable_tools(include_schemas=false)
|
||||
```
|
||||
|
||||
Returns each tool's `tool_name` (injected name), `qualified_name` (`provider.tool`), and provider metadata (name, transport, url). Only tools whose providers are registered in mcp-forge's own config appear here — tools available to the OpenCode agent from other MCP servers (e.g. GitHub) are NOT automatically available inside mcp-forge.
|
||||
|
||||
## Verifying a single tool name
|
||||
|
||||
To confirm a specific tool name resolves before using it, pass it in `mcp_tools` and check the `available_tools` list in the response. Only successfully resolved tools appear there.
|
||||
|
||||
```python
|
||||
mcp-forge_execute_python(
|
||||
code='print("ok")',
|
||||
mcp_tools=["search"]
|
||||
)
|
||||
# response includes: "available_tools": ["search"]
|
||||
# if the name is wrong, the whole call errors with "Tool '<name>' not found"
|
||||
```
|
||||
|
||||
## Return values
|
||||
|
||||
Injected tools return Python objects (lists, dicts). Handle both a direct value and a dict wrapper:
|
||||
|
||||
```python
|
||||
data = search(query="foo")
|
||||
records = data.get("result", []) if isinstance(data, dict) else data
|
||||
```
|
||||
|
||||
## Combining searxng + mcp-forge
|
||||
|
||||
```python
|
||||
mcp-forge_execute_python(
|
||||
code='''
|
||||
results = search(query="uv python", language="en")
|
||||
top = results[0]
|
||||
page = fetch(url=top["url"], max_chars=2000)
|
||||
print(page["content"])
|
||||
''',
|
||||
mcp_tools=["search", "fetch"]
|
||||
)
|
||||
```
|
||||
1
.python-version
Normal file
1
.python-version
Normal file
|
|
@ -0,0 +1 @@
|
|||
3.14
|
||||
0
README.md
Normal file
0
README.md
Normal file
1
fast_mcp_docs
Symbolic link
1
fast_mcp_docs
Symbolic link
|
|
@ -0,0 +1 @@
|
|||
/home/hans/software/fastmcp/docs/
|
||||
22
pyproject.toml
Normal file
22
pyproject.toml
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
[project]
|
||||
name = "searxng-mcp"
|
||||
version = "0.1.0"
|
||||
description = "MCP server exposing SearxNG web search as a tool"
|
||||
readme = "README.md"
|
||||
authors = [
|
||||
{ name = "Hans Aschauer", email = "hans.git@ch23.de" }
|
||||
]
|
||||
requires-python = ">=3.14"
|
||||
dependencies = [
|
||||
"fastmcp>=3.2.4",
|
||||
"httpx>=0.28.1",
|
||||
"pydantic-settings>=2.13.1",
|
||||
"trafilatura>=2.0.0",
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
searxng-mcp = "searxng_mcp.__main__:main"
|
||||
|
||||
[build-system]
|
||||
requires = ["uv_build>=0.10.8,<0.11.0"]
|
||||
build-backend = "uv_build"
|
||||
221
scripts/ingest_fastmcp_docs.py
Normal file
221
scripts/ingest_fastmcp_docs.py
Normal file
|
|
@ -0,0 +1,221 @@
|
|||
"""
|
||||
Ingest FastMCP documentation into rag-mcp.
|
||||
|
||||
Walks fast_mcp_docs/, reads each .mdx/.md file, and adds it as a record
|
||||
in a rag-mcp document. Runs directly against the rag-mcp HTTP MCP endpoint.
|
||||
|
||||
Usage:
|
||||
uv run scripts/ingest_fastmcp_docs.py [--dry-run] [--rag-url URL]
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
from pathlib import Path
|
||||
|
||||
import httpx
|
||||
|
||||
DOCS_DIR = Path(__file__).parent.parent / "fast_mcp_docs"
|
||||
DEFAULT_RAG_URL = "http://localhost:8006/mcp"
|
||||
DOC_SOURCE = "fastmcp-docs"
|
||||
DOC_DESCRIPTION = "FastMCP Python library documentation (prefecthq/fastmcp)"
|
||||
DOC_TAGS = ["fastmcp", "mcp", "python", "docs"]
|
||||
|
||||
|
||||
class RagMcpClient:
|
||||
"""Minimal synchronous client for rag-mcp HTTP MCP endpoint."""
|
||||
|
||||
def __init__(self, url: str):
|
||||
self.url = url
|
||||
self.session_id: str | None = None
|
||||
self._id = 0
|
||||
self.client = httpx.Client(timeout=60.0)
|
||||
|
||||
def _next_id(self) -> int:
|
||||
self._id += 1
|
||||
return self._id
|
||||
|
||||
def _headers(self) -> dict:
|
||||
h = {
|
||||
"Content-Type": "application/json",
|
||||
"Accept": "application/json, text/event-stream",
|
||||
}
|
||||
if self.session_id:
|
||||
h["Mcp-Session-Id"] = self.session_id
|
||||
return h
|
||||
|
||||
def _parse_sse(self, text: str) -> dict:
|
||||
"""Extract the JSON payload from an SSE response."""
|
||||
for line in text.splitlines():
|
||||
if line.startswith("data: "):
|
||||
return json.loads(line[6:])
|
||||
raise ValueError(f"No data line in SSE response: {text[:200]}")
|
||||
|
||||
def initialize(self) -> None:
|
||||
payload = {
|
||||
"jsonrpc": "2.0",
|
||||
"method": "initialize",
|
||||
"params": {
|
||||
"protocolVersion": "2024-11-05",
|
||||
"capabilities": {},
|
||||
"clientInfo": {"name": "ingest-fastmcp-docs", "version": "1.0"},
|
||||
},
|
||||
"id": self._next_id(),
|
||||
}
|
||||
resp = self.client.post(self.url, json=payload, headers=self._headers())
|
||||
resp.raise_for_status()
|
||||
self.session_id = resp.headers.get("mcp-session-id")
|
||||
result = self._parse_sse(resp.text)
|
||||
if "error" in result:
|
||||
raise RuntimeError(f"initialize failed: {result['error']}")
|
||||
print(f"[rag-mcp] Session: {self.session_id}")
|
||||
|
||||
def call_tool(self, name: str, arguments: dict) -> dict:
|
||||
payload = {
|
||||
"jsonrpc": "2.0",
|
||||
"method": "tools/call",
|
||||
"params": {"name": name, "arguments": arguments},
|
||||
"id": self._next_id(),
|
||||
}
|
||||
resp = self.client.post(self.url, json=payload, headers=self._headers())
|
||||
resp.raise_for_status()
|
||||
result = self._parse_sse(resp.text)
|
||||
if "error" in result:
|
||||
raise RuntimeError(f"tools/call {name} failed: {result['error']}")
|
||||
# Unwrap MCP content envelope
|
||||
content = result.get("result", {}).get("content", [])
|
||||
if content and content[0].get("type") == "text":
|
||||
return json.loads(content[0]["text"])
|
||||
return result.get("result", {})
|
||||
|
||||
def close(self) -> None:
|
||||
self.client.close()
|
||||
|
||||
|
||||
def find_doc_files(docs_dir: Path) -> list[Path]:
|
||||
files = []
|
||||
for root, _dirs, filenames in os.walk(docs_dir):
|
||||
for fname in sorted(filenames):
|
||||
if fname.endswith((".mdx", ".md")):
|
||||
files.append(Path(root) / fname)
|
||||
return sorted(files)
|
||||
|
||||
|
||||
def derive_title(rel_path: Path, content: str) -> str:
|
||||
"""Extract title from first heading or fall back to filename."""
|
||||
for line in content.splitlines():
|
||||
line = line.strip()
|
||||
if line.startswith("# "):
|
||||
return line[2:].strip()
|
||||
if line.startswith("title:"):
|
||||
return line[6:].strip().strip('"').strip("'")
|
||||
return rel_path.stem.replace("-", " ").replace("_", " ").title()
|
||||
|
||||
|
||||
def derive_section(rel_path: Path) -> str:
|
||||
parts = rel_path.parts
|
||||
return parts[0] if len(parts) > 1 else "root"
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Ingest FastMCP docs into rag-mcp")
|
||||
parser.add_argument("--dry-run", action="store_true", help="List files only, no ingestion")
|
||||
parser.add_argument("--rag-url", default=DEFAULT_RAG_URL, help="rag-mcp MCP endpoint")
|
||||
parser.add_argument("--limit", type=int, default=0, help="Max files to ingest (0=all)")
|
||||
args = parser.parse_args()
|
||||
|
||||
files = find_doc_files(DOCS_DIR)
|
||||
print(f"Found {len(files)} doc files in {DOCS_DIR}")
|
||||
|
||||
if args.dry_run:
|
||||
for f in files:
|
||||
print(f" {f.relative_to(DOCS_DIR)}")
|
||||
return
|
||||
|
||||
if args.limit:
|
||||
files = files[: args.limit]
|
||||
print(f"Limiting to {args.limit} files")
|
||||
|
||||
client = RagMcpClient(args.rag_url)
|
||||
client.initialize()
|
||||
|
||||
# Find or create the document
|
||||
print("Looking for existing fastmcp-docs document...")
|
||||
docs_list = client.call_tool("browse_documents", {"page": 1, "page_size": 50})
|
||||
existing_doc = None
|
||||
for doc in docs_list:
|
||||
if isinstance(doc, dict) and doc.get("source") == DOC_SOURCE:
|
||||
existing_doc = doc
|
||||
break
|
||||
|
||||
if existing_doc:
|
||||
doc_id = existing_doc["id"]
|
||||
print(f"Using existing document id={doc_id}")
|
||||
else:
|
||||
print("Creating new document...")
|
||||
new_doc = client.call_tool(
|
||||
"add_document",
|
||||
{
|
||||
"source": DOC_SOURCE,
|
||||
"tags": DOC_TAGS,
|
||||
"description": DOC_DESCRIPTION,
|
||||
"meta": {"repo": "prefecthq/fastmcp", "local_path": str(DOCS_DIR)},
|
||||
},
|
||||
)
|
||||
doc_id = new_doc["id"]
|
||||
print(f"Created document id={doc_id}")
|
||||
|
||||
# Ingest each file
|
||||
ok = 0
|
||||
errors = 0
|
||||
for i, fpath in enumerate(files):
|
||||
rel = fpath.relative_to(DOCS_DIR)
|
||||
try:
|
||||
content = fpath.read_text(encoding="utf-8")
|
||||
except Exception as e:
|
||||
print(f" [SKIP] {rel}: read error: {e}")
|
||||
errors += 1
|
||||
continue
|
||||
|
||||
title = derive_title(rel, content)
|
||||
section = derive_section(rel)
|
||||
|
||||
try:
|
||||
result = client.call_tool(
|
||||
"add_record_fields",
|
||||
{
|
||||
"document_id": doc_id,
|
||||
"fields": {
|
||||
"title": title,
|
||||
"path": str(rel),
|
||||
"content": content,
|
||||
},
|
||||
"metadata": {
|
||||
"section": section,
|
||||
"path": str(rel),
|
||||
"title": title,
|
||||
},
|
||||
"config": {
|
||||
"chunk_size": 800,
|
||||
"overlap": 80,
|
||||
"embed_full_field": True,
|
||||
"generate_snippets": True,
|
||||
},
|
||||
},
|
||||
)
|
||||
ok += 1
|
||||
if (i + 1) % 10 == 0:
|
||||
print(f" [{i+1}/{len(files)}] {rel} -> record_id={result.get('record_id')}")
|
||||
except Exception as e:
|
||||
print(f" [ERROR] {rel}: {e}")
|
||||
errors += 1
|
||||
time.sleep(1) # back off on error
|
||||
|
||||
client.close()
|
||||
print(f"\nDone: {ok} ingested, {errors} errors (document id={doc_id})")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
5
src/searxng_mcp/__init__.py
Normal file
5
src/searxng_mcp/__init__.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
"""SearxNG MCP — package entry point."""
|
||||
|
||||
from searxng_mcp.server import mcp
|
||||
|
||||
__all__ = ["mcp"]
|
||||
41
src/searxng_mcp/__main__.py
Normal file
41
src/searxng_mcp/__main__.py
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
"""CLI entry point for the SearxNG MCP server."""
|
||||
|
||||
import argparse
|
||||
from searxng_mcp.server import mcp
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
prog="searxng-mcp",
|
||||
description="SearxNG MCP server",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--transport",
|
||||
choices=["stdio", "http", "sse"],
|
||||
default="stdio",
|
||||
help="Transport protocol (default: stdio)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--host",
|
||||
default="127.0.0.1",
|
||||
help="Host to bind when using http/sse transport (default: 127.0.0.1)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--port",
|
||||
type=int,
|
||||
default=8000,
|
||||
help="Port to bind when using http/sse transport (default: 8000)",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
kwargs = {"transport": args.transport}
|
||||
if args.transport in ("http", "sse"):
|
||||
kwargs["host"] = args.host
|
||||
kwargs["port"] = args.port
|
||||
|
||||
mcp.run(**kwargs)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
src/searxng_mcp/py.typed
Normal file
0
src/searxng_mcp/py.typed
Normal file
36
src/searxng_mcp/searxng.py
Normal file
36
src/searxng_mcp/searxng.py
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
"""HTTP client for the SearxNG search API."""
|
||||
|
||||
from typing import Any
|
||||
import httpx
|
||||
|
||||
|
||||
async def search(
|
||||
base_url: str,
|
||||
query: str,
|
||||
categories: str | None = None,
|
||||
engines: str | None = None,
|
||||
language: str | None = None,
|
||||
pageno: int = 1,
|
||||
time_range: str | None = None,
|
||||
safesearch: int = 0,
|
||||
) -> dict[str, Any]:
|
||||
"""Send a search request to a SearxNG instance and return parsed JSON."""
|
||||
params: dict[str, Any] = {
|
||||
"q": query,
|
||||
"format": "json",
|
||||
"pageno": pageno,
|
||||
"safesearch": safesearch,
|
||||
}
|
||||
if categories:
|
||||
params["categories"] = categories
|
||||
if engines:
|
||||
params["engines"] = engines
|
||||
if language:
|
||||
params["language"] = language
|
||||
if time_range:
|
||||
params["time_range"] = time_range
|
||||
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.get(f"{base_url.rstrip('/')}/search", params=params)
|
||||
response.raise_for_status()
|
||||
return response.json()
|
||||
222
src/searxng_mcp/server.py
Normal file
222
src/searxng_mcp/server.py
Normal file
|
|
@ -0,0 +1,222 @@
|
|||
"""SearxNG MCP server."""
|
||||
|
||||
from typing import Annotated, Literal
|
||||
from fastmcp import FastMCP
|
||||
from pydantic import Field
|
||||
from pydantic_settings import BaseSettings, SettingsConfigDict
|
||||
|
||||
import asyncio
|
||||
import trafilatura
|
||||
|
||||
from searxng_mcp.searxng import search as _search
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
model_config = SettingsConfigDict(env_prefix="SEARXNG_", env_file=".env", env_file_encoding="utf-8")
|
||||
|
||||
base_url: str = "http://localhost:8080"
|
||||
|
||||
|
||||
settings = Settings()
|
||||
mcp = FastMCP(
|
||||
"SearxNG Search",
|
||||
instructions=(
|
||||
"Use the search tool to query the web via a SearxNG instance. "
|
||||
"Prefer specific queries and use categories/time_range to narrow results. "
|
||||
"Use the fetch tool to retrieve a page preview (first N chars). "
|
||||
"If the page is truncated and you need more, read the resource "
|
||||
"web://fetch?url=<url>&start=<n>&end=<m> to get a specific character slice. "
|
||||
"Pages are cached after the first fetch; pass use_cache=false to force a refresh."
|
||||
),
|
||||
)
|
||||
|
||||
# In-memory cache: (url, output_format, include_tables, include_images, include_links) -> content
|
||||
_cache: dict[tuple, str] = {}
|
||||
|
||||
|
||||
async def _fetch_and_extract(
|
||||
url: str,
|
||||
output_format: str = "markdown",
|
||||
include_tables: bool = True,
|
||||
include_images: bool = False,
|
||||
include_links: bool = False,
|
||||
use_cache: bool = True,
|
||||
) -> str:
|
||||
"""Shared fetch+extract logic used by both the tool and resource."""
|
||||
cache_key = (url, output_format, include_tables, include_images, include_links)
|
||||
|
||||
if use_cache and cache_key in _cache:
|
||||
return _cache[cache_key]
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
downloaded = await loop.run_in_executor(None, trafilatura.fetch_url, url)
|
||||
if not downloaded:
|
||||
raise ValueError(f"Failed to fetch URL: {url}")
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
lambda: trafilatura.extract(
|
||||
downloaded,
|
||||
url=url,
|
||||
output_format=output_format,
|
||||
include_tables=include_tables,
|
||||
include_images=include_images,
|
||||
include_links=include_links,
|
||||
with_metadata=output_format == "json",
|
||||
),
|
||||
)
|
||||
if not result:
|
||||
raise ValueError(f"Failed to extract content from URL: {url}")
|
||||
|
||||
_cache[cache_key] = result
|
||||
return result
|
||||
|
||||
|
||||
@mcp.tool
|
||||
async def search(
|
||||
query: Annotated[str, Field(description="Search query string.")],
|
||||
categories: Annotated[
|
||||
str | None,
|
||||
Field(description="Comma-separated categories: general, images, news, science, files, social_media, it, map."),
|
||||
] = None,
|
||||
engines: Annotated[
|
||||
str | None,
|
||||
Field(description="Comma-separated engines to use, e.g. 'google,bing'. Overrides categories."),
|
||||
] = None,
|
||||
language: Annotated[
|
||||
str | None,
|
||||
Field(description="BCP 47 language code for results, e.g. 'en', 'de'."),
|
||||
] = None,
|
||||
pageno: Annotated[
|
||||
int,
|
||||
Field(description="Result page number (1-based).", ge=1),
|
||||
] = 1,
|
||||
time_range: Annotated[
|
||||
Literal["day", "week", "month", "year"] | None,
|
||||
Field(description="Restrict results to a time range."),
|
||||
] = None,
|
||||
safesearch: Annotated[
|
||||
Literal[0, 1, 2],
|
||||
Field(description="Safe search level: 0=off, 1=moderate, 2=strict."),
|
||||
] = 0,
|
||||
) -> list[dict]:
|
||||
"""Search the web via SearxNG and return a list of results.
|
||||
|
||||
Each result contains: title, url, content (snippet), engine, category.
|
||||
Returns at most the results provided by the SearxNG instance (typically 10 per page).
|
||||
"""
|
||||
data = await _search(
|
||||
base_url=settings.base_url,
|
||||
query=query,
|
||||
categories=categories,
|
||||
engines=engines,
|
||||
language=language,
|
||||
pageno=pageno,
|
||||
time_range=time_range,
|
||||
safesearch=safesearch,
|
||||
)
|
||||
results = data.get("results", [])
|
||||
return [
|
||||
{
|
||||
"title": r.get("title", ""),
|
||||
"url": r.get("url", ""),
|
||||
"content": r.get("content", ""),
|
||||
"engine": r.get("engine", ""),
|
||||
"category": r.get("category", ""),
|
||||
}
|
||||
for r in results
|
||||
]
|
||||
|
||||
|
||||
@mcp.tool
|
||||
async def fetch(
|
||||
url: Annotated[str, Field(description="URL of the page to fetch and extract.")],
|
||||
output_format: Annotated[
|
||||
Literal["markdown", "txt", "json"],
|
||||
Field(description="Output format for extracted content: markdown, txt, or json (includes metadata)."),
|
||||
] = "markdown",
|
||||
include_tables: Annotated[
|
||||
bool,
|
||||
Field(description="Include tables in extracted content."),
|
||||
] = True,
|
||||
include_images: Annotated[
|
||||
bool,
|
||||
Field(description="Include image descriptions in extracted content."),
|
||||
] = False,
|
||||
include_links: Annotated[
|
||||
bool,
|
||||
Field(description="Include hyperlinks in extracted content."),
|
||||
] = False,
|
||||
max_chars: Annotated[
|
||||
int,
|
||||
Field(description="Maximum characters to return. 0 means no limit.", ge=0),
|
||||
] = 2000,
|
||||
start: Annotated[
|
||||
int,
|
||||
Field(description="Start character offset for slicing extracted content.", ge=0),
|
||||
] = 0,
|
||||
end: Annotated[
|
||||
int,
|
||||
Field(description="End character offset for slicing extracted content. 0 means read to end of content.", ge=0),
|
||||
] = 0,
|
||||
use_cache: Annotated[
|
||||
bool,
|
||||
Field(description="Return cached content if available. Set to false to force a fresh download."),
|
||||
] = True,
|
||||
) -> dict:
|
||||
"""Fetch a URL and extract its main content, stripping navigation, ads, and boilerplate.
|
||||
|
||||
Returns a preview of the content (up to max_chars) plus total_chars and truncated flag.
|
||||
If truncated, use start/end to page through the full content, or read the resource
|
||||
web://fetch?url=<url>&start=<n>&end=<m> for specific slices.
|
||||
"""
|
||||
content = await _fetch_and_extract(url, output_format, include_tables, include_images, include_links, use_cache)
|
||||
total_chars = len(content)
|
||||
|
||||
# Apply explicit start/end slice first (takes priority over max_chars windowing)
|
||||
if start > 0 or end > 0:
|
||||
slice_end = end if end > 0 else None
|
||||
sliced = content[start:slice_end]
|
||||
return {
|
||||
"content": sliced,
|
||||
"total_chars": total_chars,
|
||||
"truncated": False,
|
||||
}
|
||||
|
||||
if max_chars > 0 and total_chars > max_chars:
|
||||
return {
|
||||
"content": content[:max_chars],
|
||||
"total_chars": total_chars,
|
||||
"truncated": True,
|
||||
}
|
||||
return {
|
||||
"content": content,
|
||||
"total_chars": total_chars,
|
||||
"truncated": False,
|
||||
}
|
||||
|
||||
|
||||
@mcp.resource(
|
||||
"web://fetch{?url,start,end,output_format,include_links,include_tables,include_images,use_cache}",
|
||||
mime_type="text/markdown",
|
||||
)
|
||||
async def fetch_slice(
|
||||
url: str = "",
|
||||
start: int = 0,
|
||||
end: int = 0,
|
||||
output_format: str = "markdown",
|
||||
include_links: bool = False,
|
||||
include_tables: bool = True,
|
||||
include_images: bool = False,
|
||||
use_cache: bool = True,
|
||||
) -> str:
|
||||
"""Fetch a URL and return a character slice of the extracted content.
|
||||
|
||||
Use start/end to page through large documents (end=0 means read to end of content).
|
||||
Example: web://fetch?url=https://example.com/page&start=2000&end=4000
|
||||
"""
|
||||
if not url:
|
||||
raise ValueError("url parameter is required")
|
||||
content = await _fetch_and_extract(url, output_format, include_tables, include_images, include_links, use_cache)
|
||||
if end > 0:
|
||||
return content[start:end]
|
||||
return content[start:]
|
||||
Loading…
Add table
Add a link
Reference in a new issue