From 49a0662332c72598d5f3a5307878dd8840dd37d9 Mon Sep 17 00:00:00 2001 From: aliyanishfaq Date: Tue, 22 Jul 2025 11:57:49 -0700 Subject: [PATCH 1/3] handle client-side meta refresh redirects --- mcpdoc/main.py | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/mcpdoc/main.py b/mcpdoc/main.py index 76f82af..936e83f 100644 --- a/mcpdoc/main.py +++ b/mcpdoc/main.py @@ -1,7 +1,8 @@ """MCP Llms-txt server for docs.""" import os -from urllib.parse import urlparse +import re +from urllib.parse import urlparse, urljoin import httpx from markdownify import markdownify @@ -229,6 +230,7 @@ def list_doc_sources() -> str: @server.tool(description=fetch_docs_description) async def fetch_docs(url: str) -> str: nonlocal domains + url = url.strip() # Handle local file paths (either as file:// URLs or direct filesystem paths) if not _is_http_or_https(url): abs_path = _normalize_path(url) @@ -255,7 +257,23 @@ async def fetch_docs(url: str) -> str: try: response = await httpx_client.get(url, timeout=timeout) response.raise_for_status() - return markdownify(response.text) + content = response.text + + # Check for meta refresh tag which indicates a client-side redirect + match = re.search( + r' Date: Tue, 22 Jul 2025 13:00:32 -0700 Subject: [PATCH 2/3] fix: redirect domain check --- mcpdoc/main.py | 38 ++++++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 14 deletions(-) diff --git a/mcpdoc/main.py b/mcpdoc/main.py index 936e83f..c1071a7 100644 --- a/mcpdoc/main.py +++ b/mcpdoc/main.py @@ -229,7 +229,7 @@ def list_doc_sources() -> str: @server.tool(description=fetch_docs_description) async def fetch_docs(url: str) -> str: - nonlocal domains + nonlocal domains, follow_redirects url = url.strip() # Handle local file paths (either as file:// URLs or direct filesystem paths) if not _is_http_or_https(url): @@ -259,19 +259,29 @@ async def fetch_docs(url: str) -> str: response.raise_for_status() content = response.text - # Check for meta refresh tag which indicates a client-side redirect - match = re.search( - r' Date: Tue, 22 Jul 2025 13:04:00 -0700 Subject: [PATCH 3/3] chore: code cleaning --- mcpdoc/main.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/mcpdoc/main.py b/mcpdoc/main.py index c1071a7..905a7ad 100644 --- a/mcpdoc/main.py +++ b/mcpdoc/main.py @@ -270,7 +270,7 @@ async def fetch_docs(url: str) -> str: if match: redirect_url = match.group(1) new_url = urljoin(str(response.url), redirect_url) - + if "*" not in domains and not any( new_url.startswith(domain) for domain in domains ): @@ -278,7 +278,7 @@ async def fetch_docs(url: str) -> str: "Error: Redirect URL not allowed. Must start with one of the following domains: " + ", ".join(domains) ) - + response = await httpx_client.get(new_url, timeout=timeout) response.raise_for_status() content = response.text