From 49a0662332c72598d5f3a5307878dd8840dd37d9 Mon Sep 17 00:00:00 2001
From: aliyanishfaq <aliyanishfaq200@gmail.com>
Date: Tue, 22 Jul 2025 11:57:49 -0700
Subject: [PATCH 1/3] handle client-side meta refresh redirects

---
 mcpdoc/main.py | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/mcpdoc/main.py b/mcpdoc/main.py
index 76f82af..936e83f 100644
--- a/mcpdoc/main.py
+++ b/mcpdoc/main.py
@@ -1,7 +1,8 @@
 """MCP Llms-txt server for docs."""
 
 import os
-from urllib.parse import urlparse
+import re
+from urllib.parse import urlparse, urljoin
 
 import httpx
 from markdownify import markdownify
@@ -229,6 +230,7 @@ def list_doc_sources() -> str:
     @server.tool(description=fetch_docs_description)
     async def fetch_docs(url: str) -> str:
         nonlocal domains
+        url = url.strip()
         # Handle local file paths (either as file:// URLs or direct filesystem paths)
         if not _is_http_or_https(url):
             abs_path = _normalize_path(url)
@@ -255,7 +257,23 @@ async def fetch_docs(url: str) -> str:
             try:
                 response = await httpx_client.get(url, timeout=timeout)
                 response.raise_for_status()
-                return markdownify(response.text)
+                content = response.text
+
+                # Check for meta refresh tag which indicates a client-side redirect
+                match = re.search(
+                    r'<meta http-equiv="refresh" content="[^;]+;\s*url=([^"]+)"',
+                    content,
+                    re.IGNORECASE,
+                )
+
+                if match:
+                    redirect_url = match.group(1)
+                    new_url = urljoin(str(response.url), redirect_url)
+                    response = await httpx_client.get(new_url, timeout=timeout)
+                    response.raise_for_status()
+                    content = response.text
+
+                return markdownify(content)
             except (httpx.HTTPStatusError, httpx.RequestError) as e:
                 return f"Encountered an HTTP error: {str(e)}"
 

From 60a3cffbffa056c30aced601234f10b17cb5c8fb Mon Sep 17 00:00:00 2001
From: aliyanishfaq <aliyanishfaq200@gmail.com>
Date: Tue, 22 Jul 2025 13:00:32 -0700
Subject: [PATCH 2/3] fix: redirect domain check

---
 mcpdoc/main.py | 38 ++++++++++++++++++++++++--------------
 1 file changed, 24 insertions(+), 14 deletions(-)

diff --git a/mcpdoc/main.py b/mcpdoc/main.py
index 936e83f..c1071a7 100644
--- a/mcpdoc/main.py
+++ b/mcpdoc/main.py
@@ -229,7 +229,7 @@ def list_doc_sources() -> str:
 
     @server.tool(description=fetch_docs_description)
     async def fetch_docs(url: str) -> str:
-        nonlocal domains
+        nonlocal domains, follow_redirects
         url = url.strip()
         # Handle local file paths (either as file:// URLs or direct filesystem paths)
         if not _is_http_or_https(url):
@@ -259,19 +259,29 @@ async def fetch_docs(url: str) -> str:
                 response.raise_for_status()
                 content = response.text
 
-                # Check for meta refresh tag which indicates a client-side redirect
-                match = re.search(
-                    r'<meta http-equiv="refresh" content="[^;]+;\s*url=([^"]+)"',
-                    content,
-                    re.IGNORECASE,
-                )
-
-                if match:
-                    redirect_url = match.group(1)
-                    new_url = urljoin(str(response.url), redirect_url)
-                    response = await httpx_client.get(new_url, timeout=timeout)
-                    response.raise_for_status()
-                    content = response.text
+                if follow_redirects:
+                    # Check for meta refresh tag which indicates a client-side redirect
+                    match = re.search(
+                        r'<meta http-equiv="refresh" content="[^;]+;\s*url=([^"]+)"',
+                        content,
+                        re.IGNORECASE,
+                    )
+
+                    if match:
+                        redirect_url = match.group(1)
+                        new_url = urljoin(str(response.url), redirect_url)
+                        
+                        if "*" not in domains and not any(
+                            new_url.startswith(domain) for domain in domains
+                        ):
+                            return (
+                                "Error: Redirect URL not allowed. Must start with one of the following domains: "
+                                + ", ".join(domains)
+                            )
+                        
+                        response = await httpx_client.get(new_url, timeout=timeout)
+                        response.raise_for_status()
+                        content = response.text
 
                 return markdownify(content)
             except (httpx.HTTPStatusError, httpx.RequestError) as e:

From e652664c99a7c0d4dfd054991d31feb81e6e61c8 Mon Sep 17 00:00:00 2001
From: aliyanishfaq <aliyanishfaq200@gmail.com>
Date: Tue, 22 Jul 2025 13:04:00 -0700
Subject: [PATCH 3/3] chore: code cleaning

---
 mcpdoc/main.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mcpdoc/main.py b/mcpdoc/main.py
index c1071a7..905a7ad 100644
--- a/mcpdoc/main.py
+++ b/mcpdoc/main.py
@@ -270,7 +270,7 @@ async def fetch_docs(url: str) -> str:
                     if match:
                         redirect_url = match.group(1)
                         new_url = urljoin(str(response.url), redirect_url)
-                        
+
                         if "*" not in domains and not any(
                             new_url.startswith(domain) for domain in domains
                         ):
@@ -278,7 +278,7 @@ async def fetch_docs(url: str) -> str:
                                 "Error: Redirect URL not allowed. Must start with one of the following domains: "
                                 + ", ".join(domains)
                             )
-                        
+
                         response = await httpx_client.get(new_url, timeout=timeout)
                         response.raise_for_status()
                         content = response.text