Skip to content

Commit 4489ee3

Browse files
committed
add redaction
1 parent d2331cd commit 4489ee3

File tree

10 files changed

+989
-35
lines changed

10 files changed

+989
-35
lines changed

pyproject.toml

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,8 @@ classifiers = [
1818
"Programming Language :: Python :: 3.12",
1919
]
2020
dependencies = [
21-
"datafog>=4.1.1",
2221
"mcp>=1.0.0",
23-
"mcpcat-api==0.1.1",
22+
"mcpcat-api==0.1.3",
2423
"pydantic>=2.0.0",
2524
]
2625

src/mcpcat/modules/event_queue.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
from .compatibility import get_mcp_compatible_error_message
1818
from .internal import get_server_tracking_data
1919
from .logging import write_to_log
20-
from .redaction import redact_event_sync
20+
from .redaction import redact_event
2121
from .session import get_session_info, set_last_activity
2222

2323

@@ -91,7 +91,7 @@ def _process_event(self, event: UnredactedEvent) -> None:
9191
if event and event.redaction_fn:
9292
# Redact sensitive information if a redaction function is provided
9393
try:
94-
redacted_event = redact_event_sync(event)
94+
redacted_event = redact_event(event, event.redaction_fn)
9595
# The redacted event is already the full event object, not a dict
9696
event = redacted_event
9797
event.redaction_fn = None # Clear the function to avoid reprocessing
@@ -192,10 +192,16 @@ def publish_event(server: Any, event: UnredactedEvent) -> None:
192192
session_info = get_session_info(server, data)
193193

194194
# Create full event with all required fields
195+
# Merge event data with session info (event fields take precedence)
196+
event_data = event.model_dump(exclude_none=True)
197+
session_data = session_info.model_dump(exclude_none=True)
198+
199+
# Event fields take precedence over session fields
200+
merged_data = {**event_data, **session_data}
201+
195202
full_event = UnredactedEvent(
203+
**merged_data,
196204
project_id=data.project_id,
197-
**event.model_dump(exclude={"project_id", "redaction_fn"}),
198-
**session_info.model_dump(exclude_none=True),
199205
redaction_fn=data.options.redact_sensitive_information,
200206
)
201207

src/mcpcat/modules/overrides/mcp_server.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from mcpcat.modules.tools import handle_report_missing
1313

1414
from ...types import EventType, MCPCatData, UnredactedEvent
15-
from ..session import get_server_session_id
15+
from ..session import get_client_info_from_request_context, get_server_session_id
1616

1717

1818
def safe_request_context(server: Server) -> Optional[RequestContext]:
@@ -43,7 +43,6 @@ async def wrapped_initialize_handler(request: InitializeRequest) -> ServerResult
4343
timestamp=datetime.now(),
4444
parameters=request.params.model_dump() if request.params else {},
4545
event_type=EventType.MCP_INITIALIZE.value,
46-
redaction_fn=data.options.redact_sensitive_information,
4746
)
4847

4948
# Call the original handler
@@ -60,14 +59,14 @@ async def wrapped_list_tools_handler(request: ListToolsRequest) -> ServerResult:
6059
"""Intercept list_tools requests to add MCPCat tools and modify existing ones."""
6160
session_id = get_server_session_id(server)
6261
request_context = safe_request_context(server)
62+
get_client_info_from_request_context(server, request_context)
6363
identify_session(server, request, request_context)
6464
event = UnredactedEvent(
6565
session_id=session_id,
6666
timestamp=datetime.now(),
6767

6868
parameters=request.params.model_dump() if request.params else {},
6969
event_type=EventType.MCP_TOOLS_LIST.value,
70-
redaction_fn=data.options.redact_sensitive_information,
7170
)
7271

7372
# Call the original handler to get the tools
@@ -137,6 +136,7 @@ async def wrapped_call_tool_handler(request: CallToolRequest) -> ServerResult:
137136
arguments = request.params.arguments or {}
138137
session_id = get_server_session_id(server)
139138
request_context = safe_request_context(server)
139+
get_client_info_from_request_context(server, request_context)
140140
identify_session(server, request, request_context)
141141

142142
write_to_log(f"Intercepted call to tool '{tool_name}' with arguments: {arguments} and request context: {request_context}")
@@ -147,7 +147,6 @@ async def wrapped_call_tool_handler(request: CallToolRequest) -> ServerResult:
147147
parameters=request.params.model_dump() if request.params else {},
148148
event_type=EventType.MCP_TOOLS_CALL.value,
149149
resource_name=tool_name,
150-
redaction_fn=data.options.redact_sensitive_information,
151150
)
152151

153152
# Handle report_missing tool directly

src/mcpcat/modules/redaction.py

Lines changed: 86 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,94 @@
11
"""PII redaction for MCPCat logs."""
22

3-
from typing import Any, TYPE_CHECKING
4-
5-
from datafog import DataFog
6-
3+
from typing import Any, TYPE_CHECKING, Callable, Set
74
if TYPE_CHECKING:
85
from mcpcat.types import Event, UnredactedEvent
96

107

11-
def defaultRedactor(text: str) -> str:
12-
"""Default redactor function for sensitive information."""
13-
# Basic implementation - can be enhanced with actual redaction logic
14-
return text
8+
# Set of field names that should be protected from redaction.
9+
# These fields contain system-level identifiers and metadata that
10+
# need to be preserved for analytics tracking.
11+
PROTECTED_FIELDS: Set[str] = {
12+
'session_id',
13+
'id',
14+
'project_id',
15+
'server',
16+
'identify_actor_given_id',
17+
'identify_actor_name',
18+
'identify_data',
19+
'resource_name',
20+
'event_type',
21+
'actor_id'
22+
}
23+
24+
25+
def redact_strings_in_object(
26+
obj: Any,
27+
redact_fn: Callable[[str], str],
28+
path: str = '',
29+
is_protected: bool = False
30+
) -> Any:
31+
"""
32+
Recursively applies a redaction function to all string values in an object.
33+
This ensures that sensitive information is removed from all string fields
34+
before events are sent to the analytics service.
35+
36+
Args:
37+
obj: The object to redact strings from
38+
redact_fn: The redaction function to apply to each string
39+
path: The current path in the object tree (used to check protected fields)
40+
is_protected: Whether the current object/value is within a protected field
41+
42+
Returns:
43+
A new object with all strings redacted
44+
"""
45+
if obj is None:
46+
return obj
47+
48+
# Handle strings
49+
if isinstance(obj, str):
50+
# Don't redact if this field or any parent field is protected
51+
if is_protected:
52+
return obj
53+
return redact_fn(obj)
54+
55+
# Handle arrays/lists
56+
if isinstance(obj, list):
57+
return [redact_strings_in_object(item, redact_fn, f"{path}[{index}]", is_protected)
58+
for index, item in enumerate(obj)]
59+
60+
# Handle dictionaries/objects
61+
if isinstance(obj, dict):
62+
redacted_obj = {}
63+
64+
for key, value in obj.items():
65+
# Skip None values
66+
if value is None:
67+
continue
68+
69+
# Build the path for nested fields
70+
field_path = f"{path}.{key}" if path else key
71+
# Check if this field is protected (only check at top level)
72+
is_field_protected = is_protected or (path == '' and key in PROTECTED_FIELDS)
73+
redacted_obj[key] = redact_strings_in_object(value, redact_fn, field_path, is_field_protected)
74+
75+
return redacted_obj
76+
77+
# For all other types (numbers, booleans, etc.), return as-is
78+
return obj
1579

1680

17-
def redact_event_sync(event: "UnredactedEvent") -> "Event":
18-
"""Synchronous version to redact sensitive information from an event."""
19-
return event
81+
def redact_event(event: "UnredactedEvent", redact_fn: Callable[[str], str]) -> "Event":
82+
"""
83+
Applies the customer's redaction function to all string fields in an Event object.
84+
This is the main entry point for redacting sensitive information from events
85+
before they are sent to the analytics service.
86+
87+
Args:
88+
event: The event to redact
89+
redact_fn: The customer's redaction function
90+
91+
Returns:
92+
A new event object with all strings redacted
93+
"""
94+
return redact_strings_in_object(event, redact_fn, '', False)

src/mcpcat/modules/session.py

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,14 @@
44
from datetime import datetime, timedelta
55
from typing import Any
66

7+
from mcp import Implementation
8+
from mcp.shared.context import RequestContext
79
from mcp.server import Server
10+
from mcp.server.session import ServerSession
811

912
from mcpcat.modules.constants import INACTIVITY_TIMEOUT_IN_MINUTES, SESSION_ID_PREFIX
1013
from mcpcat.modules.internal import get_server_tracking_data, set_server_tracking_data
14+
from mcpcat.modules.logging import write_to_log
1115

1216
from ..types import MCPCatData, SessionInfo
1317
from ..utils import generate_prefixed_ksuid
@@ -25,6 +29,24 @@ def get_mcpcat_version() -> str | None:
2529
return None
2630

2731

32+
def get_client_info_from_request_context(server: Server, request_context: RequestContext) -> None:
33+
data = get_server_tracking_data(server)
34+
if not data:
35+
return
36+
37+
# If client name and version are already set, no need to fetch again
38+
if data.session_info.client_name and data.session_info.client_version:
39+
return
40+
41+
try:
42+
client_info = request_context.session.client_params.clientInfo
43+
data.session_info.client_name = client_info.name if client_info else None
44+
data.session_info.client_version = client_info.version if client_info else None
45+
set_server_tracking_data(server, data)
46+
except Exception as e:
47+
write_to_log(f"Failed to get client info from request context: {e}")
48+
return
49+
2850
def get_session_info(server: Server, data: MCPCatData | None = None) -> SessionInfo:
2951
"""Get session information for the current MCP session."""
3052
actor_info = None
@@ -37,11 +59,11 @@ def get_session_info(server: Server, data: MCPCatData | None = None) -> SessionI
3759
mcpcat_version=get_mcpcat_version(),
3860
server_name=server.name if hasattr(server, 'name') else None,
3961
server_version=server.version if hasattr(server, 'version') else None,
40-
client_name=data.session_info.client_name if data else None,
41-
client_version=data.session_info.client_version if data else None,
62+
client_name=data.session_info.client_name if data and data.session_info else None,
63+
client_version=data.session_info.client_version if data and data.session_info else None,
4264
identify_actor_given_id=actor_info.userId if actor_info else None,
4365
identify_actor_name=actor_info.userName if actor_info else None,
44-
identify_actor_data=actor_info.userData if actor_info else None,
66+
identify_data=actor_info.userData if actor_info else None,
4567
)
4668

4769
if not data:

src/mcpcat/types.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,6 @@
1313
# Type alias for redaction function
1414
RedactionFunction = Callable[[str], str | Awaitable[str]]
1515

16-
# Import default redactor
17-
from .modules.redaction import defaultRedactor
1816

1917

2018
@dataclass
@@ -36,7 +34,7 @@ class SessionInfo(BaseModel):
3634
client_version: Optional[str] = None
3735
identify_actor_given_id: Optional[str] = None # Actor ID for mcpcat:identify events
3836
identify_actor_name: Optional[str] = None # Actor name for mcpcat:identify events
39-
identify_actor_data: Optional[dict[str, Any]] = None
37+
identify_data: Optional[dict[str, Any]] = None
4038

4139
class Event(PublishEventRequest):
4240
pass
@@ -68,7 +66,7 @@ class MCPCatOptions:
6866
enable_tracing: bool = True
6967
enable_tool_call_context: bool = True
7068
identify: IdentifyFunction | None = None
71-
redact_sensitive_information: RedactionFunction | None = defaultRedactor
69+
redact_sensitive_information: RedactionFunction | None = None
7270

7371
@dataclass
7472
class MCPCatData:

0 commit comments

Comments
 (0)