From e54881dc94fac32875639f4a3ca4810c78b21864 Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Wed, 10 Sep 2025 15:42:58 +0530 Subject: [PATCH 01/20] Enhance adapter validation by adding URL security checks in AdapterProcessor and views --- .../adapter_processor_v2/adapter_processor.py | 42 +++++++++++++++++-- backend/adapter_processor_v2/views.py | 27 ++++++++++++ 2 files changed, 66 insertions(+), 3 deletions(-) diff --git a/backend/adapter_processor_v2/adapter_processor.py b/backend/adapter_processor_v2/adapter_processor.py index 31ad4a383a..6927a2729e 100644 --- a/backend/adapter_processor_v2/adapter_processor.py +++ b/backend/adapter_processor_v2/adapter_processor.py @@ -6,8 +6,12 @@ from cryptography.fernet import Fernet from django.conf import settings from django.core.exceptions import ObjectDoesNotExist -from platform_settings_v2.platform_auth_service import PlatformAuthenticationService -from tenant_account_v2.organization_member_service import OrganizationMemberService +from platform_settings_v2.platform_auth_service import ( + PlatformAuthenticationService, +) +from tenant_account_v2.organization_member_service import ( + OrganizationMemberService, +) from adapter_processor_v2.constants import AdapterKeys, AllowedDomains from adapter_processor_v2.exceptions import ( @@ -27,7 +31,9 @@ logger = logging.getLogger(__name__) try: - from plugins.subscription.time_trials.subscription_adapter import add_unstract_key + from plugins.subscription.time_trials.subscription_adapter import ( + add_unstract_key, + ) except ImportError: add_unstract_key = None @@ -130,6 +136,36 @@ def update_adapter_metadata(adapter_metadata_b: Any, **kwargs) -> Any: return adapter_metadata_b return adapter_metadata_b + @staticmethod + def validate_adapter_urls(adapter_id: str, adapter_metadata: dict) -> None: + """Validate URLs for an adapter configuration without full connection test. + + This method only validates URLs for security (SSRF protection) without + attempting actual network connections. + + Args: + adapter_id: The adapter ID (e.g., "postgres|70ab6cc2...") + adapter_metadata: The adapter configuration metadata + + Raises: + AdapterError: If URL validation fails due to security violations + """ + try: + # Get the adapter class + adapterkit = Adapterkit() + adapter_class = adapterkit.get_adapter_class_by_adapter_id(adapter_id) + + # Create a temporary instance just to get configured URLs + # This will trigger URL validation in __init__ but not full connection test + adapter_class(adapter_metadata) + + # If we reach here, URL validation passed + logger.debug(f"URL validation passed for adapter {adapter_id}") + + except Exception as e: + logger.error(f"URL validation failed for adapter {adapter_id}: {str(e)}") + raise + @staticmethod def __fetch_adapters_by_key_value(key: str, value: Any) -> Adapter: """Fetches a list of adapters that have an attribute matching key and diff --git a/backend/adapter_processor_v2/views.py b/backend/adapter_processor_v2/views.py index 36a2db0116..05ae0a936d 100644 --- a/backend/adapter_processor_v2/views.py +++ b/backend/adapter_processor_v2/views.py @@ -1,7 +1,10 @@ +import json import logging import uuid from typing import Any +from cryptography.fernet import Fernet +from django.conf import settings from django.db import IntegrityError from django.db.models import ProtectedError, QuerySet from django.http import HttpRequest @@ -40,6 +43,7 @@ TestAdapterSerializer, UserDefaultAdapterSerializer, ) +from unstract.sdk.exceptions import SdkError from .constants import AdapterKeys as constant from .models import AdapterInstance, UserDefaultAdapter @@ -177,6 +181,29 @@ def create(self, request: Any) -> Response: use_platform_unstract_key = True serializer.is_valid(raise_exception=True) + + # Validate URLs for security without full adapter testing + adapter_id = serializer.validated_data.get(AdapterKeys.ADAPTER_ID) + adapter_metadata_b = serializer.validated_data.get(AdapterKeys.ADAPTER_METADATA_B) + + # Decrypt metadata to get configuration + decrypted_metadata = json.loads( + Fernet(settings.ENCRYPTION_KEY.encode("utf-8")) + .decrypt(adapter_metadata_b) + .decode("utf-8") + ) + + # Validate URLs for this adapter configuration + try: + AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) + except SdkError as e: + from rest_framework.exceptions import ValidationError + + # Format error message similar to test adapter API + adapter_name = decrypted_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") + error_detail = f"Error testing '{adapter_name}'. {str(e)}" + raise ValidationError(error_detail) + try: adapter_type = serializer.validated_data.get(AdapterKeys.ADAPTER_TYPE) From 0963f8d7529f066033efa2f5999522efc57a31cf Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Wed, 10 Sep 2025 15:46:08 +0530 Subject: [PATCH 02/20] Add Whitelist to sample.env --- backend/sample.env | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/backend/sample.env b/backend/sample.env index dcd79b5a16..9945a5d57c 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -200,3 +200,8 @@ RUNNER_POLLING_INTERVAL_SECONDS=2 # Default: 1800 seconds (30 minutes) # Examples: 900 (15 min), 1800 (30 min), 3600 (60 min) MIN_SCHEDULE_INTERVAL_SECONDS=1800 + +# WHitelisted adapter URLs to allow user to connect to locally hosted adapters. +# Whitelisting 10.68.0.10 to allow frictionless adapter connection to +# managed Postgres for VectorDB +ALLOWED_ADAPTER_PRIVATE_ENDPOINTS="127.0.0.1, 10.68.0.10" From 21320bebf85d5f6f0abf91c6f10003deae76782e Mon Sep 17 00:00:00 2001 From: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> Date: Thu, 11 Sep 2025 10:40:11 +0530 Subject: [PATCH 03/20] Update backend/adapter_processor_v2/views.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> --- backend/adapter_processor_v2/views.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/backend/adapter_processor_v2/views.py b/backend/adapter_processor_v2/views.py index 05ae0a936d..e2ff10d16f 100644 --- a/backend/adapter_processor_v2/views.py +++ b/backend/adapter_processor_v2/views.py @@ -186,24 +186,26 @@ def create(self, request: Any) -> Response: adapter_id = serializer.validated_data.get(AdapterKeys.ADAPTER_ID) adapter_metadata_b = serializer.validated_data.get(AdapterKeys.ADAPTER_METADATA_B) + from rest_framework.exceptions import ValidationError + if not adapter_metadata_b: + raise ValidationError("Missing adapter metadata for validation.") + # Decrypt metadata to get configuration - decrypted_metadata = json.loads( - Fernet(settings.ENCRYPTION_KEY.encode("utf-8")) - .decrypt(adapter_metadata_b) - .decode("utf-8") - ) + try: + fernet = Fernet(settings.ENCRYPTION_KEY.encode("utf-8")) + decrypted_json = fernet.decrypt(adapter_metadata_b) + decrypted_metadata = json.loads(decrypted_json.decode("utf-8")) + except Exception as e: # InvalidToken/JSONDecodeError/TypeError/etc. + raise ValidationError("Invalid adapter metadata.") from e # Validate URLs for this adapter configuration try: AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) - except SdkError as e: - from rest_framework.exceptions import ValidationError - + except Exception as e: # Format error message similar to test adapter API adapter_name = decrypted_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") - error_detail = f"Error testing '{adapter_name}'. {str(e)}" - raise ValidationError(error_detail) - + error_detail = f"Error testing '{adapter_name}'. {e!s}" + raise ValidationError(error_detail) from e try: adapter_type = serializer.validated_data.get(AdapterKeys.ADAPTER_TYPE) From 4307bf63b5ac47973e4e82f6e6a7e9fb9cf12d76 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 11 Sep 2025 05:10:26 +0000 Subject: [PATCH 04/20] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- backend/adapter_processor_v2/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/adapter_processor_v2/views.py b/backend/adapter_processor_v2/views.py index e2ff10d16f..54b4e09140 100644 --- a/backend/adapter_processor_v2/views.py +++ b/backend/adapter_processor_v2/views.py @@ -43,7 +43,6 @@ TestAdapterSerializer, UserDefaultAdapterSerializer, ) -from unstract.sdk.exceptions import SdkError from .constants import AdapterKeys as constant from .models import AdapterInstance, UserDefaultAdapter @@ -187,6 +186,7 @@ def create(self, request: Any) -> Response: adapter_metadata_b = serializer.validated_data.get(AdapterKeys.ADAPTER_METADATA_B) from rest_framework.exceptions import ValidationError + if not adapter_metadata_b: raise ValidationError("Missing adapter metadata for validation.") From c5e6359717e67a90d7071d5f8c6f5ecddaef0d80 Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Thu, 11 Sep 2025 11:03:38 +0530 Subject: [PATCH 05/20] Add url validation for PUT API --- backend/adapter_processor_v2/views.py | 36 ++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/backend/adapter_processor_v2/views.py b/backend/adapter_processor_v2/views.py index 05ae0a936d..ca8b0cf6dd 100644 --- a/backend/adapter_processor_v2/views.py +++ b/backend/adapter_processor_v2/views.py @@ -22,7 +22,9 @@ from rest_framework.serializers import ModelSerializer from rest_framework.versioning import URLPathVersioning from rest_framework.viewsets import GenericViewSet, ModelViewSet -from tenant_account_v2.organization_member_service import OrganizationMemberService +from tenant_account_v2.organization_member_service import ( + OrganizationMemberService, +) from utils.filtering import FilterHelper from adapter_processor_v2.adapter_processor import AdapterProcessor @@ -386,6 +388,38 @@ def update( # Get the adapter instance for update adapter = self.get_object() + # Get serializer and validate data first + serializer = self.get_serializer(adapter, data=request.data, partial=True) + serializer.is_valid(raise_exception=True) + + # Validate URLs for security if metadata is being updated + if AdapterKeys.ADAPTER_METADATA_B in serializer.validated_data: + adapter_id = ( + serializer.validated_data.get(AdapterKeys.ADAPTER_ID) + or adapter.adapter_id + ) + adapter_metadata_b = serializer.validated_data.get( + AdapterKeys.ADAPTER_METADATA_B + ) + + # Decrypt metadata to get configuration + decrypted_metadata = json.loads( + Fernet(settings.ENCRYPTION_KEY.encode("utf-8")) + .decrypt(adapter_metadata_b) + .decode("utf-8") + ) + + # Validate URLs for this adapter configuration + try: + AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) + except SdkError as e: + from rest_framework.exceptions import ValidationError + + # Format error message similar to test adapter API + adapter_name = decrypted_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") + error_detail = f"Error testing '{adapter_name}'. {str(e)}" + raise ValidationError(error_detail) + if use_platform_unstract_key: logger.error("Processing adapter with platform key") serializer = self.get_serializer(adapter, data=request.data, partial=True) From 51966084ec6e211028b050b2b7219a65adf868ba Mon Sep 17 00:00:00 2001 From: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> Date: Thu, 11 Sep 2025 11:08:06 +0530 Subject: [PATCH 06/20] Update backend/adapter_processor_v2/views.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> --- backend/adapter_processor_v2/views.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/backend/adapter_processor_v2/views.py b/backend/adapter_processor_v2/views.py index 0a122b4ef8..b5becfe3dc 100644 --- a/backend/adapter_processor_v2/views.py +++ b/backend/adapter_processor_v2/views.py @@ -189,6 +189,16 @@ def create(self, request: Any) -> Response: from rest_framework.exceptions import ValidationError + if not adapter_metadata_b: + raise ValidationError("Missing adapter metadata for validation.") + + # Decrypt metadata to get configuration + try: + # Validate URLs for security without full adapter testing + adapter_id = serializer.validated_data.get(AdapterKeys.ADAPTER_ID) + adapter_metadata_b = serializer.validated_data.get(AdapterKeys.ADAPTER_METADATA_B) + + from rest_framework.exceptions import ValidationError if not adapter_metadata_b: raise ValidationError("Missing adapter metadata for validation.") @@ -197,9 +207,26 @@ def create(self, request: Any) -> Response: fernet = Fernet(settings.ENCRYPTION_KEY.encode("utf-8")) decrypted_json = fernet.decrypt(adapter_metadata_b) decrypted_metadata = json.loads(decrypted_json.decode("utf-8")) + # Ensure object shape + from rest_framework.exceptions import ValidationError + if not isinstance(decrypted_metadata, dict): + raise ValidationError("Invalid adapter metadata format: expected JSON object.") except Exception as e: # InvalidToken/JSONDecodeError/TypeError/etc. raise ValidationError("Invalid adapter metadata.") from e + # Validate URLs for this adapter configuration + try: + AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) + except Exception as e: + # Format error message similar to test adapter API + adapter_name = ( + decrypted_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") + if isinstance(decrypted_metadata, dict) + else "adapter" + ) + error_detail = f"Error testing '{adapter_name}'. {e!s}" + raise ValidationError(error_detail) from e + # Validate URLs for this adapter configuration try: AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) From 7fbcc33837bc0fa8a526a2281de03a6d9d52178d Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Thu, 11 Sep 2025 11:24:15 +0530 Subject: [PATCH 07/20] Keep exception handling uniform across CREATE and UPDATE APIS --- backend/adapter_processor_v2/views.py | 34 +++++++++++++++------------ 1 file changed, 19 insertions(+), 15 deletions(-) diff --git a/backend/adapter_processor_v2/views.py b/backend/adapter_processor_v2/views.py index 0a122b4ef8..366ddef891 100644 --- a/backend/adapter_processor_v2/views.py +++ b/backend/adapter_processor_v2/views.py @@ -404,23 +404,27 @@ def update( AdapterKeys.ADAPTER_METADATA_B ) - # Decrypt metadata to get configuration - decrypted_metadata = json.loads( - Fernet(settings.ENCRYPTION_KEY.encode("utf-8")) - .decrypt(adapter_metadata_b) - .decode("utf-8") - ) + from rest_framework.exceptions import ValidationError + + if not adapter_metadata_b: + raise ValidationError("Missing adapter metadata for validation.") - # Validate URLs for this adapter configuration - try: - AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) - except SdkError as e: - from rest_framework.exceptions import ValidationError + # Decrypt metadata to get configuration + try: + fernet = Fernet(settings.ENCRYPTION_KEY.encode("utf-8")) + decrypted_json = fernet.decrypt(adapter_metadata_b) + decrypted_metadata = json.loads(decrypted_json.decode("utf-8")) + except Exception as e: # InvalidToken/JSONDecodeError/TypeError/etc. + raise ValidationError("Invalid adapter metadata.") from e - # Format error message similar to test adapter API - adapter_name = decrypted_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") - error_detail = f"Error testing '{adapter_name}'. {str(e)}" - raise ValidationError(error_detail) + # Validate URLs for this adapter configuration + try: + AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) + except Exception as e: + # Format error message similar to test adapter API + adapter_name = decrypted_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") + error_detail = f"Error testing '{adapter_name}'. {e!s}" + raise ValidationError(error_detail) from e if use_platform_unstract_key: logger.error("Processing adapter with platform key") From 9d6ce54ef2166a588340098a45f6fddeffa65bda Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Thu, 11 Sep 2025 12:05:36 +0530 Subject: [PATCH 08/20] Keep exception handling uniform across CREATE and UPDATE APIS --- .../adapter_processor_v2/adapter_processor.py | 25 +++++++++++++------ backend/adapter_processor_v2/views.py | 11 +++----- 2 files changed, 21 insertions(+), 15 deletions(-) diff --git a/backend/adapter_processor_v2/adapter_processor.py b/backend/adapter_processor_v2/adapter_processor.py index 6927a2729e..46f17a2121 100644 --- a/backend/adapter_processor_v2/adapter_processor.py +++ b/backend/adapter_processor_v2/adapter_processor.py @@ -9,6 +9,7 @@ from platform_settings_v2.platform_auth_service import ( PlatformAuthenticationService, ) +from rest_framework.exceptions import ValidationError from tenant_account_v2.organization_member_service import ( OrganizationMemberService, ) @@ -98,8 +99,6 @@ def get_adapter_data_with_key(adapter_id: str, key_value: str) -> Any: def test_adapter(adapter_id: str, adapter_metadata: dict[str, Any]) -> bool: logger.info(f"Testing adapter: {adapter_id}") try: - adapter_class = Adapterkit().get_adapter_class_by_adapter_id(adapter_id) - if adapter_metadata.pop(AdapterKeys.ADAPTER_TYPE) == AdapterKeys.X2TEXT: if ( adapter_metadata.get(AdapterKeys.PLATFORM_PROVIDED_UNSTRACT_KEY) @@ -113,7 +112,17 @@ def test_adapter(adapter_id: str, adapter_metadata: dict[str, Any]) -> bool: platform_key.key ) - adapter_instance = adapter_class(adapter_metadata) + # Validate URLs for this adapter configuration + try: + adapter_instance = AdapterProcessor.validate_adapter_urls( + adapter_id, adapter_metadata + ) + except Exception as e: + # Format error message similar to test adapter API + adapter_name = adapter_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") + error_detail = f"Error testing '{adapter_name}'. {e!s}" + raise ValidationError(error_detail) from e + # adapter_instance = adapter_class(adapter_metadata) test_result: bool = adapter_instance.test_connection() return test_result except SdkError as e: @@ -137,7 +146,7 @@ def update_adapter_metadata(adapter_metadata_b: Any, **kwargs) -> Any: return adapter_metadata_b @staticmethod - def validate_adapter_urls(adapter_id: str, adapter_metadata: dict) -> None: + def validate_adapter_urls(adapter_id: str, adapter_metadata: dict) -> Adapter: """Validate URLs for an adapter configuration without full connection test. This method only validates URLs for security (SSRF protection) without @@ -147,6 +156,9 @@ def validate_adapter_urls(adapter_id: str, adapter_metadata: dict) -> None: adapter_id: The adapter ID (e.g., "postgres|70ab6cc2...") adapter_metadata: The adapter configuration metadata + Returns: + Adapter: The adapter instance if validation passes + Raises: AdapterError: If URL validation fails due to security violations """ @@ -157,10 +169,7 @@ def validate_adapter_urls(adapter_id: str, adapter_metadata: dict) -> None: # Create a temporary instance just to get configured URLs # This will trigger URL validation in __init__ but not full connection test - adapter_class(adapter_metadata) - - # If we reach here, URL validation passed - logger.debug(f"URL validation passed for adapter {adapter_id}") + return adapter_class(adapter_metadata) except Exception as e: logger.error(f"URL validation failed for adapter {adapter_id}: {str(e)}") diff --git a/backend/adapter_processor_v2/views.py b/backend/adapter_processor_v2/views.py index 366ddef891..086072f594 100644 --- a/backend/adapter_processor_v2/views.py +++ b/backend/adapter_processor_v2/views.py @@ -17,6 +17,7 @@ ) from rest_framework import status from rest_framework.decorators import action +from rest_framework.exceptions import ValidationError from rest_framework.request import Request from rest_framework.response import Response from rest_framework.serializers import ModelSerializer @@ -187,8 +188,6 @@ def create(self, request: Any) -> Response: adapter_id = serializer.validated_data.get(AdapterKeys.ADAPTER_ID) adapter_metadata_b = serializer.validated_data.get(AdapterKeys.ADAPTER_METADATA_B) - from rest_framework.exceptions import ValidationError - if not adapter_metadata_b: raise ValidationError("Missing adapter metadata for validation.") @@ -202,7 +201,7 @@ def create(self, request: Any) -> Response: # Validate URLs for this adapter configuration try: - AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) + _ = AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) except Exception as e: # Format error message similar to test adapter API adapter_name = decrypted_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") @@ -404,9 +403,7 @@ def update( AdapterKeys.ADAPTER_METADATA_B ) - from rest_framework.exceptions import ValidationError - - if not adapter_metadata_b: + if not adapter_id or adapter_metadata_b: raise ValidationError("Missing adapter metadata for validation.") # Decrypt metadata to get configuration @@ -419,7 +416,7 @@ def update( # Validate URLs for this adapter configuration try: - AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) + _ = AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) except Exception as e: # Format error message similar to test adapter API adapter_name = decrypted_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") From 6a7bb7cb960b5ebbcc95801a4de5ce6e594d2393 Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Thu, 11 Sep 2025 12:40:59 +0530 Subject: [PATCH 09/20] Fix indentation errors --- backend/adapter_processor_v2/views.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/backend/adapter_processor_v2/views.py b/backend/adapter_processor_v2/views.py index b3d01c369b..6d31070a0b 100644 --- a/backend/adapter_processor_v2/views.py +++ b/backend/adapter_processor_v2/views.py @@ -188,16 +188,6 @@ def create(self, request: Any) -> Response: adapter_id = serializer.validated_data.get(AdapterKeys.ADAPTER_ID) adapter_metadata_b = serializer.validated_data.get(AdapterKeys.ADAPTER_METADATA_B) - if not adapter_metadata_b: - raise ValidationError("Missing adapter metadata for validation.") - - # Decrypt metadata to get configuration - try: - # Validate URLs for security without full adapter testing - adapter_id = serializer.validated_data.get(AdapterKeys.ADAPTER_ID) - adapter_metadata_b = serializer.validated_data.get(AdapterKeys.ADAPTER_METADATA_B) - - from rest_framework.exceptions import ValidationError if not adapter_metadata_b: raise ValidationError("Missing adapter metadata for validation.") @@ -207,9 +197,10 @@ def create(self, request: Any) -> Response: decrypted_json = fernet.decrypt(adapter_metadata_b) decrypted_metadata = json.loads(decrypted_json.decode("utf-8")) # Ensure object shape - from rest_framework.exceptions import ValidationError if not isinstance(decrypted_metadata, dict): - raise ValidationError("Invalid adapter metadata format: expected JSON object.") + raise ValidationError( + "Invalid adapter metadata format: expected JSON object." + ) except Exception as e: # InvalidToken/JSONDecodeError/TypeError/etc. raise ValidationError("Invalid adapter metadata.") from e From 08c50444f6d78168e086e70c53270748c532e560 Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Thu, 11 Sep 2025 12:43:00 +0530 Subject: [PATCH 10/20] Fix logical condition --- backend/adapter_processor_v2/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/adapter_processor_v2/views.py b/backend/adapter_processor_v2/views.py index 6d31070a0b..d69479b624 100644 --- a/backend/adapter_processor_v2/views.py +++ b/backend/adapter_processor_v2/views.py @@ -421,7 +421,7 @@ def update( AdapterKeys.ADAPTER_METADATA_B ) - if not adapter_id or adapter_metadata_b: + if not adapter_id or not adapter_metadata_b: raise ValidationError("Missing adapter metadata for validation.") # Decrypt metadata to get configuration From db3acfff4d9f0f9b9206f52fe43c81979a307464 Mon Sep 17 00:00:00 2001 From: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> Date: Thu, 11 Sep 2025 14:49:56 +0530 Subject: [PATCH 11/20] Update backend/adapter_processor_v2/views.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> --- backend/adapter_processor_v2/views.py | 46 +++++++++++++++------------ 1 file changed, 25 insertions(+), 21 deletions(-) diff --git a/backend/adapter_processor_v2/views.py b/backend/adapter_processor_v2/views.py index d69479b624..eb3c661992 100644 --- a/backend/adapter_processor_v2/views.py +++ b/backend/adapter_processor_v2/views.py @@ -411,6 +411,7 @@ def update( serializer = self.get_serializer(adapter, data=request.data, partial=True) serializer.is_valid(raise_exception=True) + # Validate URLs for security if metadata is being updated # Validate URLs for security if metadata is being updated if AdapterKeys.ADAPTER_METADATA_B in serializer.validated_data: adapter_id = ( @@ -420,27 +421,30 @@ def update( adapter_metadata_b = serializer.validated_data.get( AdapterKeys.ADAPTER_METADATA_B ) - - if not adapter_id or not adapter_metadata_b: - raise ValidationError("Missing adapter metadata for validation.") - - # Decrypt metadata to get configuration - try: - fernet = Fernet(settings.ENCRYPTION_KEY.encode("utf-8")) - decrypted_json = fernet.decrypt(adapter_metadata_b) - decrypted_metadata = json.loads(decrypted_json.decode("utf-8")) - except Exception as e: # InvalidToken/JSONDecodeError/TypeError/etc. - raise ValidationError("Invalid adapter metadata.") from e - - # Validate URLs for this adapter configuration - try: - _ = AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) - except Exception as e: - # Format error message similar to test adapter API - adapter_name = decrypted_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") - error_detail = f"Error testing '{adapter_name}'. {e!s}" - raise ValidationError(error_detail) from e - + if not adapter_id or not adapter_metadata_b: + raise ValidationError("Missing adapter metadata for validation.") + # Decrypt metadata to get configuration + try: + fernet = Fernet(settings.ENCRYPTION_KEY.encode("utf-8")) + decrypted_json = fernet.decrypt(adapter_metadata_b) + decrypted_metadata = json.loads(decrypted_json.decode("utf-8")) + if not isinstance(decrypted_metadata, dict): + raise ValidationError( + "Invalid adapter metadata format: expected JSON object." + ) + except Exception as e: # InvalidToken/JSONDecodeError/TypeError/etc. + raise ValidationError("Invalid adapter metadata.") from e + # Validate URLs for this adapter configuration + try: + AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) + except Exception as e: + adapter_name = ( + decrypted_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") + if isinstance(decrypted_metadata, dict) + else "adapter" + ) + error_detail = f"Error testing '{adapter_name}'. {e!s}" + raise ValidationError(error_detail) from e if use_platform_unstract_key: logger.error("Processing adapter with platform key") serializer = self.get_serializer(adapter, data=request.data, partial=True) From a756ddbde075a5e1e44f34cb9e1ad7db4ff4a20d Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Fri, 12 Sep 2025 15:12:47 +0530 Subject: [PATCH 12/20] Reafctorinf for SONAR issue fixes --- .../adapter_processor_v2/adapter_processor.py | 1 - backend/adapter_processor_v2/views.py | 249 ++++++++---------- 2 files changed, 112 insertions(+), 138 deletions(-) diff --git a/backend/adapter_processor_v2/adapter_processor.py b/backend/adapter_processor_v2/adapter_processor.py index 46f17a2121..19d24f441d 100644 --- a/backend/adapter_processor_v2/adapter_processor.py +++ b/backend/adapter_processor_v2/adapter_processor.py @@ -122,7 +122,6 @@ def test_adapter(adapter_id: str, adapter_metadata: dict[str, Any]) -> bool: adapter_name = adapter_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") error_detail = f"Error testing '{adapter_name}'. {e!s}" raise ValidationError(error_detail) from e - # adapter_instance = adapter_class(adapter_metadata) test_result: bool = adapter_instance.test_connection() return test_result except SdkError as e: diff --git a/backend/adapter_processor_v2/views.py b/backend/adapter_processor_v2/views.py index d69479b624..95f35881fd 100644 --- a/backend/adapter_processor_v2/views.py +++ b/backend/adapter_processor_v2/views.py @@ -172,117 +172,138 @@ def get_serializer_class( return AdapterListSerializer return AdapterInstanceSerializer - def create(self, request: Any) -> Response: - serializer = self.get_serializer(data=request.data) - - use_platform_unstract_key = False - adapter_metadata = request.data.get(AdapterKeys.ADAPTER_METADATA) - if adapter_metadata and adapter_metadata.get( - AdapterKeys.PLATFORM_PROVIDED_UNSTRACT_KEY, False - ): - use_platform_unstract_key = True - - serializer.is_valid(raise_exception=True) - - # Validate URLs for security without full adapter testing - adapter_id = serializer.validated_data.get(AdapterKeys.ADAPTER_ID) - adapter_metadata_b = serializer.validated_data.get(AdapterKeys.ADAPTER_METADATA_B) - + def _decrypt_and_validate_metadata(self, adapter_metadata_b: bytes) -> dict[str, Any]: + """Decrypt adapter metadata and validate its format.""" if not adapter_metadata_b: raise ValidationError("Missing adapter metadata for validation.") - # Decrypt metadata to get configuration try: fernet = Fernet(settings.ENCRYPTION_KEY.encode("utf-8")) decrypted_json = fernet.decrypt(adapter_metadata_b) decrypted_metadata = json.loads(decrypted_json.decode("utf-8")) - # Ensure object shape + if not isinstance(decrypted_metadata, dict): raise ValidationError( "Invalid adapter metadata format: expected JSON object." ) - except Exception as e: # InvalidToken/JSONDecodeError/TypeError/etc. - raise ValidationError("Invalid adapter metadata.") from e - - # Validate URLs for this adapter configuration - try: - _ = AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) + return decrypted_metadata except Exception as e: - # Format error message similar to test adapter API - adapter_name = ( - decrypted_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") - if isinstance(decrypted_metadata, dict) - else "adapter" - ) - error_detail = f"Error testing '{adapter_name}'. {e!s}" - raise ValidationError(error_detail) from e + raise ValidationError("Invalid adapter metadata.") from e - # Validate URLs for this adapter configuration + def _validate_adapter_urls( + self, adapter_id: str, decrypted_metadata: dict[str, Any] + ) -> None: + """Validate URLs for adapter configuration.""" try: AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) except Exception as e: - # Format error message similar to test adapter API adapter_name = decrypted_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") error_detail = f"Error testing '{adapter_name}'. {e!s}" raise ValidationError(error_detail) from e - try: - adapter_type = serializer.validated_data.get(AdapterKeys.ADAPTER_TYPE) - if adapter_type == AdapterKeys.X2TEXT and use_platform_unstract_key: - adapter_metadata_b = serializer.validated_data.get( - AdapterKeys.ADAPTER_METADATA_B - ) - adapter_metadata_b = AdapterProcessor.update_adapter_metadata( - adapter_metadata_b - ) - # Update the validated data with the new adapter_metadata - serializer.validated_data[AdapterKeys.ADAPTER_METADATA_B] = ( - adapter_metadata_b - ) + def _check_platform_key_usage(self, request_data: dict[str, Any]) -> bool: + """Check if platform unstract key should be used.""" + adapter_metadata = request_data.get(AdapterKeys.ADAPTER_METADATA) + return bool( + adapter_metadata + and adapter_metadata.get(AdapterKeys.PLATFORM_PROVIDED_UNSTRACT_KEY, False) + ) - instance = serializer.save() - organization_member = OrganizationMemberService.get_user_by_id( - request.user.id + def _update_metadata_for_platform_key( + self, + serializer_validated_data: dict[str, Any], + adapter_type: str, + is_paid_subscription: bool = False, + ) -> None: + """Update adapter metadata when using platform key.""" + if adapter_type == AdapterKeys.X2TEXT: + adapter_metadata_b = serializer_validated_data.get( + AdapterKeys.ADAPTER_METADATA_B ) - - # Check to see if there is a default configured - # for this adapter_type and for the current user - ( - user_default_adapter, - created, - ) = UserDefaultAdapter.objects.get_or_create( - organization_member=organization_member + updated_metadata_b = AdapterProcessor.update_adapter_metadata( + adapter_metadata_b, is_paid_subscription=is_paid_subscription ) + serializer_validated_data[AdapterKeys.ADAPTER_METADATA_B] = updated_metadata_b + + def _set_default_adapter_if_needed( + self, adapter_instance: AdapterInstance, adapter_type: str, user_id: int + ) -> None: + """Set adapter as default if no default exists for this type.""" + organization_member = OrganizationMemberService.get_user_by_id(user_id) + user_default_adapter, created = UserDefaultAdapter.objects.get_or_create( + organization_member=organization_member + ) - if (adapter_type == AdapterKeys.LLM) and ( - not user_default_adapter.default_llm_adapter - ): - user_default_adapter.default_llm_adapter = instance + # Map adapter types to their default fields + adapter_type_mapping = { + AdapterKeys.LLM: "default_llm_adapter", + AdapterKeys.EMBEDDING: "default_embedding_adapter", + AdapterKeys.VECTOR_DB: "default_vector_db_adapter", + AdapterKeys.X2TEXT: "default_x2text_adapter", + } + + if adapter_type in adapter_type_mapping: + field_name = adapter_type_mapping[adapter_type] + if not getattr(user_default_adapter, field_name): + setattr(user_default_adapter, field_name, adapter_instance) + user_default_adapter.organization_member = organization_member + user_default_adapter.save() + + def _validate_update_metadata( + self, serializer_validated_data: dict[str, Any], current_adapter: AdapterInstance + ) -> tuple[str | None, dict[str, Any] | None]: + """Validate metadata for update operations.""" + if AdapterKeys.ADAPTER_METADATA_B not in serializer_validated_data: + return None, None + + adapter_id = ( + serializer_validated_data.get(AdapterKeys.ADAPTER_ID) + or current_adapter.adapter_id + ) + adapter_metadata_b = serializer_validated_data.get(AdapterKeys.ADAPTER_METADATA_B) - elif (adapter_type == AdapterKeys.EMBEDDING) and ( - not user_default_adapter.default_embedding_adapter - ): - user_default_adapter.default_embedding_adapter = instance - elif (adapter_type == AdapterKeys.VECTOR_DB) and ( - not user_default_adapter.default_vector_db_adapter - ): - user_default_adapter.default_vector_db_adapter = instance - elif (adapter_type == AdapterKeys.X2TEXT) and ( - not user_default_adapter.default_x2text_adapter - ): - user_default_adapter.default_x2text_adapter = instance + if not adapter_id or not adapter_metadata_b: + raise ValidationError("Missing adapter metadata for validation.") - organization_member = OrganizationMemberService.get_user_by_id( - request.user.id - ) - user_default_adapter.organization_member = organization_member + decrypted_metadata = self._decrypt_and_validate_metadata(adapter_metadata_b) + self._validate_adapter_urls(adapter_id, decrypted_metadata) - user_default_adapter.save() + return adapter_id, decrypted_metadata + + def create(self, request: Any) -> Response: + serializer = self.get_serializer(data=request.data) + use_platform_unstract_key = self._check_platform_key_usage(request.data) + + serializer.is_valid(raise_exception=True) + + # Extract and validate metadata + adapter_id = serializer.validated_data.get(AdapterKeys.ADAPTER_ID) + adapter_metadata_b = serializer.validated_data.get(AdapterKeys.ADAPTER_METADATA_B) + decrypted_metadata = self._decrypt_and_validate_metadata(adapter_metadata_b) + + # Validate URLs for security + self._validate_adapter_urls(adapter_id, decrypted_metadata) + + try: + adapter_type = serializer.validated_data.get(AdapterKeys.ADAPTER_TYPE) + + # Update metadata if using platform key + if use_platform_unstract_key: + self._update_metadata_for_platform_key( + serializer.validated_data, adapter_type + ) + + # Save the adapter instance + instance = serializer.save() + + # Set as default adapter if needed + self._set_default_adapter_if_needed(instance, adapter_type, request.user.id) except IntegrityError: raise DuplicateAdapterNameError( name=serializer.validated_data.get(AdapterKeys.ADAPTER_NAME) ) + headers = self.get_success_headers(serializer.data) return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers) @@ -394,76 +415,30 @@ def list_of_shared_users(self, request: HttpRequest, pk: Any = None) -> Response def update( self, request: Request, *args: tuple[Any], **kwargs: dict[str, Any] ) -> Response: - # Check if adapter metadata is being updated and contains the platform key flag - use_platform_unstract_key = False - adapter_metadata = request.data.get(AdapterKeys.ADAPTER_METADATA) - - if adapter_metadata and adapter_metadata.get( - AdapterKeys.PLATFORM_PROVIDED_UNSTRACT_KEY, False - ): - use_platform_unstract_key = True - logger.error(f"Platform key flag detected: {use_platform_unstract_key}") - - # Get the adapter instance for update + use_platform_unstract_key = self._check_platform_key_usage(request.data) adapter = self.get_object() - # Get serializer and validate data first + # Get serializer and validate data serializer = self.get_serializer(adapter, data=request.data, partial=True) serializer.is_valid(raise_exception=True) - # Validate URLs for security if metadata is being updated - if AdapterKeys.ADAPTER_METADATA_B in serializer.validated_data: - adapter_id = ( - serializer.validated_data.get(AdapterKeys.ADAPTER_ID) - or adapter.adapter_id - ) - adapter_metadata_b = serializer.validated_data.get( - AdapterKeys.ADAPTER_METADATA_B - ) - - if not adapter_id or not adapter_metadata_b: - raise ValidationError("Missing adapter metadata for validation.") - - # Decrypt metadata to get configuration - try: - fernet = Fernet(settings.ENCRYPTION_KEY.encode("utf-8")) - decrypted_json = fernet.decrypt(adapter_metadata_b) - decrypted_metadata = json.loads(decrypted_json.decode("utf-8")) - except Exception as e: # InvalidToken/JSONDecodeError/TypeError/etc. - raise ValidationError("Invalid adapter metadata.") from e - - # Validate URLs for this adapter configuration - try: - _ = AdapterProcessor.validate_adapter_urls(adapter_id, decrypted_metadata) - except Exception as e: - # Format error message similar to test adapter API - adapter_name = decrypted_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter") - error_detail = f"Error testing '{adapter_name}'. {e!s}" - raise ValidationError(error_detail) from e + # Validate metadata if being updated + adapter_id, decrypted_metadata = self._validate_update_metadata( + serializer.validated_data, adapter + ) + # Handle platform key updates if use_platform_unstract_key: logger.error("Processing adapter with platform key") - serializer = self.get_serializer(adapter, data=request.data, partial=True) - serializer.is_valid(raise_exception=True) - - # Get adapter_type from validated data (consistent with create method) adapter_type = serializer.validated_data.get(AdapterKeys.ADAPTER_TYPE) logger.error(f"Adapter type from validated data: {adapter_type}") - if adapter_type == AdapterKeys.X2TEXT: - logger.error("Processing X2TEXT adapter with platform key") - adapter_metadata_b = serializer.validated_data.get( - AdapterKeys.ADAPTER_METADATA_B - ) - adapter_metadata_b = AdapterProcessor.update_adapter_metadata( - adapter_metadata_b, is_paid_subscription=True - ) - # Update the validated data with the new adapter_metadata - serializer.validated_data[AdapterKeys.ADAPTER_METADATA_B] = ( - adapter_metadata_b - ) + # Update metadata for platform key usage + self._update_metadata_for_platform_key( + serializer.validated_data, adapter_type, is_paid_subscription=True + ) - # Save the instance with updated metadata + # Save and return updated instance serializer.save() return Response(serializer.data) From ea273a15b62a52623a8d54388f5b6b238a8e21d4 Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Fri, 12 Sep 2025 17:26:51 +0530 Subject: [PATCH 13/20] Fix SONAR issues - unused variable definition --- backend/adapter_processor_v2/views.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/backend/adapter_processor_v2/views.py b/backend/adapter_processor_v2/views.py index a9ab62ae02..49469803d9 100644 --- a/backend/adapter_processor_v2/views.py +++ b/backend/adapter_processor_v2/views.py @@ -230,7 +230,7 @@ def _set_default_adapter_if_needed( ) -> None: """Set adapter as default if no default exists for this type.""" organization_member = OrganizationMemberService.get_user_by_id(user_id) - user_default_adapter, created = UserDefaultAdapter.objects.get_or_create( + user_default_adapter, _ = UserDefaultAdapter.objects.get_or_create( organization_member=organization_member ) @@ -425,9 +425,7 @@ def update( serializer.is_valid(raise_exception=True) # Validate metadata if being updated - adapter_id, decrypted_metadata = self._validate_update_metadata( - serializer.validated_data, adapter - ) + _, _ = self._validate_update_metadata(serializer.validated_data, adapter) # Handle platform key updates if use_platform_unstract_key: From 7588621620f05d277913d8bfc3ce10ef08be13fd Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Mon, 15 Sep 2025 22:23:47 +0530 Subject: [PATCH 14/20] pass validate_urls=true --- backend/adapter_processor_v2/adapter_processor.py | 11 +++++++---- backend/sample.env | 2 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/backend/adapter_processor_v2/adapter_processor.py b/backend/adapter_processor_v2/adapter_processor.py index 19d24f441d..7fefacf19b 100644 --- a/backend/adapter_processor_v2/adapter_processor.py +++ b/backend/adapter_processor_v2/adapter_processor.py @@ -166,12 +166,15 @@ def validate_adapter_urls(adapter_id: str, adapter_metadata: dict) -> Adapter: adapterkit = Adapterkit() adapter_class = adapterkit.get_adapter_class_by_adapter_id(adapter_id) - # Create a temporary instance just to get configured URLs - # This will trigger URL validation in __init__ but not full connection test - return adapter_class(adapter_metadata) + # Create a temporary instance just to validate URLs + # Pass validate_urls=True to trigger URL validation + return adapter_class(adapter_metadata, validate_urls=True) except Exception as e: - logger.error(f"URL validation failed for adapter {adapter_id}: {str(e)}") + logger.error( + f"URL validation failed for adapter {adapter_id}: {str(e)}", + exc_info=True, + ) raise @staticmethod diff --git a/backend/sample.env b/backend/sample.env index 9945a5d57c..9d1e7089fb 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -204,4 +204,4 @@ MIN_SCHEDULE_INTERVAL_SECONDS=1800 # WHitelisted adapter URLs to allow user to connect to locally hosted adapters. # Whitelisting 10.68.0.10 to allow frictionless adapter connection to # managed Postgres for VectorDB -ALLOWED_ADAPTER_PRIVATE_ENDPOINTS="127.0.0.1, 10.68.0.10" +WHITELISTED_ENDPOINTS="10.68.0.10" From 082f56f2ad51b9698768ea730997744a634c5a41 Mon Sep 17 00:00:00 2001 From: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> Date: Tue, 16 Sep 2025 12:04:33 +0530 Subject: [PATCH 15/20] Update backend/adapter_processor_v2/views.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> --- backend/adapter_processor_v2/views.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/backend/adapter_processor_v2/views.py b/backend/adapter_processor_v2/views.py index 49469803d9..6f82911f3c 100644 --- a/backend/adapter_processor_v2/views.py +++ b/backend/adapter_processor_v2/views.py @@ -283,7 +283,7 @@ def create(self, request: Any) -> Response: adapter_metadata_b = serializer.validated_data.get(AdapterKeys.ADAPTER_METADATA_B) decrypted_metadata = self._decrypt_and_validate_metadata(adapter_metadata_b) - # Validate URLs for security + # Validate URLs for security (pre-mutation) self._validate_adapter_urls(adapter_id, decrypted_metadata) try: @@ -291,9 +291,13 @@ def create(self, request: Any) -> Response: # Update metadata if using platform key if use_platform_unstract_key: - self._update_metadata_for_platform_key( + updated_b = self._update_metadata_for_platform_key( serializer.validated_data, adapter_type ) + if updated_b is not None: + # Re-validate post-mutation metadata before save + final_md = self._decrypt_and_validate_metadata(updated_b) + self._validate_adapter_urls(adapter_id, final_md) # Save the adapter instance instance = serializer.save() @@ -301,6 +305,10 @@ def create(self, request: Any) -> Response: # Set as default adapter if needed self._set_default_adapter_if_needed(instance, adapter_type, request.user.id) + except IntegrityError as e: + raise DuplicateAdapterNameError( + name=serializer.validated_data.get(AdapterKeys.ADAPTER_NAME) + ) from e except IntegrityError: raise DuplicateAdapterNameError( name=serializer.validated_data.get(AdapterKeys.ADAPTER_NAME) From d78e5fbe61702ef388c7161d2b170e22700e8418 Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Tue, 16 Sep 2025 12:17:51 +0530 Subject: [PATCH 16/20] Revert changes in previous commit for coderabbit issue as it is not a use-case --- backend/adapter_processor_v2/adapter_processor.py | 5 ++++- backend/adapter_processor_v2/views.py | 10 +--------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/backend/adapter_processor_v2/adapter_processor.py b/backend/adapter_processor_v2/adapter_processor.py index 7fefacf19b..1f6564acfa 100644 --- a/backend/adapter_processor_v2/adapter_processor.py +++ b/backend/adapter_processor_v2/adapter_processor.py @@ -99,6 +99,8 @@ def get_adapter_data_with_key(adapter_id: str, key_value: str) -> Any: def test_adapter(adapter_id: str, adapter_metadata: dict[str, Any]) -> bool: logger.info(f"Testing adapter: {adapter_id}") try: + # Defensive copy; don't mutate caller dict + adapter_metadata = dict(adapter_metadata) if adapter_metadata.pop(AdapterKeys.ADAPTER_TYPE) == AdapterKeys.X2TEXT: if ( adapter_metadata.get(AdapterKeys.PLATFORM_PROVIDED_UNSTRACT_KEY) @@ -126,7 +128,8 @@ def test_adapter(adapter_id: str, adapter_metadata: dict[str, Any]) -> bool: return test_result except SdkError as e: raise TestAdapterError( - e, adapter_name=adapter_metadata[AdapterKeys.ADAPTER_NAME] + e, + adapter_name=adapter_metadata.get(AdapterKeys.ADAPTER_NAME, "adapter"), ) @staticmethod diff --git a/backend/adapter_processor_v2/views.py b/backend/adapter_processor_v2/views.py index 6f82911f3c..209dd0fb0f 100644 --- a/backend/adapter_processor_v2/views.py +++ b/backend/adapter_processor_v2/views.py @@ -291,13 +291,9 @@ def create(self, request: Any) -> Response: # Update metadata if using platform key if use_platform_unstract_key: - updated_b = self._update_metadata_for_platform_key( + self._update_metadata_for_platform_key( serializer.validated_data, adapter_type ) - if updated_b is not None: - # Re-validate post-mutation metadata before save - final_md = self._decrypt_and_validate_metadata(updated_b) - self._validate_adapter_urls(adapter_id, final_md) # Save the adapter instance instance = serializer.save() @@ -305,10 +301,6 @@ def create(self, request: Any) -> Response: # Set as default adapter if needed self._set_default_adapter_if_needed(instance, adapter_type, request.user.id) - except IntegrityError as e: - raise DuplicateAdapterNameError( - name=serializer.validated_data.get(AdapterKeys.ADAPTER_NAME) - ) from e except IntegrityError: raise DuplicateAdapterNameError( name=serializer.validated_data.get(AdapterKeys.ADAPTER_NAME) From 534a5771e73661be56e23c287747f6ce0ebc6965 Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Wed, 17 Sep 2025 20:40:41 +0530 Subject: [PATCH 17/20] Add validation of endpoints for API/ETL and postprocessor hook --- .../provider/webhook/webhook.py | 8 ++ backend/sample.env | 2 +- prompt-service/sample.env | 5 ++ .../prompt_service/services/answer_prompt.py | 86 ++++--------------- 4 files changed, 31 insertions(+), 70 deletions(-) diff --git a/backend/notification_v2/provider/webhook/webhook.py b/backend/notification_v2/provider/webhook/webhook.py index 37fb7431bf..ff5f94e2bb 100644 --- a/backend/notification_v2/provider/webhook/webhook.py +++ b/backend/notification_v2/provider/webhook/webhook.py @@ -7,6 +7,7 @@ from backend.celery_service import app as celery_app from notification_v2.enums import AuthorizationType from notification_v2.provider.notification_provider import NotificationProvider +from unstract.sdk.adapters.url_validator import URLValidator logger = logging.getLogger(__name__) @@ -51,6 +52,13 @@ def validate(self): """ if not self.notification.url: raise ValueError("Webhook URL is required.") + + # Validate webhook URL for security + is_valid, error_message = URLValidator.validate_url(self.notification.url) + logger.info(f"Notification url {self.notification_url}") + if not is_valid: + raise ValueError(f"Webhook URL validation failed: {error_message}") + if not self.payload: raise ValueError("Payload is required.") return super().validate() diff --git a/backend/sample.env b/backend/sample.env index 9d1e7089fb..0fb453d633 100644 --- a/backend/sample.env +++ b/backend/sample.env @@ -201,7 +201,7 @@ RUNNER_POLLING_INTERVAL_SECONDS=2 # Examples: 900 (15 min), 1800 (30 min), 3600 (60 min) MIN_SCHEDULE_INTERVAL_SECONDS=1800 -# WHitelisted adapter URLs to allow user to connect to locally hosted adapters. +# Whitelisted adapter URLs to allow user to connect to locally hosted adapters. # Whitelisting 10.68.0.10 to allow frictionless adapter connection to # managed Postgres for VectorDB WHITELISTED_ENDPOINTS="10.68.0.10" diff --git a/prompt-service/sample.env b/prompt-service/sample.env index e26e6cbcd2..06408b5618 100644 --- a/prompt-service/sample.env +++ b/prompt-service/sample.env @@ -64,3 +64,8 @@ ADAPTER_LLMW_STATUS_RETRIES=5 # Rentroll Service RENTROLL_SERVICE_HOST=http://unstract-rentroll-service RENTROLL_SERVICE_PORT=5003 + +# Whitelisted adapter URLs to allow user to connect to locally hosted adapters. +# Whitelisting 10.68.0.10 to allow frictionless adapter connection to +# managed Postgres for VectorDB +WHITELISTED_ENDPOINTS="10.68.0.10" diff --git a/prompt-service/src/unstract/prompt_service/services/answer_prompt.py b/prompt-service/src/unstract/prompt_service/services/answer_prompt.py index bf92262daa..35e807e5bc 100644 --- a/prompt-service/src/unstract/prompt_service/services/answer_prompt.py +++ b/prompt-service/src/unstract/prompt_service/services/answer_prompt.py @@ -1,8 +1,5 @@ -import ipaddress -import socket from logging import Logger from typing import Any -from urllib.parse import urlparse from flask import current_app as app @@ -17,6 +14,7 @@ repair_json_with_best_structure, ) from unstract.prompt_service.utils.log import publish_log +from unstract.sdk.adapters.url_validator import URLValidator from unstract.sdk.constants import LogLevel from unstract.sdk.exceptions import RateLimitError as SdkRateLimitError from unstract.sdk.exceptions import SdkError @@ -26,58 +24,6 @@ from unstract.sdk.llm import LLM -def _is_safe_public_url(url: str) -> bool: - """Validate webhook URL for SSRF protection. - - Only allows HTTPS and blocks private/loopback/internal addresses. - Resolves all DNS records (A/AAAA) to prevent DNS rebinding attacks. - """ - try: - p = urlparse(url) - if p.scheme not in ("https",): # Only allow HTTPS for security - return False - host = p.hostname or "" - # Block obvious local hosts - if host in ("localhost",): - return False - - addrs: set[str] = set() - # If literal IP, validate directly; else resolve all records (A/AAAA) - try: - ipaddress.ip_address(host) - addrs.add(host) - except ValueError: - try: - for family, _type, _proto, _canonname, sockaddr in socket.getaddrinfo( - host, None, type=socket.SOCK_STREAM - ): - addr = sockaddr[0] - addrs.add(addr) - except Exception: - return False - - if not addrs: - return False - - # Validate all resolved addresses - for addr in addrs: - try: - ip = ipaddress.ip_address(addr) - except ValueError: - return False - if ( - ip.is_private - or ip.is_loopback - or ip.is_link_local - or ip.is_reserved - or ip.is_multicast - ): - return False - return True - except Exception: - return False - - class AnswerPromptService: @staticmethod def extract_variable( @@ -342,23 +288,25 @@ def handle_json( app.logger.warning( "Postprocessing webhook enabled but URL missing; skipping." ) - elif not _is_safe_public_url(webhook_url): - app.logger.warning( - "Postprocessing webhook URL is not allowed; skipping." - ) else: - try: - processed_data, updated_highlight_data = postprocess_data( - parsed_data, - webhook_enabled=True, - webhook_url=webhook_url, - highlight_data=highlight_data, - timeout=60, - ) - except Exception as e: + is_valid, error_message = URLValidator.validate_url(webhook_url) + if not is_valid: app.logger.warning( - f"Postprocessing webhook failed: {e}. Using unprocessed data." + f"Postprocessing webhook URL validation failed: {error_message}; skipping." ) + else: + try: + processed_data, updated_highlight_data = postprocess_data( + parsed_data, + webhook_enabled=True, + webhook_url=webhook_url, + highlight_data=highlight_data, + timeout=60, + ) + except Exception as e: + app.logger.warning( + f"Postprocessing webhook failed: {e}. Using unprocessed data." + ) structured_output[prompt_key] = processed_data From ac1d5c5e21d9d7f5338d5539fb6d8a4851d53a7a Mon Sep 17 00:00:00 2001 From: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> Date: Thu, 18 Sep 2025 12:02:49 +0530 Subject: [PATCH 18/20] Update backend/notification_v2/provider/webhook/webhook.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> Signed-off-by: Gayathri <142381512+gaya3-zipstack@users.noreply.github.com> --- backend/notification_v2/provider/webhook/webhook.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/backend/notification_v2/provider/webhook/webhook.py b/backend/notification_v2/provider/webhook/webhook.py index ff5f94e2bb..7934a7abcc 100644 --- a/backend/notification_v2/provider/webhook/webhook.py +++ b/backend/notification_v2/provider/webhook/webhook.py @@ -55,10 +55,9 @@ def validate(self): # Validate webhook URL for security is_valid, error_message = URLValidator.validate_url(self.notification.url) - logger.info(f"Notification url {self.notification_url}") + logger.info("Validating webhook URL.") if not is_valid: raise ValueError(f"Webhook URL validation failed: {error_message}") - if not self.payload: raise ValueError("Payload is required.") return super().validate() From cd129cf7f7ef343261de4bea183fc95a07c398a8 Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Mon, 22 Sep 2025 16:40:02 +0530 Subject: [PATCH 19/20] URL validation in variable replacement --- .../controllers/answer_prompt.py | 42 ++++++++++++++----- .../helpers/variable_replacement.py | 22 +++++++++- .../services/variable_replacement.py | 3 ++ 3 files changed, 55 insertions(+), 12 deletions(-) diff --git a/prompt-service/src/unstract/prompt_service/controllers/answer_prompt.py b/prompt-service/src/unstract/prompt_service/controllers/answer_prompt.py index 35ad2d5c06..02cb631c5f 100644 --- a/prompt-service/src/unstract/prompt_service/controllers/answer_prompt.py +++ b/prompt-service/src/unstract/prompt_service/controllers/answer_prompt.py @@ -11,7 +11,9 @@ from unstract.prompt_service.exceptions import BadRequest from unstract.prompt_service.helpers.auth import AuthHelper from unstract.prompt_service.helpers.plugin import PluginManager -from unstract.prompt_service.helpers.prompt_ide_base_tool import PromptServiceBaseTool +from unstract.prompt_service.helpers.prompt_ide_base_tool import ( + PromptServiceBaseTool, +) from unstract.prompt_service.helpers.usage import UsageHelper from unstract.prompt_service.services.answer_prompt import AnswerPromptService from unstract.prompt_service.services.rentrolls_extractor.interface import ( @@ -86,15 +88,33 @@ def prompt_processor() -> Any: app.logger.info(f"[{tool_id}] chunk size: {chunk_size}") util = PromptServiceBaseTool(platform_key=platform_key) index = Index(tool=util, run_id=run_id, capture_metrics=True) - if VariableReplacementService.is_variables_present(prompt_text=prompt_text): - prompt_text = VariableReplacementService.replace_variables_in_prompt( - prompt=output, - structured_output=structured_output, - log_events_id=log_events_id, - tool_id=tool_id, - prompt_name=prompt_name, - doc_name=doc_name, + try: + if VariableReplacementService.is_variables_present(prompt_text=prompt_text): + prompt_text = VariableReplacementService.replace_variables_in_prompt( + prompt=output, + structured_output=structured_output, + log_events_id=log_events_id, + tool_id=tool_id, + prompt_name=prompt_name, + doc_name=doc_name, + ) + except BadRequest as e: + app.logger.error( + f"[{tool_id}] Error during variable replacement: {e}", + exc_info=True, + ) + publish_log( + log_events_id, + { + "tool_id": tool_id, + "prompt_key": prompt_name, + "doc_name": doc_name, + }, + LogLevel.ERROR, + RunLevel.RUN, + f"Error during variable replacement: {e}", ) + raise app.logger.info(f"[{tool_id}] Executing prompt: '{prompt_name}'") publish_log( @@ -243,7 +263,9 @@ def prompt_processor() -> Any: # Track token usage by sending to the audit service try: - from unstract.sdk.utils.token_counter import TokenCounter + from unstract.sdk.utils.token_counter import ( + TokenCounter, + ) # Get metrics from the extraction result metrics = extraction_result.get("metrics", {}) diff --git a/prompt-service/src/unstract/prompt_service/helpers/variable_replacement.py b/prompt-service/src/unstract/prompt_service/helpers/variable_replacement.py index 8b10092522..87c7d7a8fe 100644 --- a/prompt-service/src/unstract/prompt_service/helpers/variable_replacement.py +++ b/prompt-service/src/unstract/prompt_service/helpers/variable_replacement.py @@ -6,7 +6,9 @@ from flask import current_app as app from unstract.prompt_service.constants import VariableConstants, VariableType +from unstract.prompt_service.exceptions import BadRequest from unstract.prompt_service.utils.request import HTTPMethod, make_http_request +from unstract.sdk.adapters.url_validator import URLValidator class VariableReplacementHelper: @@ -22,7 +24,9 @@ def replace_static_variable( static_variable_marker_string = "".join(["{{", variable, "}}"]) replaced_prompt: str = VariableReplacementHelper.replace_generic_string_value( - prompt=prompt, variable=static_variable_marker_string, value=output_value + prompt=prompt, + variable=static_variable_marker_string, + value=output_value, ) return replaced_prompt @@ -71,7 +75,12 @@ def identify_variable_type(variable: str) -> VariableType: def replace_dynamic_variable( prompt: str, variable: str, structured_output: dict[str, Any] ) -> str: - url = re.search(VariableConstants.DYNAMIC_VARIABLE_URL_REGEX, variable).group(0) + url_match = re.search(VariableConstants.DYNAMIC_VARIABLE_URL_REGEX, variable) + if not url_match: + app.logger.error(f"No URL found in dynamic variable: {variable}") + return prompt + + url = url_match.group(0) data = re.findall(VariableConstants.DYNAMIC_VARIABLE_DATA_REGEX, variable)[0] output_value = VariableReplacementHelper.check_static_variable_run_status( structure_output=structured_output, variable=data @@ -108,6 +117,15 @@ def fetch_dynamic_variable_value(url: str, data: str) -> Any: # Future versions may include support for # authentication and other input formats. + # Validate URL before making the request + is_valid, error_message = URLValidator.validate_url(url) + if not is_valid: + # app.logger.error( + # f"Invalid or unsafe URL detected: {url} - {error_message}", + # exc_info=True, + # ) + raise BadRequest(f"Invalid or unsafe URL: {url} - {error_message}") + verb: HTTPMethod = HTTPMethod.POST headers = {"Content-Type": "text/plain"} response: Any = make_http_request(verb=verb, url=url, data=data, headers=headers) diff --git a/prompt-service/src/unstract/prompt_service/services/variable_replacement.py b/prompt-service/src/unstract/prompt_service/services/variable_replacement.py index 6d6184a7ec..c76b0ae434 100644 --- a/prompt-service/src/unstract/prompt_service/services/variable_replacement.py +++ b/prompt-service/src/unstract/prompt_service/services/variable_replacement.py @@ -4,6 +4,7 @@ from unstract.prompt_service.constants import PromptServiceConstants as PSKeys from unstract.prompt_service.constants import RunLevel, VariableType +from unstract.prompt_service.exceptions import BadRequest from unstract.prompt_service.helpers.variable_replacement import ( VariableReplacementHelper, ) @@ -74,6 +75,8 @@ def replace_variables_in_prompt( prompt_text = VariableReplacementService._execute_variable_replacement( prompt_text=prompt_text, variable_map=structured_output ) + except BadRequest: + raise finally: app.logger.info( f"[{tool_id}] Prompt after variable replacement: {prompt_text}" From 2f5213f5e5e15e24449c24d3d2db3cb9aef8637b Mon Sep 17 00:00:00 2001 From: gayathrivijayakumar Date: Mon, 22 Sep 2025 16:45:08 +0530 Subject: [PATCH 20/20] Add sample env variable --- prompt-service/sample.env | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/prompt-service/sample.env b/prompt-service/sample.env index 06408b5618..f70d895086 100644 --- a/prompt-service/sample.env +++ b/prompt-service/sample.env @@ -66,6 +66,5 @@ RENTROLL_SERVICE_HOST=http://unstract-rentroll-service RENTROLL_SERVICE_PORT=5003 # Whitelisted adapter URLs to allow user to connect to locally hosted adapters. -# Whitelisting 10.68.0.10 to allow frictionless adapter connection to -# managed Postgres for VectorDB +# Whitelisting 10.68.0.10 to allow URLs in variable replacement and postprocessor hooks WHITELISTED_ENDPOINTS="10.68.0.10"