Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion .env-template
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,17 @@ VITE_API_STREAMING=true
OPENAI_API_BASE=
OPENAI_API_VERSION=
AZURE_DEPLOYMENT_NAME=
AZURE_EMBEDDINGS_DEPLOYMENT_NAME=
AZURE_EMBEDDINGS_DEPLOYMENT_NAME=

#Azure AD Application (client) ID
MICROSOFT_CLIENT_ID=your-azure-ad-client-id
#Azure AD Application client secret
MICROSOFT_CLIENT_SECRET=your-azure-ad-client-secret
#Azure AD Tenant ID (or 'common' for multi-tenant)
MICROSOFT_TENANT_ID=your-azure-ad-tenant-id
#If you are using a Microsoft Entra ID tenant,
#configure the AUTHORITY variable as
#"https://login.microsoftonline.com/TENANT_GUID"
#or "https://login.microsoftonline.com/contoso.onmicrosoft.com".
#Alternatively, use "https://login.microsoftonline.com/common" for multi-tenant app.
MICROSOFT_AUTHORITY=https://{tenentId}.ciamlogin.com/{tenentId}
12 changes: 8 additions & 4 deletions application/api/connector/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -298,10 +298,14 @@ def get(self):
session_token = str(uuid.uuid4())

try:
credentials = auth.create_credentials_from_token_info(token_info)
service = auth.build_drive_service(credentials)
user_info = service.about().get(fields="user").execute()
user_email = user_info.get('user', {}).get('emailAddress', 'Connected User')
if provider == "google_drive":
credentials = auth.create_credentials_from_token_info(token_info)
service = auth.build_drive_service(credentials)
user_info = service.about().get(fields="user").execute()
user_email = user_info.get('user', {}).get('emailAddress', 'Connected User')
else:
user_email = token_info.get('user_info', {}).get('email', 'Connected User')

except Exception as e:
current_app.logger.warning(f"Could not get user info: {e}")
user_email = 'Connected User'
Expand Down
5 changes: 5 additions & 0 deletions application/core/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,11 @@ class Settings(BaseSettings):
"http://127.0.0.1:7091/api/connectors/callback" ##add redirect url as it is to your provider's console(gcp)
)

# Microsoft Entra ID (Azure AD) integration
MICROSOFT_CLIENT_ID: Optional[str] = None # Azure AD Application (client) ID
MICROSOFT_CLIENT_SECRET: Optional[str] = None # Azure AD Application client secret
MICROSOFT_TENANT_ID: Optional[str] = "common" # Azure AD Tenant ID (or 'common' for multi-tenant)
MICROSOFT_AUTHORITY: Optional[str] = None # e.g., "https://login.microsoftonline.com/{tenant_id}"
# GitHub source
GITHUB_ACCESS_TOKEN: Optional[str] = None # PAT token with read repo access

Expand Down
4 changes: 4 additions & 0 deletions application/parser/connectors/connector_creator.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from application.parser.connectors.google_drive.loader import GoogleDriveLoader
from application.parser.connectors.google_drive.auth import GoogleDriveAuth
from application.parser.connectors.share_point.auth import SharePointAuth
from application.parser.connectors.share_point.loader import SharePointLoader


class ConnectorCreator:
Expand All @@ -12,10 +14,12 @@ class ConnectorCreator:

connectors = {
"google_drive": GoogleDriveLoader,
"share_point": SharePointLoader,
}

auth_providers = {
"google_drive": GoogleDriveAuth,
"share_point": SharePointAuth,
}

@classmethod
Expand Down
10 changes: 10 additions & 0 deletions application/parser/connectors/share_point/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
"""
Share Point connector package for DocsGPT.

This module provides authentication and document loading capabilities for Share Point.
"""

from .auth import SharePointAuth
from .loader import SharePointLoader

__all__ = ['SharePointAuth', 'SharePointLoader']
91 changes: 91 additions & 0 deletions application/parser/connectors/share_point/auth.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
import logging
import datetime
from typing import Optional, Dict, Any

from msal import ConfidentialClientApplication

from application.core.settings import settings
from application.parser.connectors.base import BaseConnectorAuth


class SharePointAuth(BaseConnectorAuth):
"""
Handles Microsoft OAuth 2.0 authentication.

# Documentation:
- https://learn.microsoft.com/en-us/entra/identity-platform/v2-oauth2-auth-code-flow
- https://learn.microsoft.com/en-gb/entra/msal/python/
"""

# Microsoft Graph scopes for SharePoint access
SCOPES = [
"User.Read",
]

def __init__(self):
self.client_id = settings.MICROSOFT_CLIENT_ID
self.client_secret = settings.MICROSOFT_CLIENT_SECRET

if not self.client_id or not self.client_secret:
raise ValueError(
"Microsoft OAuth credentials not configured. Please set MICROSOFT_CLIENT_ID and MICROSOFT_CLIENT_SECRET in settings."
)

self.redirect_uri = settings.CONNECTOR_REDIRECT_BASE_URI
self.tenant_id = settings.MICROSOFT_TENANT_ID
self.authority = getattr(settings, "MICROSOFT_AUTHORITY", f"https://{self.tenant_id}.ciamlogin.com/{self.tenant_id}")

self.auth_app = ConfidentialClientApplication(
client_id=self.client_id, client_credential=self.client_secret, authority=self.authority
)

def get_authorization_url(self, state: Optional[str] = None) -> str:
return self.auth_app.get_authorization_request_url(
scopes=self.SCOPES, state=state, redirect_uri=self.redirect_uri
)

def exchange_code_for_tokens(self, authorization_code: str) -> Dict[str, Any]:
result = self.auth_app.acquire_token_by_authorization_code(
code=authorization_code, scopes=self.SCOPES, redirect_uri=self.redirect_uri
)

if "error" in result:
logging.error(f"Error acquiring token: {result.get('error_description')}")
raise ValueError(f"Error acquiring token: {result.get('error_description')}")

return self.map_token_response(result)

def refresh_access_token(self, refresh_token: str) -> Dict[str, Any]:
result = self.auth_app.acquire_token_by_refresh_token(refresh_token=refresh_token, scopes=self.SCOPES)

if "error" in result:
logging.error(f"Error acquiring token: {result.get('error_description')}")
raise ValueError(f"Error acquiring token: {result.get('error_description')}")

return self.map_token_response(result)

def is_token_expired(self, token_info: Dict[str, Any]) -> bool:
if not token_info or "expiry" not in token_info:
# If no expiry info, consider token expired to be safe
return True

# Get expiry timestamp and current time
expiry_timestamp = token_info["expiry"]
current_timestamp = int(datetime.datetime.now().timestamp())

# Token is expired if current time is greater than or equal to expiry time
return current_timestamp >= expiry_timestamp

def map_token_response(self, result) -> Dict[str, Any]:
return {
"access_token": result.get("access_token"),
"refresh_token": result.get("refresh_token"),
"token_uri": result.get("id_token_claims", {}).get("iss"),
"scopes": result.get("scope"),
"expiry": result.get("id_token_claims", {}).get("exp"),
"user_info": {
"name": result.get("id_token_claims", {}).get("name"),
"email": result.get("id_token_claims", {}).get("preferred_username"),
},
"raw_token": result,
}
44 changes: 44 additions & 0 deletions application/parser/connectors/share_point/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
from typing import List, Dict, Any
from application.parser.connectors.base import BaseConnectorLoader
from application.parser.schema.base import Document


class SharePointLoader(BaseConnectorLoader):
def __init__(self, session_token: str):
pass

def load_data(self, inputs: Dict[str, Any]) -> List[Document]:
"""
Load documents from the external knowledge base.

Args:
inputs: Configuration dictionary containing:
- file_ids: Optional list of specific file IDs to load
- folder_ids: Optional list of folder IDs to browse/download
- limit: Maximum number of items to return
- list_only: If True, return metadata without content
- recursive: Whether to recursively process folders

Returns:
List of Document objects
"""
pass

def download_to_directory(self, local_dir: str, source_config: Dict[str, Any] = None) -> Dict[str, Any]:
"""
Download files/folders to a local directory.

Args:
local_dir: Local directory path to download files to
source_config: Configuration for what to download

Returns:
Dictionary containing download results:
- files_downloaded: Number of files downloaded
- directory_path: Path where files were downloaded
- empty_result: Whether no files were downloaded
- source_type: Type of connector
- config_used: Configuration that was used
- error: Error message if download failed (optional)
"""
pass
3 changes: 2 additions & 1 deletion application/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ markupsafe==3.0.2
marshmallow==3.26.1
mpmath==1.3.0
multidict==6.4.3
msal==1.34.0
mypy-extensions==1.0.0
networkx==3.4.2
numpy==2.2.1
Expand Down Expand Up @@ -87,4 +88,4 @@ werkzeug>=3.1.0,<3.1.2
yarl==1.20.0
markdownify==1.1.0
tldextract==5.1.3
websockets==14.1
websockets==14.1
16 changes: 16 additions & 0 deletions frontend/src/assets/sharepoint.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
13 changes: 13 additions & 0 deletions frontend/src/components/ConnectedStateSkeleton.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
const ConnectedStateSkeleton = () => (
<div className="mb-4">
<div className="flex w-full animate-pulse items-center justify-between rounded-[10px] bg-gray-200 px-4 py-2 dark:bg-gray-700">
<div className="flex items-center gap-2">
<div className="h-4 w-4 rounded bg-gray-300 dark:bg-gray-600"></div>
<div className="h-4 w-32 rounded bg-gray-300 dark:bg-gray-600"></div>
</div>
<div className="h-4 w-16 rounded bg-gray-300 dark:bg-gray-600"></div>
</div>
</div>
);

export default ConnectedStateSkeleton;
2 changes: 1 addition & 1 deletion frontend/src/components/ConnectorAuth.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -150,7 +150,7 @@ const ConnectorAuth: React.FC<ConnectorAuthProps> = ({
{isConnected ? (
<div className="mb-4">
<div className="flex w-full items-center justify-between rounded-[10px] bg-[#8FDD51] px-4 py-2 text-sm font-medium text-[#212121]">
<div className="flex items-center gap-2">
<div className="flex max-w-[500px] items-center gap-2">
<svg className="h-4 w-4" viewBox="0 0 24 24">
<path
fill="currentColor"
Expand Down
13 changes: 13 additions & 0 deletions frontend/src/components/FileSelectionSkeleton.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
const FilesSectionSkeleton = () => (
<div className="rounded-lg border border-[#EEE6FF78] dark:border-[#6A6A6A]">
<div className="p-4">
<div className="mb-4 flex items-center justify-between">
<div className="h-5 w-24 animate-pulse rounded bg-gray-200 dark:bg-gray-700"></div>
<div className="h-8 w-24 animate-pulse rounded bg-gray-200 dark:bg-gray-700"></div>
</div>
<div className="h-4 w-40 animate-pulse rounded bg-gray-200 dark:bg-gray-700"></div>
</div>
</div>
);

export default FilesSectionSkeleton;
44 changes: 6 additions & 38 deletions frontend/src/components/GoogleDrivePicker.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@ import {
getSessionToken,
setSessionToken,
removeSessionToken,
validateProviderSession,
} from '../utils/providerUtils';
import ConnectedStateSkeleton from './ConnectedStateSkeleton';
import FilesSectionSkeleton from './FileSelectionSkeleton';

interface PickerFile {
id: string;
Expand Down Expand Up @@ -50,20 +53,9 @@ const GoogleDrivePicker: React.FC<GoogleDrivePickerProps> = ({

const validateSession = async (sessionToken: string) => {
try {
const apiHost = import.meta.env.VITE_API_HOST;
const validateResponse = await fetch(
`${apiHost}/api/connectors/validate-session`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${token}`,
},
body: JSON.stringify({
provider: 'google_drive',
session_token: sessionToken,
}),
},
const validateResponse = await validateProviderSession(
token,
'google_drive',
);

if (!validateResponse.ok) {
Expand Down Expand Up @@ -234,30 +226,6 @@ const GoogleDrivePicker: React.FC<GoogleDrivePickerProps> = ({
onSelectionChange([], []);
};

const ConnectedStateSkeleton = () => (
<div className="mb-4">
<div className="flex w-full animate-pulse items-center justify-between rounded-[10px] bg-gray-200 px-4 py-2 dark:bg-gray-700">
<div className="flex items-center gap-2">
<div className="h-4 w-4 rounded bg-gray-300 dark:bg-gray-600"></div>
<div className="h-4 w-32 rounded bg-gray-300 dark:bg-gray-600"></div>
</div>
<div className="h-4 w-16 rounded bg-gray-300 dark:bg-gray-600"></div>
</div>
</div>
);

const FilesSectionSkeleton = () => (
<div className="rounded-lg border border-[#EEE6FF78] dark:border-[#6A6A6A]">
<div className="p-4">
<div className="mb-4 flex items-center justify-between">
<div className="h-5 w-24 animate-pulse rounded bg-gray-200 dark:bg-gray-700"></div>
<div className="h-8 w-24 animate-pulse rounded bg-gray-200 dark:bg-gray-700"></div>
</div>
<div className="h-4 w-40 animate-pulse rounded bg-gray-200 dark:bg-gray-700"></div>
</div>
</div>
);

return (
<div>
{isValidating ? (
Expand Down
Loading