diff --git a/CHANGELOG.md b/CHANGELOG.md index 8d16acc8a4..c4db3af158 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to ### Added - ✨(frontend) add duplicate action to doc tree #1175 +- ✨(api) add API route to fetch document content #1206 ### Changed diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 7919033e16..bdeba5a23d 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1,6 +1,7 @@ """API endpoints""" # pylint: disable=too-many-lines +import base64 import json import logging import uuid @@ -37,6 +38,15 @@ from core import authentication, choices, enums, models from core.services.ai_services import AIService from core.services.collaboration_services import CollaborationService +from core.services.converter_services import ( + ServiceUnavailableError as YProviderServiceUnavailableError, +) +from core.services.converter_services import ( + ValidationError as YProviderValidationError, +) +from core.services.converter_services import ( + YdocConverter, +) from core.tasks.mail import send_ask_for_access_mail from core.utils import extract_attachments, filter_descendants @@ -1443,6 +1453,69 @@ def cors_proxy(self, request, *args, **kwargs): status=status.HTTP_500_INTERNAL_SERVER_ERROR, ) + @drf.decorators.action( + detail=True, + methods=["get"], + url_path="content", + name="Get document content in different formats", + ) + def content(self, request, pk=None): + """ + Retrieve document content in different formats (JSON, Markdown, HTML). + + Query parameters: + - content_format: The desired output format (json, markdown, html) + + Returns: + JSON response with content in the specified format. + """ + + document = self.get_object() + + content_format = request.query_params.get("content_format", "json").lower() + if content_format not in {"json", "markdown", "html"}: + raise drf.exceptions.ValidationError( + "Invalid format. Must be one of: json, markdown, html" + ) + + # Get the base64 content from the document + content = None + base64_content = document.content + if base64_content is not None: + # Convert using the y-provider service + try: + yprovider = YdocConverter() + result = yprovider.convert( + base64.b64decode(base64_content), + "application/vnd.yjs.doc", + { + "markdown": "text/markdown", + "html": "text/html", + "json": "application/json", + }[content_format], + ) + content = result + except YProviderValidationError as e: + return drf_response.Response( + {"error": str(e)}, status=status.HTTP_400_BAD_REQUEST + ) + except YProviderServiceUnavailableError as e: + logger.error("Error getting content for document %s: %s", pk, e) + return drf_response.Response( + {"error": "Failed to get document content"}, + status=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + + return drf_response.Response( + { + "id": str(document.id), + "title": document.title, + "content": content, + "created_at": document.created_at, + "updated_at": document.updated_at, + } + ) + class DocumentAccessViewSet( ResourceAccessViewsetMixin, diff --git a/src/backend/core/models.py b/src/backend/core/models.py index d6e79c5100..1098bbe940 100644 --- a/src/backend/core/models.py +++ b/src/backend/core/models.py @@ -786,6 +786,7 @@ def get_abilities(self, user): "children_list": can_get, "children_create": can_update and user.is_authenticated, "collaboration_auth": can_get, + "content": can_get, "cors_proxy": can_get, "descendants": can_get, "destroy": is_owner, diff --git a/src/backend/core/services/converter_services.py b/src/backend/core/services/converter_services.py index d6a6dbf4b1..9c79a7192d 100644 --- a/src/backend/core/services/converter_services.py +++ b/src/backend/core/services/converter_services.py @@ -1,4 +1,4 @@ -"""Converter services.""" +"""Y-Provider API services.""" from base64 import b64encode @@ -28,25 +28,44 @@ def auth_header(self): # Note: Yprovider microservice accepts only raw token, which is not recommended return f"Bearer {settings.Y_PROVIDER_API_KEY}" - def convert(self, text): + def _request(self, url, data, content_type, accept): + """Make a request to the Y-Provider API.""" + response = requests.post( + url, + data=data, + headers={ + "Authorization": self.auth_header, + "Content-Type": content_type, + "Accept": accept, + }, + timeout=settings.CONVERSION_API_TIMEOUT, + verify=settings.CONVERSION_API_SECURE, + ) + response.raise_for_status() + return response + + def convert( + self, text, content_type="text/markdown", accept="application/vnd.yjs.doc" + ): """Convert a Markdown text into our internal format using an external microservice.""" if not text: raise ValidationError("Input text cannot be empty") try: - response = requests.post( + response = self._request( f"{settings.Y_PROVIDER_API_BASE_URL}{settings.CONVERSION_API_ENDPOINT}/", - data=text, - headers={ - "Authorization": self.auth_header, - "Content-Type": "text/markdown", - }, - timeout=settings.CONVERSION_API_TIMEOUT, - verify=settings.CONVERSION_API_SECURE, + text, + content_type, + accept, ) - response.raise_for_status() - return b64encode(response.content).decode("utf-8") + if accept == "application/vnd.yjs.doc": + return b64encode(response.content).decode("utf-8") + if accept in {"text/markdown", "text/html"}: + return response.text + if accept == "application/json": + return response.json() + raise ValidationError("Unsupported format") except requests.RequestException as err: raise ServiceUnavailableError( "Failed to connect to conversion service", diff --git a/src/backend/core/tests/documents/test_api_documents_content.py b/src/backend/core/tests/documents/test_api_documents_content.py new file mode 100644 index 0000000000..459bf5a0ce --- /dev/null +++ b/src/backend/core/tests/documents/test_api_documents_content.py @@ -0,0 +1,176 @@ +""" +Tests for Documents API endpoint in impress's core app: content +""" + +import base64 +from unittest.mock import patch + +import pytest +import requests +from rest_framework import status +from rest_framework.test import APIClient + +from core import factories + +pytestmark = pytest.mark.django_db + + +@pytest.mark.parametrize( + "reach, role", + [ + ("public", "reader"), + ("public", "editor"), + ], +) +@patch("core.services.converter_services.YdocConverter.convert") +def test_api_documents_content_public(mock_content, reach, role): + """Anonymous users should be allowed to access content of public documents.""" + document = factories.DocumentFactory(link_reach=reach, link_role=role) + mock_content.return_value = {"some": "data"} + + response = APIClient().get(f"/api/v1.0/documents/{document.id!s}/content/") + + assert response.status_code == status.HTTP_200_OK + data = response.json() + assert data["id"] == str(document.id) + assert data["title"] == document.title + assert data["content"] == {"some": "data"} + mock_content.assert_called_once_with( + base64.b64decode(document.content), + "application/vnd.yjs.doc", + "application/json", + ) + + +@pytest.mark.parametrize( + "reach, doc_role, user_role", + [ + ("restricted", "reader", "reader"), + ("restricted", "reader", "editor"), + ("restricted", "reader", "administrator"), + ("restricted", "reader", "owner"), + ("restricted", "editor", "reader"), + ("restricted", "editor", "editor"), + ("restricted", "editor", "administrator"), + ("restricted", "editor", "owner"), + ("authenticated", "reader", None), + ("authenticated", "editor", None), + ], +) +@patch("core.services.converter_services.YdocConverter.convert") +def test_api_documents_content_not_public(mock_content, reach, doc_role, user_role): + """Authenticated users need access to get non-public document content.""" + user = factories.UserFactory() + document = factories.DocumentFactory(link_reach=reach, link_role=doc_role) + mock_content.return_value = {"some": "data"} + + # First anonymous request should fail + client = APIClient() + response = client.get(f"/api/v1.0/documents/{document.id!s}/content/") + + assert response.status_code == status.HTTP_401_UNAUTHORIZED + mock_content.assert_not_called() + + # Login and try again + client.force_login(user) + response = client.get(f"/api/v1.0/documents/{document.id!s}/content/") + + # If restricted, we still should not have access + if user_role is not None: + assert response.status_code == status.HTTP_403_FORBIDDEN + mock_content.assert_not_called() + + # Create an access as a reader. This should unlock the access. + factories.UserDocumentAccessFactory( + document=document, user=user, role=user_role + ) + + response = client.get(f"/api/v1.0/documents/{document.id!s}/content/") + + assert response.status_code == status.HTTP_200_OK + data = response.json() + assert data["id"] == str(document.id) + assert data["title"] == document.title + assert data["content"] == {"some": "data"} + mock_content.assert_called_once_with( + base64.b64decode(document.content), + "application/vnd.yjs.doc", + "application/json", + ) + + +@pytest.mark.parametrize( + "content_format, accept", + [ + ("markdown", "text/markdown"), + ("html", "text/html"), + ("json", "application/json"), + ], +) +@patch("core.services.converter_services.YdocConverter.convert") +def test_api_documents_content_format(mock_content, content_format, accept): + """Test that the content endpoint returns a specific format.""" + document = factories.DocumentFactory(link_reach="public") + mock_content.return_value = {"some": "data"} + + response = APIClient().get( + f"/api/v1.0/documents/{document.id!s}/content/?content_format={content_format}" + ) + + assert response.status_code == status.HTTP_200_OK + data = response.json() + assert data["id"] == str(document.id) + assert data["title"] == document.title + assert data["content"] == {"some": "data"} + mock_content.assert_called_once_with( + base64.b64decode(document.content), "application/vnd.yjs.doc", accept + ) + + +@patch("core.services.converter_services.YdocConverter._request") +def test_api_documents_content_invalid_format(mock_request): + """Test that the content endpoint rejects invalid formats.""" + document = factories.DocumentFactory(link_reach="public") + + response = APIClient().get( + f"/api/v1.0/documents/{document.id!s}/content/?content_format=invalid" + ) + assert response.status_code == status.HTTP_400_BAD_REQUEST + mock_request.assert_not_called() + + +@patch("core.services.converter_services.YdocConverter._request") +def test_api_documents_content_yservice_error(mock_request): + """Test that service errors are handled properly.""" + document = factories.DocumentFactory(link_reach="public") + mock_request.side_effect = requests.RequestException() + + response = APIClient().get(f"/api/v1.0/documents/{document.id!s}/content/") + mock_request.assert_called_once() + assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR + + +@patch("core.services.converter_services.YdocConverter._request") +def test_api_documents_content_nonexistent_document(mock_request): + """Test that accessing a nonexistent document returns 404.""" + client = APIClient() + response = client.get( + "/api/v1.0/documents/00000000-0000-0000-0000-000000000000/content/" + ) + assert response.status_code == status.HTTP_404_NOT_FOUND + mock_request.assert_not_called() + + +@patch("core.services.converter_services.YdocConverter._request") +def test_api_documents_content_empty_document(mock_request): + """Test that accessing an empty document returns empty content.""" + document = factories.DocumentFactory(link_reach="public", content="") + + response = APIClient().get(f"/api/v1.0/documents/{document.id!s}/content/") + + assert response.status_code == status.HTTP_200_OK + data = response.json() + assert data["id"] == str(document.id) + assert data["title"] == document.title + assert data["content"] is None + mock_request.assert_not_called() diff --git a/src/backend/core/tests/documents/test_api_documents_retrieve.py b/src/backend/core/tests/documents/test_api_documents_retrieve.py index b229adb5c9..63880f2e69 100644 --- a/src/backend/core/tests/documents/test_api_documents_retrieve.py +++ b/src/backend/core/tests/documents/test_api_documents_retrieve.py @@ -37,6 +37,7 @@ def test_api_documents_retrieve_anonymous_public_standalone(): "children_list": True, "collaboration_auth": True, "cors_proxy": True, + "content": True, "descendants": True, "destroy": False, "duplicate": False, @@ -112,6 +113,7 @@ def test_api_documents_retrieve_anonymous_public_parent(): "collaboration_auth": True, "descendants": True, "cors_proxy": True, + "content": True, "destroy": False, "duplicate": False, # Anonymous user can't favorite a document even with read access @@ -216,6 +218,7 @@ def test_api_documents_retrieve_authenticated_unrelated_public_or_authenticated( "collaboration_auth": True, "descendants": True, "cors_proxy": True, + "content": True, "destroy": False, "duplicate": True, "favorite": True, @@ -297,6 +300,7 @@ def test_api_documents_retrieve_authenticated_public_or_authenticated_parent(rea "collaboration_auth": True, "descendants": True, "cors_proxy": True, + "content": True, "destroy": False, "duplicate": True, "favorite": True, @@ -490,6 +494,7 @@ def test_api_documents_retrieve_authenticated_related_parent(): "collaboration_auth": True, "descendants": True, "cors_proxy": True, + "content": True, "destroy": access.role == "owner", "duplicate": True, "favorite": True, diff --git a/src/backend/core/tests/documents/test_api_documents_trashbin.py b/src/backend/core/tests/documents/test_api_documents_trashbin.py index 9e80539777..66a023fdbe 100644 --- a/src/backend/core/tests/documents/test_api_documents_trashbin.py +++ b/src/backend/core/tests/documents/test_api_documents_trashbin.py @@ -81,6 +81,7 @@ def test_api_documents_trashbin_format(): "collaboration_auth": True, "descendants": True, "cors_proxy": True, + "content": True, "destroy": True, "duplicate": True, "favorite": True, diff --git a/src/backend/core/tests/test_models_documents.py b/src/backend/core/tests/test_models_documents.py index 57427fb9c8..72d9aa574f 100644 --- a/src/backend/core/tests/test_models_documents.py +++ b/src/backend/core/tests/test_models_documents.py @@ -161,6 +161,7 @@ def test_models_documents_get_abilities_forbidden( "collaboration_auth": False, "descendants": False, "cors_proxy": False, + "content": False, "destroy": False, "duplicate": False, "favorite": False, @@ -223,6 +224,7 @@ def test_models_documents_get_abilities_reader( "collaboration_auth": True, "descendants": True, "cors_proxy": True, + "content": True, "destroy": False, "duplicate": is_authenticated, "favorite": is_authenticated, @@ -287,6 +289,7 @@ def test_models_documents_get_abilities_editor( "collaboration_auth": True, "descendants": True, "cors_proxy": True, + "content": True, "destroy": False, "duplicate": is_authenticated, "favorite": is_authenticated, @@ -340,6 +343,7 @@ def test_models_documents_get_abilities_owner(django_assert_num_queries): "collaboration_auth": True, "descendants": True, "cors_proxy": True, + "content": True, "destroy": True, "duplicate": True, "favorite": True, @@ -390,6 +394,7 @@ def test_models_documents_get_abilities_administrator(django_assert_num_queries) "collaboration_auth": True, "descendants": True, "cors_proxy": True, + "content": True, "destroy": False, "duplicate": True, "favorite": True, @@ -443,6 +448,7 @@ def test_models_documents_get_abilities_editor_user(django_assert_num_queries): "collaboration_auth": True, "descendants": True, "cors_proxy": True, + "content": True, "destroy": False, "duplicate": True, "favorite": True, @@ -503,6 +509,7 @@ def test_models_documents_get_abilities_reader_user( "collaboration_auth": True, "descendants": True, "cors_proxy": True, + "content": True, "destroy": False, "duplicate": True, "favorite": True, @@ -561,6 +568,7 @@ def test_models_documents_get_abilities_preset_role(django_assert_num_queries): "collaboration_auth": True, "descendants": True, "cors_proxy": True, + "content": True, "destroy": False, "duplicate": True, "favorite": True, diff --git a/src/backend/core/tests/test_services_converter_services.py b/src/backend/core/tests/test_services_converter_services.py index 01773f8570..086d132b35 100644 --- a/src/backend/core/tests/test_services_converter_services.py +++ b/src/backend/core/tests/test_services_converter_services.py @@ -1,4 +1,4 @@ -"""Test converter services.""" +"""Test y-provider services.""" from base64 import b64decode from unittest.mock import MagicMock, patch @@ -84,6 +84,42 @@ def test_convert_full_integration(mock_post, settings): headers={ "Authorization": "Bearer test-key", "Content-Type": "text/markdown", + "Accept": "application/vnd.yjs.doc", + }, + timeout=5, + verify=False, + ) + + +@patch("requests.post") +def test_convert_full_integration_with_specific_headers(mock_post, settings): + """Test successful conversion with specific content type and accept headers.""" + settings.Y_PROVIDER_API_BASE_URL = "http://test.com/" + settings.Y_PROVIDER_API_KEY = "test-key" + settings.CONVERSION_API_ENDPOINT = "conversion-endpoint" + settings.CONVERSION_API_TIMEOUT = 5 + settings.CONVERSION_API_SECURE = False + + converter = YdocConverter() + + expected_response = "# Test Document\n\nThis is test content." + mock_response = MagicMock() + mock_response.text = expected_response + mock_response.raise_for_status.return_value = None + mock_post.return_value = mock_response + + result = converter.convert( + b"test_content", "application/vnd.yjs.doc", "text/markdown" + ) + + assert result == expected_response + mock_post.assert_called_once_with( + "http://test.com/conversion-endpoint/", + data=b"test_content", + headers={ + "Authorization": "Bearer test-key", + "Content-Type": "application/vnd.yjs.doc", + "Accept": "text/markdown", }, timeout=5, verify=False, diff --git a/src/frontend/servers/y-provider/__tests__/convert.test.ts b/src/frontend/servers/y-provider/__tests__/convert.test.ts index 67de07cf1f..44c21c2870 100644 --- a/src/frontend/servers/y-provider/__tests__/convert.test.ts +++ b/src/frontend/servers/y-provider/__tests__/convert.test.ts @@ -18,6 +18,9 @@ import { COLLABORATION_SERVER_ORIGIN as origin, } from '../src/env'; +const expectedMarkdown = '# Example document\n\nLorem ipsum dolor sit amet.'; +const expectedHTML = + '
Lorem ipsum dolor sit amet.
'; const expectedBlocks = [ { children: [], @@ -121,23 +124,43 @@ describe('Server Tests', () => { }); }); + test('POST /api/convert with unsupported Content-Type returns 415', async () => { + const app = initApp(); + const response = await request(app) + .post('/api/convert') + .set('origin', origin) + .set('authorization', apiKey) + .set('content-type', 'image/png') + .send('randomdata'); + expect(response.status).toBe(415); + expect(response.body).toStrictEqual({ error: 'Unsupported Content-Type' }); + }); + + test('POST /api/convert with unsupported Accept returns 406', async () => { + const app = initApp(); + const response = await request(app) + .post('/api/convert') + .set('origin', origin) + .set('authorization', apiKey) + .set('content-type', 'text/markdown') + .set('accept', 'image/png') + .send('# Header'); + expect(response.status).toBe(406); + expect(response.body).toStrictEqual({ error: 'Unsupported format' }); + }); + test.each([[apiKey], [`Bearer ${apiKey}`]])( 'POST /api/convert with correct content with Authorization: %s', async (authHeader) => { const app = initApp(); - const document = [ - '# Example document', - '', - 'Lorem ipsum dolor sit amet.', - '', - ].join('\n'); - const response = await request(app) .post('/api/convert') .set('Origin', origin) .set('Authorization', authHeader) - .send(document); + .set('content-type', 'text/markdown') + .set('accept', 'application/vnd.yjs.doc') + .send(expectedMarkdown); expect(response.status).toBe(200); expect(response.body).toBeInstanceOf(Buffer); @@ -150,4 +173,95 @@ describe('Server Tests', () => { expect(blocks).toStrictEqual(expectedBlocks); }, ); + + test('POST /api/convert Yjs to HTML', async () => { + const app = initApp(); + const editor = ServerBlockNoteEditor.create(); + const blocks = await editor.tryParseMarkdownToBlocks(expectedMarkdown); + const yDocument = editor.blocksToYDoc(blocks, 'document-store'); + const yjsUpdate = Y.encodeStateAsUpdate(yDocument); + const response = await request(app) + .post('/api/convert') + .set('origin', origin) + .set('authorization', apiKey) + .set('content-type', 'application/vnd.yjs.doc') + .set('accept', 'text/html') + .send(Buffer.from(yjsUpdate)); + expect(response.status).toBe(200); + expect(response.header['content-type']).toBe('text/html; charset=utf-8'); + expect(typeof response.text).toBe('string'); + expect(response.text).toBe(expectedHTML); + }); + + test('POST /api/convert Yjs to Markdown', async () => { + const app = initApp(); + const editor = ServerBlockNoteEditor.create(); + const blocks = await editor.tryParseMarkdownToBlocks(expectedMarkdown); + const yDocument = editor.blocksToYDoc(blocks, 'document-store'); + const yjsUpdate = Y.encodeStateAsUpdate(yDocument); + const response = await request(app) + .post('/api/convert') + .set('origin', origin) + .set('authorization', apiKey) + .set('content-type', 'application/vnd.yjs.doc') + .set('accept', 'text/markdown') + .send(Buffer.from(yjsUpdate)); + expect(response.status).toBe(200); + expect(response.header['content-type']).toBe( + 'text/markdown; charset=utf-8', + ); + expect(typeof response.text).toBe('string'); + expect(response.text.trim()).toBe(expectedMarkdown); + }); + + test('POST /api/convert Yjs to JSON', async () => { + const app = initApp(); + const editor = ServerBlockNoteEditor.create(); + const blocks = await editor.tryParseMarkdownToBlocks(expectedMarkdown); + const yDocument = editor.blocksToYDoc(blocks, 'document-store'); + const yjsUpdate = Y.encodeStateAsUpdate(yDocument); + const response = await request(app) + .post('/api/convert') + .set('origin', origin) + .set('authorization', apiKey) + .set('content-type', 'application/vnd.yjs.doc') + .set('accept', 'application/json') + .send(Buffer.from(yjsUpdate)); + expect(response.status).toBe(200); + expect(response.header['content-type']).toBe( + 'application/json; charset=utf-8', + ); + expect(Array.isArray(response.body)).toBe(true); + expect(response.body).toStrictEqual(expectedBlocks); + }); + + test('POST /api/convert Markdown to JSON', async () => { + const app = initApp(); + const response = await request(app) + .post('/api/convert') + .set('origin', origin) + .set('authorization', apiKey) + .set('content-type', 'text/markdown') + .set('accept', 'application/json') + .send(expectedMarkdown); + expect(response.status).toBe(200); + expect(response.header['content-type']).toBe( + 'application/json; charset=utf-8', + ); + expect(Array.isArray(response.body)).toBe(true); + expect(response.body).toStrictEqual(expectedBlocks); + }); + + test('POST /api/convert with invalid Yjs content returns 400', async () => { + const app = initApp(); + const response = await request(app) + .post('/api/convert') + .set('origin', origin) + .set('authorization', apiKey) + .set('content-type', 'application/vnd.yjs.doc') + .set('accept', 'application/json') + .send(Buffer.from('notvalidyjs')); + expect(response.status).toBe(400); + expect(response.body).toStrictEqual({ error: 'Invalid Yjs content' }); + }); }); diff --git a/src/frontend/servers/y-provider/package.json b/src/frontend/servers/y-provider/package.json index 7364781bfc..6a89231815 100644 --- a/src/frontend/servers/y-provider/package.json +++ b/src/frontend/servers/y-provider/package.json @@ -10,7 +10,7 @@ "dev": "cross-env COLLABORATION_LOGGING=true && nodemon --config nodemon.json", "start": "node ./dist/start-server.js", "lint": "eslint . --ext .ts", - "test": "vitest --run" + "test": "vitest --run --disable-console-intercept" }, "engines": { "node": ">=22" diff --git a/src/frontend/servers/y-provider/src/handlers/convertHandler.ts b/src/frontend/servers/y-provider/src/handlers/convertHandler.ts index 15bab784d5..9d8cd237c8 100644 --- a/src/frontend/servers/y-provider/src/handlers/convertHandler.ts +++ b/src/frontend/servers/y-provider/src/handlers/convertHandler.ts @@ -1,3 +1,4 @@ +import { PartialBlock } from '@blocknote/core'; import { ServerBlockNoteEditor } from '@blocknote/server-util'; import { Request, Response } from 'express'; import * as Y from 'yjs'; @@ -12,29 +13,79 @@ const editor = ServerBlockNoteEditor.create(); export const convertHandler = async ( req: Request