Skip to content

✨(api) add API route to fetch document content #1213

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ and this project adheres to
### Added

- ✨(frontend) add duplicate action to doc tree #1175
- ✨(api) add API route to fetch document content #1206

### Changed

Expand Down
73 changes: 73 additions & 0 deletions src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""API endpoints"""
# pylint: disable=too-many-lines

import base64
import json
import logging
import uuid
Expand Down Expand Up @@ -37,6 +38,15 @@
from core import authentication, choices, enums, models
from core.services.ai_services import AIService
from core.services.collaboration_services import CollaborationService
from core.services.converter_services import (
ServiceUnavailableError as YProviderServiceUnavailableError,
)
from core.services.converter_services import (
ValidationError as YProviderValidationError,
)
from core.services.converter_services import (
YdocConverter,
)
from core.tasks.mail import send_ask_for_access_mail
from core.utils import extract_attachments, filter_descendants

Expand Down Expand Up @@ -1443,6 +1453,69 @@ def cors_proxy(self, request, *args, **kwargs):
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)

@drf.decorators.action(
detail=True,
methods=["get"],
url_path="content",
name="Get document content in different formats",
)
def content(self, request, pk=None):
"""
Retrieve document content in different formats (JSON, Markdown, HTML).

Query parameters:
- content_format: The desired output format (json, markdown, html)

Returns:
JSON response with content in the specified format.
"""

document = self.get_object()

content_format = request.query_params.get("content_format", "json").lower()
if content_format not in {"json", "markdown", "html"}:
raise drf.exceptions.ValidationError(
"Invalid format. Must be one of: json, markdown, html"
)

# Get the base64 content from the document
content = None
base64_content = document.content
if base64_content is not None:
# Convert using the y-provider service
try:
yprovider = YdocConverter()
result = yprovider.convert(
base64.b64decode(base64_content),
"application/vnd.yjs.doc",
{
"markdown": "text/markdown",
"html": "text/html",
"json": "application/json",
}[content_format],
)
content = result
except YProviderValidationError as e:
return drf_response.Response(
{"error": str(e)}, status=status.HTTP_400_BAD_REQUEST
)
except YProviderServiceUnavailableError as e:
logger.error("Error getting content for document %s: %s", pk, e)
return drf_response.Response(
{"error": "Failed to get document content"},
status=status.HTTP_500_INTERNAL_SERVER_ERROR,
)

return drf_response.Response(
{
"id": str(document.id),
"title": document.title,
"content": content,
"created_at": document.created_at,
"updated_at": document.updated_at,
}
)


class DocumentAccessViewSet(
ResourceAccessViewsetMixin,
Expand Down
1 change: 1 addition & 0 deletions src/backend/core/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,6 +786,7 @@ def get_abilities(self, user):
"children_list": can_get,
"children_create": can_update and user.is_authenticated,
"collaboration_auth": can_get,
"content": can_get,
"cors_proxy": can_get,
"descendants": can_get,
"destroy": is_owner,
Expand Down
43 changes: 31 additions & 12 deletions src/backend/core/services/converter_services.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Converter services."""
"""Y-Provider API services."""

from base64 import b64encode

Expand Down Expand Up @@ -28,25 +28,44 @@ def auth_header(self):
# Note: Yprovider microservice accepts only raw token, which is not recommended
return f"Bearer {settings.Y_PROVIDER_API_KEY}"

def convert(self, text):
def _request(self, url, data, content_type, accept):
"""Make a request to the Y-Provider API."""
response = requests.post(
url,
data=data,
headers={
"Authorization": self.auth_header,
"Content-Type": content_type,
"Accept": accept,
},
timeout=settings.CONVERSION_API_TIMEOUT,
verify=settings.CONVERSION_API_SECURE,
)
response.raise_for_status()
return response

def convert(
self, text, content_type="text/markdown", accept="application/vnd.yjs.doc"
):
"""Convert a Markdown text into our internal format using an external microservice."""

if not text:
raise ValidationError("Input text cannot be empty")

try:
response = requests.post(
response = self._request(
f"{settings.Y_PROVIDER_API_BASE_URL}{settings.CONVERSION_API_ENDPOINT}/",
data=text,
headers={
"Authorization": self.auth_header,
"Content-Type": "text/markdown",
},
timeout=settings.CONVERSION_API_TIMEOUT,
verify=settings.CONVERSION_API_SECURE,
text,
content_type,
accept,
)
response.raise_for_status()
return b64encode(response.content).decode("utf-8")
if accept == "application/vnd.yjs.doc":
return b64encode(response.content).decode("utf-8")
if accept in {"text/markdown", "text/html"}:
return response.text
if accept == "application/json":
return response.json()
raise ValidationError("Unsupported format")
except requests.RequestException as err:
raise ServiceUnavailableError(
"Failed to connect to conversion service",
Expand Down
176 changes: 176 additions & 0 deletions src/backend/core/tests/documents/test_api_documents_content.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
"""
Tests for Documents API endpoint in impress's core app: content
"""

import base64
from unittest.mock import patch

import pytest
import requests
from rest_framework import status
from rest_framework.test import APIClient

from core import factories

pytestmark = pytest.mark.django_db


@pytest.mark.parametrize(
"reach, role",
[
("public", "reader"),
("public", "editor"),
],
)
@patch("core.services.converter_services.YdocConverter.convert")
def test_api_documents_content_public(mock_content, reach, role):
"""Anonymous users should be allowed to access content of public documents."""
document = factories.DocumentFactory(link_reach=reach, link_role=role)
mock_content.return_value = {"some": "data"}

response = APIClient().get(f"/api/v1.0/documents/{document.id!s}/content/")

assert response.status_code == status.HTTP_200_OK
data = response.json()
assert data["id"] == str(document.id)
assert data["title"] == document.title
assert data["content"] == {"some": "data"}
mock_content.assert_called_once_with(
base64.b64decode(document.content),
"application/vnd.yjs.doc",
"application/json",
)


@pytest.mark.parametrize(
"reach, doc_role, user_role",
[
("restricted", "reader", "reader"),
("restricted", "reader", "editor"),
("restricted", "reader", "administrator"),
("restricted", "reader", "owner"),
("restricted", "editor", "reader"),
("restricted", "editor", "editor"),
("restricted", "editor", "administrator"),
("restricted", "editor", "owner"),
("authenticated", "reader", None),
("authenticated", "editor", None),
],
)
@patch("core.services.converter_services.YdocConverter.convert")
def test_api_documents_content_not_public(mock_content, reach, doc_role, user_role):
"""Authenticated users need access to get non-public document content."""
user = factories.UserFactory()
document = factories.DocumentFactory(link_reach=reach, link_role=doc_role)
mock_content.return_value = {"some": "data"}

# First anonymous request should fail
client = APIClient()
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")

assert response.status_code == status.HTTP_401_UNAUTHORIZED
mock_content.assert_not_called()

# Login and try again
client.force_login(user)
response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")

# If restricted, we still should not have access
if user_role is not None:
assert response.status_code == status.HTTP_403_FORBIDDEN
mock_content.assert_not_called()

# Create an access as a reader. This should unlock the access.
factories.UserDocumentAccessFactory(
document=document, user=user, role=user_role
)

response = client.get(f"/api/v1.0/documents/{document.id!s}/content/")

assert response.status_code == status.HTTP_200_OK
data = response.json()
assert data["id"] == str(document.id)
assert data["title"] == document.title
assert data["content"] == {"some": "data"}
mock_content.assert_called_once_with(
base64.b64decode(document.content),
"application/vnd.yjs.doc",
"application/json",
)


@pytest.mark.parametrize(
"content_format, accept",
[
("markdown", "text/markdown"),
("html", "text/html"),
("json", "application/json"),
],
)
@patch("core.services.converter_services.YdocConverter.convert")
def test_api_documents_content_format(mock_content, content_format, accept):
"""Test that the content endpoint returns a specific format."""
document = factories.DocumentFactory(link_reach="public")
mock_content.return_value = {"some": "data"}

response = APIClient().get(
f"/api/v1.0/documents/{document.id!s}/content/?content_format={content_format}"
)

assert response.status_code == status.HTTP_200_OK
data = response.json()
assert data["id"] == str(document.id)
assert data["title"] == document.title
assert data["content"] == {"some": "data"}
mock_content.assert_called_once_with(
base64.b64decode(document.content), "application/vnd.yjs.doc", accept
)


@patch("core.services.converter_services.YdocConverter._request")
def test_api_documents_content_invalid_format(mock_request):
"""Test that the content endpoint rejects invalid formats."""
document = factories.DocumentFactory(link_reach="public")

response = APIClient().get(
f"/api/v1.0/documents/{document.id!s}/content/?content_format=invalid"
)
assert response.status_code == status.HTTP_400_BAD_REQUEST
mock_request.assert_not_called()


@patch("core.services.converter_services.YdocConverter._request")
def test_api_documents_content_yservice_error(mock_request):
"""Test that service errors are handled properly."""
document = factories.DocumentFactory(link_reach="public")
mock_request.side_effect = requests.RequestException()

response = APIClient().get(f"/api/v1.0/documents/{document.id!s}/content/")
mock_request.assert_called_once()
assert response.status_code == status.HTTP_500_INTERNAL_SERVER_ERROR


@patch("core.services.converter_services.YdocConverter._request")
def test_api_documents_content_nonexistent_document(mock_request):
"""Test that accessing a nonexistent document returns 404."""
client = APIClient()
response = client.get(
"/api/v1.0/documents/00000000-0000-0000-0000-000000000000/content/"
)
assert response.status_code == status.HTTP_404_NOT_FOUND
mock_request.assert_not_called()


@patch("core.services.converter_services.YdocConverter._request")
def test_api_documents_content_empty_document(mock_request):
"""Test that accessing an empty document returns empty content."""
document = factories.DocumentFactory(link_reach="public", content="")

response = APIClient().get(f"/api/v1.0/documents/{document.id!s}/content/")

assert response.status_code == status.HTTP_200_OK
data = response.json()
assert data["id"] == str(document.id)
assert data["title"] == document.title
assert data["content"] is None
mock_request.assert_not_called()
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ def test_api_documents_retrieve_anonymous_public_standalone():
"children_list": True,
"collaboration_auth": True,
"cors_proxy": True,
"content": True,
"descendants": True,
"destroy": False,
"duplicate": False,
Expand Down Expand Up @@ -112,6 +113,7 @@ def test_api_documents_retrieve_anonymous_public_parent():
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"content": True,
"destroy": False,
"duplicate": False,
# Anonymous user can't favorite a document even with read access
Expand Down Expand Up @@ -216,6 +218,7 @@ def test_api_documents_retrieve_authenticated_unrelated_public_or_authenticated(
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"content": True,
"destroy": False,
"duplicate": True,
"favorite": True,
Expand Down Expand Up @@ -297,6 +300,7 @@ def test_api_documents_retrieve_authenticated_public_or_authenticated_parent(rea
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"content": True,
"destroy": False,
"duplicate": True,
"favorite": True,
Expand Down Expand Up @@ -490,6 +494,7 @@ def test_api_documents_retrieve_authenticated_related_parent():
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"content": True,
"destroy": access.role == "owner",
"duplicate": True,
"favorite": True,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ def test_api_documents_trashbin_format():
"collaboration_auth": True,
"descendants": True,
"cors_proxy": True,
"content": True,
"destroy": True,
"duplicate": True,
"favorite": True,
Expand Down
Loading
Loading