Skip to content

Commit

Permalink
feat: add optional dms integration
Browse files Browse the repository at this point in the history
This adds the feature to be able to send docx, odt to DMS to be
converted to pdf and saved in alexandria.
  • Loading branch information
Yelinz committed Feb 22, 2024
1 parent fe314ca commit 38ecbec
Show file tree
Hide file tree
Showing 5 changed files with 136 additions and 7 deletions.
16 changes: 11 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,17 @@ A list of configuration options which you need
To use SSE-C in development make sure to generate a certificate for the minio container and set `ALEXANDRIA_S3_VERIFY` to `false`.

- ClamAV
- `ALEXANDRIA_CLAMD_ENABLED`: Set this to `True` to enable ClamAV (virus scanner).
- `ALEXANDRIA_CLAMD_SOCKET`: ClamAV socket
- `ALEXANDRIA_CLAMD_USE_TCP`: Use TCP to connect to ClamAV service
- `ALEXANDRIA_CLAMD_TCP_SOCKET`: ClamAV service socket
- `ALEXANDRIA_CLAMD_TCP_ADDR`: ClamAV service address
- `ALEXANDRIA_CLAMD_ENABLED`: Set this to `True` to enable ClamAV (virus scanner).
- `ALEXANDRIA_CLAMD_SOCKET`: ClamAV socket
- `ALEXANDRIA_CLAMD_USE_TCP`: Use TCP to connect to ClamAV service
- `ALEXANDRIA_CLAMD_TCP_SOCKET`: ClamAV service socket
- `ALEXANDRIA_CLAMD_TCP_ADDR`: ClamAV service address

- [Document Merge Service](https://github.com/adfinis/document-merge-service)
Enable conversion of docx/odt files to pdf directly in alexandria.
- `ALEXANDRIA_ENABLE_PDF_CONVERSION`: Set this to `True` to enable the pdf conversion endpoint.
- `ALEXANDRIA_DMS_URL`: URL where the document merge service is running


For development, you can also set the following environemnt variables to help you:

Expand Down
56 changes: 56 additions & 0 deletions alexandria/core/tests/test_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
HTTP_200_OK,
HTTP_201_CREATED,
HTTP_400_BAD_REQUEST,
HTTP_401_UNAUTHORIZED,
HTTP_403_FORBIDDEN,
HTTP_404_NOT_FOUND,
)
Expand Down Expand Up @@ -441,3 +442,58 @@ def test_presigned_url_different_file(admin_client, file, file_factory):

response = admin_client.get(url)
assert response.status_code == HTTP_400_BAD_REQUEST


def test_convert_document(
admin_client, document_factory, file_factory, settings, mocker
):
settings.ALEXANDRIA_ENABLE_PDF_CONVERSION = True
document = document_factory()
file_factory(document=document, name="foo")

response = mocker.Mock()
response.status_code = HTTP_200_OK
response.content = b"pdfdata"
mocker.patch("requests.post", return_value=response)
url = reverse("document-convert", args=[document.pk])
response = admin_client.get(url)

assert response.status_code == HTTP_200_OK

assert Document.objects.all().count() == 2
assert File.objects.all().count() == 3
assert File.objects.filter(name="foo.pdf", variant=File.Variant.ORIGINAL).exists()


def test_convert_document_not_enabled(
admin_client, document_factory, file_factory, settings, mocker
):
settings.ALEXANDRIA_ENABLE_PDF_CONVERSION = False
document = document_factory()
file_factory(document=document, name="foo")

response = mocker.Mock()
response.status_code = HTTP_200_OK
response.content = b"pdfdata"
mocker.patch("requests.post", return_value=response)
url = reverse("document-convert", args=[document.pk])
response = admin_client.get(url)

assert response.status_code == HTTP_400_BAD_REQUEST


def test_convert_document_token_expired(
admin_client, document_factory, file_factory, settings, mocker
):
settings.ALEXANDRIA_ENABLE_PDF_CONVERSION = True
document = document_factory()
file_factory(document=document, name="foo")

response = mocker.Mock()
response.status_code = HTTP_401_UNAUTHORIZED
response.content = b"pdfdata"
mocker.patch("requests.post", return_value=response)
url = reverse("document-convert", args=[document.pk])
response = admin_client.get(url)

assert response.status_code == HTTP_401_UNAUTHORIZED
50 changes: 48 additions & 2 deletions alexandria/core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,30 @@
import zipfile
from tempfile import NamedTemporaryFile

import requests
from django.conf import settings
from django.core.exceptions import ValidationError as DjangoCoreValidationError
from django.core.files.base import ContentFile
from django.http import FileResponse
from django.utils.translation import gettext as _
from generic_permissions.permissions import AllowAny, PermissionViewMixin
from generic_permissions.visibilities import VisibilityViewMixin
from rest_framework.authentication import get_authorization_header
from rest_framework.decorators import action, permission_classes
from rest_framework.exceptions import NotFound, PermissionDenied, ValidationError
from rest_framework.exceptions import (
AuthenticationFailed,
NotFound,
PermissionDenied,
ValidationError,
)
from rest_framework.mixins import (
CreateModelMixin,
DestroyModelMixin,
ListModelMixin,
RetrieveModelMixin,
)
from rest_framework.response import Response
from rest_framework.status import HTTP_201_CREATED
from rest_framework.status import HTTP_200_OK, HTTP_201_CREATED, HTTP_401_UNAUTHORIZED
from rest_framework.viewsets import GenericViewSet
from rest_framework_json_api.views import (
AutoPrefetchMixin,
Expand Down Expand Up @@ -100,6 +108,44 @@ def update(self, request, *args, **kwargs):

return response

@action(methods=["get"], detail=True)
def convert(self, request, pk=None):
if not settings.ALEXANDRIA_ENABLE_PDF_CONVERSION:
raise ValidationError(_("PDF conversion is not enabled."))

document = self.get_object()
file = document.get_latest_original()

response = requests.post(
settings.ALEXANDRIA_DMS_URL + "/convert",
data={"target_format": "pdf"},
headers={"authorization": get_authorization_header(request)},
files={"file": file.content},
)

if response.status_code == HTTP_401_UNAUTHORIZED:
raise AuthenticationFailed(_("Token has expired."))

response.raise_for_status()

converted_document = models.Document.objects.create(
title={k: v + ".pdf" for k, v in document.title.items()},
description=document.description,
category=document.category,
date=document.date,
)
file_name = file.name + ".pdf"
converted_file = models.File.objects.create(
document=converted_document,
name=file_name,
content=ContentFile(response.content, file_name),
mime_type="application/pdf",
size=len(response.content),
)
converted_file.create_thumbnail()

return Response(status=HTTP_200_OK)


class FileViewSet(
PermissionViewMixin,
Expand Down
6 changes: 6 additions & 0 deletions alexandria/settings/alexandria.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,9 @@ def default(default_dev=env.NOTSET, default_prod=env.NOTSET):
CLAMD_TCP_SOCKET = env.str("ALEXANDRIA_CLAMD_TCP_SOCKET", default=3310)
CLAMD_TCP_ADDR = env.str("ALEXANDRIA_CLAMD_TCP_ADDR", default="localhost")
CLAMD_ENABLED = env.bool("ALEXANDRIA_CLAMD_ENABLED", default=False)

# Document merge service
ALEXANDRIA_ENABLE_PDF_CONVERSION = env.bool(
"ALEXANDRIA_ENABLE_PDF_CONVERSION", default=False
)
ALEXANDRIA_DMS_URL = env.str("ALEXANDRIA_DMS_URL", default="http://dms:8000/api/v1")
15 changes: 15 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,21 @@ services:
# https://docs.djangoproject.com/en/2.1/ref/settings/#password
# same as postgres password above
# - DATABASE_PASSWORD=
dms:
image: ghcr.io/adfinis/document-merge-service:6.4.4
depends_on:
- db
environment:
- DATABASE_ENGINE=django.db.backends.postgresql
- DATABASE_HOST=db
- DATABASE_PORT=5432
- DATABASE_NAME=alexandria
- DATABASE_USER=alexandria
- DATABASE_PASSWORD=alexandria
- ALLOWED_HOSTS=dms
- OIDC_BEARER_TOKEN_REVALIDATION_TIME=300
- SECRET_KEY=aaa
- DOCXTEMPLATE_JINJA_EXTENSIONS=
minio:
image: minio/minio:RELEASE.2023-11-06T22-26-08Z
volumes:
Expand Down

0 comments on commit 38ecbec

Please sign in to comment.