From 6d30dcd0533be084b8596cc1536ca69894fdc174 Mon Sep 17 00:00:00 2001 From: yelinz Date: Tue, 24 Sep 2024 15:06:33 +0200 Subject: [PATCH] feat(file): move thumbnail generation to celery --- alexandria/conftest.py | 4 ++ .../commands/generate_missing_thumbnails.py | 3 +- alexandria/core/models.py | 56 ++----------------- alexandria/core/tasks.py | 47 ++++++++++++++++ .../tests/__snapshots__/test_viewsets.ambr | 12 ++-- .../storages/tests/test_dynamic_field.py | 2 +- compose.override.yaml | 4 -- 7 files changed, 67 insertions(+), 61 deletions(-) diff --git a/alexandria/conftest.py b/alexandria/conftest.py index 3bf027c6..f439ee7a 100644 --- a/alexandria/conftest.py +++ b/alexandria/conftest.py @@ -64,6 +64,10 @@ def mock_celery(mocker): "alexandria.core.tasks.set_content_vector.delay", side_effect=lambda id: tasks.set_content_vector(id), ) + mocker.patch( + "alexandria.core.tasks.create_thumbnail.delay", + side_effect=lambda id: tasks.create_thumbnail(id), + ) @pytest.fixture diff --git a/alexandria/core/management/commands/generate_missing_thumbnails.py b/alexandria/core/management/commands/generate_missing_thumbnails.py index bb48e9f2..a85f5a19 100644 --- a/alexandria/core/management/commands/generate_missing_thumbnails.py +++ b/alexandria/core/management/commands/generate_missing_thumbnails.py @@ -3,6 +3,7 @@ from tqdm import tqdm from alexandria.core.models import File +from alexandria.core.tasks import create_thumbnail class Command(BaseCommand): @@ -12,7 +13,7 @@ def handle(self, *args, **options): for file in tqdm( File.objects.filter(variant="original", renderings__isnull=True) ): - file.create_thumbnail() + create_thumbnail(file.pk) if virtual_memory().available < 300_000_000: print("about to run out of memory, stopping") break diff --git a/alexandria/core/models.py b/alexandria/core/models.py index 786c3a02..3e87ffb8 100644 --- a/alexandria/core/models.py +++ b/alexandria/core/models.py @@ -1,7 +1,5 @@ -import logging import re import uuid -from mimetypes import guess_extension from pathlib import Path from tempfile import NamedTemporaryFile @@ -13,18 +11,14 @@ from django.core.files import File as DjangoFile from django.core.validators import RegexValidator from django.db import models, transaction -from django.db.models.fields.files import ImageFile from django.dispatch import receiver from django.utils.translation import gettext_lazy as _ from localized_fields.fields import LocalizedCharField, LocalizedTextField from manabi.token import Key, Token -from preview_generator.manager import PreviewManager from alexandria.core.presign_urls import make_signature_components from alexandria.storages.fields import DynamicStorageFileField -log = logging.getLogger(__name__) - def upload_file_content_to(instance, _): return f"{instance.pk}_{instance.name}" @@ -255,49 +249,6 @@ def get_webdav_url(self, username, group, host="http://localhost:8000"): f"{handler}{host}{settings.ALEXANDRIA_MANABI_DAV_URL_PATH}/{token.as_url()}" ) - def create_thumbnail(self): - if ( - self.variant != File.Variant.ORIGINAL - or self.renderings.count() > 0 - or not settings.ALEXANDRIA_ENABLE_THUMBNAIL_GENERATION - ): - return - - with NamedTemporaryFile() as tmp: - temp_file = Path(tmp.name) - manager = PreviewManager(str(temp_file.parent)) - with temp_file.open("wb") as f: - f.write(self.content.file.file.read()) - extension = guess_extension(self.mime_type) - preview_kwargs = {"file_ext": extension} - if settings.ALEXANDRIA_THUMBNAIL_WIDTH: # pragma: no cover - preview_kwargs["width"] = settings.ALEXANDRIA_THUMBNAIL_WIDTH - if settings.ALEXANDRIA_THUMBNAIL_HEIGHT: # pragma: no cover - preview_kwargs["height"] = settings.ALEXANDRIA_THUMBNAIL_HEIGHT - try: - path_to_preview_image = Path( - manager.get_jpeg_preview(str(temp_file), **preview_kwargs) - ) - # thumbnail generation can throw many different exceptions, catch all - except Exception: # noqa: B902 - log.exception("Thumbnail generation failed") - return None - - with path_to_preview_image.open("rb") as thumb: - file = ImageFile(thumb) - thumb_file = File.objects.create( - name=f"{self.name}_preview.jpg", - document=self.document, - variant=File.Variant.THUMBNAIL.value, - original=self, - encryption_status=self.encryption_status, - content=file, - mime_type="image/jpeg", - size=file.size, - ) - - return thumb_file - def get_download_url(self, request): if not request: return None @@ -336,4 +287,9 @@ def set_file_attributes(sender, instance, **kwargs): ): tasks.set_content_vector.delay_on_commit(instance.pk) - instance.create_thumbnail() + if ( + instance.variant == File.Variant.ORIGINAL + and instance.renderings.count() < 1 + and settings.ALEXANDRIA_ENABLE_THUMBNAIL_GENERATION + ): + tasks.create_thumbnail.delay_on_commit(instance.pk) diff --git a/alexandria/core/tasks.py b/alexandria/core/tasks.py index ed00dea9..eb11d6c2 100644 --- a/alexandria/core/tasks.py +++ b/alexandria/core/tasks.py @@ -1,14 +1,21 @@ import hashlib +from mimetypes import guess_extension from pathlib import Path +from tempfile import NamedTemporaryFile import tika.language import tika.parser from django.conf import settings from django.contrib.postgres.search import SearchVector from django.db.models import Value +from django.db.models.fields.files import ImageFile +from preview_generator.manager import PreviewManager from alexandria.core.models import File from celery import shared_task +from celery.utils.log import get_task_logger + +logger = get_task_logger(__name__) @shared_task(soft_time_limit=301) @@ -56,3 +63,43 @@ def set_checksum(file_pk: str): def make_checksum(bytes_: bytes) -> str: return f"sha256:{hashlib.sha256(bytes_).hexdigest()}" + + +@shared_task +def create_thumbnail(file_pk: str): + file = File.objects.get(pk=file_pk) + + with NamedTemporaryFile() as tmp: + temp_file = Path(tmp.name) + manager = PreviewManager(str(temp_file.parent)) + with temp_file.open("wb") as f: + f.write(file.content.file.file.read()) + extension = guess_extension(file.mime_type) + preview_kwargs = {"file_ext": extension} + if settings.ALEXANDRIA_THUMBNAIL_WIDTH: # pragma: no cover + preview_kwargs["width"] = settings.ALEXANDRIA_THUMBNAIL_WIDTH + if settings.ALEXANDRIA_THUMBNAIL_HEIGHT: # pragma: no cover + preview_kwargs["height"] = settings.ALEXANDRIA_THUMBNAIL_HEIGHT + try: + path_to_preview_image = Path( + manager.get_jpeg_preview(str(temp_file), **preview_kwargs) + ) + # thumbnail generation can throw many different exceptions, catch all + except Exception: # noqa: B902 + logger.exception("Thumbnail generation failed") + return None + + with path_to_preview_image.open("rb") as thumb: + image = ImageFile(thumb) + thumb_file = File.objects.create( + name=f"{file.name}_preview.jpg", + document=file.document, + variant=File.Variant.THUMBNAIL.value, + original=file, + encryption_status=file.encryption_status, + content=image, + mime_type="image/jpeg", + size=file.size, + ) + + return thumb_file diff --git a/alexandria/core/tests/__snapshots__/test_viewsets.ambr b/alexandria/core/tests/__snapshots__/test_viewsets.ambr index e5a374d3..f28dddfa 100644 --- a/alexandria/core/tests/__snapshots__/test_viewsets.ambr +++ b/alexandria/core/tests/__snapshots__/test_viewsets.ambr @@ -84,12 +84,13 @@ 'SELECT "alexandria_core_file"."created_at", "alexandria_core_file"."created_by_user", "alexandria_core_file"."created_by_group", "alexandria_core_file"."modified_at", "alexandria_core_file"."modified_by_user", "alexandria_core_file"."modified_by_group", "alexandria_core_file"."metainfo", "alexandria_core_file"."id", "alexandria_core_file"."variant", "alexandria_core_file"."original_id", "alexandria_core_file"."name", "alexandria_core_file"."document_id", "alexandria_core_file"."checksum", "alexandria_core_file"."encryption_status", "alexandria_core_file"."content_vector", "alexandria_core_file"."language", "alexandria_core_file"."content", "alexandria_core_file"."mime_type", "alexandria_core_file"."size" FROM "alexandria_core_file" WHERE "alexandria_core_file"."id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid LIMIT 21', 'UPDATE "alexandria_core_file" SET "content_vector" = (setweight(to_tsvector(COALESCE(\'content\', \'\')), \'A\') || setweight(to_tsvector(\'english\'::regconfig, COALESCE(\'Important text\', \'\')), \'B\')), "language" = \'en\' WHERE "alexandria_core_file"."id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid', 'SELECT COUNT(*) AS "__count" FROM "alexandria_core_file" WHERE "alexandria_core_file"."original_id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid', + 'SELECT "alexandria_core_file"."created_at", "alexandria_core_file"."created_by_user", "alexandria_core_file"."created_by_group", "alexandria_core_file"."modified_at", "alexandria_core_file"."modified_by_user", "alexandria_core_file"."modified_by_group", "alexandria_core_file"."metainfo", "alexandria_core_file"."id", "alexandria_core_file"."variant", "alexandria_core_file"."original_id", "alexandria_core_file"."name", "alexandria_core_file"."document_id", "alexandria_core_file"."checksum", "alexandria_core_file"."encryption_status", "alexandria_core_file"."content_vector", "alexandria_core_file"."language", "alexandria_core_file"."content", "alexandria_core_file"."mime_type", "alexandria_core_file"."size" FROM "alexandria_core_file" WHERE "alexandria_core_file"."id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid LIMIT 21', 'COMMIT', 'SELECT "alexandria_core_file"."created_at", "alexandria_core_file"."created_by_user", "alexandria_core_file"."created_by_group", "alexandria_core_file"."modified_at", "alexandria_core_file"."modified_by_user", "alexandria_core_file"."modified_by_group", "alexandria_core_file"."metainfo", "alexandria_core_file"."id", "alexandria_core_file"."variant", "alexandria_core_file"."original_id", "alexandria_core_file"."name", "alexandria_core_file"."document_id", "alexandria_core_file"."checksum", "alexandria_core_file"."encryption_status", "alexandria_core_file"."content_vector", "alexandria_core_file"."language", "alexandria_core_file"."content", "alexandria_core_file"."mime_type", "alexandria_core_file"."size" FROM "alexandria_core_file" WHERE "alexandria_core_file"."document_id" = \'f561aaf6ef0bf14d4208bb46a4ccb3ad\'::uuid ORDER BY "alexandria_core_file"."created_at" DESC', 'SELECT "alexandria_core_tag"."created_at", "alexandria_core_tag"."created_by_user", "alexandria_core_tag"."created_by_group", "alexandria_core_tag"."modified_at", "alexandria_core_tag"."modified_by_user", "alexandria_core_tag"."modified_by_group", "alexandria_core_tag"."metainfo", "alexandria_core_tag"."id", "alexandria_core_tag"."name", "alexandria_core_tag"."description", "alexandria_core_tag"."tag_synonym_group_id" FROM "alexandria_core_tag" INNER JOIN "alexandria_core_document_tags" ON ("alexandria_core_tag"."id" = "alexandria_core_document_tags"."tag_id") WHERE "alexandria_core_document_tags"."document_id" = \'f561aaf6ef0bf14d4208bb46a4ccb3ad\'::uuid', 'SELECT "alexandria_core_mark"."created_at", "alexandria_core_mark"."created_by_user", "alexandria_core_mark"."created_by_group", "alexandria_core_mark"."modified_at", "alexandria_core_mark"."modified_by_user", "alexandria_core_mark"."modified_by_group", "alexandria_core_mark"."metainfo", "alexandria_core_mark"."slug", "alexandria_core_mark"."name", "alexandria_core_mark"."description" FROM "alexandria_core_mark" INNER JOIN "alexandria_core_document_marks" ON ("alexandria_core_mark"."slug" = "alexandria_core_document_marks"."mark_id") WHERE "alexandria_core_document_marks"."document_id" = \'f561aaf6ef0bf14d4208bb46a4ccb3ad\'::uuid', ]), - 'query_count': 15, + 'query_count': 16, 'request': dict({ 'CONTENT_LENGTH': '397', 'CONTENT_TYPE': 'multipart/form-data; boundary=BoUnDaRyStRiNg; charset=utf-8', @@ -172,10 +173,11 @@ 'SELECT "alexandria_core_file"."created_at", "alexandria_core_file"."created_by_user", "alexandria_core_file"."created_by_group", "alexandria_core_file"."modified_at", "alexandria_core_file"."modified_by_user", "alexandria_core_file"."modified_by_group", "alexandria_core_file"."metainfo", "alexandria_core_file"."id", "alexandria_core_file"."variant", "alexandria_core_file"."original_id", "alexandria_core_file"."name", "alexandria_core_file"."document_id", "alexandria_core_file"."checksum", "alexandria_core_file"."encryption_status", "alexandria_core_file"."content_vector", "alexandria_core_file"."language", "alexandria_core_file"."content", "alexandria_core_file"."mime_type", "alexandria_core_file"."size" FROM "alexandria_core_file" WHERE "alexandria_core_file"."id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid LIMIT 21', 'UPDATE "alexandria_core_file" SET "content_vector" = (setweight(to_tsvector(COALESCE(\'father\', \'\')), \'A\') || setweight(to_tsvector(\'english\'::regconfig, COALESCE(\'Important text\', \'\')), \'B\')), "language" = \'en\' WHERE "alexandria_core_file"."id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid', 'SELECT COUNT(*) AS "__count" FROM "alexandria_core_file" WHERE "alexandria_core_file"."original_id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid', + 'SELECT "alexandria_core_file"."created_at", "alexandria_core_file"."created_by_user", "alexandria_core_file"."created_by_group", "alexandria_core_file"."modified_at", "alexandria_core_file"."modified_by_user", "alexandria_core_file"."modified_by_group", "alexandria_core_file"."metainfo", "alexandria_core_file"."id", "alexandria_core_file"."variant", "alexandria_core_file"."original_id", "alexandria_core_file"."name", "alexandria_core_file"."document_id", "alexandria_core_file"."checksum", "alexandria_core_file"."encryption_status", "alexandria_core_file"."content_vector", "alexandria_core_file"."language", "alexandria_core_file"."content", "alexandria_core_file"."mime_type", "alexandria_core_file"."size" FROM "alexandria_core_file" WHERE "alexandria_core_file"."id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid LIMIT 21', 'SELECT "alexandria_core_file"."created_at", "alexandria_core_file"."created_by_user", "alexandria_core_file"."created_by_group", "alexandria_core_file"."modified_at", "alexandria_core_file"."modified_by_user", "alexandria_core_file"."modified_by_group", "alexandria_core_file"."metainfo", "alexandria_core_file"."id", "alexandria_core_file"."variant", "alexandria_core_file"."original_id", "alexandria_core_file"."name", "alexandria_core_file"."document_id", "alexandria_core_file"."checksum", "alexandria_core_file"."encryption_status", "alexandria_core_file"."content_vector", "alexandria_core_file"."language", "alexandria_core_file"."content", "alexandria_core_file"."mime_type", "alexandria_core_file"."size" FROM "alexandria_core_file" WHERE "alexandria_core_file"."original_id" = \'ea416ed0759d46a8de58f63a59077499\'::uuid ORDER BY "alexandria_core_file"."created_at" DESC', 'SELECT 1 AS "a" FROM "alexandria_core_document" WHERE ("alexandria_core_document"."id" = \'9dd4e461268c8034f5c8564e155c67a6\'::uuid AND "alexandria_core_document"."id" = \'9dd4e461268c8034f5c8564e155c67a6\'::uuid) LIMIT 1', ]), - 'query_count': 10, + 'query_count': 11, 'request': dict({ 'CONTENT_LENGTH': '345', 'CONTENT_TYPE': 'multipart/form-data; boundary=BoUnDaRyStRiNg; charset=utf-8', @@ -870,7 +872,7 @@ 'modified-by-group': None, 'modified-by-user': None, 'name': 'father.png_preview.jpg', - 'size': 2257, + 'size': 3030, 'variant': 'thumbnail', }), 'id': 'f561aaf6-ef0b-f14d-4208-bb46a4ccb3ad', @@ -1563,7 +1565,7 @@ 'modified-by-group': None, 'modified-by-user': None, 'name': 'father.png_preview.jpg', - 'size': 2257, + 'size': 3030, 'variant': 'thumbnail', }), 'id': 'f561aaf6-ef0b-f14d-4208-bb46a4ccb3ad', @@ -1646,7 +1648,7 @@ 'modified-by-group': None, 'modified-by-user': None, 'name': 'run.png_preview.jpg', - 'size': 2257, + 'size': 7738, 'variant': 'thumbnail', }), 'id': 'dad3a37a-a9d5-0688-b515-7698acfd7aee', diff --git a/alexandria/storages/tests/test_dynamic_field.py b/alexandria/storages/tests/test_dynamic_field.py index e7a47380..a374461f 100644 --- a/alexandria/storages/tests/test_dynamic_field.py +++ b/alexandria/storages/tests/test_dynamic_field.py @@ -24,7 +24,7 @@ def test_dynamic_storage_select_global_ssec( # Patch away file opens mocker.patch("alexandria.core.tasks.set_checksum.delay", side_effect=None) mocker.patch("alexandria.core.tasks.set_content_vector.delay", side_effect=None) - mocker.patch("alexandria.core.models.File.create_thumbnail") + mocker.patch("alexandria.core.tasks.create_thumbnail.delay", side_effect=None) if raises is not None: with pytest.raises(raises): file_factory() diff --git a/compose.override.yaml b/compose.override.yaml index 35a4891b..bca9cbda 100644 --- a/compose.override.yaml +++ b/compose.override.yaml @@ -50,10 +50,6 @@ services: - CONCURRENT_DATABASE_RELOAD=false - ENABLE_DEFINITIONS_UPDATE=false - redis: - environment: - - REDIS_PASSWORD=redis - celery: <<: *alexandria ports: