Skip to content

Commit

Permalink
[Issue #6414] File system operations do not adhere to Django file sto…
Browse files Browse the repository at this point in the history
…rage API (#6425)

* [Issue #6414] Use Django storage API in delete_orphaned_* functions

* [Issue #6414] Use Django storage API in geonode.qgis_server.tests.test_views

* [Issue #6414] Use Django storage API when generating document thumbnails

* [Issue #6414] Thumbnail generation fix for local storage

* Add thumbnail convenience functions

* Cleanup Django storage API changes
  • Loading branch information
Matthew Northcott authored Sep 23, 2020
1 parent 6b62716 commit 771e154
Show file tree
Hide file tree
Showing 11 changed files with 194 additions and 132 deletions.
16 changes: 8 additions & 8 deletions geonode/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@
send_notification,
get_notification_recipients)
from geonode.people.enumerations import ROLE_VALUES
from geonode.base.thumb_utils import (
thumb_path,
remove_thumbs)

from pyproj import transform, Proj

Expand Down Expand Up @@ -1304,7 +1307,8 @@ def has_thumbnail(self):
# Note - you should probably broadcast layer#post_save() events to ensure
# that indexing (or other listeners) are notified
def save_thumbnail(self, filename, image):
upload_path = os.path.join('thumbs/', filename)
upload_path = thumb_path(filename)

try:
# Check that the image is valid
from PIL import Image
Expand All @@ -1313,18 +1317,14 @@ def save_thumbnail(self, filename, image):
im = Image.open(content_data)
im.verify() # verify that it is, in fact an image

thumbnail_name, ext = os.path.splitext(filename)
_, _thumbs = storage.listdir("thumbs")
for _thumb in _thumbs:
if _thumb.startswith(thumbnail_name):
storage.delete(os.path.join("thumbs", _thumb))
logger.debug("Deleted existing thumbnail: " + _thumb)
name, ext = os.path.splitext(filename)
remove_thumbs(name)

if upload_path and image:
actual_name = storage.save(upload_path, ContentFile(image))
url = storage.url(actual_name)
_url = urlparse(url)
_upload_path = os.path.join('thumbs/', os.path.basename(_url.path))
_upload_path = thumb_path(os.path.basename(_url.path))
if upload_path != _upload_path:
if storage.exists(_upload_path):
storage.delete(_upload_path)
Expand Down
38 changes: 38 additions & 0 deletions geonode/base/thumb_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os

from django.conf import settings
from django.core.files.storage import default_storage as storage


def thumb_path(filename):
"""Return the complete path of the provided thumbnail file accessible
via Django storage API"""
return os.path.join(settings.THUMBNAIL_LOCATION, filename)


def thumb_exists(filename):
"""Determine if a thumbnail file exists in storage"""
return storage.exists(thumb_path(filename))


def get_thumbs():
"""Fetches a list of all stored thumbnails"""
if not storage.exists(settings.THUMBNAIL_LOCATION):
return []

subdirs, thumbs = storage.listdir(settings.THUMBNAIL_LOCATION)

return thumbs


def remove_thumb(filename):
"""Delete a thumbnail from storage"""
storage.delete(thumb_path(filename))


def remove_thumbs(name):
"""Removes all stored thumbnails that start with the same name as the
file specified"""
for thumb in get_thumbs():
if thumb.startswith(name):
remove_thumb(thumb)
49 changes: 29 additions & 20 deletions geonode/base/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,14 @@
"""

# Standard Modules
import os
import re
import logging
from dateutil.parser import isoparse
from datetime import datetime, timedelta

# Django functionality
from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.files.storage import FileSystemStorage
from django.core.files.storage import default_storage as storage

# Geonode functionality
from guardian.shortcuts import get_perms, remove_perm, assign_perm
Expand All @@ -42,6 +40,9 @@
from geonode.geoserver.helpers import ogc_server_settings
from geonode.maps.models import Map
from geonode.services.models import Service
from geonode.base.thumb_utils import (
get_thumbs,
remove_thumb)

logger = logging.getLogger('geonode.base.utils')

Expand All @@ -50,28 +51,36 @@
'ESRI Shapefile', 'View in Google Earth', 'KML', 'KMZ', 'Atom', 'DIF',
'Dublin Core', 'ebRIM', 'FGDC', 'ISO', 'ISO with XSL']

thumb_filename_regex = re.compile(
r"^(document|map|layer)-([a-f\d]{8}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{12})-thumb\.png$")


def get_thumb_uuid(filename):
"""Fetches the UUID associated with the given thumbnail file"""
result = thumb_filename_regex.search(filename)
uuid = result.group(2) if result else None

return uuid


def delete_orphaned_thumbs():
"""
Deletes orphaned thumbnails.
"""
if isinstance(storage, FileSystemStorage):
documents_path = os.path.join(settings.MEDIA_ROOT, 'thumbs')
else:
documents_path = os.path.join(settings.STATIC_ROOT, 'thumbs')
if os.path.exists(documents_path):
for filename in os.listdir(documents_path):
fn = os.path.join(documents_path, filename)
model = filename.split('-')[0]
uuid = filename.replace(model, '').replace('-thumb.png', '')[1:]
if ResourceBase.objects.filter(uuid=uuid).count() == 0:
print('Removing orphan thumb %s' % fn)
logger.debug('Removing orphan thumb %s' % fn)
try:
os.remove(fn)
except OSError:
print('Could not delete file %s' % fn)
logger.error('Could not delete file %s' % fn)
deleted = []
thumb_uuids = {get_thumb_uuid(filename): filename for filename in get_thumbs()}
db_uuids = ResourceBase.objects.filter(uuid__in=thumb_uuids.keys()).values_list("uuid", flat=True)
orphaned_uuids = set(thumb_uuids.keys()) - set(db_uuids)
orphaned_thumbs = (thumb_uuids[uuid] for uuid in orphaned_uuids if uuid is not None)

for filename in orphaned_thumbs:
try:
remove_thumb(filename)
deleted.append(filename)
except NotImplementedError as e:
logger.error("Failed to delete orphaned thumbnail '{}': {}".format(filename, e))

return deleted


def remove_duplicate_links(resource):
Expand Down
31 changes: 15 additions & 16 deletions geonode/documents/renderers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@
#########################################################################

import io
import os
import subprocess
import traceback
import tempfile

from django.conf import settings
from threading import Timer
from mimetypes import guess_type
from urllib.request import pathname2url

from tempfile import NamedTemporaryFile


class ConversionError(Exception):
"""Raise when conversion was unsuccessful."""
Expand Down Expand Up @@ -58,28 +58,23 @@ def render_document(document_path, extension="png"):

# workaround: https://github.com/dagwieers/unoconv/issues/167
# first convert a document to PDF and continue
if extension == "pdf":
temp_path = document_path
elif guess_mimetype(document_path) == 'application/pdf':
temp_path = document_path
else:
temp = render_document(document_path, extension="pdf")
temp_path = temp.name
dispose_input = False
if extension != "pdf" and guess_mimetype(document_path) != 'application/pdf':
document_path = render_document(document_path, extension="pdf")
dispose_input = True

# spawn subprocess and render the document
output = NamedTemporaryFile(suffix='.{}'.format(extension))
output_path = None
if settings.UNOCONV_ENABLE:
timeout = None
_, output_path = tempfile.mkstemp(suffix=".{}".format(extension))
try:
def kill(process):
return process.kill()

unoconv = subprocess.Popen(
[settings.UNOCONV_EXECUTABLE, "-v", "-e", "PageRange=1-2",
"-f", extension, "-o", output.name, temp_path],
"-f", extension, "-o", output_path, document_path],
stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
timeout = Timer(settings.UNOCONV_TIMEOUT, kill, [unoconv])
timeout = Timer(settings.UNOCONV_TIMEOUT, unoconv.kill)
timeout.start()
stdout, stderr = unoconv.communicate()
except Exception as e:
Expand All @@ -88,8 +83,12 @@ def kill(process):
finally:
if timeout:
timeout.cancel()
if dispose_input and document_path is not None:
os.remove(document_path)
else:
raise NotImplementedError("unoconv is disabled. Set 'UNOCONV_ENABLE' to enable.")

return output
return output_path


def generate_thumbnail_content(image_path, size=(200, 150)):
Expand Down
59 changes: 34 additions & 25 deletions geonode/documents/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
#########################################################################

import os
from os import access, R_OK
from os.path import isfile

from geonode.celery_app import app
from celery.utils.log import get_task_logger
Expand All @@ -29,7 +27,8 @@
from geonode.documents.renderers import render_document
from geonode.documents.renderers import generate_thumbnail_content
from geonode.documents.renderers import ConversionError
from geonode.documents.renderers import MissingPILError

from django.core.files.storage import default_storage as storage

logger = get_task_logger(__name__)

Expand All @@ -47,38 +46,48 @@ def create_document_thumbnail(self, object_id):
logger.error("Document #{} does not exist.".format(object_id))
return

if not storage.exists(document.doc_file.name):
logger.error("Document #{} exists but its location could not be resolved.".format(object_id))
return

image_path = None
image_file = None

if document.is_image():
image_path = document.doc_file.path
image_file = storage.open(document.doc_file.name, 'rb')
elif document.is_file():
try:
image_file = render_document(document.doc_file.path)
image_path = image_file.name
except ConversionError as e:
logger.debug("Could not convert document #{}: {}."
.format(object_id, e))

try:
if image_path:
assert isfile(image_path) and access(image_path, R_OK) and os.stat(image_path).st_size > 0
except (AssertionError, TypeError):
image_path = None
document_location = storage.path(document.doc_file.name)
except NotImplementedError as e:
logger.debug(e)
document_location = storage.url(document.doc_file.name)

if not image_path:
image_path = document.find_placeholder()

if not image_path or not os.path.exists(image_path):
logger.debug("Could not find placeholder for document #{}"
.format(object_id))
return
try:
image_path = render_document(document_location)
if image_path is not None:
image_file = open(image_path, 'rb')
else:
logger.debug("Failed to render document #{}".format(object_id))
except ConversionError as e:
logger.debug("Could not convert document #{}: {}.".format(object_id, e))
except NotImplementedError as e:
logger.debug("Failed to render document #{}: {}".format(object_id, e))

thumbnail_content = None
try:
thumbnail_content = generate_thumbnail_content(image_path)
except MissingPILError:
logger.error('Pillow not installed, could not generate thumbnail.')
try:
thumbnail_content = generate_thumbnail_content(image_file)
except Exception:
thumbnail_content = generate_thumbnail_content(document.find_placeholder())
except Exception as e:
logger.error("Could not generate thumbnail: {}".format(e))
return
finally:
if image_file is not None:
image_file.close()

if image_path is not None:
os.remove(image_path)

if not thumbnail_content:
logger.warning("Thumbnail for document #{} empty.".format(object_id))
Expand Down
27 changes: 15 additions & 12 deletions geonode/documents/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

import gisdata
from datetime import datetime
from django.conf import settings
from django.urls import reverse
from django.contrib.auth.models import Group
from django.contrib.auth import get_user_model
Expand All @@ -49,6 +48,7 @@
from geonode.maps.models import Map
from geonode.layers.models import Layer
from geonode.compat import ensure_string
from geonode.base.thumb_utils import get_thumbs
from geonode.base.models import License, Region
from geonode.documents import DocumentsAppConfig
from geonode.documents.forms import DocumentFormMixin
Expand Down Expand Up @@ -449,9 +449,6 @@ class DocumentModerationTestCase(GeoNodeBaseTestSupport):

def setUp(self):
super(DocumentModerationTestCase, self).setUp()
thumbs_dir = os.path.join(settings.MEDIA_ROOT, "thumbs")
if not os.path.exists(thumbs_dir):
os.mkdir(thumbs_dir)
self.user = 'admin'
self.passwd = 'admin'
create_models(type=b'document')
Expand Down Expand Up @@ -493,18 +490,24 @@ def test_moderated_upload(self):
_d.delete()

from geonode.documents.utils import delete_orphaned_document_files
delete_orphaned_document_files()
_, document_files_before = storage.listdir("documents")
deleted = delete_orphaned_document_files()
_, document_files_after = storage.listdir("documents")
self.assertTrue(len(deleted) > 0)
self.assertEqual(set(deleted), set(document_files_before) - set(document_files_after))

from geonode.base.utils import delete_orphaned_thumbs
delete_orphaned_thumbs()
thumb_files_before = get_thumbs()
deleted = delete_orphaned_thumbs()
thumb_files_after = get_thumbs()
self.assertTrue(len(deleted) > 0)
self.assertEqual(set(deleted), set(thumb_files_before) - set(thumb_files_after))

documents_path = os.path.join(settings.MEDIA_ROOT, 'documents')
fn = os.path.join(documents_path, os.path.basename(input_path))
self.assertFalse(os.path.isfile(fn))
fn = os.path.join("documents", os.path.basename(input_path))
self.assertFalse(storage.exists(fn))

_, files = storage.listdir("thumbs")
_cnt = sum(1 for fn in files if uuid in fn)
self.assertTrue(_cnt == 0)
files = [thumb for thumb in get_thumbs() if uuid in thumb]
self.assertEqual(len(files), 0)

with self.settings(ADMIN_MODERATE_UPLOADS=True):
self.client.login(username=self.user, password=self.passwd)
Expand Down
Loading

0 comments on commit 771e154

Please sign in to comment.