Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Issue #6414] File system operations do not adhere to Django file storage API #6425

Merged
merged 6 commits into from
Sep 23, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions geonode/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@
send_notification,
get_notification_recipients)
from geonode.people.enumerations import ROLE_VALUES
from geonode.base.thumb_utils import (
thumb_path,
remove_thumbs)

from pyproj import transform, Proj

Expand Down Expand Up @@ -1306,7 +1309,8 @@ def has_thumbnail(self):
# Note - you should probably broadcast layer#post_save() events to ensure
# that indexing (or other listeners) are notified
def save_thumbnail(self, filename, image):
upload_path = os.path.join('thumbs/', filename)
upload_path = thumb_path(filename)

try:
# Check that the image is valid
from PIL import Image
Expand All @@ -1315,18 +1319,14 @@ def save_thumbnail(self, filename, image):
im = Image.open(content_data)
im.verify() # verify that it is, in fact an image

thumbnail_name, ext = os.path.splitext(filename)
_, _thumbs = storage.listdir("thumbs")
for _thumb in _thumbs:
if _thumb.startswith(thumbnail_name):
storage.delete(os.path.join("thumbs", _thumb))
logger.debug("Deleted existing thumbnail: " + _thumb)
name, ext = os.path.splitext(filename)
remove_thumbs(name)

if upload_path and image:
actual_name = storage.save(upload_path, ContentFile(image))
url = storage.url(actual_name)
_url = urlparse(url)
_upload_path = os.path.join('thumbs/', os.path.basename(_url.path))
_upload_path = thumb_path(os.path.basename(_url.path))
if upload_path != _upload_path:
if storage.exists(_upload_path):
storage.delete(_upload_path)
Expand Down
38 changes: 38 additions & 0 deletions geonode/base/thumb_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import os

from django.conf import settings
from django.core.files.storage import default_storage as storage


def thumb_path(filename):
"""Return the complete path of the provided thumbnail file accessible
via Django storage API"""
return os.path.join(settings.THUMBNAIL_LOCATION, filename)


def thumb_exists(filename):
"""Determine if a thumbnail file exists in storage"""
return storage.exists(thumb_path(filename))


def get_thumbs():
"""Fetches a list of all stored thumbnails"""
if not storage.exists(settings.THUMBNAIL_LOCATION):
return []

subdirs, thumbs = storage.listdir(settings.THUMBNAIL_LOCATION)

return thumbs


def remove_thumb(filename):
"""Delete a thumbnail from storage"""
storage.delete(thumb_path(filename))


def remove_thumbs(name):
"""Removes all stored thumbnails that start with the same name as the
file specified"""
for thumb in get_thumbs():
if thumb.startswith(name):
remove_thumb(thumb)
49 changes: 29 additions & 20 deletions geonode/base/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,16 +22,14 @@
"""

# Standard Modules
import os
import re
import logging
from dateutil.parser import isoparse
from datetime import datetime, timedelta

# Django functionality
from django.conf import settings
from django.contrib.auth import get_user_model
from django.core.files.storage import FileSystemStorage
from django.core.files.storage import default_storage as storage

# Geonode functionality
from guardian.shortcuts import get_perms, remove_perm, assign_perm
Expand All @@ -42,6 +40,9 @@
from geonode.geoserver.helpers import ogc_server_settings
from geonode.maps.models import Map
from geonode.services.models import Service
from geonode.base.thumb_utils import (
get_thumbs,
remove_thumb)

logger = logging.getLogger('geonode.base.utils')

Expand All @@ -50,28 +51,36 @@
'ESRI Shapefile', 'View in Google Earth', 'KML', 'KMZ', 'Atom', 'DIF',
'Dublin Core', 'ebRIM', 'FGDC', 'ISO', 'ISO with XSL']

thumb_filename_regex = re.compile(
r"^(document|map|layer)-([a-f\d]{8}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{12})-thumb\.png$")


def get_thumb_uuid(filename):
"""Fetches the UUID associated with the given thumbnail file"""
result = thumb_filename_regex.search(filename)
uuid = result.group(2) if result else None

return uuid


def delete_orphaned_thumbs():
"""
Deletes orphaned thumbnails.
"""
if isinstance(storage, FileSystemStorage):
documents_path = os.path.join(settings.MEDIA_ROOT, 'thumbs')
else:
documents_path = os.path.join(settings.STATIC_ROOT, 'thumbs')
if os.path.exists(documents_path):
for filename in os.listdir(documents_path):
fn = os.path.join(documents_path, filename)
model = filename.split('-')[0]
uuid = filename.replace(model, '').replace('-thumb.png', '')[1:]
if ResourceBase.objects.filter(uuid=uuid).count() == 0:
print('Removing orphan thumb %s' % fn)
logger.debug('Removing orphan thumb %s' % fn)
try:
os.remove(fn)
except OSError:
print('Could not delete file %s' % fn)
logger.error('Could not delete file %s' % fn)
deleted = []
thumb_uuids = {get_thumb_uuid(filename): filename for filename in get_thumbs()}
db_uuids = ResourceBase.objects.filter(uuid__in=thumb_uuids.keys()).values_list("uuid", flat=True)
orphaned_uuids = set(thumb_uuids.keys()) - set(db_uuids)
orphaned_thumbs = (thumb_uuids[uuid] for uuid in orphaned_uuids if uuid is not None)

for filename in orphaned_thumbs:
try:
remove_thumb(filename)
deleted.append(filename)
except NotImplementedError as e:
logger.error("Failed to delete orphaned thumbnail '{}': {}".format(filename, e))

return deleted


def remove_duplicate_links(resource):
Expand Down
31 changes: 15 additions & 16 deletions geonode/documents/renderers.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,16 @@
#########################################################################

import io
import os
import subprocess
import traceback
import tempfile

from django.conf import settings
from threading import Timer
from mimetypes import guess_type
from urllib.request import pathname2url

from tempfile import NamedTemporaryFile


class ConversionError(Exception):
"""Raise when conversion was unsuccessful."""
Expand Down Expand Up @@ -58,28 +58,23 @@ def render_document(document_path, extension="png"):

# workaround: https://github.com/dagwieers/unoconv/issues/167
# first convert a document to PDF and continue
if extension == "pdf":
temp_path = document_path
elif guess_mimetype(document_path) == 'application/pdf':
temp_path = document_path
else:
temp = render_document(document_path, extension="pdf")
temp_path = temp.name
dispose_input = False
if extension != "pdf" and guess_mimetype(document_path) != 'application/pdf':
document_path = render_document(document_path, extension="pdf")
dispose_input = True

# spawn subprocess and render the document
output = NamedTemporaryFile(suffix='.{}'.format(extension))
output_path = None
if settings.UNOCONV_ENABLE:
timeout = None
_, output_path = tempfile.mkstemp(suffix=".{}".format(extension))
try:
def kill(process):
return process.kill()

unoconv = subprocess.Popen(
[settings.UNOCONV_EXECUTABLE, "-v", "-e", "PageRange=1-2",
"-f", extension, "-o", output.name, temp_path],
"-f", extension, "-o", output_path, document_path],
stdout=subprocess.PIPE, stderr=subprocess.PIPE
)
timeout = Timer(settings.UNOCONV_TIMEOUT, kill, [unoconv])
timeout = Timer(settings.UNOCONV_TIMEOUT, unoconv.kill)
timeout.start()
stdout, stderr = unoconv.communicate()
except Exception as e:
Expand All @@ -88,8 +83,12 @@ def kill(process):
finally:
if timeout:
timeout.cancel()
if dispose_input and document_path is not None:
os.remove(document_path)
else:
raise NotImplementedError("unoconv is disabled. Set 'UNOCONV_ENABLE' to enable.")

return output
return output_path


def generate_thumbnail_content(image_path, size=(200, 150)):
Expand Down
59 changes: 34 additions & 25 deletions geonode/documents/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@
#########################################################################

import os
from os import access, R_OK
from os.path import isfile

from geonode.celery_app import app
from celery.utils.log import get_task_logger
Expand All @@ -29,7 +27,8 @@
from geonode.documents.renderers import render_document
from geonode.documents.renderers import generate_thumbnail_content
from geonode.documents.renderers import ConversionError
from geonode.documents.renderers import MissingPILError

from django.core.files.storage import default_storage as storage

logger = get_task_logger(__name__)

Expand All @@ -47,38 +46,48 @@ def create_document_thumbnail(self, object_id):
logger.error("Document #{} does not exist.".format(object_id))
return

if not storage.exists(document.doc_file.name):
logger.error("Document #{} exists but its location could not be resolved.".format(object_id))
return

image_path = None
image_file = None

if document.is_image():
image_path = document.doc_file.path
image_file = storage.open(document.doc_file.name, 'rb')
elif document.is_file():
try:
image_file = render_document(document.doc_file.path)
image_path = image_file.name
except ConversionError as e:
logger.debug("Could not convert document #{}: {}."
.format(object_id, e))

try:
if image_path:
assert isfile(image_path) and access(image_path, R_OK) and os.stat(image_path).st_size > 0
except (AssertionError, TypeError):
image_path = None
document_location = storage.path(document.doc_file.name)
except NotImplementedError as e:
logger.debug(e)
document_location = storage.url(document.doc_file.name)

if not image_path:
image_path = document.find_placeholder()

if not image_path or not os.path.exists(image_path):
logger.debug("Could not find placeholder for document #{}"
.format(object_id))
return
try:
image_path = render_document(document_location)
if image_path is not None:
image_file = open(image_path, 'rb')
else:
logger.debug("Failed to render document #{}".format(object_id))
except ConversionError as e:
logger.debug("Could not convert document #{}: {}.".format(object_id, e))
except NotImplementedError as e:
logger.debug("Failed to render document #{}: {}".format(object_id, e))

thumbnail_content = None
try:
thumbnail_content = generate_thumbnail_content(image_path)
except MissingPILError:
logger.error('Pillow not installed, could not generate thumbnail.')
try:
thumbnail_content = generate_thumbnail_content(image_file)
except Exception:
thumbnail_content = generate_thumbnail_content(document.find_placeholder())
except Exception as e:
logger.error("Could not generate thumbnail: {}".format(e))
return
finally:
if image_file is not None:
image_file.close()

if image_path is not None:
os.remove(image_path)

if not thumbnail_content:
logger.warning("Thumbnail for document #{} empty.".format(object_id))
Expand Down
27 changes: 15 additions & 12 deletions geonode/documents/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

import gisdata
from datetime import datetime
from django.conf import settings
from django.urls import reverse
from django.contrib.auth.models import Group
from django.contrib.auth import get_user_model
Expand All @@ -49,6 +48,7 @@
from geonode.maps.models import Map
from geonode.layers.models import Layer
from geonode.compat import ensure_string
from geonode.base.thumb_utils import get_thumbs
from geonode.base.models import License, Region
from geonode.documents import DocumentsAppConfig
from geonode.documents.forms import DocumentFormMixin
Expand Down Expand Up @@ -449,9 +449,6 @@ class DocumentModerationTestCase(GeoNodeBaseTestSupport):

def setUp(self):
super(DocumentModerationTestCase, self).setUp()
thumbs_dir = os.path.join(settings.MEDIA_ROOT, "thumbs")
if not os.path.exists(thumbs_dir):
os.mkdir(thumbs_dir)
self.user = 'admin'
self.passwd = 'admin'
create_models(type=b'document')
Expand Down Expand Up @@ -493,18 +490,24 @@ def test_moderated_upload(self):
_d.delete()

from geonode.documents.utils import delete_orphaned_document_files
delete_orphaned_document_files()
_, document_files_before = storage.listdir("documents")
deleted = delete_orphaned_document_files()
_, document_files_after = storage.listdir("documents")
self.assertTrue(len(deleted) > 0)
self.assertEqual(set(deleted), set(document_files_before) - set(document_files_after))

from geonode.base.utils import delete_orphaned_thumbs
delete_orphaned_thumbs()
thumb_files_before = get_thumbs()
deleted = delete_orphaned_thumbs()
thumb_files_after = get_thumbs()
self.assertTrue(len(deleted) > 0)
self.assertEqual(set(deleted), set(thumb_files_before) - set(thumb_files_after))

documents_path = os.path.join(settings.MEDIA_ROOT, 'documents')
fn = os.path.join(documents_path, os.path.basename(input_path))
self.assertFalse(os.path.isfile(fn))
fn = os.path.join("documents", os.path.basename(input_path))
self.assertFalse(storage.exists(fn))

_, files = storage.listdir("thumbs")
_cnt = sum(1 for fn in files if uuid in fn)
self.assertTrue(_cnt == 0)
files = [thumb for thumb in get_thumbs() if uuid in thumb]
self.assertEqual(len(files), 0)

with self.settings(ADMIN_MODERATE_UPLOADS=True):
self.client.login(username=self.user, password=self.passwd)
Expand Down
Loading