diff --git a/geonode/base/models.py b/geonode/base/models.py index c05dbd78fd4..ed77fc31ae6 100644 --- a/geonode/base/models.py +++ b/geonode/base/models.py @@ -74,6 +74,9 @@ send_notification, get_notification_recipients) from geonode.people.enumerations import ROLE_VALUES +from geonode.base.thumb_utils import ( + thumb_path, + remove_thumbs) from pyproj import transform, Proj @@ -1306,7 +1309,8 @@ def has_thumbnail(self): # Note - you should probably broadcast layer#post_save() events to ensure # that indexing (or other listeners) are notified def save_thumbnail(self, filename, image): - upload_path = os.path.join('thumbs/', filename) + upload_path = thumb_path(filename) + try: # Check that the image is valid from PIL import Image @@ -1315,18 +1319,14 @@ def save_thumbnail(self, filename, image): im = Image.open(content_data) im.verify() # verify that it is, in fact an image - thumbnail_name, ext = os.path.splitext(filename) - _, _thumbs = storage.listdir("thumbs") - for _thumb in _thumbs: - if _thumb.startswith(thumbnail_name): - storage.delete(os.path.join("thumbs", _thumb)) - logger.debug("Deleted existing thumbnail: " + _thumb) + name, ext = os.path.splitext(filename) + remove_thumbs(name) if upload_path and image: actual_name = storage.save(upload_path, ContentFile(image)) url = storage.url(actual_name) _url = urlparse(url) - _upload_path = os.path.join('thumbs/', os.path.basename(_url.path)) + _upload_path = thumb_path(os.path.basename(_url.path)) if upload_path != _upload_path: if storage.exists(_upload_path): storage.delete(_upload_path) diff --git a/geonode/base/thumb_utils.py b/geonode/base/thumb_utils.py new file mode 100644 index 00000000000..49c27bd50a5 --- /dev/null +++ b/geonode/base/thumb_utils.py @@ -0,0 +1,38 @@ +import os + +from django.conf import settings +from django.core.files.storage import default_storage as storage + + +def thumb_path(filename): + """Return the complete path of the provided thumbnail file accessible + via Django storage API""" + return os.path.join(settings.THUMBNAIL_LOCATION, filename) + + +def thumb_exists(filename): + """Determine if a thumbnail file exists in storage""" + return storage.exists(thumb_path(filename)) + + +def get_thumbs(): + """Fetches a list of all stored thumbnails""" + if not storage.exists(settings.THUMBNAIL_LOCATION): + return [] + + subdirs, thumbs = storage.listdir(settings.THUMBNAIL_LOCATION) + + return thumbs + + +def remove_thumb(filename): + """Delete a thumbnail from storage""" + storage.delete(thumb_path(filename)) + + +def remove_thumbs(name): + """Removes all stored thumbnails that start with the same name as the + file specified""" + for thumb in get_thumbs(): + if thumb.startswith(name): + remove_thumb(thumb) diff --git a/geonode/base/utils.py b/geonode/base/utils.py index c17c4909d82..e21d7c7d9f8 100644 --- a/geonode/base/utils.py +++ b/geonode/base/utils.py @@ -22,7 +22,7 @@ """ # Standard Modules -import os +import re import logging from dateutil.parser import isoparse from datetime import datetime, timedelta @@ -30,8 +30,6 @@ # Django functionality from django.conf import settings from django.contrib.auth import get_user_model -from django.core.files.storage import FileSystemStorage -from django.core.files.storage import default_storage as storage # Geonode functionality from guardian.shortcuts import get_perms, remove_perm, assign_perm @@ -42,6 +40,9 @@ from geonode.geoserver.helpers import ogc_server_settings from geonode.maps.models import Map from geonode.services.models import Service +from geonode.base.thumb_utils import ( + get_thumbs, + remove_thumb) logger = logging.getLogger('geonode.base.utils') @@ -50,28 +51,36 @@ 'ESRI Shapefile', 'View in Google Earth', 'KML', 'KMZ', 'Atom', 'DIF', 'Dublin Core', 'ebRIM', 'FGDC', 'ISO', 'ISO with XSL'] +thumb_filename_regex = re.compile( + r"^(document|map|layer)-([a-f\d]{8}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{4}-[a-f\d]{12})-thumb\.png$") + + +def get_thumb_uuid(filename): + """Fetches the UUID associated with the given thumbnail file""" + result = thumb_filename_regex.search(filename) + uuid = result.group(2) if result else None + + return uuid + def delete_orphaned_thumbs(): """ Deletes orphaned thumbnails. """ - if isinstance(storage, FileSystemStorage): - documents_path = os.path.join(settings.MEDIA_ROOT, 'thumbs') - else: - documents_path = os.path.join(settings.STATIC_ROOT, 'thumbs') - if os.path.exists(documents_path): - for filename in os.listdir(documents_path): - fn = os.path.join(documents_path, filename) - model = filename.split('-')[0] - uuid = filename.replace(model, '').replace('-thumb.png', '')[1:] - if ResourceBase.objects.filter(uuid=uuid).count() == 0: - print('Removing orphan thumb %s' % fn) - logger.debug('Removing orphan thumb %s' % fn) - try: - os.remove(fn) - except OSError: - print('Could not delete file %s' % fn) - logger.error('Could not delete file %s' % fn) + deleted = [] + thumb_uuids = {get_thumb_uuid(filename): filename for filename in get_thumbs()} + db_uuids = ResourceBase.objects.filter(uuid__in=thumb_uuids.keys()).values_list("uuid", flat=True) + orphaned_uuids = set(thumb_uuids.keys()) - set(db_uuids) + orphaned_thumbs = (thumb_uuids[uuid] for uuid in orphaned_uuids if uuid is not None) + + for filename in orphaned_thumbs: + try: + remove_thumb(filename) + deleted.append(filename) + except NotImplementedError as e: + logger.error("Failed to delete orphaned thumbnail '{}': {}".format(filename, e)) + + return deleted def remove_duplicate_links(resource): diff --git a/geonode/documents/renderers.py b/geonode/documents/renderers.py index e4669762dbe..819798658a5 100644 --- a/geonode/documents/renderers.py +++ b/geonode/documents/renderers.py @@ -19,16 +19,16 @@ ######################################################################### import io +import os import subprocess import traceback +import tempfile from django.conf import settings from threading import Timer from mimetypes import guess_type from urllib.request import pathname2url -from tempfile import NamedTemporaryFile - class ConversionError(Exception): """Raise when conversion was unsuccessful.""" @@ -58,28 +58,23 @@ def render_document(document_path, extension="png"): # workaround: https://github.com/dagwieers/unoconv/issues/167 # first convert a document to PDF and continue - if extension == "pdf": - temp_path = document_path - elif guess_mimetype(document_path) == 'application/pdf': - temp_path = document_path - else: - temp = render_document(document_path, extension="pdf") - temp_path = temp.name + dispose_input = False + if extension != "pdf" and guess_mimetype(document_path) != 'application/pdf': + document_path = render_document(document_path, extension="pdf") + dispose_input = True # spawn subprocess and render the document - output = NamedTemporaryFile(suffix='.{}'.format(extension)) + output_path = None if settings.UNOCONV_ENABLE: timeout = None + _, output_path = tempfile.mkstemp(suffix=".{}".format(extension)) try: - def kill(process): - return process.kill() - unoconv = subprocess.Popen( [settings.UNOCONV_EXECUTABLE, "-v", "-e", "PageRange=1-2", - "-f", extension, "-o", output.name, temp_path], + "-f", extension, "-o", output_path, document_path], stdout=subprocess.PIPE, stderr=subprocess.PIPE ) - timeout = Timer(settings.UNOCONV_TIMEOUT, kill, [unoconv]) + timeout = Timer(settings.UNOCONV_TIMEOUT, unoconv.kill) timeout.start() stdout, stderr = unoconv.communicate() except Exception as e: @@ -88,8 +83,12 @@ def kill(process): finally: if timeout: timeout.cancel() + if dispose_input and document_path is not None: + os.remove(document_path) + else: + raise NotImplementedError("unoconv is disabled. Set 'UNOCONV_ENABLE' to enable.") - return output + return output_path def generate_thumbnail_content(image_path, size=(200, 150)): diff --git a/geonode/documents/tasks.py b/geonode/documents/tasks.py index 18f2606b9ce..27c6274cc3c 100644 --- a/geonode/documents/tasks.py +++ b/geonode/documents/tasks.py @@ -19,8 +19,6 @@ ######################################################################### import os -from os import access, R_OK -from os.path import isfile from geonode.celery_app import app from celery.utils.log import get_task_logger @@ -29,7 +27,8 @@ from geonode.documents.renderers import render_document from geonode.documents.renderers import generate_thumbnail_content from geonode.documents.renderers import ConversionError -from geonode.documents.renderers import MissingPILError + +from django.core.files.storage import default_storage as storage logger = get_task_logger(__name__) @@ -47,38 +46,48 @@ def create_document_thumbnail(self, object_id): logger.error("Document #{} does not exist.".format(object_id)) return + if not storage.exists(document.doc_file.name): + logger.error("Document #{} exists but its location could not be resolved.".format(object_id)) + return + image_path = None + image_file = None if document.is_image(): - image_path = document.doc_file.path + image_file = storage.open(document.doc_file.name, 'rb') elif document.is_file(): try: - image_file = render_document(document.doc_file.path) - image_path = image_file.name - except ConversionError as e: - logger.debug("Could not convert document #{}: {}." - .format(object_id, e)) - - try: - if image_path: - assert isfile(image_path) and access(image_path, R_OK) and os.stat(image_path).st_size > 0 - except (AssertionError, TypeError): - image_path = None + document_location = storage.path(document.doc_file.name) + except NotImplementedError as e: + logger.debug(e) + document_location = storage.url(document.doc_file.name) - if not image_path: - image_path = document.find_placeholder() - - if not image_path or not os.path.exists(image_path): - logger.debug("Could not find placeholder for document #{}" - .format(object_id)) - return + try: + image_path = render_document(document_location) + if image_path is not None: + image_file = open(image_path, 'rb') + else: + logger.debug("Failed to render document #{}".format(object_id)) + except ConversionError as e: + logger.debug("Could not convert document #{}: {}.".format(object_id, e)) + except NotImplementedError as e: + logger.debug("Failed to render document #{}: {}".format(object_id, e)) thumbnail_content = None try: - thumbnail_content = generate_thumbnail_content(image_path) - except MissingPILError: - logger.error('Pillow not installed, could not generate thumbnail.') + try: + thumbnail_content = generate_thumbnail_content(image_file) + except Exception: + thumbnail_content = generate_thumbnail_content(document.find_placeholder()) + except Exception as e: + logger.error("Could not generate thumbnail: {}".format(e)) return + finally: + if image_file is not None: + image_file.close() + + if image_path is not None: + os.remove(image_path) if not thumbnail_content: logger.warning("Thumbnail for document #{} empty.".format(object_id)) diff --git a/geonode/documents/tests.py b/geonode/documents/tests.py index 20bc0d07445..7875676f5d2 100644 --- a/geonode/documents/tests.py +++ b/geonode/documents/tests.py @@ -31,7 +31,6 @@ import gisdata from datetime import datetime -from django.conf import settings from django.urls import reverse from django.contrib.auth.models import Group from django.contrib.auth import get_user_model @@ -49,6 +48,7 @@ from geonode.maps.models import Map from geonode.layers.models import Layer from geonode.compat import ensure_string +from geonode.base.thumb_utils import get_thumbs from geonode.base.models import License, Region from geonode.documents import DocumentsAppConfig from geonode.documents.forms import DocumentFormMixin @@ -449,9 +449,6 @@ class DocumentModerationTestCase(GeoNodeBaseTestSupport): def setUp(self): super(DocumentModerationTestCase, self).setUp() - thumbs_dir = os.path.join(settings.MEDIA_ROOT, "thumbs") - if not os.path.exists(thumbs_dir): - os.mkdir(thumbs_dir) self.user = 'admin' self.passwd = 'admin' create_models(type=b'document') @@ -493,18 +490,24 @@ def test_moderated_upload(self): _d.delete() from geonode.documents.utils import delete_orphaned_document_files - delete_orphaned_document_files() + _, document_files_before = storage.listdir("documents") + deleted = delete_orphaned_document_files() + _, document_files_after = storage.listdir("documents") + self.assertTrue(len(deleted) > 0) + self.assertEqual(set(deleted), set(document_files_before) - set(document_files_after)) from geonode.base.utils import delete_orphaned_thumbs - delete_orphaned_thumbs() + thumb_files_before = get_thumbs() + deleted = delete_orphaned_thumbs() + thumb_files_after = get_thumbs() + self.assertTrue(len(deleted) > 0) + self.assertEqual(set(deleted), set(thumb_files_before) - set(thumb_files_after)) - documents_path = os.path.join(settings.MEDIA_ROOT, 'documents') - fn = os.path.join(documents_path, os.path.basename(input_path)) - self.assertFalse(os.path.isfile(fn)) + fn = os.path.join("documents", os.path.basename(input_path)) + self.assertFalse(storage.exists(fn)) - _, files = storage.listdir("thumbs") - _cnt = sum(1 for fn in files if uuid in fn) - self.assertTrue(_cnt == 0) + files = [thumb for thumb in get_thumbs() if uuid in thumb] + self.assertEqual(len(files), 0) with self.settings(ADMIN_MODERATE_UPLOADS=True): self.client.login(username=self.user, password=self.passwd) diff --git a/geonode/documents/utils.py b/geonode/documents/utils.py index 8ec1c71ba78..2c9b2c8cb84 100644 --- a/geonode/documents/utils.py +++ b/geonode/documents/utils.py @@ -26,7 +26,7 @@ import logging # Django functionality -from django.conf import settings +from django.core.files.storage import default_storage as storage # Geonode functionality from geonode.documents.models import Document @@ -38,14 +38,17 @@ def delete_orphaned_document_files(): """ Deletes orphaned files of deleted documents. """ - documents_path = os.path.join(settings.MEDIA_ROOT, 'documents') - for filename in os.listdir(documents_path): - fn = os.path.join(documents_path, filename) + deleted = [] + _, files = storage.listdir("documents") + + for filename in files: if Document.objects.filter(doc_file__contains=filename).count() == 0: - message = 'Removing orphan document {}'.format(fn) - logger.debug(message) + logger.debug("Deleting orphaned document " + filename) try: - os.remove(fn) - except OSError: - message = 'Could not delete file {}'.format(fn) - logger.error(message) + storage.delete(os.path.join("documents", filename)) + deleted.append(filename) + except NotImplementedError as e: + logger.error( + "Failed to delete orphaned document '{}': {}".format(filename, e)) + + return deleted diff --git a/geonode/layers/utils.py b/geonode/layers/utils.py index d30fa9c0659..b463a49de99 100644 --- a/geonode/layers/utils.py +++ b/geonode/layers/utils.py @@ -57,6 +57,7 @@ from geonode import GeoNodeException, geoserver, qgis_server from geonode.people.utils import get_valid_user from geonode.layers.models import UploadSession, LayerFile +from geonode.base.thumb_utils import thumb_exists from geonode.base.models import Link, SpatialRepresentationType, \ TopicCategory, Region, License, ResourceBase from geonode.layers.models import shp_exts, csv_exts, vec_exts, cov_exts, Layer @@ -955,15 +956,8 @@ def create_thumbnail(instance, thumbnail_remote_url, thumbnail_create_url=None, thumbnail_name = 'layer-%s-thumb.png' % instance.uuid elif isinstance(instance, Map): thumbnail_name = 'map-%s-thumb.png' % instance.uuid - _thumb_exists = False - try: - _thumbnail_dir = os.path.join(settings.MEDIA_ROOT, 'thumbs') - _thumbnail_path = os.path.join(_thumbnail_dir, thumbnail_name) - _thumb_exists = storage.exists(_thumbnail_path) - except Exception: - _thumbnail_dir = os.path.join(settings.STATIC_ROOT, 'thumbs') - _thumbnail_path = os.path.join(_thumbnail_dir, thumbnail_name) - _thumb_exists = storage.exists(_thumbnail_path) + + _thumb_exists = thumb_exists(thumbnail_name) if overwrite or not _thumb_exists: BBOX_DIFFERENCE_THRESHOLD = 1e-5 @@ -1233,15 +1227,20 @@ def create_gs_thumbnail_geonode(instance, overwrite=False, check_bbox=False): def delete_orphaned_layers(): """Delete orphaned layer files.""" - layer_path = os.path.join(settings.MEDIA_ROOT, 'layers') - for filename in os.listdir(layer_path): - fn = os.path.join(layer_path, filename) + deleted = [] + _, files = storage.listdir("layers") + + for filename in files: if LayerFile.objects.filter(file__icontains=filename).count() == 0: - logger.debug('Removing orphan layer file %s' % fn) + logger.debug("Deleting orphaned layer file " + filename) try: - os.remove(fn) - except OSError: - logger.warn('Could not delete file %s' % fn) + storage.delete(os.path.join("layers", filename)) + deleted.append(filename) + except NotImplementedError as e: + logger.error( + "Failed to delete orphaned layer file '{}': {}".format(filename, e)) + + return deleted def set_layers_permissions(permissions_name, resources_names=None, diff --git a/geonode/qgis_server/tests/test_helpers.py b/geonode/qgis_server/tests/test_helpers.py index 2f0de27ca2e..cde5f15ac18 100644 --- a/geonode/qgis_server/tests/test_helpers.py +++ b/geonode/qgis_server/tests/test_helpers.py @@ -35,6 +35,7 @@ import requests from django.conf import settings from django.core.management import call_command +from django.core.files.storage import default_storage as storage from django.urls import reverse from geonode import qgis_server @@ -251,22 +252,23 @@ def test_delete_orphan(self): # register file list layer_path = settings.QGIS_SERVER_CONFIG['layer_directory'] tiles_path = settings.QGIS_SERVER_CONFIG['tiles_directory'] - geonode_layer_path = os.path.join(settings.MEDIA_ROOT, 'layers') - qgis_layer_list = set(os.listdir(layer_path)) - tile_cache_list = set(os.listdir(tiles_path)) - geonode_layer_list = set(os.listdir(geonode_layer_path)) + # Use sets to perform difference operation later + qgis_layers = set(os.listdir(layer_path)) + tile_caches = set(os.listdir(tiles_path)) + # storage.listdir returns a (directories, files) tuple + geonode_layers = set(storage.listdir("layers")[1]) # run management command. should not change anything call_command('delete_orphaned_qgis_server_layers') - actual_qgis_layer_list = set(os.listdir(layer_path)) - actual_tile_cache_list = set(os.listdir(tiles_path)) - actual_geonode_layer_list = set(os.listdir(geonode_layer_path)) + actual_qgis_layers = set(os.listdir(layer_path)) + actual_tile_caches = set(os.listdir(tiles_path)) + actual_geonode_layers = set(storage.listdir("layers")[1]) - self.assertEqual(qgis_layer_list, actual_qgis_layer_list) - self.assertEqual(tile_cache_list, actual_tile_cache_list) - self.assertEqual(geonode_layer_list, actual_geonode_layer_list) + self.assertEqual(qgis_layers, actual_qgis_layers) + self.assertEqual(tile_caches, actual_tile_caches) + self.assertEqual(geonode_layers, actual_geonode_layers) # now create random file without reference shutil.copy( @@ -275,13 +277,12 @@ def test_delete_orphan(self): shutil.copytree( os.path.join(tiles_path, 'test_grid'), os.path.join(tiles_path, 'test_grid_copy')) - shutil.copy( - os.path.join(geonode_layer_path, 'test_grid.tif'), - os.path.join(geonode_layer_path, 'test_grid_copy.tif')) + with storage.open(os.path.join("layers", "test_grid.tif"), 'rb') as f: + storage.save(os.path.join("layers", "test_grid_copy.tif"), f) - actual_qgis_layer_list = set(os.listdir(layer_path)) - actual_tile_cache_list = set(os.listdir(tiles_path)) - actual_geonode_layer_list = set(os.listdir(geonode_layer_path)) + actual_qgis_layers = set(os.listdir(layer_path)) + actual_tile_caches = set(os.listdir(tiles_path)) + actual_geonode_layers = set(storage.listdir("layers")[1]) # run management command. This should clear the files. But preserve # registered files (the one that is saved in database) @@ -289,13 +290,13 @@ def test_delete_orphan(self): self.assertEqual( {'test_grid_copy.tif'}, - actual_qgis_layer_list - qgis_layer_list) + actual_qgis_layers - qgis_layers) self.assertEqual( {'test_grid_copy'}, - actual_tile_cache_list - tile_cache_list) + actual_tile_caches - tile_caches) self.assertEqual( {'test_grid_copy.tif'}, - actual_geonode_layer_list - geonode_layer_list) + actual_geonode_layers - geonode_layers) # cleanup uploaded.delete() diff --git a/geonode/qgis_server/tests/test_views.py b/geonode/qgis_server/tests/test_views.py index 622f462a4da..43c1ea16c9b 100644 --- a/geonode/qgis_server/tests/test_views.py +++ b/geonode/qgis_server/tests/test_views.py @@ -33,10 +33,12 @@ import gisdata from django.conf import settings from django.contrib.staticfiles.templatetags import staticfiles +from django.core.files.storage import default_storage as storage from django.urls import reverse from geonode import qgis_server from geonode.compat import ensure_string +from geonode.base.thumb_utils import thumb_path from geonode.decorators import on_ogc_backend from geonode.layers.utils import file_upload from geonode.maps.models import Map @@ -572,13 +574,12 @@ def test_thumbnail_links(self): response = self.client.get(remote_thumbnail_url) - thumbnail_dir = os.path.join(settings.MEDIA_ROOT, 'thumbs') - thumbnail_path = os.path.join(thumbnail_dir, 'layer-thumb.png') + thumbnail_path = thumb_path("layer-thumb.png") layer.save_thumbnail(thumbnail_path, ensure_string(response.content)) # Check thumbnail created - self.assertTrue(os.path.exists(thumbnail_path)) + self.assertTrue(storage.exists(thumbnail_path)) self.assertEqual(what(thumbnail_path), 'png') # Check that now we have thumbnail @@ -646,13 +647,12 @@ def test_map_thumbnail(self): response = self.client.get(remote_thumbnail_url) - thumbnail_dir = os.path.join(settings.MEDIA_ROOT, 'thumbs') - thumbnail_path = os.path.join(thumbnail_dir, 'map-thumb.png') + thumbnail_path = thumb_path("map-thumb.png") map.save_thumbnail(thumbnail_path, ensure_string(response.content)) # Check thumbnail created - self.assertTrue(os.path.exists(thumbnail_path)) + self.assertTrue(storage.exists(thumbnail_path)) self.assertEqual(what(thumbnail_path), 'png') # Check that now we have thumbnail diff --git a/geonode/settings.py b/geonode/settings.py index 8fd6464f62b..d88e645de4c 100644 --- a/geonode/settings.py +++ b/geonode/settings.py @@ -266,6 +266,7 @@ STATICFILES_LOCATION = 'static' MEDIAFILES_LOCATION = 'uploaded' +THUMBNAIL_LOCATION = 'thumbs' # Absolute path to the directory that holds media. # Example: "/home/media/media.lawrence.com/"