From 54b92dafd8c2f2d01480019bb42f216e3fc9ff47 Mon Sep 17 00:00:00 2001 From: ThrawnCA Date: Wed, 6 Apr 2022 09:20:44 +1000 Subject: [PATCH] Apply QGOV patchces to 2.9.7 base [QOL-7544] apply QGOV patches to 2.9.5 base update psycopg2 to support PostgreSQL 12, #5796 [QOL-9275] add config flag to control private dataset behaviour - If set to True, then return Forbidden or redirect to login page, instead of hiding the dataset's existence. Bump moment from 2.26.0 to 2.29.4 Bumps [moment](https://github.com/moment/moment) from 2.26.0 to 2.29.4. - [Release notes](https://github.com/moment/moment/releases) - [Changelog](https://github.com/moment/moment/blob/develop/CHANGELOG.md) - [Commits](https://github.com/moment/moment/compare/2.26.0...2.29.4) --- updated-dependencies: - dependency-name: moment dependency-type: direct:production ... Signed-off-by: dependabot[bot] fix logic import [DC-32816] update sqlparse to handle CSV contents more robustly, GitHub #5822 - This is already fixed on master but not backported to 2.9 [DC-31965] handle missing resources in activity stream - Copy cleanup from upstream Revert "Add test and changelog" This reverts commit 3745031fe213b7b742500568d692f552ccfd7b69. Revert "Check if locale exists on i18n JS API" This reverts commit c906ee1d0b22552de8d73fb576e9ab80f264ffb0. Revert "Fixing tests." This reverts commit 233be7e39ed353939699efbcbd33999ac04b0c3b. Add test and changelog Fixing tests. Check if locale exists on i18n JS API fix errno2 check for resource remove extra comma (cherry picked from commit fb27f2a42cd9b579e1989271f5ecf1a3f4c4e35d) [QOL-8785] only import Pylons response when relevant [QOL-8785] fix missing imports and make Flake8 happy [QOL-8785] reuse uploader mimetype if present [QOL-8785] store resource URL in uploader so we can retrieve it for Pylons downloads [QOL-8785] parse config flag as a boolean [QOL-8785] skip test for now [QOL-8785] drop test environment for post [QOL-8785] make mailer test syntax match other tests [QOL-8785] add blank password fields to pass validation [QOL-8785] restore 'save' flag to test form [QOL-8785] add debugging statement for test failure [QOL-8785] add username to test environment [QOL-8785] adjust test assertion for better failure message [QOL-8785] fix test assertion syntax [QOL-8785] adjust test username change parameters [QOL-8785] fix test assertion syntax [QOL-8785] fix test assertion syntax [QOL-8785] oops only add version to emails if flag is not set [QOL-8785] use string not bool for test config value [QOL-8785] add mail server parameter to test [QOL-8785] make post-reset landing page configurable [QOL-8785] use 'download' function to simplify controller [QOL-8785] drop 'REMOTE_USER' value in test as it's confusing the dashboard [QOL-8785] update assertion syntax for pytest [QOL-8515] get test app from fixtures instead of directly retrieving it [QOL-8515] make pep8 happy [QOL-8515] skip updating package modified timestamp if only updating resources - copied from 2.8.8 QGOV branch [QOL-8515] allow sysadmins to update usernames - copied from 2.8.8 QGOV branch [QOL-8515] optionally redirect to a different page after password resets - copied from 2.8.8 QGOV branch [QOL-8515] optionally hide CKAN version in site status and emails - copied from 2.8.8 QGOV branch [QOL-8515] make pep8 happy [QOL-8515] make pep8 happy [QOL-8515] fix import spelling [QOL-8515] make pep8 happy [QOL-8515] add functionality to delete uploaded files from storage on resource deletion [QOL-8515] adjust mocking of 'open' function to handle PY2 and PY3 [QOL-8515] oops provide necessary test import [QOL-8515] add testing of extra IUploader functions add metadata to uploader interface, next step, surface it via package * add metadata to IUploader interface's * Add new resource_file_metadata_show get api to surface metadata checks, usage case is archiver or similar verifying files have not change or disappeared * missed storage_path from file delete/download [QOL-8576] simplify imports [QOL-8576] fix isoformat syntax so lint is happy - raw string literals aren't allowed, but Python 2 'isoformat' can't handle unicode, so use the 'str' function, which is documented to have the same effect Fix isoformat argument for Python 2 [QOL-7544] handle storage directory already existing, #6521 remove white space lint check if dir already exists [QOL-7544] apply QGOV patches to 2.9.5 base - render full name as 'Displayed Name' since that better describes its purpose - handle race condition in creation of site user - ignore authentication when loading user profile during password reset --- CHANGELOG.rst | 19 ++++ ckan/cli/user.py | 10 +- ckan/config/middleware/pylons_app.py | 18 ++- ckan/controllers/package.py | 12 +- ckan/controllers/user.py | 9 +- ckan/i18n/en_AU/LC_MESSAGES/ckan.po | 8 +- ckan/lib/mailer.py | 3 +- ckan/lib/uploader.py | 145 ++++++++++++++++++++++-- ckan/logic/action/delete.py | 13 ++- ckan/logic/action/get.py | 50 +++++++- ckan/logic/action/patch.py | 1 + ckan/logic/action/update.py | 12 +- ckan/logic/auth/get.py | 4 + ckan/logic/validators.py | 5 +- ckan/plugins/interfaces.py | 63 +++++++++- ckan/templates/user/edit_user_form.html | 6 +- ckan/tests/controllers/test_package.py | 55 +++++++++ ckan/tests/controllers/test_user.py | 22 ++++ ckan/tests/lib/test_mailer.py | 38 +++++++ ckan/tests/logic/action/test_get.py | 79 ++++++++++++- ckan/views/dataset.py | 16 ++- ckan/views/resource.py | 17 ++- ckan/views/user.py | 12 +- package-lock.json | 6 +- package.json | 2 +- requirements-py2.in | 2 +- requirements-py2.txt | 2 +- requirements.in | 2 +- requirements.txt | 2 +- 29 files changed, 558 insertions(+), 75 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 3cf2a61cef7..ca3959e2802 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -9,6 +9,25 @@ Changelog .. towncrier release notes start +v.2.9.x 2022-xx-xx +================== + +Major features +-------------- + +- Update to Interface IUploader, on get_uploader and get_resource_uploader, new to include new method signature metadata() which can be utilised by archiver and other plugins instead of trying on local disk directly +- Add get api `resource_file_metadata_show` which takes resource id and returns { 'content_type': content_type, 'size': length, 'hash': hash } if found +- Add delete resource binary when resource id 'deleted' by author/sysadmin +- Redirect user to login if private dataset/resource and not logged in +- SysAdmin can edit username +- mailer.py, get.py - Hide ckan version on mail and response headers via config flag +- pylons_app,py - verify folder is directory and skip instead of always mkdir (nfs may hang on network) +- cleanup user reset to always go to reset success page even on error instead of homepage +- ckan.po - 'Full name' renamed to 'Displayed Name' +- patch.py - resource_patch context ignore auth passthrough for plugin integration to patch without user context (archiver/validator/etc) +- update.py - Reduce deadlock on resource -> package parent updates where not required :: On package_resource_reorder add context resources_only = True, On Package update, don't update package level metadata_modified if resources_only is True + + v.2.9.7 2022-10-26 ================== diff --git a/ckan/cli/user.py b/ckan/cli/user.py index 34f759e8f09..8e3fd100ba1 100644 --- a/ckan/cli/user.py +++ b/ckan/cli/user.py @@ -6,9 +6,7 @@ from six import text_type from datetime import datetime -import ckan.logic as logic -import ckan.plugins as plugin -import ckan.model as model +from ckan import logic, model, plugins as plugin from ckan.cli import error_shout from ckan.common import json @@ -56,8 +54,6 @@ def add_user(ctx, username, args): # pprint(u'Creating user: %r' % username) try: - import ckan.logic as logic - import ckan.model as model site_user = logic.get_action(u'get_site_user')({ u'model': model, u'ignore_auth': True}, @@ -90,7 +86,6 @@ def get_user_str(user): @user.command(u'list', short_help=u'List all users') def list_users(): - import ckan.model as model click.secho(u'Users:') users = model.Session.query(model.User).filter_by(state=u'active') click.secho(u'count = %i' % users.count()) @@ -102,7 +97,6 @@ def list_users(): @click.argument(u'username') @click.pass_context def remove_user(ctx, username): - import ckan.model as model if not username: error_shout(u'Please specify the username to be removed') return @@ -117,7 +111,6 @@ def remove_user(ctx, username): @user.command(u'show', short_help=u'Show user') @click.argument(u'username') def show_user(username): - import ckan.model as model if not username: error_shout(u'Please specify the username for the user') return @@ -128,7 +121,6 @@ def show_user(username): @user.command(u'setpass', short_help=u'Set password for the user') @click.argument(u'username') def set_password(username): - import ckan.model as model if not username: error_shout(u'Need name of the user.') return diff --git a/ckan/config/middleware/pylons_app.py b/ckan/config/middleware/pylons_app.py index 126fe21f022..a3c39cc9d07 100644 --- a/ckan/config/middleware/pylons_app.py +++ b/ckan/config/middleware/pylons_app.py @@ -154,12 +154,18 @@ def make_pylons_stack(conf, full_stack=True, static_files=True, storage_directory = uploader.get_storage_path() if storage_directory: path = os.path.join(storage_directory, 'storage') - try: - os.makedirs(path) - except OSError as e: - # errno 17 is file already exists - if e.errno != 17: - raise + + # check if the storage directory is already created by + # the user or third-party + if os.path.isdir(path): + pass + else: + try: + os.makedirs(path) + except OSError as e: + # errno 17 is file already exists + if e.errno != 17: + raise storage_app = StaticURLParser(path, cache_max_age=static_max_age) static_parsers.insert(0, storage_app) diff --git a/ckan/controllers/package.py b/ckan/controllers/package.py index ebfe82c70c0..33189a04e02 100644 --- a/ckan/controllers/package.py +++ b/ckan/controllers/package.py @@ -1166,19 +1166,11 @@ def resource_download(self, id, resource_id, filename=None): if rsc.get('url_type') == 'upload': upload = uploader.get_resource_uploader(rsc) - filepath = upload.get_path(rsc['id']) - fileapp = paste.fileapp.FileApp(filepath) try: - status, headers, app_iter = request.call_application(fileapp) + return upload.download(rsc['id'], filename) except OSError: + # includes FileNotFoundError abort(404, _('Resource data not found')) - response.headers.update(dict(headers)) - content_type, content_enc = mimetypes.guess_type( - rsc.get('url', '')) - if content_type: - response.headers['Content-Type'] = content_type - response.status = status - return app_iter elif 'url' not in rsc: abort(404, _('No download is available')) h.redirect_to(rsc['url']) diff --git a/ckan/controllers/user.py b/ckan/controllers/user.py index 87388ebad9f..c4671225c61 100644 --- a/ckan/controllers/user.py +++ b/ckan/controllers/user.py @@ -493,13 +493,15 @@ def request_reset(self): 'or contact an administrator for help') ) log.exception(e) - return h.redirect_to(u'/') + return h.redirect_to(config.get( + u'ckan.user_reset_landing_page', u'home.index')) # always tell the user it succeeded, because otherwise we reveal # which accounts exist or not h.flash_success( _(u'A reset link has been emailed to you ' '(unless the account specified does not exist)')) - return h.redirect_to(u'/') + return h.redirect_to(config.get( + u'ckan.user_reset_landing_page', u'home.index')) return render('user/request_reset.html') def perform_reset(self, id): @@ -541,7 +543,8 @@ def perform_reset(self, id): mailer.create_reset_key(user_obj) h.flash_success(_("Your password has been reset.")) - h.redirect_to(u'home.index') + return h.redirect_to(config.get( + u'ckan.user_reset_landing_page', u'home.index')) except NotAuthorized: h.flash_error(_('Unauthorized to edit user %s') % id) except NotFound as e: diff --git a/ckan/i18n/en_AU/LC_MESSAGES/ckan.po b/ckan/i18n/en_AU/LC_MESSAGES/ckan.po index 69ab3db4c34..b5fc9a3a482 100644 --- a/ckan/i18n/en_AU/LC_MESSAGES/ckan.po +++ b/ckan/i18n/en_AU/LC_MESSAGES/ckan.po @@ -2,10 +2,10 @@ # Copyright (C) 2020 ORGANIZATION # This file is distributed under the same license as the ckan project. # FIRST AUTHOR , 2020. -# +# # Translators: # AdriĆ  Mercader , 2020 -# +# #, fuzzy msgid "" msgstr "" @@ -4777,7 +4777,7 @@ msgstr "Change details" #: ckan/templates/user/edit_user_form.html:10 msgid "Full name" -msgstr "Full name" +msgstr "Displayed name" #: ckan/templates/user/edit_user_form.html:10 msgid "eg. Joe Bloggs" @@ -4931,7 +4931,7 @@ msgstr "username" #: ckan/templates/user/new_user_form.html:6 msgid "Full Name" -msgstr "Full Name" +msgstr "Displayed Name" #: ckan/templates/user/new_user_form.html:27 msgid "Create Account" diff --git a/ckan/lib/mailer.py b/ckan/lib/mailer.py index dc83eb0f1cd..e2276650ec1 100644 --- a/ckan/lib/mailer.py +++ b/ckan/lib/mailer.py @@ -58,7 +58,8 @@ def _mail_recipient(recipient_name, recipient_email, msg['From'] = _("%s <%s>") % (sender_name, mail_from) msg['To'] = u"%s <%s>" % (recipient_name, recipient_email) msg['Date'] = utils.formatdate(time()) - msg['X-Mailer'] = "CKAN %s" % ckan.__version__ + if not ckan.common.asbool(config.get('ckan.hide_version')): + msg['X-Mailer'] = "CKAN %s" % ckan.__version__ # Check if extension is setting reply-to via headers or use config option if reply_to and reply_to != '' and not msg['Reply-to']: msg['Reply-to'] = reply_to diff --git a/ckan/lib/uploader.py b/ckan/lib/uploader.py index f8a5f31bac0..848970a8d34 100644 --- a/ckan/lib/uploader.py +++ b/ckan/lib/uploader.py @@ -1,11 +1,13 @@ # encoding: utf-8 +import hashlib import os import cgi import datetime import logging import magic import mimetypes +import six from six.moves.urllib.parse import urlparse from werkzeug.datastructures import FileStorage as FlaskFileStorage @@ -13,7 +15,7 @@ import ckan.lib.munge as munge import ckan.logic as logic import ckan.plugins as plugins -from ckan.common import config, aslist +from ckan.common import config, aslist, request ALLOWED_UPLOAD_TYPES = (cgi.FieldStorage, FlaskFileStorage) MB = 1 << 20 @@ -108,6 +110,34 @@ def get_max_resource_size(): return _max_resource_size +def _file_hashnlength(local_path): + BLOCKSIZE = 65536 + hasher = hashlib.sha1() + length = 0 + + with open(local_path, 'rb') as afile: + buf = afile.read(BLOCKSIZE) + while len(buf) > 0: + hasher.update(buf) + length += len(buf) + + buf = afile.read(BLOCKSIZE) + + return (six.text_type(hasher.hexdigest()), length) + + +def _add_download_headers(file_path, mime_type, response): + """ Add appropriate 'Content-Type' and 'Content-Disposition' headers + to a a file download. + """ + if mime_type: + response.headers['Content-Type'] = mime_type + if mime_type != 'application/pdf': + file_name = file_path.split('/')[-1] + response.headers['Content-Disposition'] = \ + 'attachment; filename=' + file_name + + class Upload(object): def __init__(self, object_type, old_filename=None): ''' Setup upload by creating a subdirectory of the storage directory @@ -150,6 +180,7 @@ def update_data_dict(self, data_dict, url_field, file_field, clear_field): self.clear = data_dict.pop(clear_field, None) self.file_field = file_field self.upload_field_storage = data_dict.pop(file_field, None) + self.preserve_filename = data_dict.get('preserve_filename', None) if not self.storage_path: return @@ -157,7 +188,9 @@ def update_data_dict(self, data_dict, url_field, file_field, clear_field): if isinstance(self.upload_field_storage, ALLOWED_UPLOAD_TYPES): if self.upload_field_storage.filename: self.filename = self.upload_field_storage.filename - self.filename = str(datetime.datetime.utcnow()) + self.filename + if not self.preserve_filename: + self.filename = str(datetime.datetime.utcnow()) \ + + self.filename self.filename = munge.munge_filename_legacy(self.filename) self.filepath = os.path.join(self.storage_path, self.filename) data_dict[url_field] = self.filename @@ -224,6 +257,53 @@ def verify_type(self): if types and type_ not in types: raise logic.ValidationError(err) + def delete(self, filename): + ''' Delete file we are pointing at''' + if not filename.startswith('http'): + try: + # Delete file from storage_path and filename + os.remove(os.path.join(self.storage_path, filename)) + except OSError: + pass + + def download(self, filename): + ''' Generate file stream or redirect for file''' + filepath = os.path.join(self.storage_path, filename) + if hasattr(request, 'call_application'): + return self._pylons_download(filepath) + else: + return self._flask_download(id, filepath) + + def _pylons_download(self, filepath): + import paste + from pylons import response + fileapp = paste.fileapp.FileApp(filepath) + + status, headers, app_iter = request.call_application(fileapp) + response.headers.update(dict(headers)) + content_type, content_enc = mimetypes.guess_type(filepath) + _add_download_headers(filepath, content_type, response) + response.status = status + return app_iter + + def _flask_download(self, filepath): + import flask + resp = flask.send_file(filepath) + content_type, content_enc = mimetypes.guess_type(filepath) + _add_download_headers(filepath, content_type, resp) + return resp + + def metadata(self, filename): + ''' Return metadata of file''' + try: + filepath = os.path.join(self.storage_path, filename) + content_type, content_encoding = mimetypes.guess_type(filepath) + hash, length = _file_hashnlength(filepath) + return {'content_type': content_type, 'size': length, 'hash': hash} + except IOError as e: + log.error("Could not retrieve meta data, IOError thrown: %s", e) + return e + class ResourceUpload(object): def __init__(self, resource): @@ -243,13 +323,14 @@ def __init__(self, resource): self.filename = None self.mimetype = None - url = resource.get('url') + self.url = resource.get('url') upload_field_storage = resource.pop('upload', None) self.clear = resource.pop('clear_upload', None) - if url and config_mimetype_guess == 'file_ext' and urlparse(url).path: - self.mimetype = mimetypes.guess_type(url)[0] + if self.url and config_mimetype_guess == 'file_ext' \ + and urlparse(self.url).path: + self.mimetype = mimetypes.guess_type(self.url)[0] if bool(upload_field_storage) and \ isinstance(upload_field_storage, ALLOWED_UPLOAD_TYPES): @@ -275,7 +356,7 @@ def __init__(self, resource): self.mimetype = magic.from_buffer(self.upload_file.read(), mime=True) self.upload_file.seek(0, os.SEEK_SET) - except IOError as e: + except IOError: # Not that important if call above fails self.mimetype = None @@ -341,5 +422,55 @@ def upload(self, id, max_size=10): if self.clear: try: os.remove(filepath) - except OSError as e: + except OSError: pass + + def delete(self, id, filename=None): + ''' Delete file we are pointing at''' + try: + os.remove(self.get_path(id)) + except OSError: + pass + + def download(self, id, filename=None): + ''' Generate file stream or redirect for file''' + filepath = self.get_path(id) + if hasattr(request, 'call_application'): + return self._pylons_download(filepath) + else: + return self._flask_download(id, filepath) + + def _pylons_download(self, filepath): + import paste + from pylons import response + fileapp = paste.fileapp.FileApp(filepath) + # may throw OSError, which should be handled by the controller + # which will wrap it with abort(404, _('Resource data not found')) + status, headers, app_iter = request.call_application(fileapp) + + response.headers.update(dict(headers)) + content_type, content_enc = mimetypes.guess_type( + self.url) + _add_download_headers(filepath, content_type, response) + response.status = status + return app_iter + + def _flask_download(self, resource_id, filepath): + import flask + resp = flask.send_file(filepath) + _add_download_headers(filepath, self.mimetype, resp) + return resp + + def metadata(self, id, filename=None): + ''' Return meta details of file''' + try: + filepath = self.get_path(id) + if self.mimetype: + content_type = self.mimetype + else: + content_type = mimetypes.guess_type(self.url)[0] + hash, length = _file_hashnlength(filepath) + return {'content_type': content_type, 'size': length, 'hash': hash} + except IOError as e: + log.error("Could not retrieve meta data, IOError thrown: %s", e) + return e diff --git a/ckan/logic/action/delete.py b/ckan/logic/action/delete.py index 6d4adcae632..d2a9771f2cf 100644 --- a/ckan/logic/action/delete.py +++ b/ckan/logic/action/delete.py @@ -7,13 +7,11 @@ import sqlalchemy as sqla import six -import ckan.lib.jobs as jobs +from ckan.lib import api_token, dictization, jobs, uploader import ckan.logic import ckan.logic.action import ckan.plugins as plugins -import ckan.lib.dictization as dictization import ckan.lib.dictization.model_dictize as model_dictize -import ckan.lib.api_token as api_token from ckan import authz from ckan.common import _ @@ -196,6 +194,15 @@ def resource_delete(context, data_dict): plugin.before_delete(context, data_dict, pkg_dict.get('resources', [])) + # Delete file if it was uploaded + if entity.get('url_type') == 'upload': + upload = uploader.get_resource_uploader(entity) + # Don't break if old plugin/interface is found + if hasattr(upload, "delete"): + upload.delete(id) + else: + logging.warning("%s does not have delete function, could not cleanup: %s", type(upload).__name__, id) + pkg_dict = _get_action('package_show')(context, {'id': package_id}) if pkg_dict.get('resources'): diff --git a/ckan/logic/action/get.py b/ckan/logic/action/get.py index f767c443e16..6fad464de5c 100644 --- a/ckan/logic/action/get.py +++ b/ckan/logic/action/get.py @@ -18,14 +18,11 @@ import ckan.logic.action import ckan.logic.schema import ckan.lib.dictization.model_dictize as model_dictize -import ckan.lib.jobs as jobs import ckan.lib.navl.dictization_functions import ckan.model as model import ckan.model.misc as misc import ckan.plugins as plugins -import ckan.lib.search as search -import ckan.lib.plugins as lib_plugins -import ckan.lib.datapreview as datapreview +from ckan.lib import datapreview, jobs, plugins as lib_plugins, search, uploader import ckan.authz as authz from ckan.common import _ @@ -1150,6 +1147,45 @@ def resource_view_list(context, data_dict): return model_dictize.resource_view_list_dictize(resource_views, context) +def resource_file_metadata_show(context, data_dict): + '''Get file metadata from a resource if it was uploaded. + + :param id: the id of the resource + :type id: string + + :returns: the meta data of resource file { 'content_type': content_type, 'size': length, 'hash': hash } + :rtype: dictionary + ''' + model = context['model'] + id = _get_or_bust(data_dict, 'id') + + resource = model.Resource.get(id) + resource_context = dict(context, resource=resource) + + if not resource: + raise NotFound + + _check_access('resource_file_metadata_show', resource_context, data_dict) + + pkg_dict = logic.get_action('package_show')( + dict(context), + {'id': resource.package.id, + 'include_tracking': asbool(data_dict.get('include_tracking', False))}) + + for resource_dict in pkg_dict['resources']: + if resource_dict['id'] == id: + break + else: + log.error('Could not find resource %s after all', id) + raise NotFound(_('Resource was not found.')) + + upload = uploader.get_resource_uploader(resource_dict) + try: + return upload.metadata(id) + except IOError: + raise NotFound + + def _group_or_org_show(context, data_dict, is_org=False): model = context['model'] id = _get_or_bust(data_dict, 'id') @@ -2422,15 +2458,17 @@ def status_show(context, data_dict): ''' _check_access('status_show', context, data_dict) - return { + site_info = { 'site_title': config.get('ckan.site_title'), 'site_description': config.get('ckan.site_description'), 'site_url': config.get('ckan.site_url'), - 'ckan_version': ckan.__version__, 'error_emails_to': config.get('email_to'), 'locale_default': config.get('ckan.locale_default'), 'extensions': config.get('ckan.plugins').split(), } + if not asbool(config.get('ckan.hide_version')): + site_info['ckan_version'] = ckan.__version__ + return site_info def vocabulary_list(context, data_dict): diff --git a/ckan/logic/action/patch.py b/ckan/logic/action/patch.py index 7d41f2d9d40..b9a30055103 100644 --- a/ckan/logic/action/patch.py +++ b/ckan/logic/action/patch.py @@ -70,6 +70,7 @@ def resource_patch(context, data_dict): 'user': context['user'], 'auth_user_obj': context['auth_user_obj'], 'for_update': True, + 'ignore_auth': context.get('ignore_auth', False), } resource_dict = _get_action('resource_show')( diff --git a/ckan/logic/action/update.py b/ckan/logic/action/update.py index c926a009c81..abd9a362378 100644 --- a/ckan/logic/action/update.py +++ b/ckan/logic/action/update.py @@ -310,10 +310,11 @@ def package_update(context, data_dict): model.Session.rollback() raise ValidationError(errors) - #avoid revisioning by updating directly - model.Session.query(model.Package).filter_by(id=pkg.id).update( - {"metadata_modified": datetime.datetime.utcnow()}) - model.Session.refresh(pkg) + if not context.get('resources_only', False): + #avoid revisioning by updating directly + model.Session.query(model.Package).filter_by(id=pkg.id).update( + {"metadata_modified": datetime.datetime.utcnow()}) + model.Session.refresh(pkg) pkg = model_save.package_dict_save(data, context) @@ -568,6 +569,7 @@ def package_resource_reorder(context, data_dict): new_resources = ordered_resources + existing_resources package_dict['resources'] = new_resources + context['resources_only'] = True _check_access('package_resource_reorder', context, package_dict) _get_action('package_update')(context, package_dict) @@ -815,7 +817,7 @@ def user_update(context, data_dict): '''Update a user account. Normal users can only update their own user accounts. Sysadmins can update - any user account. Can not modify exisiting user's name. + any user account and modify existing usernames. .. note:: Update methods may delete parameters not explicitly provided in the data_dict. If you want to edit only a specific attribute use `user_patch` diff --git a/ckan/logic/auth/get.py b/ckan/logic/auth/get.py index 967776c6d6a..ef3dda222d1 100644 --- a/ckan/logic/auth/get.py +++ b/ckan/logic/auth/get.py @@ -161,6 +161,10 @@ def resource_view_list(context, data_dict): return authz.is_authorized('resource_show', context, data_dict) +def resource_file_metadata_show(context, data_dict): + return authz.is_authorized('resource_show', context, data_dict) + + def group_show(context, data_dict): user = context.get('user') group = get_group_object(context, data_dict) diff --git a/ckan/logic/validators.py b/ckan/logic/validators.py index fdc0820e946..70eccd47832 100644 --- a/ckan/logic/validators.py +++ b/ckan/logic/validators.py @@ -583,10 +583,13 @@ def user_name_validator(key, data, errors, context): return else: # Otherwise return an error: there's already another user with that - # name, so you can create a new user with that name or update an + # name, so you can't create a new user with that name or update an # existing user's name to that name. errors[key].append(_('That login name is not available.')) elif user_obj_from_context: + requester = context.get('auth_user_obj', None) + if requester and requester.sysadmin == True: + return old_user = model.User.get(user_obj_from_context.id) if old_user is not None and old_user.state != model.State.PENDING: errors[key].append(_('That login name can not be modified.')) diff --git a/ckan/plugins/interfaces.py b/ckan/plugins/interfaces.py index 39a3ce15ca2..06598ac3f06 100644 --- a/ckan/plugins/interfaces.py +++ b/ckan/plugins/interfaces.py @@ -1757,8 +1757,10 @@ def get_uploader(self, upload_to, old_filename): ``update_data_dict(data_dict, url_field, file_field, clear_field)`` - Allow the data_dict to be manipulated before it reaches any - validators. + Allow the data_dict to be manipulated before it reaches any validators. + Optionally, this data_dict can have the following attribute set: + preserve_filename (boolean): If false, then the current UTC time + (datetime.utcnow) will be prepended to the filename. :param data_dict: data_dict to be updated :type data_dict: dictionary @@ -1768,6 +1770,8 @@ def get_uploader(self, upload_to, old_filename): :param file_field: name of the key where the FieldStorage is kept (i.e the field where the file data actually is). + Must be an instance of either cgi.FieldStorage or + werkzeug.datastructures.FileStorage :type file_field: string :param clear_field: name of a boolean field which requests the upload @@ -1781,6 +1785,30 @@ def get_uploader(self, upload_to, old_filename): :param max_size: upload size can be limited by this value in MBs. :type max_size: int + ``delete(filename)`` + + Perform a delete. + + :param filename: The filename to use when deleting a file, + may be None depending on the storage provider. + :type filename: string + + ``download(filename)`` + + Provide redirect url or file stream + + :param filename: The filename to use when downloading a file, + may be None depending on the storage provider. + :type filename: string + + ``metadata(filename)`` + + Collect metadata of file. Returns dict + { 'content_type': content_type, 'size': length, 'hash': hash } + Throws IOError if file does not exist + + :param filename: The filename to use when collecting metadata + :type filename: string ''' def get_resource_uploader(self): @@ -1818,6 +1846,37 @@ def get_resource_uploader(self): :param id: resource id :type id: string + ``delete(id, filename)`` + + Perform a delete. + + :param id: resource id, used to delete file + :type id: string + :param filename: The filename to use when storing a resource, + may be None depending on the storage provider. + :type filename: string + + ``download(id, filename)`` + + Provide redirect url or file stream + + :param id: resource id, used to locate file + :type id: string + :param filename: The filename to use when storing a resource, + may be None depending on the storage provider. + :type filename: string + + ``metadata(id, filename)`` + + Collect metadata of resource. Returns dict + { 'content_type': content_type, 'size': length, 'hash': hash } + Throws IOError if file does not exist + + :param id: resource id, used to locate file + :type id: string + :param filename: The filename to use when collecting metadata, + may be None depending on the storage provider. + :type filename: string ''' diff --git a/ckan/templates/user/edit_user_form.html b/ckan/templates/user/edit_user_form.html index f63298b2aa3..726d55e39aa 100644 --- a/ckan/templates/user/edit_user_form.html +++ b/ckan/templates/user/edit_user_form.html @@ -5,7 +5,11 @@
{{ _('Change details') }} - {{ form.input('name', label=_('Username'), id='field-username', value=data.name, error=errors.name, classes=['control-medium'], attrs={'readonly': '', 'class': 'form-control'}) }} + {% if c.userobj.sysadmin == True %} + {{ form.input('name', label=_('Username'), id='field-username', value=data.name, error=errors.name, classes=['control-medium'], is_required=true) }} + {% else %} + {{ form.input('name', label=_('Username'), id='field-username', value=data.name, error=errors.name, classes=['control-medium'], attrs={'readonly': '', 'class': 'form-control'}) }} + {% endif %} {{ form.input('fullname', label=_('Full name'), id='field-fullname', value=data.fullname, error=errors.fullname, placeholder=_('eg. Joe Bloggs'), classes=['control-medium']) }} diff --git a/ckan/tests/controllers/test_package.py b/ckan/tests/controllers/test_package.py index f2872c22ce3..55b2385971d 100644 --- a/ckan/tests/controllers/test_package.py +++ b/ckan/tests/controllers/test_package.py @@ -752,6 +752,31 @@ def test_read_dataset_as_it_used_to_be_but_is_unmigrated(self, app): status=404, ) + # Test the 'reveal_private_datasets' flag + + @pytest.mark.ckan_config("ckan.auth.reveal_private_datasets", "True") + def test_anonymous_users_cannot_read_private_datasets(self, app): + organization = factories.Organization() + dataset = factories.Dataset(owner_org=organization["id"], private=True) + response = app.get( + url_for("dataset.read", id=dataset["name"]), + follow_redirects=False + ) + assert 302 == response.status_code + assert '/login' in response.headers[u"Location"] + + @pytest.mark.ckan_config("ckan.auth.reveal_private_datasets", "True") + def test_user_not_in_organization_cannot_read_private_datasets(self, app): + user = factories.User() + organization = factories.Organization() + dataset = factories.Dataset(owner_org=organization["id"], private=True) + response = app.get( + url_for("dataset.read", id=dataset["name"]), + extra_environ={"REMOTE_USER": six.ensure_str(user["name"])}, + status=403, + ) + assert 403 == response.status_code + @pytest.mark.usefixtures("clean_db", "with_request_context") class TestPackageDelete(object): @@ -1309,6 +1334,36 @@ def test_anonymous_users_cannot_read_private_datasets(self, app): ) assert 404 == response.status_code + # Test the 'reveal_private_datasets' flag + + @pytest.mark.ckan_config("ckan.auth.reveal_private_datasets", "True") + def test_user_not_in_organization_cannot_read_resources_in_private_dataset(self, app): + user = factories.User() + env = {"REMOTE_USER": six.ensure_str(user["name"])} + organization = factories.Organization() + dataset = factories.Dataset(owner_org=organization["id"], private=True) + resource = factories.Resource(package_id=dataset["id"]) + + url = url_for( + "{}_resource.read".format(dataset["type"]), + id=dataset["id"], resource_id=resource["id"] + ) + + response = app.get(url, status=403, extra_environ=env) + + @pytest.mark.ckan_config("ckan.auth.reveal_private_datasets", "True") + def test_anonymous_users_cannot_read_resources_in_private_datasets(self, app): + organization = factories.Organization() + dataset = factories.Dataset(owner_org=organization["id"], private=True) + resource = factories.Resource(package_id=dataset["id"]) + response = app.get( + url_for("{}_resource.read".format(dataset["type"]), + id=dataset["id"], resource_id=resource["id"]), + follow_redirects=False + ) + assert 302 == response.status_code + assert '/login' in response.headers[u"Location"] + @pytest.mark.usefixtures("clean_db", "with_request_context") class TestResourceDelete(object): diff --git a/ckan/tests/controllers/test_user.py b/ckan/tests/controllers/test_user.py index 680287dd79a..556a5586a64 100644 --- a/ckan/tests/controllers/test_user.py +++ b/ckan/tests/controllers/test_user.py @@ -352,6 +352,28 @@ def test_edit_user_logged_in_username_change_by_id(self, app): assert "That login name can not be modified" in response + @pytest.mark.skip(reason="Works in actual usage but test runs into context problems") + def test_edit_user_logged_in_username_change_by_sysadmin(self, app): + user_pass = 'TestPassword1' + user = factories.Sysadmin(password=user_pass) + + # Have to do an actual login as this test relies on repoze cookie handling. + response = app.post("/login_generic?came_from=/user/logged_in", data={ + "login": user["name"], + "password": user_pass, + "save": "" + }) + + response = app.post(url=url_for("user.edit", id=user["name"]), data={ + "email": user["email"], + "save": "", + "old_password": user_pass, + "password1": "", + "password2": "", + "name": "new-name", + }) + assert 'Profile updated' in response.body + def test_perform_reset_for_key_change(self, app): password = "TestPassword1" params = {"password1": password, "password2": password} diff --git a/ckan/tests/lib/test_mailer.py b/ckan/tests/lib/test_mailer.py index 7fddbd5c4ff..be35da73ae5 100644 --- a/ckan/tests/lib/test_mailer.py +++ b/ckan/tests/lib/test_mailer.py @@ -2,6 +2,7 @@ import base64 import hashlib +import logging import pytest import six from email.header import decode_header @@ -15,6 +16,7 @@ import ckan.tests.helpers as helpers from ckan.common import config +log = logging.getLogger(__name__) class MailerBase(object): def mime_encode(self, msg, recipient_name, subtype='plain'): @@ -34,6 +36,7 @@ def get_email_subject(self, msg): @pytest.mark.usefixtures("with_request_context", "clean_db") class TestMailer(MailerBase): def test_mail_recipient(self, mail_server): + config['ckan.hide_version'] = 'False' user = factories.User() msgs = mail_server.get_smtp_messages() assert msgs == [] @@ -63,6 +66,41 @@ def test_mail_recipient(self, mail_server): ) assert expected_body in msg[3] + def test_mail_recipient_hiding_mailer(self, mail_server): + user = factories.User() + config['ckan.hide_version'] = 'True' + + msgs = mail_server.get_smtp_messages() + assert msgs == [] + + # send email + test_email = { + "recipient_name": "Bob", + "recipient_email": user["email"], + "subject": "Meeting", + "body": "The meeting is cancelled.", + "headers": {"header1": "value1"}, + } + mailer.mail_recipient(**test_email) + + # check it went to the mock smtp server + msgs = mail_server.get_smtp_messages() + assert len(msgs) == 1 + msg = msgs[0] + assert msg[1] == config["smtp.mail_from"] + assert msg[2] == [test_email["recipient_email"]] + assert list(test_email["headers"].keys())[0] in msg[3], msg[3] + assert list(test_email["headers"].values())[0] in msg[3], msg[3] + assert test_email["subject"] in msg[3], msg[3] + assert msg[3].startswith('Content-Type: text/plain'), msg[3] + log.debug("Checking %s for X-Mailer header", msg) + assert 'X-Mailer' not in msg[3], \ + "Should have skipped X-Mailer header" + expected_body = self.mime_encode( + test_email["body"], test_email["recipient_name"] + ) + assert expected_body in msg[3] + def test_mail_recipient_with_html(self, mail_server): user = factories.User() diff --git a/ckan/tests/logic/action/test_get.py b/ckan/tests/logic/action/test_get.py index 82dd9193734..0c2b0b5c2b0 100644 --- a/ckan/tests/logic/action/test_get.py +++ b/ckan/tests/logic/action/test_get.py @@ -4,8 +4,9 @@ import re import copy +import mock import pytest -from six import text_type +from six import PY3, text_type from six.moves import xrange from ckan import model @@ -16,7 +17,17 @@ import ckan.tests.helpers as helpers import ckan.plugins.toolkit as tk from ckan import __version__ +from ckan.lib import uploader as ckan_uploader from ckan.lib.search.common import SearchError +from pyfakefs import fake_filesystem + +fs = fake_filesystem.FakeFilesystem() +fake_os = fake_filesystem.FakeOsModule(fs) +fake_open = fake_filesystem.FakeFileOpen(fs) +if PY3: + mock_open_path = 'builtins.open' +else: + mock_open_path = '__builtin__.open' @pytest.mark.usefixtures("clean_db", "with_request_context") @@ -2446,6 +2457,58 @@ def test_resource_view_show_id_not_found(self): helpers.call_action("resource_view_show", id="does_not_exist") +def mock_open_if_open_fails(*args, **kwargs): + try: + return real_open(*args, **kwargs) + except (OSError, IOError): + return fake_open(*args, **kwargs) + + +class ShowResourceFileMetadata(object): + + @helpers.change_config('ckan.storage_path', '/doesnt_exist') + @mock.patch(mock_open_path, side_effect=mock_open_if_open_fails) + @mock.patch.object(ckan_uploader, 'os', fake_os) + @mock.patch.object(ckan_uploader, '_storage_path', new='/doesnt_exist') + def test_resource_file_metadata_show_meta_data_returned(self, _): + user = factories.User() + pkg = factories.Dataset(creator_user_id=user['id']) + + url = url_for( + controller='api', + action='action', + logic_function='resource_create', ver='/3') + env = {'REMOTE_USER': user['name'].encode('ascii')} + postparams = { + 'name': 'test-flask-upload', + 'package_id': pkg['id'] + } + upload_content = 'test-content,and2' + upload_info = ('upload', 'test-upload.csv', upload_content) + app = self._get_test_app() + resp = app.post( + url, params=postparams, + upload_files=[upload_info], + extra_environ=env + ) + result = resp.json['result'] + eq('upload', result['url_type']) + eq(len(upload_content), result['size']) + + metaresult = helpers.call_action('resource_file_metadata_show', id=result['id']) + eq(len(upload_content), metaresult['size']) + eq('text/csv', metaresult['content_type']) + eq('', metaresult['hash']) + + def test_resource_file_metadata_show_id_missing(self): + with pytest.raises(logic.ValidationError): + helpers.call_action('resource_file_metadata_show') + + def test_resource_file_metadata_show_id_not_found(self): + with pytest.raises(logic.NotFound): + helpers.call_action('resource_file_metadata_show', id='does_not_exist') + + class TestGetHelpShow(object): def test_help_show_basic(self): @@ -2820,6 +2883,20 @@ def test_status_show(self): assert type(status[u"extensions"]) == list assert status[u"extensions"] == [u"stats"] + @helpers.change_config('ckan.hide_version', 'True') + def test_status_show_hiding_version(self): + + status = helpers.call_action(u'status_show') + + assert 'ckan_version' not in status, "Should have skipped CKAN version" + assert status[u'site_url'] == u'http://test.ckan.net' + assert status[u'site_title'] == u'CKAN' + assert status[u'site_description'] == u'' + assert status[u'locale_default'] == u'en' + + assert type(status[u'extensions']) == list + assert status[u'extensions'] == [u'stats'] + class TestJobList(helpers.FunctionalRQTestBase): def test_all_queues(self): diff --git a/ckan/views/dataset.py b/ckan/views/dataset.py index e057dcaab93..14ecd180f5f 100644 --- a/ckan/views/dataset.py +++ b/ckan/views/dataset.py @@ -8,7 +8,6 @@ from flask import Blueprint from flask.views import MethodView from werkzeug.datastructures import MultiDict -from ckan.common import asbool import six from six import string_types, text_type @@ -20,7 +19,7 @@ import ckan.model as model import ckan.plugins as plugins import ckan.authz as authz -from ckan.common import _, config, g, request +from ckan.common import _, asbool, config, g, request from ckan.views.home import CACHE_PARAMETERS from ckan.lib.plugins import lookup_package_plugin from ckan.lib.render import TemplateNotFound @@ -434,7 +433,18 @@ def read(package_type, id): try: pkg_dict = get_action(u'package_show')(context, data_dict) pkg = context[u'package'] - except (NotFound, NotAuthorized): + except NotFound: + return base.abort(404, _(u'Dataset not found')) + except NotAuthorized: + if asbool(config.get(u'ckan.auth.reveal_private_datasets')): + if context['user']: + return base.abort( + 403, _(u'Unauthorized to read package %s') % id) + else: + return h.redirect_to( + u'user.login', + came_from=h.url_for(u'{}.read'.format(package_type), id=id) + ) return base.abort(404, _(u'Dataset not found')) g.pkg_dict = pkg_dict diff --git a/ckan/views/resource.py b/ckan/views/resource.py index 3debf7d8e1a..49e7e7d7418 100644 --- a/ckan/views/resource.py +++ b/ckan/views/resource.py @@ -15,7 +15,7 @@ import ckan.logic as logic import ckan.model as model import ckan.plugins as plugins -from ckan.common import _, g, request +from ckan.common import _, asbool, config, g, request from ckan.views.home import CACHE_PARAMETERS from ckan.views.dataset import ( _get_pkg_template, _get_package_type, _setup_template_variables @@ -59,7 +59,20 @@ def read(package_type, id, resource_id): try: package = get_action(u'package_show')(context, {u'id': id}) - except (NotFound, NotAuthorized): + except NotFound: + return base.abort(404, _(u'Dataset not found')) + except NotAuthorized: + if asbool(config.get(u'ckan.auth.reveal_private_datasets')): + if context['user']: + return base.abort( + 403, _(u'Unauthorized to read resource %s') % resource_id) + else: + return h.redirect_to( + u'user.login', + came_from=h.url_for( + u'{}_resource.read'.format(package_type), + id=id, resource_id=resource_id) + ) return base.abort(404, _(u'Dataset not found')) activity_id = request.params.get(u'activity_id') if activity_id: diff --git a/ckan/views/user.py b/ckan/views/user.py index b09b2faff44..31612d9e136 100644 --- a/ckan/views/user.py +++ b/ckan/views/user.py @@ -666,14 +666,16 @@ def post(self): h.flash_error(_(u'Error sending the email. Try again later ' 'or contact an administrator for help')) log.exception(e) - return h.redirect_to(u'home.index') + return h.redirect_to(config.get( + u'ckan.user_reset_landing_page', u'home.index')) # always tell the user it succeeded, because otherwise we reveal # which accounts exist or not h.flash_success( _(u'A reset link has been emailed to you ' '(unless the account specified does not exist)')) - return h.redirect_to(u'home.index') + return h.redirect_to(config.get( + u'ckan.user_reset_landing_page', u'home.index')) def get(self): self._prepare() @@ -695,10 +697,13 @@ def _prepare(self, id): except logic.NotAuthorized: base.abort(403, _(u'Unauthorized to reset password.')) + # user_show is restricted so ignore_auth + context[u'ignore_auth'] = True try: user_dict = logic.get_action(u'user_show')(context, {u'id': id}) except logic.NotFound: base.abort(404, _(u'User not found')) + del context[u'ignore_auth'] user_obj = context[u'user_obj'] g.reset_key = request.params.get(u'key') if not mailer.verify_reset_link(user_obj, g.reset_key): @@ -747,7 +752,8 @@ def post(self, id): ) mailer.create_reset_key(context[u'user_obj']) h.flash_success(_(u'Your password has been reset.')) - return h.redirect_to(u'home.index') + return h.redirect_to(config.get( + u'ckan.user_reset_landing_page', u'home.index')) except logic.NotAuthorized: h.flash_error(_(u'Unauthorized to edit user %s') % id) except logic.NotFound: diff --git a/package-lock.json b/package-lock.json index a4883a134bd..148d8e365d0 100644 --- a/package-lock.json +++ b/package-lock.json @@ -3001,9 +3001,9 @@ } }, "moment": { - "version": "2.26.0", - "resolved": "https://registry.npmjs.org/moment/-/moment-2.26.0.tgz", - "integrity": "sha512-oIixUO+OamkUkwjhAVE18rAMfRJNsNe/Stid/gwHSOfHrOtw9EhAY2AHvdKZ/k/MggcYELFCJz/Sn2pL8b8JMw==" + "version": "2.29.4", + "resolved": "https://registry.npmjs.org/moment/-/moment-2.29.4.tgz", + "integrity": "sha512-5LC9SOxjSc2HF6vO2CyuTDNivEdoz2IvyJJGj6X8DJ0eFyfszE0QiEd+iXmBvUP3WHxSjFH/vIsA0EN00cgr8w==" }, "ms": { "version": "2.0.0", diff --git a/package.json b/package.json index b3484febe4e..f4ce6d9d761 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,7 @@ "gulp-sourcemaps": "^2.6.5", "jquery": "^3.5.0", "less": "^3.11.2", - "moment": "^2.24.0", + "moment": "^2.29.4", "qs": "^6.9.4" }, "devDependencies": { diff --git a/requirements-py2.in b/requirements-py2.in index b8d4f662f58..8687e6c2f60 100644 --- a/requirements-py2.in +++ b/requirements-py2.in @@ -35,7 +35,7 @@ rq==1.0 simplejson==3.10.0 sqlalchemy-migrate==0.12.0 SQLAlchemy==1.3.5 -sqlparse==0.3.0 +sqlparse==0.3.1 tzlocal==1.3 unicodecsv>=0.9 webassets==0.12.1 diff --git a/requirements-py2.txt b/requirements-py2.txt index 81574618ea1..a81111079d9 100644 --- a/requirements-py2.txt +++ b/requirements-py2.txt @@ -59,7 +59,7 @@ simplejson==3.10.0 six==1.12.0 # via bleach, pastescript, python-dateutil, pyutilib.component.core, sqlalchemy-migrate sqlalchemy-migrate==0.12.0 sqlalchemy==1.3.5 -sqlparse==0.3.0 +sqlparse==0.3.1 tempita==0.5.2 # via pylons, sqlalchemy-migrate, weberror tzlocal==1.3 unicodecsv==0.14.1 diff --git a/requirements.in b/requirements.in index a03d77fba2f..271a6468e09 100644 --- a/requirements.in +++ b/requirements.in @@ -29,7 +29,7 @@ Routes==1.13 rq==1.0 simplejson==3.10.0 SQLAlchemy==1.3.5 -sqlparse==0.3.0 +sqlparse==0.4.2 tzlocal==1.3 unicodecsv>=0.9 watchdog==2.1.5 # For py3 support diff --git a/requirements.txt b/requirements.txt index 3a3ff8eeae3..620a4b1dc79 100644 --- a/requirements.txt +++ b/requirements.txt @@ -47,7 +47,7 @@ shutilwhich==1.1.0 # via fanstatic simplejson==3.10.0 # via -r requirements.in six==1.16.0 # via bleach, python-dateutil, pyutilib sqlalchemy==1.3.5 # via -r requirements.in, alembic -sqlparse==0.3.0 # via -r requirements.in +sqlparse==0.4.2 # via -r requirements.in tzlocal==1.3 # via -r requirements.in unicodecsv==0.14.1 # via -r requirements.in urllib3==1.26.6 # via requests