Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prevent hosted resource's url from being set by client side #2545

Merged
merged 8 commits into from
Oct 4, 2020
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

## Current (in progress)

- Nothing yet
- :warning: Resources and community resources creation API change [#2545](https://github.com/opendatateam/udata/pull/2545):
- Remove the RESOURCES_FILE_ALLOWED_DOMAINS setting and mechanism.
- The community resource's/resource's url could be set from the client side, event in the case of a hosted one, which is illogical.
A hosted community resource's/resource's url should only be the matter of the backend.
- Consequently, the POST endpoint of the community resources/resources API is only meant for the remote ones.
- The PUT endpoint of the community resources/resources API will take the existing resource's url to override the one sent by the client.

## 2.3.0 (2020-09-29)

Expand Down
9 changes: 9 additions & 0 deletions udata/core/dataset/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,8 @@ def post(self, dataset):
ResourceEditPermission(dataset).test()
form = api.validate(ResourceForm)
resource = Resource()
if form._fields.get('filetype').data != 'remote':
return 'This endpoint only supports remote resources', 400
form.populate_obj(resource)
dataset.add_resource(resource)
dataset.last_modified = datetime.now()
Expand Down Expand Up @@ -353,6 +355,9 @@ def put(self, dataset, rid):
ResourceEditPermission(dataset).test()
resource = self.get_resource_or_404(dataset, rid)
form = api.validate(ResourceForm, resource)
# ensure API client does not override url on self-hosted resources
if resource.filetype == 'file':
form._fields.get('url').data = resource.url
form.populate_obj(resource)
resource.modified = datetime.now()
dataset.last_modified = datetime.now()
Expand Down Expand Up @@ -397,6 +402,8 @@ def get(self):
def post(self):
'''Create a new community resource'''
form = api.validate(CommunityResourceForm)
if form._fields.get('filetype').data != 'remote':
return 'This endpoint only supports remote community resources', 400
resource = CommunityResource()
form.populate_obj(resource)
if not resource.dataset:
Expand Down Expand Up @@ -428,6 +435,8 @@ def put(self, community):
'''Update a given community resource'''
ResourceEditPermission(community).test()
form = api.validate(CommunityResourceForm, community)
if community.filetype == 'file':
form._fields.get('url').data = community.url
form.populate_obj(community)
if not community.organization and not community.owner:
community.owner = current_user._get_current_object()
Expand Down
23 changes: 2 additions & 21 deletions udata/core/dataset/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from .models import (
Dataset, Resource, License, Checksum, CommunityResource,
UPDATE_FREQUENCIES, DEFAULT_FREQUENCY, RESOURCE_FILETYPES, CHECKSUM_TYPES,
LEGACY_FREQUENCIES, RESOURCE_TYPES, RESOURCE_FILETYPE_FILE,
LEGACY_FREQUENCIES, RESOURCE_TYPES,
ResourceSchema,
)

Expand All @@ -31,24 +31,6 @@ def normalize_format(data):
return data.strip().lower()


def enforce_filetype_file(form, field):
'''Only allowed domains in resource.url when filetype is file'''
if form._fields.get('filetype').data != RESOURCE_FILETYPE_FILE:
return
domain = urlparse(field.data).netloc
allowed_domains = current_app.config['RESOURCES_FILE_ALLOWED_DOMAINS']
allowed_domains += [current_app.config.get('SERVER_NAME')]
if current_app.config.get('CDN_DOMAIN'):
allowed_domains.append(current_app.config['CDN_DOMAIN'])
if '*' in allowed_domains:
return
if domain and domain not in allowed_domains:
message = _('Domain "{domain}" not allowed for filetype "{filetype}"')
raise validators.ValidationError(message.format(
domain=domain, filetype=RESOURCE_FILETYPE_FILE
))


def enforce_allowed_schemas(form, field):
schema = field.data
allowed_schemas = [s['id'] for s in ResourceSchema.objects()]
Expand All @@ -73,8 +55,7 @@ class BaseResourceForm(ModelForm):
choices=list(RESOURCE_TYPES.items()), default='other',
description=_('Resource type (documentation, API...)'))
url = fields.UploadableURLField(
_('URL'), [validators.DataRequired(), enforce_filetype_file],
storage=resources)
_('URL'), [validators.DataRequired()], storage=resources)
format = fields.StringField(
_('Format'),
filters=[normalize_format],
Expand Down
4 changes: 0 additions & 4 deletions udata/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,9 +281,6 @@ class Defaults(object):
# RDF
'rdf', 'ttl', 'n3',
]
# Whitelist of urls domains for resource with filetype `file`
# SERVER_NAME is always included, `*` is a supported value (wildcard)
RESOURCES_FILE_ALLOWED_DOMAINS = []

# How much time upload chunks are kept before cleanup
UPLOAD_MAX_RETENTION = 24 * HOUR
Expand Down Expand Up @@ -471,7 +468,6 @@ class Testing(object):
ACTIVATE_TERRITORIES = False
LOGGER_HANDLER_POLICY = 'never'
CELERYD_HIJACK_ROOT_LOGGER = False
RESOURCES_FILE_ALLOWED_DOMAINS = ['*']
URLS_ALLOW_LOCAL = True # Test server URL is local.test
URLS_ALLOWED_TLDS = tld_set | set(['test'])
URLS_ALLOW_PRIVATE = False
Expand Down
97 changes: 63 additions & 34 deletions udata/tests/api/test_datasets_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from udata.core.dataset.factories import (
DatasetFactory, VisibleDatasetFactory, CommunityResourceFactory,
LicenseFactory, ResourceFactory)
from udata.core.dataset.models import RESOURCE_FILETYPE_FILE, ResourceMixin
from udata.core.dataset.models import ResourceMixin
from udata.core.user.factories import UserFactory, AdminFactory
from udata.core.badges.factories import badge_factory
from udata.core.organization.factories import OrganizationFactory
Expand Down Expand Up @@ -602,6 +602,7 @@ def test_get(self):
def test_create(self):
data = ResourceFactory.as_dict()
data['extras'] = {'extra:id': 'id'}
data['filetype'] = 'remote'
with self.api_user():
response = self.post(url_for('api.resources',
dataset=self.dataset), data)
Expand All @@ -610,9 +611,19 @@ def test_create(self):
self.assertEqual(len(self.dataset.resources), 1)
self.assertEqual(self.dataset.resources[0].extras, {'extra:id': 'id'})

def test_unallowed_create_filetype_file(self):
data = ResourceFactory.as_dict()
data['filetype'] = 'file' # to be explicit
with self.api_user():
response = self.post(url_for('api.resources',
dataset=self.dataset), data)
# should fail because the POST endpoint only supports URL setting for remote resources
self.assert400(response)

def test_create_normalize_format(self):
_format = ' FORMAT '
data = ResourceFactory.as_dict()
data['filetype'] = 'remote'
data['format'] = _format
with self.api_user():
response = self.post(url_for('api.resources',
Expand All @@ -627,6 +638,7 @@ def test_create_2nd(self):
self.dataset.save()

data = ResourceFactory.as_dict()
data['filetype'] = 'remote'
with self.api_user():
response = self.post(url_for('api.resources',
dataset=self.dataset), data)
Expand Down Expand Up @@ -698,37 +710,6 @@ def test_create_with_file_chunks(self):
data = json.loads(response.data)
self.assertEqual(data['title'], 'test.txt')

def test_create_filetype_file_unallowed_domain(self):
self.app.config['RESOURCES_FILE_ALLOWED_DOMAINS'] = []
data = ResourceFactory.as_dict()
data['filetype'] = RESOURCE_FILETYPE_FILE
with self.api_user():
response = self.post(url_for('api.resources',
dataset=self.dataset), data)
self.assert400(response)

def test_create_filetype_file_allowed_domain(self):
self.app.config['RESOURCES_FILE_ALLOWED_DOMAINS'] = [
'udata.gouv.fr',
]
data = ResourceFactory.as_dict()
data['filetype'] = RESOURCE_FILETYPE_FILE
data['url'] = 'http://udata.gouv.fr/resource'
with self.api_user():
response = self.post(url_for('api.resources',
dataset=self.dataset), data)
self.assert201(response)

def test_create_filetype_file_server_name(self):
self.app.config['RESOURCES_FILE_ALLOWED_DOMAINS'] = []
data = ResourceFactory.as_dict()
data['filetype'] = RESOURCE_FILETYPE_FILE
data['url'] = 'http://%s/resource' % self.app.config['SERVER_NAME']
with self.api_user():
response = self.post(url_for('api.resources',
dataset=self.dataset), data)
self.assert201(response)

def test_reorder(self):
# Register an extra field in order to test
# https://github.com/opendatateam/udata/issues/1794
Expand All @@ -755,7 +736,7 @@ def test_reorder(self):
[str(r['id']) for r in expected_order])
self.assertEqual(self.dataset.last_modified, initial_last_modified)

def test_update(self):
def test_update_local(self):
resource = ResourceFactory()
self.dataset.resources.append(resource)
self.dataset.save()
Expand All @@ -779,6 +760,37 @@ def test_update(self):
updated = self.dataset.resources[0]
self.assertEqual(updated.title, data['title'])
self.assertEqual(updated.description, data['description'])
# Url should NOT have been updated as it is a hosted resource
self.assertNotEqual(updated.url, data['url'])
self.assertEqual(updated.extras, {'extra:id': 'id'})
self.assertEqualDates(updated.published, now)

def test_update_remote(self):
resource = ResourceFactory()
resource.filetype = 'remote'
self.dataset.resources.append(resource)
self.dataset.save()
now = datetime.now()
data = {
'title': faker.sentence(),
'description': faker.text(),
'url': faker.url(),
'published': now.isoformat(),
'extras': {
'extra:id': 'id',
}
}
with self.api_user():
response = self.put(url_for('api.resource',
dataset=self.dataset,
rid=str(resource.id)), data)
self.assert200(response)
self.dataset.reload()
self.assertEqual(len(self.dataset.resources), 1)
updated = self.dataset.resources[0]
self.assertEqual(updated.title, data['title'])
self.assertEqual(updated.description, data['description'])
# Url should have been updated as it is a remote resource
self.assertEqual(updated.url, data['url'])
self.assertEqual(updated.extras, {'extra:id': 'id'})
self.assertEqualDates(updated.published, now)
Expand Down Expand Up @@ -1283,6 +1295,7 @@ def test_community_resource_api_create_remote(self):
user = self.login()
dataset = VisibleDatasetFactory()
attrs = CommunityResourceFactory.as_dict()
attrs['filetype'] = 'remote'
attrs['dataset'] = str(dataset.id)
response = self.post(
url_for('api.community_resources'),
Expand All @@ -1298,15 +1311,31 @@ def test_community_resource_api_create_remote(self):
self.assertEqual(community_resource.owner, user)
self.assertIsNone(community_resource.organization)

def test_community_resource_api_unallowed_create_filetype_file(self):
'''It should create a remote community resource from the API'''
user = self.login()
dataset = VisibleDatasetFactory()
attrs = CommunityResourceFactory.as_dict()
attrs['filetype'] = 'file' # to be explicit
attrs['dataset'] = str(dataset.id)
response = self.post(
url_for('api.community_resources'),
attrs
)
# should fail because the POST endpoint only supports URL setting for remote community resources
self.assert400(response)

def test_community_resource_api_create_remote_needs_dataset(self):
'''
It should prevent remote community resource creation without dataset
from the API
'''
self.login()
attrs = CommunityResourceFactory.as_dict()
attrs['filetype'] = 'remote'
response = self.post(
url_for('api.community_resources'),
CommunityResourceFactory.as_dict()
attrs
)
self.assertStatus(response, 400)
data = json.loads(response.data)
Expand Down