From 603e431b574006c7bc5ae3a281ca58e3d4a9cfad Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Mon, 23 Oct 2023 11:59:29 -0400 Subject: [PATCH 1/5] user.awsverification: new module. This module contains functions for authenticating a person's AWS identity (account number, username, and user ID) by means of signed URLs. Amazon S3 authenticates clients using a per-request "signature" that incorporates the request path and headers together with a secret key held by the client. This means that the client can pre-compute this signature and send it to someone else, allowing the recipient to perform that request on that client's behalf, without revealing the secret key itself. We can arrange to create an S3 URL that can only be accessed by a particular AWS identity, and then ask someone to pre-compute the signature that they would use to access that resource (which they can do using the AWS CLI or other S3-compatible tools and libraries.) If we then submit that signature to S3 and it succeeds, we know that the requester holds the secret key for that identity. In fact, the resource in question doesn't need to actually exist, as long as we can tell the difference between an unauthorized request (HTTP 403) and an authorized request for something that doesn't exist (HTTP 404). AWS supports many types of identities. This module only supports "IAM user" identities (arn:aws:iam::*:user/*) and not any other types, both as a matter of policy (data access permissions are granted to individual people, not to groups, organizations, or computers) and because IAM users have a stable, fixed-length user ID. --- physionet-django/physionet/settings/base.py | 3 + physionet-django/user/awsverification.py | 421 ++++++++++++++++++++ 2 files changed, 424 insertions(+) create mode 100644 physionet-django/user/awsverification.py diff --git a/physionet-django/physionet/settings/base.py b/physionet-django/physionet/settings/base.py index 9afa819d0f..bdb5a1aa96 100644 --- a/physionet-django/physionet/settings/base.py +++ b/physionet-django/physionet/settings/base.py @@ -250,6 +250,9 @@ AWS_HEADER_VALUE2 = config('AWS_VALUE2', default=False) AWS_CLOUD_FORMATION = config('AWS_CLOUD_FORMATION', default=False) +# User verification bucket (see user/awsverification.py) +AWS_VERIFICATION_BUCKET_NAME = config('AWS_VERIFICATION_BUCKET_NAME', default=None) + # Tags for the DataCite API used for DOI DATACITE_API_URL = config('DATACITE_API_URL', default='https://api.test.datacite.org/dois') DATACITE_PREFIX = config('DATACITE_PREFIX', default='') diff --git a/physionet-django/user/awsverification.py b/physionet-django/user/awsverification.py new file mode 100644 index 0000000000..73d6fe4a00 --- /dev/null +++ b/physionet-django/user/awsverification.py @@ -0,0 +1,421 @@ +import json +import re +import urllib.parse + +import boto3 +from django.conf import settings +from django.forms import ValidationError +import requests + + +# As of January 2024, boto3 (and older versions of awscli) cannot +# generate correct signed URLs for arbitrary AWS regions. Always use +# us-east-1 for compatibility. +AWS_VERIFICATION_BUCKET_REGION = 'us-east-1' + + +def aws_verification_available(): + """ + Check whether the site is configured for AWS account authentication. + """ + return bool(settings.AWS_VERIFICATION_BUCKET_NAME) + + +def parse_aws_user_arn(aws_user_arn, aws_account): + """ + Check whether an AWS identity is supported for verification. + + AWS clients support numerous types of identities, described by + ARNs (Amazon Resource Names). For identity verification purposes, + only "IAM user" identities are supported. If the given ARN refers + to an IAM user, this function returns a dictionary containing the + account number and username. Otherwise an UnsupportedUserARN + exception is raised. + """ + # This must match the principal pattern in the bucket policy + # (see configure_aws_verification_bucket below). + # For the set of characters allowed in path and username, see: + # https://docs.aws.amazon.com/IAM/latest/APIReference/API_CreateUser.html + match = re.fullmatch(r'arn:aws:iam::(?P[0-9]+):user/' + r'(?:[\x21-\x2e\x30-\x7e]*/)*' + r'(?P[0-9a-zA-Z+=,.@_\-]+)', + aws_user_arn) + if not match or match['account'] != aws_account: + raise UnsupportedUserARN + return match.groupdict() + + +def get_aws_verification_key(site_domain, user_email, + aws_account, aws_userid, aws_user_arn): + """ + Generate an S3 key used to authenticate an AWS user. + + This is a string that must be signed by the user, and then + verified by Amazon S3, to verify the user's credentials. + """ + info = parse_aws_user_arn(aws_user_arn, aws_account) + aws_username = info['username'] + + # user_email is quoted to avoid slashes, as well as any shell + # metacharacters in the verification command. (The other + # variables used here shouldn't contain any slashes or shell + # metacharacters.) This is unrelated to the URL-encoding + # performed by S3 itself. + quoted_email = urllib.parse.quote(user_email, safe='@:+,') + + # This must match the resource pattern in the bucket policy + # (see configure_aws_verification_bucket below). + return (f'{site_domain}-verification/' + f'email={quoted_email}/' + f'userid={aws_userid}/' + f'account={aws_account}/' + f'username={aws_username}/') + + +def get_aws_verification_command(site_domain, user_email, + aws_account, aws_userid, aws_user_arn): + """ + Generate a shell command used to authenticate an AWS user. + + After the user enters their account ID and user ID, they will be + asked to run this command and copy its output into the form. The + output of the command is a signed URL: it proves that the person + who generated it has appropriate AWS credentials, without + revealing the person's secret key. + """ + bucket = settings.AWS_VERIFICATION_BUCKET_NAME + region = AWS_VERIFICATION_BUCKET_REGION + if not bucket or not region: + raise AWSVerificationNotConfigured + + key = get_aws_verification_key(site_domain, user_email, + aws_account, aws_userid, aws_user_arn) + return f'aws s3 presign s3://{bucket}/{key} --region {region}' + + +def check_aws_verification_url(site_domain, user_email, + aws_account, aws_userid, aws_user_arn, + signed_url): + """ + Verify a signed URL to determine a user's AWS identity. + + To verify their AWS identity, the user is asked to generate a + specific signed URL. If the URL is correct and valid, this + function returns a dictionary containing the person's verified + identity information. + + For this to work, the verification bucket must be configured by + calling configure_aws_verification_bucket(). + + Note that only the "account" and "username" portions of + aws_user_arn are verified (not the "path"). + """ + bucket = settings.AWS_VERIFICATION_BUCKET_NAME + region = AWS_VERIFICATION_BUCKET_REGION + if not bucket or not region: + raise AWSVerificationNotConfigured + + try: + unsigned_url, query = signed_url.split('?') + query_dict = urllib.parse.parse_qs(query) + except ValueError: + raise InvalidSignedURL + + # Check whether this appears to be an AWS signed URL (either old + # or new format). + query_keys = set(query_dict.keys()) + if query_keys >= {'X-Amz-Algorithm', 'X-Amz-Credential', + 'X-Amz-Date', 'X-Amz-Expires', + 'X-Amz-SignedHeaders', 'X-Amz-Signature'}: + pass + elif query_keys >= {'AWSAccessKeyId', 'Signature', 'Expires'}: + pass + else: + raise InvalidSignedURL + + # Check whether the URL corresponds to the correct bucket name. + # Any of these base URLs might be used depending on the region and + # the client configuration. + base_urls = [ + f'https://{bucket}.s3.{region}.amazonaws.com/', + f'https://s3.{region}.amazonaws.com/{bucket}/', + f'https://{bucket}.s3.amazonaws.com/', + f'https://s3.amazonaws.com/{bucket}/', + ] + for base_url in base_urls: + if unsigned_url.startswith(base_url): + key = unsigned_url[len(base_url):] + break + else: + raise InvalidS3Hostname + + # Check that the URL path matches the expected key. ('aws s3 + # presign' uses escaping identical to urllib.parse.quote.) + + expected_key = get_aws_verification_key(site_domain, user_email, + aws_account, aws_userid, + aws_user_arn) + if key != urllib.parse.quote(expected_key): + raise InvalidVerificationKey + + # Finally, verify the signature. + + with requests.Session() as session: + # If the signature is correct, and the identity is correct as + # determined by the bucket policy, then S3 should return a 404 + # response (because the resource doesn't, in fact, exist.) + response = session.get(signed_url) + if response.status_code != 404: + raise InvalidAWSSignature + + # As a sanity check, verify that S3 returns a 403 response if + # the AWS signature is missing. + response = session.get(unsigned_url) + if response.status_code != 403: + raise BadBucketPolicy + + return { + 'account': aws_account, + 'userid': aws_userid, + 'arn': aws_user_arn, + } + + +class AWSVerificationFailed(ValidationError): + """Generic exception used if AWS user cannot be verified.""" + + +class AWSVerificationNotConfigured(AWSVerificationFailed): + """Required settings for AWS verification are not defined.""" + def __init__(self): + super().__init__( + 'AWS identity verification is currently unavailable.' + ) + + +class UnsupportedUserARN(AWSVerificationFailed): + """Client-supplied ARN is not valid or cannot be verified.""" + def __init__(self): + super().__init__( + 'Invalid ARN. Please use an IAM user identity ' + '(arn:aws:iam::111111111111:user/NAME) rather than ' + 'a root user or IAM role.' + ) + + +class InvalidSignedURL(AWSVerificationFailed): + """Client-supplied URL does not appear to be an AWS signed URL.""" + def __init__(self): + super().__init__( + 'Invalid verification code (not an AWS signed URL). ' + 'Please run the command exactly as shown, and copy ' + 'and paste the output.' + ) + + +class InvalidS3Hostname(AWSVerificationFailed): + """Client-supplied URL does not match expected S3 hostname.""" + def __init__(self): + super().__init__( + 'Invalid verification code (incorrect hostname). ' + 'Please run the command exactly as shown, and copy ' + 'and paste the output.' + ) + + +class InvalidVerificationKey(AWSVerificationFailed): + """Client-supplied URL does not match expected verification key.""" + def __init__(self): + super().__init__( + 'Invalid verification code (incorrect path). ' + 'Please run the command exactly as shown, and copy ' + 'and paste the output.' + ) + + +class InvalidAWSSignature(AWSVerificationFailed): + """Client-supplied URL cannot be verified by AWS.""" + def __init__(self): + super().__init__( + 'Invalid verification code (incorrect signature). ' + 'Please run the command exactly as shown, and copy ' + 'and paste the output.' + ) + + +class BadBucketPolicy(AWSVerificationFailed): + """Verification bucket is not correctly configured.""" + def __init__(self): + super().__init__( + 'AWS identity verification is currently unavailable.' + ) + + +def configure_aws_verification_bucket(bucket_name): + """ + Configure an S3 bucket to be used for identity verification. + """ + s3 = boto3.client('s3', region_name=AWS_VERIFICATION_BUCKET_REGION) + try: + s3.create_bucket(Bucket=bucket_name) + except s3.exceptions.BucketAlreadyOwnedByYou: + pass + + s3.put_public_access_block( + Bucket=bucket_name, + PublicAccessBlockConfiguration={ + "BlockPublicAcls": False, + "IgnorePublicAcls": False, + "BlockPublicPolicy": False, + "RestrictPublicBuckets": False, + }, + ) + + # This must match the set of allowed principals + # (see parse_aws_user_arn above). + principal = "arn:aws:iam::*:user/*" + + # This must match the required verification key + # (see get_aws_verification_key above). + resource = ("*-verification/" + + "email=*/" + + "userid=${aws:userid}/" + + "account=${aws:PrincipalAccount}/" + + "username=${aws:username}/") + + # https://docs.aws.amazon.com/AmazonS3/latest/API/API_GetObject.html: + # + # You need the relevant read object (or version) permission + # for [the GetObject] operation. For more information, see + # Specifying Permissions in a Policy. If the object that you + # request doesn't exist, the error that Amazon S3 returns + # depends on whether you also have the s3:ListBucket + # permission. + # + # If you have the s3:ListBucket permission on the bucket, + # Amazon S3 returns an HTTP status code 404 (Not Found) error. + # + # If you don't have the s3:ListBucket permission, Amazon S3 + # returns an HTTP status code 403 ("access denied") error. + # + # The documentation doesn't say so, but (as of November 2023) it + # appears sufficient for the client to have permission to perform + # an s3:ListBucket action with s3:prefix exactly equal to the + # requested key. + # + # For example, assuming the object doesn't exist, + # https://xxxx.s3.amazonaws.com/a/b/c returns 404 if + # https://xxxx.s3.amazonaws.com/?prefix=a/b/c returns 200. + # + # Moreover, the s3:GetObject permission may not actually be + # required in this case. Both the s3:GetObject and s3:ListBucket + # permissions are included here for future-proofing. + + policy = json.dumps({ + "Version": "2012-10-17", + "Statement": [ + { + "Effect": "Allow", + "Principal": principal, + "Action": "s3:GetObject", + "Resource": f"arn:aws:s3:::{bucket_name}/{resource}", + }, + { + "Effect": "Allow", + "Principal": principal, + "Action": "s3:ListBucket", + "Resource": f"arn:aws:s3:::{bucket_name}", + "Condition": { + "StringLike": { + "s3:prefix": resource, + }, + }, + }, + ], + }) + + s3.put_bucket_policy(Bucket=bucket_name, Policy=policy) + + +def test_aws_verification_bucket(bucket_name): + """ + Test functionality of an identity verification bucket. + """ + s3 = boto3.client('s3', region_name=AWS_VERIFICATION_BUCKET_REGION) + sts = boto3.client('sts') + + identity = sts.get_caller_identity() + aws_account = identity['Account'] + aws_userid = identity['UserId'] + aws_arn = identity['Arn'] + + def assert_response(url, expected_status): + response = requests.get(url) + if response.status_code != expected_status: + raise Exception( + f"Expected {expected_status} for {url}, got instead:\n" + f" {response.status_code} {response.reason}\n\n" + f" {response.content}\n" + ) + + def tweak(string): + return string.translate({ord(i): ord(j) for i, j in zip( + '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/', + '1032547698BADCFEHGJILKNMPORQTSVUXWZYbadcfehgjilknmporqtsvuxwzy/+' + )}) + + def tweak_part(string, sep, n): + parts = string.split(sep) + parts[n] = tweak(parts[n]) + return sep.join(parts) + + site_domain = 'physionet.org' + user_email = 'root@example.com' + + # Correct signed URL should give a 404 + signed_url = s3.generate_presigned_url('get_object', Params={ + 'Bucket': bucket_name, + 'Key': get_aws_verification_key(site_domain, user_email, + aws_account, aws_userid, aws_arn), + }) + assert_response(signed_url, 404) + + # URL without signature should give a 403 + unsigned_url, query = signed_url.split('?') + assert_response(unsigned_url, 403) + + # Wrong signature should give a 403 + query_dict = dict(urllib.parse.parse_qsl(query)) + for key in ('Signature', 'X-Amz-Signature'): + if key in query_dict: + query_dict[key] = tweak(query_dict[key]) + wrong_url = unsigned_url + '?' + urllib.parse.urlencode(query_dict) + assert_response(wrong_url, 403) + + # Signed URL with wrong user ID should give a 403 + wrong_userid = tweak(aws_userid) + wrong_url = s3.generate_presigned_url('get_object', Params={ + 'Bucket': bucket_name, + 'Key': get_aws_verification_key(site_domain, user_email, + aws_account, wrong_userid, aws_arn), + }) + assert_response(wrong_url, 403) + + # Signed URL with wrong account ID should give a 403 + wrong_account = tweak(aws_account) + wrong_arn = tweak_part(aws_arn, ':', 4) + wrong_url = s3.generate_presigned_url('get_object', Params={ + 'Bucket': bucket_name, + 'Key': get_aws_verification_key(site_domain, user_email, + wrong_account, aws_userid, wrong_arn), + }) + assert_response(wrong_url, 403) + + # Signed URL with wrong username should give a 403 + wrong_arn = tweak_part(aws_arn, '/', -1) + wrong_url = s3.generate_presigned_url('get_object', Params={ + 'Bucket': bucket_name, + 'Key': get_aws_verification_key(site_domain, user_email, + aws_account, aws_userid, wrong_arn), + }) + assert_response(wrong_url, 403) From b8a797566e51d88fea0d41b8a995fedb6d3ae944 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Fri, 10 Nov 2023 17:38:17 -0500 Subject: [PATCH 2/5] Add settings and documentation for AWS verification. --- .env.example | 3 +++ deploy/README.md | 28 ++++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/.env.example b/.env.example index 31f34d5ab7..bb4cd505fc 100644 --- a/.env.example +++ b/.env.example @@ -53,6 +53,9 @@ PAUSE_CREDENTIALING_MESSAGE='PhysioNet will not be taking new applications for c # GOOGLE_APPLICATION_CREDENTIALS=json GCP_DELEGATION_EMAIL=email +# AWS user authentication bucket (see deploy/README.md) +#AWS_VERIFICATION_BUCKET_NAME=example-bucket + # AWS # Used to provide MIMIC through AWS, this will include S3, Redshift, Spark # Key and key2 are predefined by AWS, can be changed but IT WILL BREAK ALL diff --git a/deploy/README.md b/deploy/README.md index c381fc3dd2..7f9ade625e 100644 --- a/deploy/README.md +++ b/deploy/README.md @@ -242,6 +242,34 @@ AWS_CLOUD_FORMATION=URL This functionality will send the AWS ID to a Lambda function in the AWS Cloud Formation. That ID will be then added to the storage bucket and databases. +### User authentication for AWS + +Before accessing restricted data via AWS, users will need to add their AWS account on the "Cloud" page of their user profile. + +In order for this option to appear on the site, the site operator must create a *verification bucket* and configure the `AWS_VERIFICATION_BUCKET_NAME` setting in `.env`. A "verification bucket" is a special S3 bucket that doesn't contain any files. + +For demo/testing purposes, you can use the same verification bucket that PhysioNet uses (the bucket name isn't secret.) For production use, each site should have a verification bucket that is owned and controlled by the site's own AWS account. To do that: + +- Log in to the AWS console, and create an IAM user with full privileges for S3 administration. (This can be the same user that will be used for managing S3 project buckets.) +- Generate an access key for this user, and configure the AWS CLI (`aws configure`). +- Open a Python shell (`manage.py shell`) and run: +``` +import user.awsverification +user.awsverification.configure_aws_verification_bucket(BUCKET) +``` +where BUCKET is the bucket name you want to use (`AWS_VERIFICATION_BUCKET_NAME`). +- Delete the user / access key if you're not going to use them again. + +To test that a verification bucket is functioning correctly: + +- Log in to the AWS console, and create an IAM user with no added privileges. +- Generate an access key for this user, and configure the AWS CLI (`aws configure`). +- Open a Python shell (`manage.py shell`) and run: +``` +import user.awsverification +user.awsverification.test_aws_verification_bucket(BUCKET) +``` + ## ORCID account integration Obtaining a client_id / client_secret for interacting with the ORCID API: From 93f755a96fb22a7297871f49113c5130abff61e1 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 24 Oct 2023 14:12:34 -0400 Subject: [PATCH 3/5] Add model fields for storing AWS identity information. We want to verify a person's AWS identity in order to permit them to access restricted resources via direct cloud APIs; and possibly for other purposes in the future. An AWS account ID is not an identity. An account may contain many identities (known as "userids" or "unique IDs"), which might or might not belong to the same person. (Even if they do all belong to the same person, it doesn't mean that person wants or should want to give all of their identities the ability to access sensitive data.) Here, we add fields to store the userid alongside the account ID and ARN (which may be of interest in the future), and the date and time that these credentials were verified. --- ...58_cloudinformation_aws_userid_and_more.py | 41 +++++++++++++++++++ physionet-django/user/models.py | 23 +++++++++++ physionet-django/user/validators.py | 29 ++++++++++++- 3 files changed, 91 insertions(+), 2 deletions(-) create mode 100644 physionet-django/user/migrations/0058_cloudinformation_aws_userid_and_more.py diff --git a/physionet-django/user/migrations/0058_cloudinformation_aws_userid_and_more.py b/physionet-django/user/migrations/0058_cloudinformation_aws_userid_and_more.py new file mode 100644 index 0000000000..701b2dfd95 --- /dev/null +++ b/physionet-django/user/migrations/0058_cloudinformation_aws_userid_and_more.py @@ -0,0 +1,41 @@ +# Generated by Django 4.1.10 on 2023-10-27 22:00 + +from django.db import migrations, models +import user.validators + + +class Migration(migrations.Migration): + + dependencies = [ + ("user", "0057_alter_cloudinformation_aws_id"), + ] + + operations = [ + migrations.AddField( + model_name="cloudinformation", + name="aws_userid", + field=models.CharField( + blank=True, + max_length=30, + null=True, + unique=True, + validators=[user.validators.validate_aws_userid], + ), + ), + migrations.AddField( + model_name="cloudinformation", + name="aws_user_arn", + field=models.CharField( + blank=True, + max_length=2048, + null=True, + unique=True, + validators=[user.validators.validate_aws_user_arn], + ), + ), + migrations.AddField( + model_name="cloudinformation", + name="aws_verification_datetime", + field=models.DateTimeField(null=True), + ), + ] diff --git a/physionet-django/user/models.py b/physionet-django/user/models.py index a126ad1106..57fcc76222 100644 --- a/physionet-django/user/models.py +++ b/physionet-django/user/models.py @@ -1218,6 +1218,10 @@ class CloudInformation(models.Model): on_delete=models.CASCADE) gcp_email = models.OneToOneField('user.AssociatedEmail', related_name='gcp_email', on_delete=models.SET_NULL, null=True) + + # AWS account ID. For historical reasons this field may be + # non-null even if the account is not verified. This field is + # mainly informational and should not generally be used. aws_id = models.CharField( max_length=60, null=True, @@ -1225,6 +1229,25 @@ class CloudInformation(models.Model): default=None, validators=[validators.validate_aws_id], ) + # AWS unique user ID. This is a (typically) 21-character string + # starting with AIDA and uniquely identifies the user. Not to be + # confused with access key ID (one user may have multiple access + # keys.) This field is verified and should be used for + # authentication. + aws_userid = models.CharField( + max_length=30, null=True, blank=True, unique=True, + validators=[validators.validate_aws_userid], + ) + # Structured, readable name (ARN) for the user in AWS. This field + # is not necessarily stable (users can be renamed, for example.) + # Note that only the account and username are verified. + aws_user_arn = models.CharField( + max_length=2048, null=True, blank=True, unique=True, + validators=[validators.validate_aws_user_arn], + ) + # Date and time that the 'aws_id', 'aws_userid', and + # 'aws_user_arn' values were verified. + aws_verification_datetime = models.DateTimeField(null=True) class Meta: default_permissions = () diff --git a/physionet-django/user/validators.py b/physionet-django/user/validators.py index 455a8619c2..a73b067f7f 100644 --- a/physionet-django/user/validators.py +++ b/physionet-django/user/validators.py @@ -277,8 +277,33 @@ def validate_aws_id(value): """" Validate an AWS ID. """ - aws_id_pattern = r"\b\d{12}\b" - if value is not None and not re.search(aws_id_pattern, value): + if not re.fullmatch(r"\d{12}", value): raise ValidationError( "Invalid AWS ID. Please provide a valid AWS ID, which should be a 12-digit number." ) + + +def validate_aws_userid(value): + """ + Validate an AWS user ID. + """ + # Officially, "minimum length of 16, maximum length of 128", but + # that includes all types of unique IDs, not just IAM user IDs. + # Some examples in AWS documentation show 16-character user IDs, + # while others show 21-character IDs. The size matters since we + # need to store a fixed number of IDs in a fixed-size JSON file. + if not re.fullmatch('AIDA[A-Z0-9]{12,17}', value): + raise ValidationError( + 'Invalid AWS user ID. Your user ID should be 16 to 21 ' + 'characters long, beginning with "AIDA".' + ) + + +def validate_aws_user_arn(value): + """ + Validate an AWS IAM ARN, a string that identifies a user in AWS. + """ + # https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_identifiers.html + if not re.fullmatch(r'arn:aws:iam::\d+:[\w\-]+(?:/[\x21-\x7e]*)?', + value, re.ASCII): + raise ValidationError('Invalid ARN.') From 8608d8f518a607693ffc44764f37457de794bf6b Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Mon, 30 Oct 2023 16:14:59 -0400 Subject: [PATCH 4/5] Add views and forms for verifying AWS user identity. In order for a person to verify their AWS identity, they need to provide a digital signature, in the form of a signed URL that includes their AWS account information in the path. We further require the URL to include the domain name of the site, and the user's primary email address, to prevent misuse. This signed URL can be generated using the AWS CLI. However, the URL must be exactly correct; if it is wrong, it is difficult to tell why. In order to hopefully avoid confusion, we first ask the person to run 'aws sts get-caller-identity'; based on that, we tell them the exact 'aws s3 presign' command they need to run. --- physionet-django/user/forms.py | 130 +++++++++++++++++- .../user/templates/user/edit_cloud.html | 75 +++++++++- .../user/templates/user/edit_cloud_aws.html | 28 ++++ physionet-django/user/urls.py | 1 + physionet-django/user/views.py | 89 +++++++++++- 5 files changed, 314 insertions(+), 9 deletions(-) create mode 100644 physionet-django/user/templates/user/edit_cloud_aws.html diff --git a/physionet-django/user/forms.py b/physionet-django/user/forms.py index dc9e7d08a6..cf2ac8f7cd 100644 --- a/physionet-django/user/forms.py +++ b/physionet-django/user/forms.py @@ -1,4 +1,5 @@ import datetime +import json import time from django import forms @@ -14,6 +15,12 @@ from django.utils.html import mark_safe from django.utils.translation import gettext_lazy from physionet.utility import validate_pdf_file_type +from user.awsverification import ( + AWSVerificationFailed, + get_aws_verification_command, + check_aws_verification_url, + parse_aws_user_arn, +) from user.models import ( AssociatedEmail, CloudInformation, @@ -28,8 +35,15 @@ ) from user.trainingreport import TrainingCertificateError, find_training_report_url from user.userfiles import UserFiles -from user.validators import UsernameValidator, validate_name, validate_training_file_size -from user.validators import validate_institutional_email +from user.validators import ( + UsernameValidator, + validate_aws_id, + validate_aws_user_arn, + validate_aws_userid, + validate_institutional_email, + validate_name, + validate_training_file_size, +) from user.widgets import ProfilePhotoInput from django.db.models import OuterRef, Exists @@ -671,14 +685,13 @@ def save(self): class CloudForm(forms.ModelForm): """ - Form to store the AWS ID, and point to the google GCP email. + Form to store the email address used for Google Cloud authentication. """ class Meta: model = CloudInformation - fields = ('gcp_email','aws_id',) + fields = ('gcp_email',) labels = { 'gcp_email': 'Google (Email)', - 'aws_id': 'Amazon (ID)', } def __init__(self, *args, **kwargs): # Email choices are those belonging to a user @@ -688,6 +701,113 @@ def __init__(self, *args, **kwargs): self.fields['gcp_email'].required = False +class AWSIdentityForm(forms.Form): + """ + Form to request the identity information for Amazon Web Services. + + Verifying a user's AWS identity is a two-step process. In the + first step, the user is asked to run the 'get-caller-identity' + command. We do this because it's the easiest way for them to find + out exactly what identity they're currently using. + + The information submitted here will be passed along to the + AWSVerificationForm. + """ + aws_identity = forms.CharField( + label="Caller identity", max_length=2000, + widget=forms.Textarea(attrs={ + 'rows': 5, + 'placeholder': json.dumps({ + "UserId": "...", + "Account": "...", + "Arn": "...", + }, indent=4) + }) + ) + + def clean(self): + try: + identity = super().clean()['aws_identity'] + data = json.loads(identity) + aws_account = data['Account'] + aws_userid = data['UserId'] + aws_user_arn = data['Arn'] + except (TypeError, KeyError, ValueError): + raise forms.ValidationError( + mark_safe("Copy and paste the output of the " + "aws sts get-caller-identity command.")) + parse_aws_user_arn(aws_user_arn, aws_account) + validate_aws_id(aws_account) + validate_aws_userid(aws_userid) + validate_aws_user_arn(aws_user_arn) + return { + 'aws_account': aws_account, + 'aws_userid': aws_userid, + 'aws_user_arn': aws_user_arn, + } + + +class AWSVerificationForm(forms.Form): + """ + Form to verify the identity information for Amazon Web Services. + + Verifying a user's AWS identity is a two-step process. In the + second step, the user is asked to generate a signed URL that + proves they have the credentials for the identity they submitted + in the first step (the AWSIdentityForm). + + Once this information is validated, it is stored in the + CloudInformation model. + """ + signed_url = forms.CharField( + label="Signed URL", max_length=2000, + widget=forms.Textarea(attrs={ + 'rows': 8, + 'placeholder': (f'https://{settings.AWS_VERIFICATION_BUCKET_NAME}' + '.s3.amazonaws.com/...') + }) + ) + + def __init__(self, user, site_domain, aws_account, aws_userid, + aws_user_arn, **kwargs): + super().__init__(**kwargs) + self.user = user + self.site_domain = site_domain + self.aws_account = aws_account + self.aws_userid = aws_userid + self.aws_user_arn = aws_user_arn + + def aws_verification_command(self): + return get_aws_verification_command(site_domain=self.site_domain, + user_email=self.user.email, + aws_account=self.aws_account, + aws_userid=self.aws_userid, + aws_user_arn=self.aws_user_arn) + + def clean(self): + data = super().clean() + signed_url = data['signed_url'].strip() + validate_aws_id(self.aws_account) + validate_aws_userid(self.aws_userid) + validate_aws_user_arn(self.aws_user_arn) + info = check_aws_verification_url(site_domain=self.site_domain, + user_email=self.user.email, + aws_account=self.aws_account, + aws_userid=self.aws_userid, + aws_user_arn=self.aws_user_arn, + signed_url=signed_url) + data.update(info) + return data + + def save(self): + cloud_info = CloudInformation.objects.get_or_create(user=self.user)[0] + cloud_info.aws_id = self.cleaned_data['account'] + cloud_info.aws_userid = self.cleaned_data['userid'] + cloud_info.aws_user_arn = self.cleaned_data['arn'] + cloud_info.aws_verification_datetime = timezone.now() + cloud_info.save() + + # class ActivationForm(forms.ModelForm): class ActivationForm(forms.Form): """A form for creating new users. Includes all the required diff --git a/physionet-django/user/templates/user/edit_cloud.html b/physionet-django/user/templates/user/edit_cloud.html index 9056e0eb40..7a4f48a027 100644 --- a/physionet-django/user/templates/user/edit_cloud.html +++ b/physionet-django/user/templates/user/edit_cloud.html @@ -12,12 +12,83 @@

Edit Cloud Details

  • Follow the instructions to request access. If instructions for cloud access are not shown, the project is not currently available on the cloud.
  • +

    Google Cloud Platform

    + +{% if aws_verification_available %} +

    Amazon Web Services

    +
    + {% csrf_token %} + {% if user.cloud_information.aws_verification_datetime %} +
    +
      +
    • + +
      +
      Account
      +
      {{ user.cloud_information.aws_id }}
      +
      User ID
      +
      {{ user.cloud_information.aws_userid }}
      +
      ARN
      +
      {{ user.cloud_information.aws_user_arn }}
      +
      +
    • +
    +
    + + {# modal start #} + + {# modal end #} + {% else %} +
    +

    To link your Amazon Web Services account using the + AWS Command Line Interface: +

    +
      +
    1. + Open a terminal and run the following command: +
      aws sts get-caller-identity
      +
    2. +
    3. + Copy and paste the output into the box below. + {% include "form_snippet_no_labels.html" with form=aws_form %} +
    4. +
    + + {% endif %} +
    +{% endif %} {% endblock %} diff --git a/physionet-django/user/templates/user/edit_cloud_aws.html b/physionet-django/user/templates/user/edit_cloud_aws.html new file mode 100644 index 0000000000..0985807f9b --- /dev/null +++ b/physionet-django/user/templates/user/edit_cloud_aws.html @@ -0,0 +1,28 @@ +{% extends "user/settings.html" %} + +{% block title %}Verify AWS Account{% endblock %} + +{% block main_content %} +

    Verify AWS Account

    +
    + +
    + {% csrf_token %} +

    + To verify your Amazon Web Services account using the + AWS Command Line Interface: +

    +
      +
    1. + Open a terminal and run the following command (one line): +
      {{ form.aws_verification_command }}
      +
    2. +
    3. + Copy and paste the output into the box below. + {% include "form_snippet_no_labels.html" %} +
    4. +
    + +
    +{% endblock %} diff --git a/physionet-django/user/urls.py b/physionet-django/user/urls.py index 8723f37e62..0fae8e5b58 100644 --- a/physionet-django/user/urls.py +++ b/physionet-django/user/urls.py @@ -15,6 +15,7 @@ path("settings/emails/", views.edit_emails, name="edit_emails"), path("settings/username/", views.edit_username, name="edit_username"), path("settings/cloud/", views.edit_cloud, name="edit_cloud"), + path("settings/cloud/aws/", views.edit_cloud_aws, name="edit_cloud_aws"), path("settings/orcid/", views.edit_orcid, name="edit_orcid"), path("authorcid/", views.auth_orcid, name="auth_orcid"), path( diff --git a/physionet-django/user/views.py b/physionet-django/user/views.py index 509b18282b..fcb9de00d1 100644 --- a/physionet-django/user/views.py +++ b/physionet-django/user/views.py @@ -46,6 +46,7 @@ from project.models import Author, DUASignature, DUA, PublishedProject from requests_oauthlib import OAuth2Session from user import forms, validators +from user.awsverification import aws_verification_available from user.models import ( AssociatedEmail, CodeOfConduct, @@ -926,11 +927,26 @@ def credential_reference_verification(request, application_slug, verification_to def edit_cloud(request): """ Page to add the information for cloud usage. + + This page allows the user to specify their credentials for + accessing cloud services, of which two are currently supported: + + - Google Cloud Platform authentication is based on the email + address associated with the person's GCP account. They must + verify this address through the usual AssociatedEmail + verification process, after which they can select it as their + GCP address on this page. + + - Amazon Web Services authentication is based on the AWS "userid" + (a mostly-random string of alphanumeric characters.) This + identity is verified through a two-step process: this page asks + the person who they are, and the 'edit_cloud_aws' page asks them + to enter a code to prove it. """ user = request.user cloud_info = CloudInformation.objects.get_or_create(user=user)[0] form = forms.CloudForm(instance=cloud_info) - if request.method == 'POST': + if request.method == 'POST' and 'save-gcp' in request.POST: form = forms.CloudForm(instance=cloud_info, data=request.POST) if form.is_valid(): form.save() @@ -938,7 +954,76 @@ def edit_cloud(request): else: messages.error(request, 'Invalid submission. See errors below.') - return render(request, 'user/edit_cloud.html', {'form':form, 'user':user}) + if request.method == 'POST' and 'delete-aws' in request.POST: + cloud_info.aws_id = None + cloud_info.aws_userid = None + cloud_info.aws_user_arn = None + cloud_info.aws_verification_datetime = None + cloud_info.save() + + aws_form = forms.AWSIdentityForm() + if request.method == 'POST' and 'save-aws' in request.POST: + aws_form = forms.AWSIdentityForm(data=request.POST) + if aws_form.is_valid(): + request.session['new_aws_account'] = \ + aws_form.cleaned_data['aws_account'] + request.session['new_aws_userid'] = \ + aws_form.cleaned_data['aws_userid'] + request.session['new_aws_user_arn'] = \ + aws_form.cleaned_data['aws_user_arn'] + return redirect('edit_cloud_aws') + else: + messages.error(request, 'Invalid submission. See errors below.') + + return render(request, 'user/edit_cloud.html', { + 'user': user, + 'gcp_form': form, + 'aws_form': aws_form, + 'aws_verification_available': aws_verification_available(), + }) + + +@login_required +def edit_cloud_aws(request): + """ + Page to submit a code for AWS identity verification. + + Verifying AWS identity is a two-step process. After entering your + AWS identity information (account and userid) on the 'edit_cloud' + page, this page asks you to enter a code (a signed URL) that + proves you have the appropriate credentials (AWS access key). + """ + if not aws_verification_available(): + return redirect('edit_cloud') + + site_domain = get_current_site(request).domain + aws_account = request.session.get('new_aws_account', '') + aws_userid = request.session.get('new_aws_userid', '') + aws_user_arn = request.session.get('new_aws_user_arn', '') + form = forms.AWSVerificationForm(user=request.user, + site_domain=site_domain, + aws_account=aws_account, + aws_userid=aws_userid, + aws_user_arn=aws_user_arn) + if request.method == 'POST' and 'signed_url' in request.POST: + form = forms.AWSVerificationForm(user=request.user, + site_domain=site_domain, + aws_account=aws_account, + aws_userid=aws_userid, + aws_user_arn=aws_user_arn, + data=request.POST) + if form.is_valid(): + form.save() + request.session.pop('new_aws_account') + request.session.pop('new_aws_userid') + request.session.pop('new_aws_user_arn') + messages.success(request, 'Your cloud information has been saved.') + return redirect('edit_cloud') + else: + messages.error(request, 'Invalid submission. See errors below.') + + return render(request, 'user/edit_cloud_aws.html', {'form': form}) + @login_required def view_agreements(request): From e36f7a4acf66e0de83454ddfbb7df5b1ed1d44e3 Mon Sep 17 00:00:00 2001 From: Benjamin Moody Date: Tue, 7 Nov 2023 14:05:37 -0500 Subject: [PATCH 5/5] Add test cases for AWS identity verification. --- physionet-django/user/test_views.py | 193 +++++++++++++++++++++++++++- 1 file changed, 191 insertions(+), 2 deletions(-) diff --git a/physionet-django/user/test_views.py b/physionet-django/user/test_views.py index 3c36d906b8..f2c5f81676 100644 --- a/physionet-django/user/test_views.py +++ b/physionet-django/user/test_views.py @@ -1,12 +1,15 @@ import contextlib import datetime +import json import logging import os import pdb import re import shutil import time +from unittest import mock +import boto3 from django.conf import settings from lightwave.views import DBCAL_FILE, ORIGINAL_DBCAL_FILE @@ -16,12 +19,22 @@ from django.core import mail from django.core.management import call_command from django.core.files.uploadedfile import SimpleUploadedFile -from django.test import RequestFactory, TestCase +from django.test import RequestFactory, TestCase, override_settings from django.urls import reverse from django.utils import timezone +import requests_mock from user.enums import TrainingStatus -from user.models import AssociatedEmail, Profile, User, Training, TrainingType, Question, TrainingQuestion +from user.models import ( + AssociatedEmail, + CloudInformation, + Profile, + Question, + Training, + TrainingQuestion, + TrainingType, + User, +) from user.views import (activate_user, edit_emails, edit_profile, edit_password_complete, public_profile, register, user_settings, verify_email) @@ -666,3 +679,179 @@ def test_reject_training_invalid(self, mock_get_info_from_certificate_pdf): self.assertEqual(response.status_code, 200) self.assertEqual(self.training.status, TrainingStatus.REVIEW) + + +@override_settings( + AWS_VERIFICATION_BUCKET_NAME='example-bucket', + AWS_VERIFICATION_BUCKET_REGION='us-east-1', +) +class TestAWSVerification(TestCase): + """ + Test AWS user verification + """ + + USER_EMAIL = 'admin@mit.edu' + AWS_ACCOUNT = '314159265359' + AWS_USERID = 'AIDAAAAAAAAAAAAAAAAAA' + AWS_USERNAME = 'tim' + AWS_ARN = 'arn:aws:iam::314159265359:user/tim' + IDENTITY_JSON = json.dumps({ + 'UserId': AWS_USERID, + 'Account': AWS_ACCOUNT, + 'Arn': AWS_ARN, + }) + + S3_RESPONSE_403 = ( + '' + 'AccessDenied' + 'Access Denied' + ) + S3_RESPONSE_404 = ( + '' + 'NoSuchKey' + 'The specified key does not exist.' + ) + + def get_cloud_information(self): + user = User.objects.get(email=self.USER_EMAIL) + cloud_info, _ = CloudInformation.objects.get_or_create(user=user) + return cloud_info + + def mock_s3_presign(self, response_text): + match = re.search(r'aws s3 presign s3://([\w\-]+)/(\S+) ' + r'--region ([\w\-]+)', + response_text) + self.assertIsNotNone(match) + + with mock.patch.dict(os.environ, { + 'AWS_SHARED_CREDENTIALS_FILE': os.path.join( + settings.DEMO_FILE_ROOT, 'aws_credentials'), + 'AWS_PROFILE': 'default', + 'AWS_ACCESS_KEY_ID': '', + 'AWS_SECRET_ACCESS_KEY': '', + 'AWS_SECURITY_TOKEN': '', + 'AWS_SESSION_TOKEN': '', + 'AWS_DEFAULT_REGION': '', + }): + client = boto3.client('s3', region_name=match.group(3)) + return client.generate_presigned_url('get_object', Params={ + 'Bucket': match.group(1), + 'Key': match.group(2), + }) + + @requests_mock.Mocker() + def test_verify_success(self, mocker): + """ + Test successfully adding a valid AWS identity + """ + self.client.login(username=self.USER_EMAIL, password='Tester11!') + + cloud_info = self.get_cloud_information() + cloud_info.aws_id = None + cloud_info.aws_userid = None + cloud_info.aws_user_arn = None + cloud_info.aws_verification_datetime = None + cloud_info.save() + + response = self.client.post( + reverse('edit_cloud'), + data={'save-aws': '', 'aws_identity': self.IDENTITY_JSON}, + ) + self.assertEqual(response.status_code, 302) + self.assertEqual(response['location'], reverse('edit_cloud_aws')) + + response = self.client.get(response['location']) + self.assertEqual(response.status_code, 200) + signed_url = self.mock_s3_presign(response.content.decode()) + + # Assuming signature is correct, this URL should give a 404. + # With signature missing, it should give 403. + mocker.get( + signed_url, complete_qs=True, + status_code=404, text=self.S3_RESPONSE_404, + ) + mocker.get( + signed_url.split('?')[0], complete_qs=True, + status_code=403, text=self.S3_RESPONSE_403, + ) + response = self.client.post( + reverse('edit_cloud_aws'), + data={'signed_url': signed_url}, + ) + self.assertEqual(response.status_code, 302) + self.assertEqual(response['location'], reverse('edit_cloud')) + + cloud_info = self.get_cloud_information() + self.assertEqual(cloud_info.aws_id, self.AWS_ACCOUNT) + self.assertEqual(cloud_info.aws_userid, self.AWS_USERID) + self.assertEqual(cloud_info.aws_user_arn, self.AWS_ARN) + self.assertIsNotNone(cloud_info.aws_verification_datetime) + + @requests_mock.Mocker() + def test_verify_failure(self, mocker): + """ + Test failure to add an invalid AWS identity + """ + self.client.login(username=self.USER_EMAIL, password='Tester11!') + + cloud_info = self.get_cloud_information() + cloud_info.aws_id = None + cloud_info.aws_userid = None + cloud_info.aws_user_arn = None + cloud_info.aws_verification_datetime = None + cloud_info.save() + + response = self.client.post( + reverse('edit_cloud'), + data={'save-aws': '', 'aws_identity': self.IDENTITY_JSON}, + ) + self.assertEqual(response.status_code, 302) + self.assertEqual(response['location'], reverse('edit_cloud_aws')) + + response = self.client.get(response['location']) + self.assertEqual(response.status_code, 200) + signed_url = self.mock_s3_presign(response.content.decode()) + + # Assuming signature is wrong, this URL should give a 403. + + mocker.get( + signed_url, complete_qs=True, + status_code=403, text=self.S3_RESPONSE_403, + ) + response = self.client.post( + reverse('edit_cloud_aws'), + data={'signed_url': signed_url}, + ) + self.assertEqual(response.status_code, 200) + + cloud_info = self.get_cloud_information() + self.assertIsNone(cloud_info.aws_id) + self.assertIsNone(cloud_info.aws_userid) + self.assertIsNone(cloud_info.aws_user_arn) + self.assertIsNone(cloud_info.aws_verification_datetime) + + @requests_mock.Mocker() + def test_delete_info(self, mocker): + """ + Test deleting an existing AWS identity + """ + self.client.login(username=self.USER_EMAIL, password='Tester11!') + + cloud_info = self.get_cloud_information() + cloud_info.aws_id = self.AWS_ACCOUNT + cloud_info.aws_userid = self.AWS_USERID + cloud_info.aws_user_arn = self.AWS_ARN + cloud_info.aws_verification_datetime = timezone.now() + cloud_info.save() + + response = self.client.post( + reverse('edit_cloud'), + data={'delete-aws': ''}, + ) + self.assertEqual(response.status_code, 200) + + cloud_info = self.get_cloud_information() + self.assertIsNone(cloud_info.aws_id) + self.assertIsNone(cloud_info.aws_userid) + self.assertIsNone(cloud_info.aws_user_arn) + self.assertIsNone(cloud_info.aws_verification_datetime)