From a671f1b124de2f5ccec1ea3d40dcfd9cd1522b6c Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Thu, 15 Feb 2018 16:17:19 -0800 Subject: [PATCH 1/7] Decouple signature version of S3 addressing style Botocore currently couples `s3v4` signing and path style addressing. Using the older `s3` signature version results in the `auto` style addressing (virtual hosted by default with a fallback to path style if needed). Support for `s3v4` was added in b30a39356 in support for the new `cn-north-1` region. The virtual hosting logic didn't work with the new `.aws.cn` TLD so the code was updated to just use path style addressing when using `s3v4`. This created an inconsistency in the default behavior where if you're using the old signature version we'll use virtual hosted addressing, but if you use s3v4 you get path style by default. Combine this with some regions only supporting s3v4, additional edge cases such as us-gov-west-1, and dual stack and accelerate endpoints, the logic was becoming hard to follow. The new logic is simpler because it's decoupled from region and signature version. We'll try to use virtual hosted addressing by default, and fall back to path style if necessary. --- botocore/utils.py | 26 ++++++------- tests/functional/test_s3.py | 66 ++++++++++++++++++++++++++------ tests/unit/test_s3_addressing.py | 2 +- 3 files changed, 67 insertions(+), 27 deletions(-) diff --git a/botocore/utils.py b/botocore/utils.py index 107dcce7b9..6c8cf4bc7b 100644 --- a/botocore/utils.py +++ b/botocore/utils.py @@ -682,12 +682,7 @@ def fix_s3_host(request, signature_version, region_name, addressing. This allows us to avoid 301 redirects for all bucket names that can be CNAME'd. """ - # By default we do not use virtual hosted style addressing when - # signed with signature version 4. - if signature_version is not botocore.UNSIGNED and \ - 's3v4' in signature_version: - return - elif not _allowed_region(region_name): + if not _allowed_region(region_name): return try: switch_to_virtual_host_style( @@ -904,14 +899,17 @@ def redirect_from_error(self, request_dict, response, operation, **kwargs): error = response[1].get('Error', {}) error_code = error.get('Code') - if error_code == '301': - # A raw 301 error might be returned for several reasons, but we - # only want to try to redirect it if it's a HeadObject or - # HeadBucket because all other operations will return - # PermanentRedirect if region is incorrect. - if operation.name not in ['HeadObject', 'HeadBucket']: - return - elif error_code != 'PermanentRedirect': + is_raw_redirect = ( + error_code == '301' and + operation.name in ['HeadObject', 'HeadBucket'] + ) + is_wrong_signing_region = ( + error_code == 'AuthorizationHeaderMalformed' and + 'Region' in error + ) + is_permanent_redirect = error_code == 'PermanentRedirect' + if not any([is_raw_redirect, is_wrong_signing_region, + is_permanent_redirect]): return bucket = request_dict['context']['signing']['bucket'] diff --git a/tests/functional/test_s3.py b/tests/functional/test_s3.py index f9ceaacbd8..8885b77452 100644 --- a/tests/functional/test_s3.py +++ b/tests/functional/test_s3.py @@ -223,7 +223,10 @@ class TestRegionRedirect(BaseS3OperationTest): def setUp(self): super(TestRegionRedirect, self).setUp() self.client = self.session.create_client( - 's3', 'us-west-2', config=Config(signature_version='s3v4')) + 's3', 'us-west-2', config=Config( + signature_version='s3v4', + s3={'addressing_style': 'path'}, + )) self.redirect_response = mock.Mock() self.redirect_response.headers = { @@ -242,6 +245,23 @@ def setUp(self): b' foo.s3.eu-central-1.amazonaws.com' b'') + self.bad_signing_region_response = mock.Mock() + self.bad_signing_region_response.headers = { + 'x-amz-bucket-region': 'eu-central-1' + } + self.bad_signing_region_response.status_code = 400 + self.bad_signing_region_response.content = ( + b'' + b'' + b' AuthorizationHeaderMalformed' + b' the region us-west-2 is wrong; ' + b'expecting eu-central-1' + b' eu-central-1' + b' BD9AA1730D454E39' + b' ' + b'' + ) + self.success_response = mock.Mock() self.success_response.headers = {} self.success_response.status_code = 200 @@ -295,6 +315,29 @@ def test_region_redirect_cache(self): self.assertEqual(calls[1].url, fixed_url) self.assertEqual(calls[2].url, fixed_url) + def test_resign_request_with_region_when_needed(self): + self.http_session_send_mock.side_effect = [ + self.bad_signing_region_response, self.success_response, + ] + + # Create a client with no explicit configuration so we can + # verify the default behavior. + client = self.session.create_client( + 's3', 'us-west-2') + first_response = client.list_objects(Bucket='foo') + self.assertEqual( + first_response['ResponseMetadata']['HTTPStatusCode'], 200) + + self.assertEqual(self.http_session_send_mock.call_count, 2) + calls = [c[0][0] for c in self.http_session_send_mock.call_args_list] + initial_url = ('https://foo.s3.amazonaws.com/' + '?encoding-type=url') + self.assertEqual(calls[0].url, initial_url) + + fixed_url = ('https://foo.s3.amazonaws.com/' + '?encoding-type=url') + self.assertEqual(calls[1].url, fixed_url) + class TestGeneratePresigned(BaseS3OperationTest): def test_generate_unauthed_url(self): @@ -414,31 +457,30 @@ def test_correct_url_used_for_s3(): signature_version='s3', is_secure=False, expected_url='http://bucket.s3.amazonaws.com/key') - # The default behavior for sigv4. DNS compatible buckets still get path - # style addresses. + # Virtual host addressing is independent of signature version. yield t.case(region='us-west-2', bucket='bucket', key='key', signature_version='s3v4', expected_url=( - 'https://s3.us-west-2.amazonaws.com/bucket/key')) + 'https://bucket.s3.amazonaws.com/key')) yield t.case(region='us-east-1', bucket='bucket', key='key', signature_version='s3v4', - expected_url='https://s3.amazonaws.com/bucket/key') + expected_url='https://bucket.s3.amazonaws.com/key') yield t.case(region='us-west-1', bucket='bucket', key='key', signature_version='s3v4', expected_url=( - 'https://s3.us-west-1.amazonaws.com/bucket/key')) + 'https://bucket.s3.amazonaws.com/key')) yield t.case(region='us-west-1', bucket='bucket', key='key', signature_version='s3v4', is_secure=False, expected_url=( - 'http://s3.us-west-1.amazonaws.com/bucket/key')) + 'http://bucket.s3.amazonaws.com/key')) # Regions outside of the 'aws' partition. - # We're expecting path style because this is the default with - # 's3v4'. + # These should still default to virtual hosted addressing + # unless explicitly configured otherwise. yield t.case(region='cn-north-1', bucket='bucket', key='key', signature_version='s3v4', expected_url=( - 'https://s3.cn-north-1.amazonaws.com.cn/bucket/key')) + 'https://bucket.s3.cn-north-1.amazonaws.com.cn/key')) # This isn't actually supported because cn-north-1 is sigv4 only, # but we'll still double check that our internal logic is correct # when building the expected url. @@ -608,11 +650,11 @@ def test_correct_url_used_for_s3(): yield t.case( region='us-east-1', bucket='bucket', key='key', s3_config=use_dualstack, signature_version='s3v4', - expected_url='https://s3.dualstack.us-east-1.amazonaws.com/bucket/key') + expected_url='https://bucket.s3.dualstack.us-east-1.amazonaws.com/key') yield t.case( region='us-west-2', bucket='bucket', key='key', s3_config=use_dualstack, signature_version='s3v4', - expected_url='https://s3.dualstack.us-west-2.amazonaws.com/bucket/key') + expected_url='https://bucket.s3.dualstack.us-west-2.amazonaws.com/key') # Non DNS compatible buckets use path style for dual stack. yield t.case( region='us-west-2', bucket='bucket.dot', key='key', diff --git a/tests/unit/test_s3_addressing.py b/tests/unit/test_s3_addressing.py index 050fe1b517..8001665854 100644 --- a/tests/unit/test_s3_addressing.py +++ b/tests/unit/test_s3_addressing.py @@ -81,7 +81,7 @@ def test_list_objects_unicode_query_string_eu_central_1(self): prepared_request = self.get_prepared_request('list_objects', params) self.assertEqual( prepared_request.url, - ('https://s3.eu-central-1.amazonaws.com/safename' + ('https://safename.s3.amazonaws.com/' '?marker=%C3%A4%C3%B6%C3%BC-01.txt') ) From cfeaae6a566058a3d1551e0fa34902d8611cd375 Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Thu, 15 Feb 2018 22:10:17 -0800 Subject: [PATCH 2/7] Remove hardcoded 's3.amazonaws.com' for virtual hosted addressing This removes the hard coded references to 's3.amazonaws.com' when using the virtual hosted addressing mode of S3 and instead uses regionalized endpoints when converting to virtual hosted addressing. For example, given a bucket 'foo' in us-west-2 and a key 'bar', we would convert the URL from `s3.us-west-2.amazonaws.com/foo/bar` to `foo.s3.amazonaws.com/bar`. With this change we'll now convert the URL to `foo.s3.us-west-2.amazonaws.com/bar`. When the initial code for 's3.amazonaws.com' was first added to botocore, it provided a number of benefits: 1. You could avoid a 301 response by using `.s3.amazonaws.com`. This is because the DNS would resolve to an endpoint in the correct region, and the older signature version `s3` didn't include a region name as part of its signing process. The end result is that a user did not have to correctly configure a region for an S3 bucket, they'd automatically get to the correct region due to the DNS resolution. 2. The 301 PermanentRedirect responses did not include any structured data about the correct region to use so it wasn't easy to know which region you _should_ be sending the request to. As a result, 301 responses weren't automatically handled and ended up just raising an exception back to the user. Since this code was first introduced there were several things that have changed: 1. The introduction of the `s3v4` signature version, which requires a correct region name as part of its signature. Signing a request with the wrong region results in a 400 response. As a result, it didn't matter if `foo.s3.amazonaws.com` got you to the right region, if the request was _signed_ with the wrong region, you'd get a 400 response. 2. The 301 response (as well as most responses from S3) contain request metadata that tell you which region a bucket is in. This means that it's now possible to automatically handle 301 responses because we know which region to send the request to. 3. The introduction of various partitions outside of the `aws` partition, such as `aws-cn` meant there were other TLDs we needed to handle. The "hack" put in place in botocore was to just disable virtual hosted addressing in certain scenarios. Given all this, it makes sense to no longer hardcode `s3.amazonaws.com` when converting to virtual hosted addressing. There's already a growing number of edge cases where we have to disable this, and most importantly it's not needed anymore. --- botocore/utils.py | 2 +- tests/functional/test_s3.py | 20 ++++++++++---------- tests/integration/test_s3.py | 14 +++++++------- tests/unit/test_s3_addressing.py | 4 ++-- tests/unit/test_utils.py | 6 +++--- 5 files changed, 23 insertions(+), 23 deletions(-) diff --git a/botocore/utils.py b/botocore/utils.py index 6c8cf4bc7b..0a84020f17 100644 --- a/botocore/utils.py +++ b/botocore/utils.py @@ -672,7 +672,7 @@ def check_dns_name(bucket_name): def fix_s3_host(request, signature_version, region_name, - default_endpoint_url='s3.amazonaws.com', **kwargs): + default_endpoint_url=None, **kwargs): """ This handler looks at S3 requests just before they are signed. If there is a bucket name on the path (true for everything except diff --git a/tests/functional/test_s3.py b/tests/functional/test_s3.py index 8885b77452..805dd709c4 100644 --- a/tests/functional/test_s3.py +++ b/tests/functional/test_s3.py @@ -330,11 +330,11 @@ def test_resign_request_with_region_when_needed(self): self.assertEqual(self.http_session_send_mock.call_count, 2) calls = [c[0][0] for c in self.http_session_send_mock.call_args_list] - initial_url = ('https://foo.s3.amazonaws.com/' + initial_url = ('https://foo.s3.us-west-2.amazonaws.com/' '?encoding-type=url') self.assertEqual(calls[0].url, initial_url) - fixed_url = ('https://foo.s3.amazonaws.com/' + fixed_url = ('https://foo.s3.eu-central-1.amazonaws.com/' '?encoding-type=url') self.assertEqual(calls[1].url, fixed_url) @@ -349,7 +349,7 @@ def test_generate_unauthed_url(self): 'Bucket': 'foo', 'Key': 'bar' }) - self.assertEqual(url, 'https://foo.s3.amazonaws.com/bar') + self.assertEqual(url, 'https://foo.s3.us-west-2.amazonaws.com/bar') def test_generate_unauthed_post(self): config = Config(signature_version=botocore.UNSIGNED) @@ -357,7 +357,7 @@ def test_generate_unauthed_post(self): parts = client.generate_presigned_post(Bucket='foo', Key='bar') expected = { 'fields': {'key': 'bar'}, - 'url': 'https://foo.s3.amazonaws.com/' + 'url': 'https://foo.s3.us-west-2.amazonaws.com/' } self.assertEqual(parts, expected) @@ -446,33 +446,33 @@ def test_correct_url_used_for_s3(): # The default behavior for sigv2. DNS compatible buckets yield t.case(region='us-west-2', bucket='bucket', key='key', signature_version='s3', - expected_url='https://bucket.s3.amazonaws.com/key') + expected_url='https://bucket.s3.us-west-2.amazonaws.com/key') yield t.case(region='us-east-1', bucket='bucket', key='key', signature_version='s3', expected_url='https://bucket.s3.amazonaws.com/key') yield t.case(region='us-west-1', bucket='bucket', key='key', signature_version='s3', - expected_url='https://bucket.s3.amazonaws.com/key') + expected_url='https://bucket.s3.us-west-1.amazonaws.com/key') yield t.case(region='us-west-1', bucket='bucket', key='key', signature_version='s3', is_secure=False, - expected_url='http://bucket.s3.amazonaws.com/key') + expected_url='http://bucket.s3.us-west-1.amazonaws.com/key') # Virtual host addressing is independent of signature version. yield t.case(region='us-west-2', bucket='bucket', key='key', signature_version='s3v4', expected_url=( - 'https://bucket.s3.amazonaws.com/key')) + 'https://bucket.s3.us-west-2.amazonaws.com/key')) yield t.case(region='us-east-1', bucket='bucket', key='key', signature_version='s3v4', expected_url='https://bucket.s3.amazonaws.com/key') yield t.case(region='us-west-1', bucket='bucket', key='key', signature_version='s3v4', expected_url=( - 'https://bucket.s3.amazonaws.com/key')) + 'https://bucket.s3.us-west-1.amazonaws.com/key')) yield t.case(region='us-west-1', bucket='bucket', key='key', signature_version='s3v4', is_secure=False, expected_url=( - 'http://bucket.s3.amazonaws.com/key')) + 'http://bucket.s3.us-west-1.amazonaws.com/key')) # Regions outside of the 'aws' partition. # These should still default to virtual hosted addressing diff --git a/tests/integration/test_s3.py b/tests/integration/test_s3.py index 88b2bf9445..aa1aeb9552 100644 --- a/tests/integration/test_s3.py +++ b/tests/integration/test_s3.py @@ -582,7 +582,7 @@ def test_presign_sigv4(self): 'get_object', Params={'Bucket': self.bucket_name, 'Key': self.key}) self.assertTrue( presigned_url.startswith( - 'https://s3.amazonaws.com/%s/%s' % ( + 'https://%s.s3.amazonaws.com/%s' % ( self.bucket_name, self.key)), "Host was suppose to be the us-east-1 endpoint, instead " "got: %s" % presigned_url) @@ -647,7 +647,7 @@ def test_presign_post_sigv4(self): # Make sure the correct endpoint is being used self.assertTrue( post_args['url'].startswith( - 'https://s3.amazonaws.com/%s' % self.bucket_name), + 'https://%s.s3.amazonaws.com/' % self.bucket_name), "Host was suppose to use us-east-1 endpoint, instead " "got: %s" % post_args['url']) @@ -671,8 +671,8 @@ def test_presign_sigv2(self): 'get_object', Params={'Bucket': self.bucket_name, 'Key': self.key}) self.assertTrue( presigned_url.startswith( - 'https://%s.s3.amazonaws.com/%s' % ( - self.bucket_name, self.key)), + 'https://%s.s3.%s.amazonaws.com/%s' % ( + self.bucket_name, self.region, self.key)), "Host was suppose to use DNS style, instead " "got: %s" % presigned_url) # Try to retrieve the object using the presigned url. @@ -687,7 +687,7 @@ def test_presign_sigv4(self): self.assertTrue( presigned_url.startswith( - 'https://s3.us-west-2.amazonaws.com/%s/%s' % ( + 'https://%s.s3.us-west-2.amazonaws.com/%s' % ( self.bucket_name, self.key)), "Host was suppose to be the us-west-2 endpoint, instead " "got: %s" % presigned_url) @@ -715,7 +715,7 @@ def test_presign_post_sigv2(self): # Make sure the correct endpoint is being used self.assertTrue( post_args['url'].startswith( - 'https://%s.s3.amazonaws.com' % self.bucket_name), + 'https://%s.s3.us-west-2.amazonaws.com' % self.bucket_name), "Host was suppose to use DNS style, instead " "got: %s" % post_args['url']) @@ -748,7 +748,7 @@ def test_presign_post_sigv4(self): # Make sure the correct endpoint is being used self.assertTrue( post_args['url'].startswith( - 'https://s3.us-west-2.amazonaws.com/%s' % self.bucket_name), + 'https://%s.s3.us-west-2.amazonaws.com/' % self.bucket_name), "Host was suppose to use DNS style, instead " "got: %s" % post_args['url']) diff --git a/tests/unit/test_s3_addressing.py b/tests/unit/test_s3_addressing.py index 8001665854..60279224b8 100644 --- a/tests/unit/test_s3_addressing.py +++ b/tests/unit/test_s3_addressing.py @@ -72,7 +72,7 @@ def test_list_objects_dns_name_non_classic(self): prepared_request = self.get_prepared_request('list_objects', params, force_hmacv1=True) self.assertEqual(prepared_request.url, - 'https://safename.s3.amazonaws.com/') + 'https://safename.s3.us-west-2.amazonaws.com/') def test_list_objects_unicode_query_string_eu_central_1(self): self.region_name = 'eu-central-1' @@ -81,7 +81,7 @@ def test_list_objects_unicode_query_string_eu_central_1(self): prepared_request = self.get_prepared_request('list_objects', params) self.assertEqual( prepared_request.url, - ('https://safename.s3.amazonaws.com/' + ('https://safename.s3.eu-central-1.amazonaws.com/' '?marker=%C3%A4%C3%B6%C3%BC-01.txt') ) diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index c1cc1b9c31..a0b9202bf5 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -677,13 +677,13 @@ def test_fix_s3_host_initial(self): request=request, signature_version=signature_version, region_name=region_name) self.assertEqual(request.url, - 'https://bucket.s3.amazonaws.com/key.txt') + 'https://bucket.s3-us-west-2.amazonaws.com/key.txt') self.assertEqual(request.auth_path, '/bucket/key.txt') def test_fix_s3_host_only_applied_once(self): request = AWSRequest( method='PUT', headers={}, - url='https://s3-us-west-2.amazonaws.com/bucket/key.txt' + url='https://s3.us-west-2.amazonaws.com/bucket/key.txt' ) region_name = 'us-west-2' signature_version = 's3' @@ -695,7 +695,7 @@ def test_fix_s3_host_only_applied_once(self): request=request, signature_version=signature_version, region_name=region_name) self.assertEqual(request.url, - 'https://bucket.s3.amazonaws.com/key.txt') + 'https://bucket.s3.us-west-2.amazonaws.com/key.txt') # This was a bug previously. We want to make sure that # calling fix_s3_host() again does not alter the auth_path. # Otherwise we'll get signature errors. From dc552fdb5c90639bc4beff0305b7dfe5d7768d22 Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Thu, 15 Feb 2018 22:32:16 -0800 Subject: [PATCH 3/7] Remove special case govcloud handling Similar to cn-north-1 (though implemented differently), using a govcloud region would disable virtual hosted addressing. This removes the whole "allowed regions" concept in the fix_s3_host code. Now the addressing style logic is completely separate of both signature version and region. --- botocore/client.py | 10 ---------- botocore/utils.py | 14 ++------------ tests/functional/test_s3.py | 5 ++--- tests/unit/test_s3_addressing.py | 4 ++-- 4 files changed, 6 insertions(+), 27 deletions(-) diff --git a/botocore/client.py b/botocore/client.py index 18b0243f57..662b7b8e43 100644 --- a/botocore/client.py +++ b/botocore/client.py @@ -189,16 +189,6 @@ def _get_s3_addressing_handler(self, endpoint_url, s3_config, logger.debug("Defaulting to S3 virtual host style addressing with " "path style addressing fallback.") - # For dual stack mode, we need to clear the default endpoint url in - # order to use the existing netloc if the bucket is dns compatible. - # Also, the default_endpoint_url of 's3.amazonaws.com' only works - # if we're in the 'aws' partition. Anywhere else we should - # just use the existing netloc. - if s3_config.get('use_dualstack_endpoint', False) or \ - partition != 'aws': - return functools.partial( - fix_s3_host, default_endpoint_url=None) - # By default, try to use virtual style with path fallback. return fix_s3_host diff --git a/botocore/utils.py b/botocore/utils.py index 0a84020f17..d46a743c0e 100644 --- a/botocore/utils.py +++ b/botocore/utils.py @@ -41,10 +41,6 @@ # Based on rfc2986, section 2.3 SAFE_CHARS = '-._~' LABEL_RE = re.compile(r'[a-z0-9][a-z0-9\-]*[a-z0-9]') -RESTRICTED_REGIONS = [ - 'us-gov-west-1', - 'fips-us-gov-west-1', -] RETRYABLE_HTTP_ERRORS = (requests.Timeout, requests.ConnectionError) S3_ACCELERATE_WHITELIST = ['dualstack'] @@ -679,11 +675,9 @@ def fix_s3_host(request, signature_version, region_name, ListAllBuckets) it checks to see if that bucket name conforms to the DNS naming conventions. If it does, it alters the request to use ``virtual hosting`` style addressing rather than ``path-style`` - addressing. This allows us to avoid 301 redirects for all - bucket names that can be CNAME'd. + addressing. + """ - if not _allowed_region(region_name): - return try: switch_to_virtual_host_style( request, signature_version, default_endpoint_url) @@ -760,10 +754,6 @@ def _is_get_bucket_location_request(request): return request.url.endswith('?location') -def _allowed_region(region_name): - return region_name not in RESTRICTED_REGIONS - - def instance_cache(func): """Method decorator for caching method calls to a single instance. diff --git a/tests/functional/test_s3.py b/tests/functional/test_s3.py index 805dd709c4..2ef79d5e5a 100644 --- a/tests/functional/test_s3.py +++ b/tests/functional/test_s3.py @@ -564,15 +564,14 @@ def test_correct_url_used_for_s3(): s3_config=virtual_hosting, expected_url='https://bucket.s3.us-gov-west-1.amazonaws.com/key') - # Test restricted regions not do virtual host by default yield t.case( region='us-gov-west-1', bucket='bucket', key='key', signature_version='s3', - expected_url='https://s3.us-gov-west-1.amazonaws.com/bucket/key') + expected_url='https://bucket.s3.us-gov-west-1.amazonaws.com/key') yield t.case( region='fips-us-gov-west-1', bucket='bucket', key='key', signature_version='s3', - expected_url='https://s3-fips-us-gov-west-1.amazonaws.com/bucket/key') + expected_url='https://bucket.s3-fips-us-gov-west-1.amazonaws.com/key') # Test path style addressing. diff --git a/tests/unit/test_s3_addressing.py b/tests/unit/test_s3_addressing.py index 60279224b8..b39dddb8b7 100644 --- a/tests/unit/test_s3_addressing.py +++ b/tests/unit/test_s3_addressing.py @@ -91,7 +91,7 @@ def test_list_objects_in_restricted_regions(self): prepared_request = self.get_prepared_request('list_objects', params) # Note how we keep the region specific endpoint here. self.assertEqual(prepared_request.url, - 'https://s3.us-gov-west-1.amazonaws.com/safename') + 'https://safename.s3.us-gov-west-1.amazonaws.com/') def test_list_objects_in_fips(self): self.region_name = 'fips-us-gov-west-1' @@ -100,7 +100,7 @@ def test_list_objects_in_fips(self): # Note how we keep the region specific endpoint here. self.assertEqual( prepared_request.url, - 'https://s3-fips-us-gov-west-1.amazonaws.com/safename') + 'https://safename.s3-fips-us-gov-west-1.amazonaws.com/') def test_list_objects_non_dns_name_non_classic(self): self.region_name = 'us-west-2' From 0ef645c58bd3396968d0a569b0bf2e832f8a2644 Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Fri, 16 Feb 2018 12:26:12 -0800 Subject: [PATCH 4/7] Account for 400 response from HeadBucket/HeadObject When you sign a HeadBucket/HeadObject request with the wrong region you'll get a 400 response with no body (expected for HEAD requests). We need to update the special casing for these operations to also catch this case and redirect to the new region appropriately. --- botocore/utils.py | 10 +++++++--- tests/unit/test_utils.py | 20 ++++++++++++++++++++ 2 files changed, 27 insertions(+), 3 deletions(-) diff --git a/botocore/utils.py b/botocore/utils.py index d46a743c0e..39febf9486 100644 --- a/botocore/utils.py +++ b/botocore/utils.py @@ -889,8 +889,12 @@ def redirect_from_error(self, request_dict, response, operation, **kwargs): error = response[1].get('Error', {}) error_code = error.get('Code') - is_raw_redirect = ( - error_code == '301' and + # We have to account for 400 responses because + # if we sign a Head* request with the wrong region, + # we'll get a 400 Bad Request but we won't get a + # body saying it's an "AuthorizationHeaderMalformed". + is_special_head_object = ( + error_code in ['301', '400'] and operation.name in ['HeadObject', 'HeadBucket'] ) is_wrong_signing_region = ( @@ -898,7 +902,7 @@ def redirect_from_error(self, request_dict, response, operation, **kwargs): 'Region' in error ) is_permanent_redirect = error_code == 'PermanentRedirect' - if not any([is_raw_redirect, is_wrong_signing_region, + if not any([is_special_head_object, is_wrong_signing_region, is_permanent_redirect]): return diff --git a/tests/unit/test_utils.py b/tests/unit/test_utils.py index a0b9202bf5..150849fe44 100644 --- a/tests/unit/test_utils.py +++ b/tests/unit/test_utils.py @@ -1393,6 +1393,26 @@ def test_redirects_301(self): request_dict, response, self.operation) self.assertIsNone(redirect_response) + def test_redirects_400_head_bucket(self): + request_dict = {'url': 'https://us-west-2.amazonaws.com/foo', + 'context': {'signing': {'bucket': 'foo'}}} + response = (None, { + 'Error': {'Code': '400', 'Message': 'Bad Request'}, + 'ResponseMetadata': { + 'HTTPHeaders': {'x-amz-bucket-region': 'eu-central-1'} + } + }) + + self.operation.name = 'HeadObject' + redirect_response = self.redirector.redirect_from_error( + request_dict, response, self.operation) + self.assertEqual(redirect_response, 0) + + self.operation.name = 'ListObjects' + redirect_response = self.redirector.redirect_from_error( + request_dict, response, self.operation) + self.assertIsNone(redirect_response) + def test_does_not_redirect_if_None_response(self): request_dict = {'url': 'https://us-west-2.amazonaws.com/foo', 'context': {'signing': {'bucket': 'foo'}}} From ad05e889e6026eb73eebd00fe4454907d023eb15 Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Tue, 20 Feb 2018 15:50:25 -0800 Subject: [PATCH 5/7] Use global endpoint for s3 presigning This puts back the original behavior to use the global endpoint for s3 presigning for backwards compatibility. --- botocore/signers.py | 9 ++- botocore/utils.py | 3 + tests/functional/test_s3.py | 116 ++++++++++++++++++++++++++++++++++- tests/integration/test_s3.py | 17 +++-- 4 files changed, 136 insertions(+), 9 deletions(-) diff --git a/botocore/signers.py b/botocore/signers.py index 79ac731557..fd392ce7f1 100644 --- a/botocore/signers.py +++ b/botocore/signers.py @@ -558,7 +558,8 @@ def generate_presigned_url(self, ClientMethod, Params=None, ExpiresIn=3600, expires_in = ExpiresIn http_method = HttpMethod context = { - 'is_presign_request': True + 'is_presign_request': True, + 'partition': self.meta.partition, } request_signer = self._request_signer @@ -586,6 +587,8 @@ def generate_presigned_url(self, ClientMethod, Params=None, ExpiresIn=3600, prepare_request_dict( request_dict, endpoint_url=self.meta.endpoint_url, context=context) + request_dict['context']['partition'] = self.meta.partition + # Generate the presigned url. return request_signer.generate_presigned_url( request_dict=request_dict, expires_in=expires_in, @@ -686,7 +689,9 @@ def generate_presigned_post(self, Bucket, Key, Fields=None, Conditions=None, # Prepare the request dict by including the client's endpoint url. prepare_request_dict( - request_dict, endpoint_url=self.meta.endpoint_url) + request_dict, endpoint_url=self.meta.endpoint_url, + context={'is_presign_request': True, 'partition': self.meta.partition}, + ) # Append that the bucket name to the list of conditions. conditions.append({'bucket': bucket}) diff --git a/botocore/utils.py b/botocore/utils.py index 39febf9486..eddbae0d68 100644 --- a/botocore/utils.py +++ b/botocore/utils.py @@ -678,6 +678,9 @@ def fix_s3_host(request, signature_version, region_name, addressing. """ + if request.context.get('is_presign_request', False): + if request.context.get('partition', '') == 'aws': + default_endpoint_url = 's3.amazonaws.com' try: switch_to_virtual_host_style( request, signature_version, default_endpoint_url) diff --git a/tests/functional/test_s3.py b/tests/functional/test_s3.py index 2ef79d5e5a..f80df0f351 100644 --- a/tests/functional/test_s3.py +++ b/tests/functional/test_s3.py @@ -15,6 +15,7 @@ import botocore.session from botocore.config import Config +from botocore.compat import urlsplit from botocore.exceptions import ParamValidationError from botocore import UNSIGNED @@ -349,7 +350,7 @@ def test_generate_unauthed_url(self): 'Bucket': 'foo', 'Key': 'bar' }) - self.assertEqual(url, 'https://foo.s3.us-west-2.amazonaws.com/bar') + self.assertEqual(url, 'https://foo.s3.amazonaws.com/bar') def test_generate_unauthed_post(self): config = Config(signature_version=botocore.UNSIGNED) @@ -357,7 +358,7 @@ def test_generate_unauthed_post(self): parts = client.generate_presigned_post(Bucket='foo', Key='bar') expected = { 'fields': {'key': 'bar'}, - 'url': 'https://foo.s3.us-west-2.amazonaws.com/' + 'url': 'https://foo.s3.amazonaws.com/' } self.assertEqual(parts, expected) @@ -775,6 +776,7 @@ def _verify_expected_endpoint_url(region, bucket, key, s3_config, environ['AWS_ACCESS_KEY_ID'] = 'access_key' environ['AWS_SECRET_ACCESS_KEY'] = 'secret_key' environ['AWS_CONFIG_FILE'] = 'no-exist-foo' + environ['AWS_SHARED_CREDENTIALS_FILE'] = 'no-exist-foo' session = create_session() session.config_filename = 'no-exist-foo' config = Config( @@ -790,3 +792,113 @@ def _verify_expected_endpoint_url(region, bucket, key, s3_config, Key=key, Body=b'bar') request_sent = mock_send.call_args[0][0] assert_equal(request_sent.url, expected_url) + + +def _create_s3_client(region, is_secure, endpoint_url, s3_config, + signature_version): + environ = {} + with mock.patch('os.environ', environ): + environ['AWS_ACCESS_KEY_ID'] = 'access_key' + environ['AWS_SECRET_ACCESS_KEY'] = 'secret_key' + environ['AWS_CONFIG_FILE'] = 'no-exist-foo' + environ['AWS_SHARED_CREDENTIALS_FILE'] = 'no-exist-foo' + session = create_session() + session.config_filename = 'no-exist-foo' + config = Config( + signature_version=signature_version, + s3=s3_config + ) + s3 = session.create_client('s3', region_name=region, use_ssl=is_secure, + config=config, + endpoint_url=endpoint_url) + return s3 + + +def test_addressing_for_presigned_urls(): + # See TestGeneratePresigned for more detailed test cases + # on presigned URLs. Here's we're just focusing on the + # adddressing mode used for presigned URLs. + # We special case presigned URLs due to backwards + # compatibility. + t = S3AddressingCases(_verify_presigned_url_addressing) + + # us-east-1, or the "global" endpoint. A signature version of + # None means the user doesn't have signature version configured. + yield t.case(region='us-east-1', bucket='bucket', key='key', + signature_version=None, + expected_url='https://bucket.s3.amazonaws.com/key') + yield t.case(region='us-east-1', bucket='bucket', key='key', + signature_version='s3', + expected_url='https://bucket.s3.amazonaws.com/key') + yield t.case(region='us-east-1', bucket='bucket', key='key', + signature_version='s3v4', + expected_url='https://bucket.s3.amazonaws.com/key') + yield t.case(region='us-east-1', bucket='bucket', key='key', + signature_version='s3v4', + s3_config={'addressing_style': 'path'}, + expected_url='https://s3.amazonaws.com/bucket/key') + + # A region that supports both 's3' and 's3v4'. + yield t.case(region='us-west-2', bucket='bucket', key='key', + signature_version=None, + expected_url='https://bucket.s3.amazonaws.com/key') + yield t.case(region='us-west-2', bucket='bucket', key='key', + signature_version='s3', + expected_url='https://bucket.s3.amazonaws.com/key') + yield t.case(region='us-west-2', bucket='bucket', key='key', + signature_version='s3v4', + expected_url='https://bucket.s3.amazonaws.com/key') + yield t.case(region='us-west-2', bucket='bucket', key='key', + signature_version='s3v4', + s3_config={'addressing_style': 'path'}, + expected_url='https://s3.us-west-2.amazonaws.com/bucket/key') + + # An 's3v4' only region. + yield t.case(region='us-east-2', bucket='bucket', key='key', + signature_version=None, + expected_url='https://bucket.s3.amazonaws.com/key') + yield t.case(region='us-east-2', bucket='bucket', key='key', + signature_version='s3', + expected_url='https://bucket.s3.amazonaws.com/key') + yield t.case(region='us-east-2', bucket='bucket', key='key', + signature_version='s3v4', + expected_url='https://bucket.s3.amazonaws.com/key') + yield t.case(region='us-east-2', bucket='bucket', key='key', + signature_version='s3v4', + s3_config={'addressing_style': 'path'}, + expected_url='https://s3.us-east-2.amazonaws.com/bucket/key') + + # Dualstack endpoints + yield t.case(region='us-west-2', bucket='bucket', key='key', + signature_version=None, + s3_config={'use_dualstack_endpoint': True}, + expected_url='https://bucket.s3.amazonaws.com/key') + + # Accelerate + yield t.case(region='us-west-2', bucket='bucket', key='key', + signature_version=None, + s3_config={'use_accelerate_endpoint': True}, + expected_url='https://bucket.s3-accelerate.amazonaws.com/key') + + # A region that we don't know about. + yield t.case(region='us-west-50', bucket='bucket', key='key', + signature_version=None, + expected_url='https://bucket.s3.amazonaws.com/key') + + +def _verify_presigned_url_addressing(region, bucket, key, s3_config, + is_secure=True, + customer_provided_endpoint=None, + expected_url=None, + signature_version=None): + s3 = _create_s3_client(region=region, is_secure=is_secure, + endpoint_url=customer_provided_endpoint, + s3_config=s3_config, + signature_version=signature_version) + url = s3.generate_presigned_url( + 'get_object', {'Bucket': bucket, 'Key': key}) + # We're not trying to verify the params for URL presigning, + # those are tested elsewhere. We just care about the hostname/path. + parts = urlsplit(url) + actual = '%s://%s%s' % parts[:3] + assert_equal(actual, expected_url) diff --git a/tests/integration/test_s3.py b/tests/integration/test_s3.py index aa1aeb9552..2d15b19418 100644 --- a/tests/integration/test_s3.py +++ b/tests/integration/test_s3.py @@ -671,15 +671,22 @@ def test_presign_sigv2(self): 'get_object', Params={'Bucket': self.bucket_name, 'Key': self.key}) self.assertTrue( presigned_url.startswith( - 'https://%s.s3.%s.amazonaws.com/%s' % ( - self.bucket_name, self.region, self.key)), + 'https://%s.s3.amazonaws.com/%s' % ( + self.bucket_name, self.key)), "Host was suppose to use DNS style, instead " "got: %s" % presigned_url) # Try to retrieve the object using the presigned url. self.assertEqual(requests.get(presigned_url).content, b'foo') def test_presign_sigv4(self): + # For a newly created bucket, you can't use virtualhosted + # addressing and 's3v4' due to the backwards compat behavior + # using '.s3.amazonaws.com' for anything in the AWS partition. + # Instead you either have to use the older 's3' signature version + # of you have to use path style addressing. The latter is being + # done here. self.client_config.signature_version = 's3v4' + self.client_config.s3 = {'addressing_style': 'path'} self.client = self.session.create_client( 's3', config=self.client_config) presigned_url = self.client.generate_presigned_url( @@ -687,7 +694,7 @@ def test_presign_sigv4(self): self.assertTrue( presigned_url.startswith( - 'https://%s.s3.us-west-2.amazonaws.com/%s' % ( + 'https://s3.us-west-2.amazonaws.com/%s/%s' % ( self.bucket_name, self.key)), "Host was suppose to be the us-west-2 endpoint, instead " "got: %s" % presigned_url) @@ -715,7 +722,7 @@ def test_presign_post_sigv2(self): # Make sure the correct endpoint is being used self.assertTrue( post_args['url'].startswith( - 'https://%s.s3.us-west-2.amazonaws.com' % self.bucket_name), + 'https://%s.s3.amazonaws.com' % self.bucket_name), "Host was suppose to use DNS style, instead " "got: %s" % post_args['url']) @@ -748,7 +755,7 @@ def test_presign_post_sigv4(self): # Make sure the correct endpoint is being used self.assertTrue( post_args['url'].startswith( - 'https://%s.s3.us-west-2.amazonaws.com/' % self.bucket_name), + 'https://%s.s3.amazonaws.com/' % self.bucket_name), "Host was suppose to use DNS style, instead " "got: %s" % post_args['url']) From 7d92ed647c44ea52023d81a694621450dd8130dd Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Thu, 22 Feb 2018 17:00:58 -0800 Subject: [PATCH 6/7] Use regional endpoint for dualstack endpoints As part of this change I moved out the logic for whether to use a global endpoint for fix_s3_host() into the presigner code, which is where the special casing happens anyways. --- botocore/signers.py | 18 ++++++++++++++---- botocore/utils.py | 5 ++--- tests/functional/test_s3.py | 25 +++++++++++++++++++++---- 3 files changed, 37 insertions(+), 11 deletions(-) diff --git a/botocore/signers.py b/botocore/signers.py index fd392ce7f1..be75584966 100644 --- a/botocore/signers.py +++ b/botocore/signers.py @@ -559,7 +559,7 @@ def generate_presigned_url(self, ClientMethod, Params=None, ExpiresIn=3600, http_method = HttpMethod context = { 'is_presign_request': True, - 'partition': self.meta.partition, + 'use_global_endpoint': _should_use_global_endpoint(self), } request_signer = self._request_signer @@ -587,8 +587,6 @@ def generate_presigned_url(self, ClientMethod, Params=None, ExpiresIn=3600, prepare_request_dict( request_dict, endpoint_url=self.meta.endpoint_url, context=context) - request_dict['context']['partition'] = self.meta.partition - # Generate the presigned url. return request_signer.generate_presigned_url( request_dict=request_dict, expires_in=expires_in, @@ -690,7 +688,10 @@ def generate_presigned_post(self, Bucket, Key, Fields=None, Conditions=None, # Prepare the request dict by including the client's endpoint url. prepare_request_dict( request_dict, endpoint_url=self.meta.endpoint_url, - context={'is_presign_request': True, 'partition': self.meta.partition}, + context={ + 'is_presign_request': True, + 'use_global_endpoint': _should_use_global_endpoint(self), + }, ) # Append that the bucket name to the list of conditions. @@ -709,3 +710,12 @@ def generate_presigned_post(self, Bucket, Key, Fields=None, Conditions=None, return post_presigner.generate_presigned_post( request_dict=request_dict, fields=fields, conditions=conditions, expires_in=expires_in) + + +def _should_use_global_endpoint(client): + use_dualstack_endpoint = False + if client.meta.config.s3 is not None: + use_dualstack_endpoint = client.meta.config.s3.get( + 'use_dualstack_endpoint', False) + return (client.meta.partition == 'aws' and + not use_dualstack_endpoint) diff --git a/botocore/utils.py b/botocore/utils.py index eddbae0d68..56ab9c9433 100644 --- a/botocore/utils.py +++ b/botocore/utils.py @@ -678,9 +678,8 @@ def fix_s3_host(request, signature_version, region_name, addressing. """ - if request.context.get('is_presign_request', False): - if request.context.get('partition', '') == 'aws': - default_endpoint_url = 's3.amazonaws.com' + if request.context.get('use_global_endpoint', False): + default_endpoint_url = 's3.amazonaws.com' try: switch_to_virtual_host_style( request, signature_version, default_endpoint_url) diff --git a/tests/functional/test_s3.py b/tests/functional/test_s3.py index f80df0f351..061b5ef20f 100644 --- a/tests/functional/test_s3.py +++ b/tests/functional/test_s3.py @@ -869,10 +869,21 @@ def test_addressing_for_presigned_urls(): expected_url='https://s3.us-east-2.amazonaws.com/bucket/key') # Dualstack endpoints - yield t.case(region='us-west-2', bucket='bucket', key='key', - signature_version=None, - s3_config={'use_dualstack_endpoint': True}, - expected_url='https://bucket.s3.amazonaws.com/key') + yield t.case( + region='us-west-2', bucket='bucket', key='key', + signature_version=None, + s3_config={'use_dualstack_endpoint': True}, + expected_url='https://bucket.s3.dualstack.us-west-2.amazonaws.com/key') + yield t.case( + region='us-west-2', bucket='bucket', key='key', + signature_version='s3', + s3_config={'use_dualstack_endpoint': True}, + expected_url='https://bucket.s3.dualstack.us-west-2.amazonaws.com/key') + yield t.case( + region='us-west-2', bucket='bucket', key='key', + signature_version='s3v4', + s3_config={'use_dualstack_endpoint': True}, + expected_url='https://bucket.s3.dualstack.us-west-2.amazonaws.com/key') # Accelerate yield t.case(region='us-west-2', bucket='bucket', key='key', @@ -885,6 +896,12 @@ def test_addressing_for_presigned_urls(): signature_version=None, expected_url='https://bucket.s3.amazonaws.com/key') + # Customer provided URL results in us leaving the host untouched. + yield t.case(region='us-west-2', bucket='bucket', key='key', + signature_version=None, + customer_provided_endpoint='https://foo.com/', + expected_url='https://foo.com/bucket/key') + def _verify_presigned_url_addressing(region, bucket, key, s3_config, is_secure=True, From 61dacf46751fd8743bcf74c1033f37db7a2e3ce0 Mon Sep 17 00:00:00 2001 From: James Saryerwinnie Date: Fri, 23 Feb 2018 13:31:59 -0800 Subject: [PATCH 7/7] Add changelog entry for #1387 --- .changes/next-release/feature-s3-58965.json | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changes/next-release/feature-s3-58965.json diff --git a/.changes/next-release/feature-s3-58965.json b/.changes/next-release/feature-s3-58965.json new file mode 100644 index 0000000000..007cb711e1 --- /dev/null +++ b/.changes/next-release/feature-s3-58965.json @@ -0,0 +1,5 @@ +{ + "type": "feature", + "category": "``s3``", + "description": "Default to virtual hosted addressing regardless of signature version (boto/botocore`#1387 `__)" +}