From 25a2087fb181c13c9926a19c33311116955f87e0 Mon Sep 17 00:00:00 2001 From: "Bala.FA" Date: Tue, 10 Nov 2020 10:38:02 +0530 Subject: [PATCH] Normalize user metadata in request headers properly --- minio/api.py | 35 ++++---- minio/helpers.py | 119 +++++++++++++-------------- tests/unit/header_value_test.py | 139 -------------------------------- 3 files changed, 77 insertions(+), 216 deletions(-) delete mode 100644 tests/unit/header_value_test.py diff --git a/minio/api.py b/minio/api.py index c3f2e915b..2e8b361e4 100644 --- a/minio/api.py +++ b/minio/api.py @@ -41,6 +41,7 @@ import certifi import dateutil.parser import urllib3 +from urllib3._collections import HTTPHeaderDict from . import __title__, __version__ from .commonconfig import Tags @@ -51,12 +52,11 @@ from .definitions import BaseURL, ObjectWriteResult from .deleteobjects import DeleteError, DeleteRequest, DeleteResult from .error import InvalidResponseError, S3Error, ServerError -from .helpers import (amzprefix_user_metadata, check_bucket_name, - check_non_empty_string, check_sse, check_ssec, - get_part_info, headers_to_strings, is_amz_header, - is_supported_header, - is_valid_policy_type, makedirs, md5sum_hash, quote, - read_part_data, sha256_hash, strptime_rfc3339) +from .helpers import (check_bucket_name, check_non_empty_string, check_sse, + check_ssec, get_part_info, headers_to_strings, + is_valid_policy_type, makedirs, md5sum_hash, + normalize_headers, quote, read_part_data, sha256_hash, + strptime_rfc3339) from .legalhold import LegalHold from .lifecycleconfig import LifecycleConfig from .notificationconfig import NotificationConfig @@ -264,11 +264,18 @@ def _url_open( # pylint: disable=too-many-branches self._trace_stream.write(body.decode()) self._trace_stream.write("\n") + http_headers = HTTPHeaderDict() + for key, value in (headers or {}).items(): + if isinstance(value, (list, tuple)): + _ = [http_headers.add(key, val) for val in value] + else: + http_headers.add(key, value) + response = self._http.urlopen( method, urlunsplit(url), body=body, - headers=headers, + headers=http_headers, preload_content=preload_content, ) @@ -1136,12 +1143,9 @@ def copy_object(self, bucket_name, object_name, object_source, check_ssec(source_sse) check_sse(sse) - # Preserving the user-defined metadata in headers + headers = normalize_headers(metadata) if metadata: - headers = amzprefix_user_metadata(metadata) headers["x-amz-metadata-directive"] = "REPLACE" - else: - headers = {} if conditions: headers.update(conditions) headers.update(source_sse.copy_headers() if source_sse else {}) @@ -1290,7 +1294,7 @@ def put_object( # pylint: disable=too-many-branches,too-many-statements # Set progress bar length and object name before upload progress.set_meta(object_name=object_name, total_length=length) - headers = amzprefix_user_metadata(metadata or {}) + headers = normalize_headers(metadata) headers["Content-Type"] = content_type or "application/octet-stream" headers.update(sse.headers() if sse else {}) @@ -1475,11 +1479,6 @@ def stat_object(self, bucket_name, object_name, sse=None, version_id=None, query_params=query_params, ) - custom_metadata = { - key: value for key, value in response.headers.items() - if is_supported_header(key) or is_amz_header(key) - } - last_modified = response.getheader("last-modified") if last_modified: last_modified = dateutil.parser.parse(last_modified).timetuple() @@ -1491,7 +1490,7 @@ def stat_object(self, bucket_name, object_name, sse=None, version_id=None, etag=response.getheader("etag", "").replace('"', ""), size=int(response.getheader("content-length", "0")), content_type=response.getheader("content-type"), - metadata=custom_metadata, + metadata=response.headers, version_id=response.getheader("x-amz-version-id"), ) diff --git a/minio/helpers.py b/minio/helpers.py index 1c9780001..3694a5b31 100644 --- a/minio/helpers.py +++ b/minio/helpers.py @@ -303,65 +303,6 @@ def sha256_hash(data): return sha256sum.decode() if isinstance(sha256sum, bytes) else sha256sum -def amzprefix_user_metadata(metadata): - """ - Return a new metadata dictionary where user defined metadata keys - are prefixed by "x-amz-meta-". - """ - meta = dict() - for key, value in metadata.items(): - # Check if metadata value has US-ASCII encoding since it is - # the only one supported by HTTP headers. This will show a better - # exception message when users pass unsupported characters - # in metadata values. - try: - if isinstance(value, str): - value.encode('us-ascii') - value = ( - [str(val) for val in value] - if isinstance(value, (list, tuple)) else str(value) - ) - except UnicodeEncodeError as exc: - raise ValueError( - 'Metadata supports only US-ASCII characters.', - ) from exc - - if (is_amz_header(key) or is_supported_header(key) or - is_storageclass_header(key)): - meta[key] = value - else: - meta["X-Amz-Meta-" + key] = value - return meta - - -def is_amz_header(key): - """Returns true if amz s3 system defined metadata.""" - key = key.lower() - return (key.startswith("x-amz-meta") or key == "x-amz-acl" or - key.startswith("x-amz-server-side-encryption")) - - -def is_supported_header(key): - """Returns true if a standard supported header.""" - - # Supported headers for object. - supported_headers = [ - "cache-control", - "content-encoding", - "content-type", - "content-disposition", - "content-language", - "x-amz-website-redirect-location", - # Add more supported headers here. - ] - return key.lower() in supported_headers - - -def is_storageclass_header(key): - """Returns true if header is a storage class header.""" - return key.lower() == "x-amz-storage-class" - - def url_replace( url, scheme=None, netloc=None, path=None, query=None, fragment=None ): @@ -373,3 +314,63 @@ def url_replace( query if query is not None else url.query, fragment if fragment is not None else url.fragment, ) + + +def _metadata_to_headers(metadata): + """Convert user metadata to headers.""" + def normalize_key(key): + if not key.lower().startswith("x-amz-meta-"): + key = "X-Amz-Meta-" + key + return key + + def to_string(value): + value = str(value) + try: + value.encode("us-ascii") + except UnicodeEncodeError as exc: + raise ValueError( + ( + "unsupported metadata value {0}; " + "only US-ASCII encoded characters are supported" + ).format(value) + ) from exc + return value + + def normalize_value(values): + if not isinstance(values, (list, tuple)): + values = [values] + return [to_string(value) for value in values] + + return { + normalize_key(key): normalize_value(value) + for key, value in (metadata or {}).items() + } + + +def normalize_headers(headers): + """Normalize headers by prefixing 'X-Amz-Meta-' for user metadata.""" + headers = headers or {} + + def guess_user_metadata(key): + key = key.lower() + return not ( + key.startswith("x-amz-") or + key in [ + "cache-control", + "content-encoding", + "content-type", + "content-disposition", + "content-language", + ] + ) + + user_metadata = { + key: value for key, value in headers.items() + if guess_user_metadata(key) + } + + # Remove guessed user metadata. + _ = [headers.pop(key) for key in user_metadata] + + headers.update(_metadata_to_headers(user_metadata)) + return headers diff --git a/tests/unit/header_value_test.py b/tests/unit/header_value_test.py deleted file mode 100644 index 9777dd5a1..000000000 --- a/tests/unit/header_value_test.py +++ /dev/null @@ -1,139 +0,0 @@ -# -*- coding: utf-8 -*- -# MinIO Python Library for Amazon S3 Compatible Cloud Storage, -# (C) 2018 MinIO, Inc. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from unittest import TestCase - -from nose.tools import eq_ - -from minio.helpers import (amzprefix_user_metadata, is_amz_header, - is_storageclass_header, is_supported_header) - - -class HeaderTests(TestCase): - header_variants = { - "content-type": [ - "content-type", - "Content-Type", - "CONTENT-TYPE", - "cONTENT-tYPE", - "cOntent-TypE", - "CoNTENT-tYPe", - ], - "x-amz-meta-me": [ - "x-amz-meta-me", - "X-Amz-Meta-Me", - "X-AMZ-META-ME", - "x-aMZ-mETA-mE", - ], - "cache-control": [ - "cache-control", - "Cache-Control", - "CACHE-CONTROL", - "cACHE-cONTROL", - "CacHe-conTrol", - ], - "content-disposition": [ - "content-disposition", - "Content-Disposition", - "CONTENT-DISPOSITION", - "cONTENT-dISPOSITION", - "conTent-disPositioN", - ], - "content-language": [ - "content-language", - "Content-Language", - "CONTENT-LANGUAGE", - "conTent-Language", - ], - "x-amz-website-redirect-location": [ - "x-amz-website-redirect-location", - "X-Amz-Website-Redirect-Location", - "X-AMZ-WEBSITE-REDIRECT-LOCATION", - "x-aMZ-wEBSITE-rEDIRECT-lOCATION", - ], - "x-amz-meta-status-code": [ - "x-amz-meta-status-code", - "X-Amz-Meta-Status-Code", - "X-AMZ-META-STATUS-CODE", - "x-aMZ-mETA-sTATUS-cODE", - ], - "x-amz-server-side-encryption": [ - "x-amz-server-side-encryption", - "X-Amz-Server-Side-Encryption", - "X-AMZ-SERVER-SIDE-ENCRYPTION", - "x-aMZ-sERVER-sIDE-eNCRYPTION", - ], - "x-amz-storage-class": [ - "x-amz-storage-class", - "X-Amz-Storage-Class", - "X-AMZ-STORAGE-CLASS", - "x-aMZ-sTORAGE-cLASS", - ], - } - - def check_ok_header(self, check_fun, header): - for header_variant in self.header_variants.get(header, [header]): - eq_(check_fun(header_variant), True) - - def check_bad_header(self, check_fun, header): - for header_variant in self.header_variants.get(header, [header]): - eq_(check_fun(header_variant), False) - - def test_is_supported_header(self): - self.check_ok_header(is_supported_header, "content-type") - self.check_ok_header(is_supported_header, "cache-control") - self.check_ok_header(is_supported_header, "content-disposition") - self.check_ok_header(is_supported_header, "content-encoding") - self.check_ok_header(is_supported_header, "content-language") - self.check_ok_header(is_supported_header, - "x-amz-website-redirect-location") - - def test_is_not_supported_header(self): - self.check_bad_header(is_supported_header, "x-amz-meta-me") - - def test_is_amz_header(self): - self.check_ok_header(is_amz_header, "x-amz-meta-status-code") - self.check_ok_header(is_amz_header, "x-amz-server-side-encryption") - - def test_is_not_amz_header(self): - self.check_bad_header(is_amz_header, "X_AMZ_META-VALUE") - self.check_bad_header(is_amz_header, "content-type") - - def test_is_storageclass_header(self): - self.check_ok_header(is_storageclass_header, "x-amz-storage-class") - - def test_is_not_storageclass_header(self): - self.check_bad_header(is_storageclass_header, "x-amz-storage-classs") - - def test_amzprefix_user_metadata(self): - metadata = { - 'x-amz-meta-testing': 'values', - 'x-amz-meta-setting': 'zombies', - 'amz-meta-setting': 'zombiesddd', - 'hhh': 34, - 'u_u': 'dd', - 'y-fu-bar': 'zoo', - 'Content-Type': 'application/csv', - 'x-amz-storage-class': 'REDUCED_REDUNDANCY', - 'content-language': 'fr' - } - m = amzprefix_user_metadata(metadata) - self.assertTrue('Content-Type' in m) - self.assertTrue('content-language' in m) - - self.assertTrue('X-Amz-Meta-hhh' in m) - self.assertTrue('x-amz-storage-class' in m) - self.assertTrue('X-Amz-Meta-amz-meta-setting' in m)