Skip to content

Commit

Permalink
Normalize user metadata in request headers properly
Browse files Browse the repository at this point in the history
  • Loading branch information
balamurugana committed Nov 10, 2020
1 parent 1adadb8 commit 25a2087
Show file tree
Hide file tree
Showing 3 changed files with 77 additions and 216 deletions.
35 changes: 17 additions & 18 deletions minio/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
import certifi
import dateutil.parser
import urllib3
from urllib3._collections import HTTPHeaderDict

from . import __title__, __version__
from .commonconfig import Tags
Expand All @@ -51,12 +52,11 @@
from .definitions import BaseURL, ObjectWriteResult
from .deleteobjects import DeleteError, DeleteRequest, DeleteResult
from .error import InvalidResponseError, S3Error, ServerError
from .helpers import (amzprefix_user_metadata, check_bucket_name,
check_non_empty_string, check_sse, check_ssec,
get_part_info, headers_to_strings, is_amz_header,
is_supported_header,
is_valid_policy_type, makedirs, md5sum_hash, quote,
read_part_data, sha256_hash, strptime_rfc3339)
from .helpers import (check_bucket_name, check_non_empty_string, check_sse,
check_ssec, get_part_info, headers_to_strings,
is_valid_policy_type, makedirs, md5sum_hash,
normalize_headers, quote, read_part_data, sha256_hash,
strptime_rfc3339)
from .legalhold import LegalHold
from .lifecycleconfig import LifecycleConfig
from .notificationconfig import NotificationConfig
Expand Down Expand Up @@ -264,11 +264,18 @@ def _url_open( # pylint: disable=too-many-branches
self._trace_stream.write(body.decode())
self._trace_stream.write("\n")

http_headers = HTTPHeaderDict()
for key, value in (headers or {}).items():
if isinstance(value, (list, tuple)):
_ = [http_headers.add(key, val) for val in value]
else:
http_headers.add(key, value)

response = self._http.urlopen(
method,
urlunsplit(url),
body=body,
headers=headers,
headers=http_headers,
preload_content=preload_content,
)

Expand Down Expand Up @@ -1136,12 +1143,9 @@ def copy_object(self, bucket_name, object_name, object_source,
check_ssec(source_sse)
check_sse(sse)

# Preserving the user-defined metadata in headers
headers = normalize_headers(metadata)
if metadata:
headers = amzprefix_user_metadata(metadata)
headers["x-amz-metadata-directive"] = "REPLACE"
else:
headers = {}
if conditions:
headers.update(conditions)
headers.update(source_sse.copy_headers() if source_sse else {})
Expand Down Expand Up @@ -1290,7 +1294,7 @@ def put_object( # pylint: disable=too-many-branches,too-many-statements
# Set progress bar length and object name before upload
progress.set_meta(object_name=object_name, total_length=length)

headers = amzprefix_user_metadata(metadata or {})
headers = normalize_headers(metadata)
headers["Content-Type"] = content_type or "application/octet-stream"
headers.update(sse.headers() if sse else {})

Expand Down Expand Up @@ -1475,11 +1479,6 @@ def stat_object(self, bucket_name, object_name, sse=None, version_id=None,
query_params=query_params,
)

custom_metadata = {
key: value for key, value in response.headers.items()
if is_supported_header(key) or is_amz_header(key)
}

last_modified = response.getheader("last-modified")
if last_modified:
last_modified = dateutil.parser.parse(last_modified).timetuple()
Expand All @@ -1491,7 +1490,7 @@ def stat_object(self, bucket_name, object_name, sse=None, version_id=None,
etag=response.getheader("etag", "").replace('"', ""),
size=int(response.getheader("content-length", "0")),
content_type=response.getheader("content-type"),
metadata=custom_metadata,
metadata=response.headers,
version_id=response.getheader("x-amz-version-id"),
)

Expand Down
119 changes: 60 additions & 59 deletions minio/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -303,65 +303,6 @@ def sha256_hash(data):
return sha256sum.decode() if isinstance(sha256sum, bytes) else sha256sum


def amzprefix_user_metadata(metadata):
"""
Return a new metadata dictionary where user defined metadata keys
are prefixed by "x-amz-meta-".
"""
meta = dict()
for key, value in metadata.items():
# Check if metadata value has US-ASCII encoding since it is
# the only one supported by HTTP headers. This will show a better
# exception message when users pass unsupported characters
# in metadata values.
try:
if isinstance(value, str):
value.encode('us-ascii')
value = (
[str(val) for val in value]
if isinstance(value, (list, tuple)) else str(value)
)
except UnicodeEncodeError as exc:
raise ValueError(
'Metadata supports only US-ASCII characters.',
) from exc

if (is_amz_header(key) or is_supported_header(key) or
is_storageclass_header(key)):
meta[key] = value
else:
meta["X-Amz-Meta-" + key] = value
return meta


def is_amz_header(key):
"""Returns true if amz s3 system defined metadata."""
key = key.lower()
return (key.startswith("x-amz-meta") or key == "x-amz-acl" or
key.startswith("x-amz-server-side-encryption"))


def is_supported_header(key):
"""Returns true if a standard supported header."""

# Supported headers for object.
supported_headers = [
"cache-control",
"content-encoding",
"content-type",
"content-disposition",
"content-language",
"x-amz-website-redirect-location",
# Add more supported headers here.
]
return key.lower() in supported_headers


def is_storageclass_header(key):
"""Returns true if header is a storage class header."""
return key.lower() == "x-amz-storage-class"


def url_replace(
url, scheme=None, netloc=None, path=None, query=None, fragment=None
):
Expand All @@ -373,3 +314,63 @@ def url_replace(
query if query is not None else url.query,
fragment if fragment is not None else url.fragment,
)


def _metadata_to_headers(metadata):
"""Convert user metadata to headers."""
def normalize_key(key):
if not key.lower().startswith("x-amz-meta-"):
key = "X-Amz-Meta-" + key
return key

def to_string(value):
value = str(value)
try:
value.encode("us-ascii")
except UnicodeEncodeError as exc:
raise ValueError(
(
"unsupported metadata value {0}; "
"only US-ASCII encoded characters are supported"
).format(value)
) from exc
return value

def normalize_value(values):
if not isinstance(values, (list, tuple)):
values = [values]
return [to_string(value) for value in values]

return {
normalize_key(key): normalize_value(value)
for key, value in (metadata or {}).items()
}


def normalize_headers(headers):
"""Normalize headers by prefixing 'X-Amz-Meta-' for user metadata."""
headers = headers or {}

def guess_user_metadata(key):
key = key.lower()
return not (
key.startswith("x-amz-") or
key in [
"cache-control",
"content-encoding",
"content-type",
"content-disposition",
"content-language",
]
)

user_metadata = {
key: value for key, value in headers.items()
if guess_user_metadata(key)
}

# Remove guessed user metadata.
_ = [headers.pop(key) for key in user_metadata]

headers.update(_metadata_to_headers(user_metadata))
return headers
139 changes: 0 additions & 139 deletions tests/unit/header_value_test.py

This file was deleted.

0 comments on commit 25a2087

Please sign in to comment.