Skip to content

Commit

Permalink
Support full object checksum (#468)
Browse files Browse the repository at this point in the history
Co-authored-by: Michael Graeb <[email protected]>
  • Loading branch information
TingDaoK and graebm authored Dec 6, 2024
1 parent 9c1bd19 commit 337155f
Show file tree
Hide file tree
Showing 26 changed files with 730 additions and 472 deletions.
53 changes: 39 additions & 14 deletions include/aws/s3/private/s3_checksums.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,26 +11,45 @@

struct aws_s3_checksum;

/* List to check the checksum algorithm to use based on the priority. */
static const enum aws_s3_checksum_algorithm s_checksum_algo_priority_list[] = {
AWS_SCA_CRC64NVME,
AWS_SCA_CRC32C,
AWS_SCA_CRC32,
AWS_SCA_SHA1,
AWS_SCA_SHA256,
};
AWS_STATIC_ASSERT(AWS_ARRAY_SIZE(s_checksum_algo_priority_list) == (AWS_SCA_END - AWS_SCA_INIT + 1));

struct aws_checksum_vtable {
void (*destroy)(struct aws_s3_checksum *checksum);
int (*update)(struct aws_s3_checksum *checksum, const struct aws_byte_cursor *buf);
int (*finalize)(struct aws_s3_checksum *checksum, struct aws_byte_buf *out, size_t truncate_to);
int (*finalize)(struct aws_s3_checksum *checksum, struct aws_byte_buf *out);
};

struct aws_s3_checksum {
struct aws_allocator *allocator;
struct aws_checksum_vtable *vtable;
void *impl;
size_t digest_size;
enum aws_s3_checksum_algorithm algorithm;
bool good;
union {
struct aws_hash *hash;
uint32_t crc_val_32bit;
uint64_t crc_val_64bit;
} impl;
};

struct checksum_config {
struct checksum_config_storage {
struct aws_allocator *allocator;
struct aws_byte_buf full_object_checksum;
bool has_full_object_checksum;

enum aws_s3_checksum_location location;
enum aws_s3_checksum_algorithm checksum_algorithm;
bool validate_response_checksum;
struct {
bool crc64nvme;
bool crc32c;
bool crc32;
bool sha1;
Expand Down Expand Up @@ -85,25 +104,26 @@ struct aws_input_stream *aws_chunk_stream_new(
* Get the size of the checksum output corresponding to the aws_s3_checksum_algorithm enum value.
*/
AWS_S3_API
size_t aws_get_digest_size_from_algorithm(enum aws_s3_checksum_algorithm algorithm);
size_t aws_get_digest_size_from_checksum_algorithm(enum aws_s3_checksum_algorithm algorithm);

/**
* Get the header name corresponding to the aws_s3_checksum_algorithm enum value.
* Get header name to use for algorithm (e.g. "x-amz-checksum-crc32")
*/
AWS_S3_API
const struct aws_byte_cursor *aws_get_http_header_name_from_algorithm(enum aws_s3_checksum_algorithm algorithm);
struct aws_byte_cursor aws_get_http_header_name_from_checksum_algorithm(enum aws_s3_checksum_algorithm algorithm);

/**
* Get the multipart upload header name corresponding to the aws_s3_checksum_algorithm enum value.
* Get algorithm's name (e.g. "CRC32"), to be used as the value of headers like `x-amz-checksum-algorithm`
*/
AWS_S3_API
const struct aws_byte_cursor *aws_get_create_mpu_header_name_from_algorithm(enum aws_s3_checksum_algorithm algorithm);
struct aws_byte_cursor aws_get_checksum_algorithm_name(enum aws_s3_checksum_algorithm algorithm);

/**
* Get the complete multipart upload name corresponding to the aws_s3_checksum_algorithm enum value.
* Get the name of checksum algorithm to be used as the details of the parts were uploaded. Referring to
* https://docs.aws.amazon.com/AmazonS3/latest/API/API_CompletedPart.html#AmazonS3-Type-CompletedPart
*/
AWS_S3_API
const struct aws_byte_cursor *aws_get_complete_mpu_name_from_algorithm(enum aws_s3_checksum_algorithm algorithm);
struct aws_byte_cursor aws_get_completed_part_name_from_checksum_algorithm(enum aws_s3_checksum_algorithm algorithm);

/**
* create a new aws_checksum corresponding to the aws_s3_checksum_algorithm enum value.
Expand All @@ -121,8 +141,7 @@ int aws_checksum_compute(
struct aws_allocator *allocator,
enum aws_s3_checksum_algorithm algorithm,
const struct aws_byte_cursor *input,
struct aws_byte_buf *output,
size_t truncate_to);
struct aws_byte_buf *output);

/**
* Cleans up and deallocates checksum.
Expand All @@ -141,9 +160,15 @@ int aws_checksum_update(struct aws_s3_checksum *checksum, const struct aws_byte_
* Allocation of output is the caller's responsibility.
*/
AWS_S3_API
int aws_checksum_finalize(struct aws_s3_checksum *checksum, struct aws_byte_buf *output, size_t truncate_to);
int aws_checksum_finalize(struct aws_s3_checksum *checksum, struct aws_byte_buf *output);

AWS_S3_API
void aws_checksum_config_storage_init(
struct aws_allocator *allocator,
struct checksum_config_storage *internal_config,
const struct aws_s3_checksum_config *config);

AWS_S3_API
void checksum_config_init(struct checksum_config *internal_config, const struct aws_s3_checksum_config *config);
void aws_checksum_config_storage_cleanup(struct checksum_config_storage *internal_config);

#endif /* AWS_S3_CHECKSUMS_H */
2 changes: 1 addition & 1 deletion include/aws/s3/private/s3_meta_request_impl.h
Original file line number Diff line number Diff line change
Expand Up @@ -272,7 +272,7 @@ struct aws_s3_meta_request {
const bool should_compute_content_md5;

/* deep copy of the checksum config. */
struct checksum_config checksum_config;
struct checksum_config_storage checksum_config;

/* checksum found in either a default get request, or in the initial head request of a multipart get */
struct aws_byte_buf meta_request_level_response_header_checksum;
Expand Down
14 changes: 5 additions & 9 deletions include/aws/s3/private/s3_request_messages.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ struct aws_byte_buf;
struct aws_byte_cursor;
struct aws_string;
struct aws_array_list;
struct checksum_config;
struct checksum_config_storage;

AWS_EXTERN_C_BEGIN

Expand Down Expand Up @@ -52,13 +52,9 @@ struct aws_input_stream *aws_s3_message_util_assign_body(
struct aws_allocator *allocator,
struct aws_byte_buf *byte_buf,
struct aws_http_message *out_message,
const struct checksum_config *checksum_config,
const struct checksum_config_storage *checksum_config,
struct aws_byte_buf *out_checksum);

/* Return true if checksum headers has been set. */
AWS_S3_API
bool aws_s3_message_util_check_checksum_header(struct aws_http_message *message);

/* Create an HTTP request for an S3 Ranged Get Object Request, using the given request as a basis */
AWS_S3_API
struct aws_http_message *aws_s3_ranged_get_object_message_new(
Expand All @@ -80,7 +76,7 @@ AWS_S3_API
struct aws_http_message *aws_s3_create_multipart_upload_message_new(
struct aws_allocator *allocator,
struct aws_http_message *base_message,
const struct checksum_config *checksum_config);
const struct checksum_config_storage *checksum_config);

/* Create an HTTP request for an S3 Put Object request, using the original request as a basis. Creates and assigns a
* body stream using the passed in buffer. If multipart is not needed, part number and upload_id can be 0 and NULL,
Expand All @@ -93,7 +89,7 @@ struct aws_http_message *aws_s3_upload_part_message_new(
uint32_t part_number,
const struct aws_string *upload_id,
bool should_compute_content_md5,
const struct checksum_config *checksum_config,
const struct checksum_config_storage *checksum_config,
struct aws_byte_buf *encoded_checksum_output);

/* Create an HTTP request for an S3 UploadPartCopy request, using the original request as a basis.
Expand All @@ -120,7 +116,7 @@ struct aws_http_message *aws_s3_complete_multipart_message_new(
struct aws_byte_buf *body_buffer,
const struct aws_string *upload_id,
const struct aws_array_list *parts,
const struct checksum_config *checksum_config);
const struct checksum_config_storage *checksum_config);

AWS_S3_API
struct aws_http_message *aws_s3_abort_multipart_upload_message_new(
Expand Down
43 changes: 8 additions & 35 deletions include/aws/s3/private/s3_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,44 +61,17 @@ extern const struct aws_byte_cursor g_request_validation_mode;
AWS_S3_API
extern const struct aws_byte_cursor g_enabled;

/**
* The checksum-algorithm header name used for CopyObject and CreateMultipartUpload
*/
AWS_S3_API
extern const struct aws_byte_cursor g_create_mpu_checksum_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32c_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha1_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha256_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32c_create_mpu_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32_create_mpu_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha1_create_mpu_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha256_create_mpu_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32c_complete_mpu_name;

AWS_S3_API
extern const struct aws_byte_cursor g_crc32_complete_mpu_name;

AWS_S3_API
extern const struct aws_byte_cursor g_sha1_complete_mpu_name;
extern const struct aws_byte_cursor g_checksum_algorithm_header_name;

/**
* The checksum-algorithm header name used for PutObject, UploadParts and PutObject*
*/
AWS_S3_API
extern const struct aws_byte_cursor g_sha256_complete_mpu_name;
extern const struct aws_byte_cursor g_sdk_checksum_algorithm_header_name;

AWS_S3_API
extern const struct aws_byte_cursor g_s3_client_version;
Expand Down
7 changes: 4 additions & 3 deletions include/aws/s3/s3_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,8 @@ enum aws_s3_checksum_algorithm {
AWS_SCA_CRC32,
AWS_SCA_SHA1,
AWS_SCA_SHA256,
AWS_SCA_END = AWS_SCA_SHA256,
AWS_SCA_CRC64NVME,
AWS_SCA_END = AWS_SCA_CRC64NVME,
};

enum aws_s3_checksum_location {
Expand Down Expand Up @@ -559,7 +560,7 @@ struct aws_s3_checksum_config {
/**
* The location of client added checksum header.
*
* If AWS_SCL_NONE. No request payload checksum will be calculated or added.
* If AWS_SCL_NONE. No request payload checksum will be added.
*
* If AWS_SCL_HEADER, the client will calculate the checksum and add it to the headers.
*
Expand Down Expand Up @@ -592,7 +593,7 @@ struct aws_s3_checksum_config {
*
* The list of algorithms for user to pick up when validate the checksum. Client will pick up the algorithm from the
* list with the priority based on performance, and the algorithm sent by server. The priority based on performance
* is [CRC32C, CRC32, SHA1, SHA256].
* is [CRC64NVME, CRC32C, CRC32, SHA1, SHA256].
*
* If the response checksum was validated by client, the result will indicate which algorithm was picked.
*/
Expand Down
76 changes: 73 additions & 3 deletions source/s3_auto_ranged_put.c
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,71 @@ static struct aws_s3_meta_request_vtable s_s3_auto_ranged_put_vtable = {
.pause = s_s3_auto_ranged_put_pause,
};

static int s_init_and_verify_checksum_config_from_headers(
struct checksum_config_storage *checksum_config,
const struct aws_http_message *message,
const void *log_id) {
/* Check if the checksum header was set from the message */
struct aws_http_headers *headers = aws_http_message_get_headers(message);
enum aws_s3_checksum_algorithm header_algo = AWS_SCA_NONE;
struct aws_byte_cursor header_value;
AWS_ZERO_STRUCT(header_value);

for (size_t i = 0; i < AWS_ARRAY_SIZE(s_checksum_algo_priority_list); i++) {
enum aws_s3_checksum_algorithm algorithm = s_checksum_algo_priority_list[i];
const struct aws_byte_cursor algorithm_header_name =
aws_get_http_header_name_from_checksum_algorithm(algorithm);
if (aws_http_headers_get(headers, algorithm_header_name, &header_value) == AWS_OP_SUCCESS) {
if (header_algo == AWS_SCA_NONE) {
header_algo = algorithm;
} else {
/* If there are multiple checksum headers set, it's malformed request */
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST,
"id=%p Could not create auto-ranged-put meta request; multiple checksum headers has been set",
log_id);
return aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
}
}
}
if (header_algo == AWS_SCA_NONE) {
/* No checksum header found, done */
return AWS_OP_SUCCESS;
}

/* Found the full object checksum from the header, check if it matches the explicit setting from config */
if (checksum_config->checksum_algorithm != AWS_SCA_NONE && checksum_config->checksum_algorithm != header_algo) {
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST,
"id=%p Could not create auto-ranged-put meta request; checksum config mismatch the checksum from header.",
log_id);
return aws_raise_error(AWS_ERROR_INVALID_ARGUMENT);
}
AWS_ASSERT(!checksum_config->has_full_object_checksum);

AWS_LOGF_DEBUG(
AWS_LS_S3_META_REQUEST,
"id=%p Setting the full-object checksum from header; algorithm: " PRInSTR ", value: " PRInSTR ".",
log_id,
AWS_BYTE_CURSOR_PRI(aws_get_checksum_algorithm_name(header_algo)),
AWS_BYTE_CURSOR_PRI(header_value));
/* Set algo */
checksum_config->checksum_algorithm = header_algo;
if (checksum_config->location == AWS_SCL_NONE) {
/* Set the checksum location to trailer for the parts, complete MPU will still have the checksum in the header.
* But to keep the data integrity for the parts, we need to set the checksum location to trailer to send the
* parts level checksums.
*/
checksum_config->location = AWS_SCL_TRAILER;
}

/* Set full object checksum from the header value. */
aws_byte_buf_init_copy_from_cursor(
&checksum_config->full_object_checksum, checksum_config->allocator, header_value);
checksum_config->has_full_object_checksum = true;
return AWS_OP_SUCCESS;
}

/* Allocate a new auto-ranged put meta request */
struct aws_s3_meta_request *aws_s3_meta_request_auto_ranged_put_new(
struct aws_allocator *allocator,
Expand Down Expand Up @@ -363,6 +428,11 @@ struct aws_s3_meta_request *aws_s3_meta_request_auto_ranged_put_new(
goto error_clean_up;
}

if (s_init_and_verify_checksum_config_from_headers(
&auto_ranged_put->base.checksum_config, options->message, (void *)&auto_ranged_put->base)) {
goto error_clean_up;
}

AWS_LOGF_DEBUG(
AWS_LS_S3_META_REQUEST, "id=%p Created new Auto-Ranged Put Meta Request.", (void *)&auto_ranged_put->base);

Expand Down Expand Up @@ -767,7 +837,7 @@ static int s_verify_part_matches_checksum(
}

struct aws_byte_buf checksum;
if (aws_byte_buf_init(&checksum, allocator, aws_get_digest_size_from_algorithm(algorithm))) {
if (aws_byte_buf_init(&checksum, allocator, aws_get_digest_size_from_checksum_algorithm(algorithm))) {
return AWS_OP_ERR;
}

Expand All @@ -776,14 +846,14 @@ static int s_verify_part_matches_checksum(
int return_status = AWS_OP_SUCCESS;

size_t encoded_len = 0;
if (aws_base64_compute_encoded_len(aws_get_digest_size_from_algorithm(algorithm), &encoded_len)) {
if (aws_base64_compute_encoded_len(aws_get_digest_size_from_checksum_algorithm(algorithm), &encoded_len)) {
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST, "Failed to resume upload. Unable to determine length of encoded checksum.");
return_status = aws_raise_error(AWS_ERROR_S3_RESUME_FAILED);
goto on_done;
}

if (aws_checksum_compute(allocator, algorithm, &body_cur, &checksum, 0)) {
if (aws_checksum_compute(allocator, algorithm, &body_cur, &checksum)) {
AWS_LOGF_ERROR(
AWS_LS_S3_META_REQUEST, "Failed to resume upload. Unable to compute checksum for the skipped part.");
return_status = aws_raise_error(AWS_ERROR_S3_RESUME_FAILED);
Expand Down
2 changes: 1 addition & 1 deletion source/s3_checksum_stream.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ static int s_finalize_checksum(struct aws_checksum_stream *impl) {
return AWS_OP_SUCCESS;
}

if (aws_checksum_finalize(impl->checksum, &impl->checksum_result, 0) != AWS_OP_SUCCESS) {
if (aws_checksum_finalize(impl->checksum, &impl->checksum_result) != AWS_OP_SUCCESS) {
AWS_LOGF_ERROR(
AWS_LS_S3_CLIENT,
"Failed to calculate checksum with error code %d (%s).",
Expand Down
Loading

0 comments on commit 337155f

Please sign in to comment.