From a12554c583020f021b2df36ea95f4ad63de2be20 Mon Sep 17 00:00:00 2001 From: melonedo <44501064+melonedo@users.noreply.github.com> Date: Tue, 24 Sep 2024 21:48:53 +0800 Subject: [PATCH] Support encoding for padded 444_U8_P012 images --- libgpujpeg/gpujpeg_common.h | 2 ++ src/gpujpeg_common.c | 4 +++- src/gpujpeg_preprocessor.cu | 34 +++++++++++++++++++++++----------- src/main.c | 2 +- 4 files changed, 29 insertions(+), 13 deletions(-) diff --git a/libgpujpeg/gpujpeg_common.h b/libgpujpeg/gpujpeg_common.h index 8f006efc..1cc120e8 100644 --- a/libgpujpeg/gpujpeg_common.h +++ b/libgpujpeg/gpujpeg_common.h @@ -275,6 +275,8 @@ struct gpujpeg_image_parameters { enum gpujpeg_color_space color_space; /// Image data sampling factor enum gpujpeg_pixel_format pixel_format; + /// Number of bytes padded to each row + int width_padding; }; /** diff --git a/src/gpujpeg_common.c b/src/gpujpeg_common.c index 44b55fc9..51070ed5 100644 --- a/src/gpujpeg_common.c +++ b/src/gpujpeg_common.c @@ -343,6 +343,7 @@ gpujpeg_image_set_default_parameters(struct gpujpeg_image_parameters* param) param->height = 0; param->color_space = GPUJPEG_RGB; param->pixel_format = GPUJPEG_444_U8_P012; + param->width_padding = 0; } struct gpujpeg_image_parameters @@ -362,7 +363,8 @@ gpujpeg_image_parameters_equals(const struct gpujpeg_image_parameters *p1 , cons return p1->width == p2->width && p1->height == p2->height && p1->color_space == p2->color_space && - p1->pixel_format == p2->pixel_format; + p1->pixel_format == p2->pixel_format && + p1->width_padding == p2->width_padding; } /* Documented at declaration */ diff --git a/src/gpujpeg_preprocessor.cu b/src/gpujpeg_preprocessor.cu index a39b799d..2cbfeca8 100644 --- a/src/gpujpeg_preprocessor.cu +++ b/src/gpujpeg_preprocessor.cu @@ -85,6 +85,9 @@ struct gpujpeg_preprocessor_raw_to_comp_store { template inline __device__ void raw_to_comp_load(const uint8_t* d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r); +template +constexpr int __device__ unit_size() { return 1; } + template<> inline __device__ void raw_to_comp_load(const uint8_t* d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r) { @@ -118,18 +121,22 @@ inline __device__ void raw_to_comp_load(const uint8_t* d_ } template<> -inline __device__ void raw_to_comp_load(const uint8_t* d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r) +constexpr int __device__ unit_size() { return 3; } + +template<> +inline __device__ void raw_to_comp_load(const uint8_t* d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r) { - const unsigned int offset = image_position * 3; r.x = d_data_raw[offset]; r.y = d_data_raw[offset + 1]; r.z = d_data_raw[offset + 2]; } template<> -inline __device__ void raw_to_comp_load(const uint8_t* d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r) +constexpr int __device__ unit_size() { return 4; } + +template<> +inline __device__ void raw_to_comp_load(const uint8_t* d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r) { - const unsigned int offset = image_position * 4; r.x = d_data_raw[offset]; r.y = d_data_raw[offset + 1]; r.z = d_data_raw[offset + 2]; @@ -137,11 +144,13 @@ inline __device__ void raw_to_comp_load(const uint8_t* d_ } template<> -inline __device__ void raw_to_comp_load(const uint8_t* d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r) +constexpr int __device__ unit_size() { return 2; } + +template<> +inline __device__ void raw_to_comp_load(const uint8_t* d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r) { - const unsigned int offset = image_position * 2; r.x = d_data_raw[offset + 1]; - if ( image_position % 2 == 0 ) { + if ( offset % 4 == 0 ) { r.y = d_data_raw[offset]; r.z = d_data_raw[offset + 2]; } else { @@ -153,7 +162,7 @@ inline __device__ void raw_to_comp_load(const uint8_t* d_d /** * Kernel - Copy raw image source data into three separated component buffers */ -typedef void (*gpujpeg_preprocessor_encode_kernel)(struct gpujpeg_preprocessor_data data, const uint8_t* d_data_raw, const uint8_t* d_data_raw_end, int image_width, int image_height, uint32_t width_div_mul, uint32_t width_div_shift); +typedef void (*gpujpeg_preprocessor_encode_kernel)(struct gpujpeg_preprocessor_data data, const uint8_t* d_data_raw, int image_width_padding, int image_width, int image_height, uint32_t width_div_mul, uint32_t width_div_shift); /** * @note @@ -171,7 +180,7 @@ template< uint8_t s_comp4_samp_factor_h, uint8_t s_comp4_samp_factor_v > __global__ void -gpujpeg_preprocessor_raw_to_comp_kernel(struct gpujpeg_preprocessor_data data, const uint8_t* d_data_raw, const uint8_t* d_data_raw_end, int image_width, int image_height, uint32_t width_div_mul, uint32_t width_div_shift) +gpujpeg_preprocessor_raw_to_comp_kernel(struct gpujpeg_preprocessor_data data, const uint8_t* d_data_raw, int image_width_padding, int image_width, int image_height, uint32_t width_div_mul, uint32_t width_div_shift) { int x = threadIdx.x; int gX = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x; @@ -187,7 +196,8 @@ gpujpeg_preprocessor_raw_to_comp_kernel(struct gpujpeg_preprocessor_data data, c // Load uchar4 r; - raw_to_comp_load(d_data_raw, image_width, image_height, image_position, image_position_x, image_position_y, r); + int offset = image_position * unit_size() + image_width_padding * image_position_y; + raw_to_comp_load(d_data_raw, image_width, image_height, offset, image_position_x, image_position_y, r); // Color transform gpujpeg_color_transform::perform(r); @@ -399,7 +409,7 @@ gpujpeg_preprocessor_encode_interlaced(struct gpujpeg_encoder * encoder) kernel<<stream>>>( data, coder->d_data_raw, - coder->d_data_raw + coder->data_raw_size, + coder->param_image.width_padding, image_width, image_height, width_div_mul, @@ -456,6 +466,8 @@ int gpujpeg_preprocessor_encode(struct gpujpeg_encoder * encoder) { struct gpujpeg_coder * coder = &encoder->coder; + /// @todo support padding for other formats + assert(!coder->param_image.width_padding || (coder->param_image.pixel_format == GPUJPEG_444_U8_P012 && coder->preprocessor)); if (coder->preprocessor) { return gpujpeg_preprocessor_encode_interlaced(encoder); } else { diff --git a/src/main.c b/src/main.c index 8dc10a1c..a2820342 100644 --- a/src/main.c +++ b/src/main.c @@ -173,7 +173,7 @@ adjust_params(struct gpujpeg_parameters* param, struct gpujpeg_image_parameters* const char* out, bool encode, const struct options* opts) { // if possible, read properties from file - struct gpujpeg_image_parameters file_param_image = { 0, 0, GPUJPEG_NONE, GPUJPEG_PIXFMT_NONE }; + struct gpujpeg_image_parameters file_param_image = { 0, 0, GPUJPEG_NONE, GPUJPEG_PIXFMT_NONE, 0 }; const char *raw_file = encode ? in : out; gpujpeg_image_get_properties(raw_file, &file_param_image, encode); param_image->width = USE_IF_NOT_NULL_ELSE(param_image->width, file_param_image.width);