Skip to content

Commit

Permalink
Merge pull request #94 from melonedo/padded-image-no-copy
Browse files Browse the repository at this point in the history
Support padded image
  • Loading branch information
MartinPulec authored Sep 26, 2024
2 parents 3e045d1 + a12554c commit c7f9751
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 13 deletions.
2 changes: 2 additions & 0 deletions libgpujpeg/gpujpeg_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,8 @@ struct gpujpeg_image_parameters {
enum gpujpeg_color_space color_space;
/// Image data sampling factor
enum gpujpeg_pixel_format pixel_format;
/// Number of bytes padded to each row
int width_padding;
};

/**
Expand Down
4 changes: 3 additions & 1 deletion src/gpujpeg_common.c
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,7 @@ gpujpeg_image_set_default_parameters(struct gpujpeg_image_parameters* param)
param->height = 0;
param->color_space = GPUJPEG_RGB;
param->pixel_format = GPUJPEG_444_U8_P012;
param->width_padding = 0;
}

struct gpujpeg_image_parameters
Expand All @@ -364,7 +365,8 @@ gpujpeg_image_parameters_equals(const struct gpujpeg_image_parameters *p1 , cons
return p1->width == p2->width &&
p1->height == p2->height &&
p1->color_space == p2->color_space &&
p1->pixel_format == p2->pixel_format;
p1->pixel_format == p2->pixel_format &&
p1->width_padding == p2->width_padding;
}

/* Documented at declaration */
Expand Down
34 changes: 23 additions & 11 deletions src/gpujpeg_preprocessor.cu
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ struct gpujpeg_preprocessor_raw_to_comp_store {
template<enum gpujpeg_pixel_format>
inline __device__ void raw_to_comp_load(const uint8_t* d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r);

template<enum gpujpeg_pixel_format>
constexpr int __device__ unit_size() { return 1; }

template<>
inline __device__ void raw_to_comp_load<GPUJPEG_U8>(const uint8_t* d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r)
{
Expand Down Expand Up @@ -118,30 +121,36 @@ inline __device__ void raw_to_comp_load<GPUJPEG_420_U8_P0P1P2>(const uint8_t* d_
}

template<>
inline __device__ void raw_to_comp_load<GPUJPEG_444_U8_P012>(const uint8_t* d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r)
constexpr int __device__ unit_size<GPUJPEG_444_U8_P012>() { return 3; }

template<>
inline __device__ void raw_to_comp_load<GPUJPEG_444_U8_P012>(const uint8_t* d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r)
{
const unsigned int offset = image_position * 3;
r.x = d_data_raw[offset];
r.y = d_data_raw[offset + 1];
r.z = d_data_raw[offset + 2];
}

template<>
inline __device__ void raw_to_comp_load<GPUJPEG_4444_U8_P0123>(const uint8_t* d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r)
constexpr int __device__ unit_size<GPUJPEG_4444_U8_P0123>() { return 4; }

template<>
inline __device__ void raw_to_comp_load<GPUJPEG_4444_U8_P0123>(const uint8_t* d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r)
{
const unsigned int offset = image_position * 4;
r.x = d_data_raw[offset];
r.y = d_data_raw[offset + 1];
r.z = d_data_raw[offset + 2];
r.w = d_data_raw[offset + 3];
}

template<>
inline __device__ void raw_to_comp_load<GPUJPEG_422_U8_P1020>(const uint8_t* d_data_raw, int &image_width, int &image_height, int &image_position, int &x, int &y, uchar4 &r)
constexpr int __device__ unit_size<GPUJPEG_422_U8_P1020>() { return 2; }

template<>
inline __device__ void raw_to_comp_load<GPUJPEG_422_U8_P1020>(const uint8_t* d_data_raw, int &image_width, int &image_height, int &offset, int &x, int &y, uchar4 &r)
{
const unsigned int offset = image_position * 2;
r.x = d_data_raw[offset + 1];
if ( image_position % 2 == 0 ) {
if ( offset % 4 == 0 ) {
r.y = d_data_raw[offset];
r.z = d_data_raw[offset + 2];
} else {
Expand All @@ -153,7 +162,7 @@ inline __device__ void raw_to_comp_load<GPUJPEG_422_U8_P1020>(const uint8_t* d_d
/**
* Kernel - Copy raw image source data into three separated component buffers
*/
typedef void (*gpujpeg_preprocessor_encode_kernel)(struct gpujpeg_preprocessor_data data, const uint8_t* d_data_raw, const uint8_t* d_data_raw_end, int image_width, int image_height, uint32_t width_div_mul, uint32_t width_div_shift);
typedef void (*gpujpeg_preprocessor_encode_kernel)(struct gpujpeg_preprocessor_data data, const uint8_t* d_data_raw, int image_width_padding, int image_width, int image_height, uint32_t width_div_mul, uint32_t width_div_shift);

/**
* @note
Expand All @@ -171,7 +180,7 @@ template<
uint8_t s_comp4_samp_factor_h, uint8_t s_comp4_samp_factor_v
>
__global__ void
gpujpeg_preprocessor_raw_to_comp_kernel(struct gpujpeg_preprocessor_data data, const uint8_t* d_data_raw, const uint8_t* d_data_raw_end, int image_width, int image_height, uint32_t width_div_mul, uint32_t width_div_shift)
gpujpeg_preprocessor_raw_to_comp_kernel(struct gpujpeg_preprocessor_data data, const uint8_t* d_data_raw, int image_width_padding, int image_width, int image_height, uint32_t width_div_mul, uint32_t width_div_shift)
{
int x = threadIdx.x;
int gX = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x;
Expand All @@ -187,7 +196,8 @@ gpujpeg_preprocessor_raw_to_comp_kernel(struct gpujpeg_preprocessor_data data, c

// Load
uchar4 r;
raw_to_comp_load<pixel_format>(d_data_raw, image_width, image_height, image_position, image_position_x, image_position_y, r);
int offset = image_position * unit_size<pixel_format>() + image_width_padding * image_position_y;
raw_to_comp_load<pixel_format>(d_data_raw, image_width, image_height, offset, image_position_x, image_position_y, r);

// Color transform
gpujpeg_color_transform<color_space, color_space_internal>::perform(r);
Expand Down Expand Up @@ -399,7 +409,7 @@ gpujpeg_preprocessor_encode_interlaced(struct gpujpeg_encoder * encoder)
kernel<<<grid, threads, 0, encoder->stream>>>(
data,
coder->d_data_raw,
coder->d_data_raw + coder->data_raw_size,
coder->param_image.width_padding,
image_width,
image_height,
width_div_mul,
Expand Down Expand Up @@ -456,6 +466,8 @@ int
gpujpeg_preprocessor_encode(struct gpujpeg_encoder * encoder)
{
struct gpujpeg_coder * coder = &encoder->coder;
/// @todo support padding for other formats
assert(!coder->param_image.width_padding || (coder->param_image.pixel_format == GPUJPEG_444_U8_P012 && coder->preprocessor));
if (coder->preprocessor) {
return gpujpeg_preprocessor_encode_interlaced(encoder);
} else {
Expand Down
2 changes: 1 addition & 1 deletion src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ adjust_params(struct gpujpeg_parameters* param, struct gpujpeg_image_parameters*
const char* out, bool encode, const struct options* opts)
{
// if possible, read properties from file
struct gpujpeg_image_parameters file_param_image = { 0, 0, GPUJPEG_NONE, GPUJPEG_PIXFMT_NONE };
struct gpujpeg_image_parameters file_param_image = { 0, 0, GPUJPEG_NONE, GPUJPEG_PIXFMT_NONE, 0 };
const char *raw_file = encode ? in : out;
gpujpeg_image_get_properties(raw_file, &file_param_image, encode);
param_image->width = USE_IF_NOT_NULL_ELSE(param_image->width, file_param_image.width);
Expand Down

0 comments on commit c7f9751

Please sign in to comment.