Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for ROI decode on MI300 #53

Merged
merged 5 commits into from
Aug 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions samples/jpegDecode/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,5 +22,6 @@ make -j
1 for hybrid JPEG decoding using CPU and GPU HIP kernels (currently not supported)) [optional - default: 0]>
-fmt <[output format] - select rocJPEG output format for decoding, one of the [native, yuv, y, rgb, rgb_planar] [optional - default: native]>
-o <[output path] - path to an output file or a path to a directory - write decoded images to a file or directory based on selected output format [optional]>
-crop <[crop rectangle] - crop rectangle for output in a comma-separated format: left,top,right,bottom - [optional]>
-d <[device id] - specify the GPU device id for the desired device (use 0 for the first device, 1 for the second device, and so on); [optional - default: 0]>
```
1 change: 1 addition & 0 deletions samples/jpegDecodeBatched/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@ make -j
-fmt <[output format] - select rocJPEG output format for decoding, one of the [native, yuv, y, rgb, rgb_planar] [optional - default: native]>
-o <[output path] - path to an output file or a path to a directory - write decoded images to a file or directory based on selected output format [optional]>
-d <[device id] - specify the GPU device id for the desired device (use 0 for the first device, 1 for the second device, and so on) - [optional - default: 0]>
-crop <[crop rectangle] - crop rectangle for output in a comma-separated format: left,top,right,bottom - [optional]>
-b <[batch_size] - decode images from input by batches of a specified size - [optional - default: 2]>
```
1 change: 1 addition & 0 deletions samples/jpegDecodeMultiThreads/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,6 @@ make -j
-fmt <[output format] - select rocJPEG output format for decoding, one of the [native, yuv, y, rgb, rgb_planar] [optional - default: native]>
-o <[output path] - path to an output file or a path to a directory - write decoded images to a file or directory based on selected output format [optional]>
-d <[device id] - specify the GPU device id for the desired device (use 0 for the first device, 1 for the second device, and so on) [optional - default: 0]>
-crop <[crop rectangle] - crop rectangle for output in a comma-separated format: left,top,right,bottom - [optional]>
-t <[threads] - number of threads for parallel JPEG decoding [optional - default: 2]>
```
2 changes: 1 addition & 1 deletion samples/rocjpeg_samples_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,7 @@ class RocJpegUtils {
" 1 for hybrid JPEG decoding using CPU and GPU HIP kernels (currently not supported)) [optional - default: 0]\n"
"-fmt [output format] - select rocJPEG output format for decoding, one of the [native, yuv, y, rgb, rgb_planar] - [optional - default: native]\n"
"-o [output path] - path to an output file or a path to an existing directory - write decoded images to a file or an existing directory based on selected output format - [optional]\n"
"-crop -crop [crop rectangle] - crop rectangle for output in a comma-separated format: left,top,right,bottom - [optional]\n"
"-crop [crop rectangle] - crop rectangle for output in a comma-separated format: left,top,right,bottom - [optional]\n"
"-d [device id] - specify the GPU device id for the desired device (use 0 for the first device, 1 for the second device, and so on) [optional - default: 0]\n";
if (show_threads) {
std::cout << "-t [threads] - number of threads for parallel JPEG decoding - [optional - default: 2]\n";
Expand Down
21 changes: 17 additions & 4 deletions src/rocjpeg_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,21 @@ RocJpegStatus RocJpegDecoder::Decode(RocJpegStreamHandle jpeg_stream_handle, con
uint32_t roi_height;
roi_width = decode_params->crop_rectangle.right - decode_params->crop_rectangle.left;
roi_height = decode_params->crop_rectangle.bottom - decode_params->crop_rectangle.top;

if (roi_width > 0 && roi_height > 0 && roi_width <= jpeg_stream_params->picture_parameter_buffer.picture_width && roi_height <= jpeg_stream_params->picture_parameter_buffer.picture_height) {
is_roi_valid = true;
is_roi_valid = true;
}

picture_width = is_roi_valid ? roi_width : jpeg_stream_params->picture_parameter_buffer.picture_width;
picture_height = is_roi_valid ? roi_height : jpeg_stream_params->picture_parameter_buffer.picture_height;


VcnJpegSpec current_vcn_jpeg_spec = jpeg_vaapi_decoder_.GetCurrentVcnJpegSpec();
if (is_roi_valid && current_vcn_jpeg_spec.can_roi_decode) {
// Set is_roi_valid to false because in this case, the hardware handles the ROI decode and we don't
// need to calculate the roi_offset later in the following functions (e.g., CopyChannel, GetPlanarYUVOutputFormat, etc) to copy the crop rectangle
is_roi_valid = false;
}

switch (decode_params->output_format) {
case ROCJPEG_OUTPUT_NATIVE:
// Copy the native decoded output buffers from interop memory directly to the destination buffers
Expand Down Expand Up @@ -227,12 +234,18 @@ RocJpegStatus RocJpegDecoder::DecodeBatched(RocJpegStreamHandle *jpeg_streams, i
roi_height = decode_params->crop_rectangle.bottom - decode_params->crop_rectangle.top;

if (roi_width > 0 && roi_height > 0 && roi_width <= jpeg_stream_params->picture_parameter_buffer.picture_width && roi_height <= jpeg_stream_params->picture_parameter_buffer.picture_height) {
is_roi_valid = true;
is_roi_valid = true;
}

picture_width = is_roi_valid ? roi_width : jpeg_stream_params->picture_parameter_buffer.picture_width;
picture_height = is_roi_valid ? roi_height : jpeg_stream_params->picture_parameter_buffer.picture_height;

if (is_roi_valid && current_vcn_jpeg_spec.can_roi_decode) {
// Set is_roi_valid to false because in this case, the hardware handles the ROI decode and we don't need to calculate the roi_offset
// later in the following functions (e.g., CopyChannel, GetPlanarYUVOutputFormat, etc) to copy the crop rectangle
is_roi_valid = false;
}

switch (decode_params->output_format) {
case ROCJPEG_OUTPUT_NATIVE:
// Copy the native decoded output buffers from interop memory directly to the destination buffers
Expand Down
43 changes: 41 additions & 2 deletions src/rocjpeg_vaapi_decoder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -571,6 +571,26 @@ RocJpegStatus RocJpegVappiDecoder::SubmitDecode(const JpegStreamParameters *jpeg
}
}

// if the HW JPEG decoder has a built-in ROI-decode capability then fill the requested crop rectangle to the picture parameter buffer
void *picture_parameter_buffer = (void*)&jpeg_stream_params->picture_parameter_buffer;
if (current_vcn_jpeg_spec_.can_roi_decode) {
uint32_t roi_width;
uint32_t roi_height;
roi_width = decode_params->crop_rectangle.right - decode_params->crop_rectangle.left;
roi_height = decode_params->crop_rectangle.bottom - decode_params->crop_rectangle.top;
if (roi_width > 0 && roi_height > 0 && roi_width <= jpeg_stream_params->picture_parameter_buffer.picture_width && roi_height <= jpeg_stream_params->picture_parameter_buffer.picture_height) {
#if VA_CHECK_VERSION(1, 21, 0)
reinterpret_cast<VAPictureParameterBufferJPEGBaseline*>(picture_parameter_buffer)->crop_rectangle.x = decode_params->crop_rectangle.left;
reinterpret_cast<VAPictureParameterBufferJPEGBaseline*>(picture_parameter_buffer)->crop_rectangle.y = decode_params->crop_rectangle.top;
reinterpret_cast<VAPictureParameterBufferJPEGBaseline*>(picture_parameter_buffer)->crop_rectangle.width = roi_width;
reinterpret_cast<VAPictureParameterBufferJPEGBaseline*>(picture_parameter_buffer)->crop_rectangle.height = roi_height;
#else
reinterpret_cast<VAPictureParameterBufferJPEGBaseline*>(picture_parameter_buffer)->va_reserved[0] = decode_params->crop_rectangle.top << 16 | decode_params->crop_rectangle.left;
reinterpret_cast<VAPictureParameterBufferJPEGBaseline*>(picture_parameter_buffer)->va_reserved[1] = roi_height << 16 | roi_width;
#endif
}
}

uint32_t surface_pixel_format = static_cast<uint32_t>(surface_attrib.value.value.i);
RocJpegVaapiMemPoolEntry mem_pool_entry = vaapi_mem_pool_->GetEntry(surface_pixel_format, jpeg_stream_params->picture_parameter_buffer.picture_width, jpeg_stream_params->picture_parameter_buffer.picture_height, 1);
VAContextID va_context_id;
Expand All @@ -592,7 +612,7 @@ RocJpegStatus RocJpegVappiDecoder::SubmitDecode(const JpegStreamParameters *jpeg

CHECK_ROCJPEG(DestroyDataBuffers());

CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id, VAPictureParameterBufferType, sizeof(VAPictureParameterBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->picture_parameter_buffer, &va_picture_parameter_buf_id_));
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id, VAPictureParameterBufferType, sizeof(VAPictureParameterBufferJPEGBaseline), 1, picture_parameter_buffer, &va_picture_parameter_buf_id_));
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id, VAIQMatrixBufferType, sizeof(VAIQMatrixBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->quantization_matrix_buffer, &va_quantization_matrix_buf_id_));
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id, VAHuffmanTableBufferType, sizeof(VAHuffmanTableBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->huffman_table_buffer, &va_huffmantable_buf_id_));
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id, VASliceParameterBufferType, sizeof(VASliceParameterBufferJPEGBaseline), 1, (void *)&jpeg_stream_params->slice_parameter_buffer, &va_slice_param_buf_id_));
Expand Down Expand Up @@ -680,6 +700,10 @@ RocJpegStatus RocJpegVappiDecoder::SubmitDecodeBatched(JpegStreamParameters *jpe
surface_attrib.type = VASurfaceAttribPixelFormat;
surface_attrib.flags = VA_SURFACE_ATTRIB_SETTABLE;
surface_attrib.value.type = VAGenericValueTypeInteger;
uint32_t roi_width;
uint32_t roi_height;
roi_width = decode_params->crop_rectangle.right - decode_params->crop_rectangle.left;
roi_height = decode_params->crop_rectangle.bottom - decode_params->crop_rectangle.top;

// Iterate through all entries of jpeg_stream_groups.
// Check if there is a matching entry in the memory pool.
Expand Down Expand Up @@ -715,8 +739,23 @@ RocJpegStatus RocJpegVappiDecoder::SubmitDecodeBatched(JpegStreamParameters *jpe
}

for (int idx : indices) {
// if the HW JPEG decoder has a built-in ROI-decode capability then fill the requested crop rectangle to the picture parameter buffer
void* picture_parameter_buffer = &jpeg_streams_params[idx].picture_parameter_buffer;
if (current_vcn_jpeg_spec_.can_roi_decode && roi_width > 0 && roi_height > 0 &&
roi_width <= jpeg_streams_params[idx].picture_parameter_buffer.picture_width &&
roi_height <= jpeg_streams_params[idx].picture_parameter_buffer.picture_height) {
#if VA_CHECK_VERSION(1, 21, 0)
reinterpret_cast<VAPictureParameterBufferJPEGBaseline*>(picture_parameter_buffer)->crop_rectangle.x = decode_params->crop_rectangle.left;
reinterpret_cast<VAPictureParameterBufferJPEGBaseline*>(picture_parameter_buffer)->crop_rectangle.y = decode_params->crop_rectangle.top;
reinterpret_cast<VAPictureParameterBufferJPEGBaseline*>(picture_parameter_buffer)->crop_rectangle.width = roi_width;
reinterpret_cast<VAPictureParameterBufferJPEGBaseline*>(picture_parameter_buffer)->crop_rectangle.height = roi_height;
#else
reinterpret_cast<VAPictureParameterBufferJPEGBaseline*>(picture_parameter_buffer)->va_reserved[0] = decode_params->crop_rectangle.top << 16 | decode_params->crop_rectangle.left;
reinterpret_cast<VAPictureParameterBufferJPEGBaseline*>(picture_parameter_buffer)->va_reserved[1] = roi_height << 16 | roi_width;
#endif
}
CHECK_ROCJPEG(DestroyDataBuffers());
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id, VAPictureParameterBufferType, sizeof(VAPictureParameterBufferJPEGBaseline), 1, (void *)&jpeg_streams_params[idx].picture_parameter_buffer, &va_picture_parameter_buf_id_));
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id, VAPictureParameterBufferType, sizeof(VAPictureParameterBufferJPEGBaseline), 1, picture_parameter_buffer, &va_picture_parameter_buf_id_));
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id, VAIQMatrixBufferType, sizeof(VAIQMatrixBufferJPEGBaseline), 1, (void *)&jpeg_streams_params[idx].quantization_matrix_buffer, &va_quantization_matrix_buf_id_));
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id, VAHuffmanTableBufferType, sizeof(VAHuffmanTableBufferJPEGBaseline), 1, (void *)&jpeg_streams_params[idx].huffman_table_buffer, &va_huffmantable_buf_id_));
CHECK_VAAPI(vaCreateBuffer(va_display_, va_context_id, VASliceParameterBufferType, sizeof(VASliceParameterBufferJPEGBaseline), 1, (void *)&jpeg_streams_params[idx].slice_parameter_buffer, &va_slice_param_buf_id_));
Expand Down