From d7e8b32a5f4ddea875f8e1352582f634b0adf239 Mon Sep 17 00:00:00 2001 From: Antoine Martin Date: Fri, 27 Sep 2013 09:28:48 +0000 Subject: [PATCH] the encoder width/height must also be padded to 32 git-svn-id: https://xpra.org/svn/Xpra/trunk@4375 3bb7dfac-3a0b-4e04-842a-767bc560f471 --- src/xpra/codecs/nvenc/encoder.pyx | 36 ++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/src/xpra/codecs/nvenc/encoder.pyx b/src/xpra/codecs/nvenc/encoder.pyx index c0f58bb000..db1dca5bf6 100644 --- a/src/xpra/codecs/nvenc/encoder.pyx +++ b/src/xpra/codecs/nvenc/encoder.pyx @@ -1160,6 +1160,8 @@ cdef raiseNVENC(NVENCSTATUS ret, msg=""): cdef class Encoder: cdef int width cdef int height + cdef int encoder_width + cdef int encoder_height cdef object src_format cdef CUcontext cuda_context cdef NV_ENCODE_API_FUNCTION_LIST functionList #@DuplicatedSignature @@ -1197,6 +1199,8 @@ cdef class Encoder: debug("init_context%s", (width, height, src_format, encoding, quality, speed, options)) self.width = width self.height = height + self.encoder_width = roundup(width, 32) + self.encoder_height = roundup(height, 32) self.src_format = src_format self.codec_name = "H264" self.preset_name = None @@ -1246,10 +1250,10 @@ cdef class Encoder: params.version = NV_ENC_INITIALIZE_PARAMS_VER params.encodeGUID = codec #ie: NV_ENC_CODEC_H264_GUID params.presetGUID = preset - params.encodeWidth = self.width - params.encodeHeight = self.height - params.darWidth = self.width - params.darHeight = self.height + params.encodeWidth = self.encoder_width + params.encodeHeight = self.encoder_height + params.darWidth = self.encoder_width + params.darHeight = self.encoder_height params.enableEncodeAsync = 0 #not supported on Linux params.enablePTD = 0 #not supported in sync mode!? params.encodeConfig = &presetConfig.presetCfg @@ -1259,8 +1263,8 @@ cdef class Encoder: #allocate input buffer: memset(&createInputBufferParams, 0, sizeof(NV_ENC_CREATE_INPUT_BUFFER)) createInputBufferParams.version = NV_ENC_CREATE_INPUT_BUFFER_VER - createInputBufferParams.width = roundup(self.width, 32) - createInputBufferParams.height = roundup(self.height, 32) + createInputBufferParams.width = self.encoder_width + createInputBufferParams.height = self.encoder_height createInputBufferParams.memoryHeap = NV_ENC_MEMORY_HEAP_SYSMEM_UNCACHED #NV_ENC_MEMORY_HEAP_AUTOSELECT createInputBufferParams.bufferFmt = self.bufferFmt raiseNVENC(self.functionList.nvEncCreateInputBuffer(self.context, &createInputBufferParams), "creating input buffer") @@ -1282,6 +1286,8 @@ cdef class Encoder: cdef float pps info = {"width" : self.width, "height" : self.height, + "encoder_width" : self.encoder_width, + "encoder_height" : self.encoder_height, "src_format": self.src_format} return info @@ -1359,13 +1365,17 @@ cdef class Encoder: cdef long offset = 0 cdef input_buf_len = 0 cdef int x, y, stride, Yheight + cdef int w, h start = time.time() debug("compress_image(%s, %s)", image, options) assert self.context!=NULL, "context is not initialized" assert image.get_planes()==ImageWrapper._3_PLANES + assert image.get_width()<=self.encoder_width, "invalid width: %s" % image.get_width() pixels = image.get_pixels() strides = image.get_rowstride() + w = image.get_width() + h = image.get_height() debug("compress_image(..) pixels=%s", type(pixels)) size = len(pixels) @@ -1381,21 +1391,21 @@ cdef class Encoder: debug("input buffer locked, inputBufferPtr=%s, pitch=%s", hex( self.inputBufferPtr), lockInputBuffer.pitch) #copy to input buffer: - Yheight = roundup(self.height, 8) + Yheight = roundup(self.height, 32) memset(self.inputBufferPtr, 0, lockInputBuffer.pitch * Yheight * 3/2) #copy luma: assert PyObject_AsReadBuffer(pixels[0], &Y, &Y_len)==0 assert PyObject_AsReadBuffer(pixels[1], &Cb, &Cb_len)==0 assert PyObject_AsReadBuffer(pixels[2], &Cr, &Cr_len)==0 stride = strides[0] - for y in range(self.height): - memcpy(self.inputBufferPtr + y*lockInputBuffer.pitch, Y + stride*y, self.width) + for y in range(h): + memcpy(self.inputBufferPtr + y*lockInputBuffer.pitch, Y + stride*y, w) #copy chroma packed: assert strides[1]==strides[2], "U and V strides differ: %s vs %s" % (strides[1], strides[2]) stride = strides[1] - for y in range(self.height/2): + for y in range(h/2): offset = (Yheight + y) * lockInputBuffer.pitch - for x in range(self.width/2): + for x in range(w/2): ( self.inputBufferPtr)[offset + (x*2)] = ( Cb)[stride*y + x] ( self.inputBufferPtr)[offset + (x*2)+1] = ( Cr)[stride*y + x] finally: @@ -1409,8 +1419,8 @@ cdef class Encoder: picParams.version = NV_ENC_PIC_PARAMS_VER picParams.bufferFmt = self.bufferFmt picParams.pictureStruct = NV_ENC_PIC_STRUCT_FRAME - picParams.inputWidth = self.width - picParams.inputHeight = self.height + picParams.inputWidth = self.encoder_width + picParams.inputHeight = self.encoder_height picParams.inputPitch = lockInputBuffer.pitch picParams.inputBuffer = self.inputBuffer picParams.outputBitstream = self.bitstreamBuffer