#2828 / 1462: we are better off using r210-RGB to paint with opengl b…

…ecause GBRP10 would need bit shifting to GBRP16 in order to be used with opengl 'SHORT' upload format - which takes up more space than r210 anyway git-svn-id: https://xpra.org/svn/Xpra/trunk@26960 3bb7dfac-3a0b-4e04-842a-767bc560f471
Xpra-org · Jul 12, 2020 · 1a6f840 · 1a6f840
1 parent 6e3103f
commit 1a6f840
Show file tree

Hide file tree

Showing 2 changed files with 93 additions and 16 deletions.
diff --git a/src/xpra/client/gl/gl_window_backing_base.py b/src/xpra/client/gl/gl_window_backing_base.py
@@ -21,7 +21,6 @@
     GL_DONT_CARE, GL_TRUE, GL_DEPTH_TEST, GL_SCISSOR_TEST, GL_LIGHTING, GL_DITHER,
     GL_RGB, GL_RGBA, GL_BGR, GL_BGRA, GL_RGBA8, GL_RGB8, GL_RGB10_A2, GL_RGB565, GL_RGB5_A1, GL_RGBA4, GL_RGBA16,
     GL_UNSIGNED_INT_2_10_10_10_REV, GL_UNSIGNED_INT_10_10_10_2, GL_UNSIGNED_SHORT_5_6_5,
-    GL_UNPACK_SWAP_BYTES,
     GL_BLEND, GL_ONE, GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA,
     GL_TEXTURE_MAX_LEVEL, GL_TEXTURE_BASE_LEVEL,
     GL_PERSPECTIVE_CORRECTION_HINT, GL_FASTEST,
@@ -124,7 +123,7 @@
     "YUV422P" : GL_UNSIGNED_BYTE,
     "YUV444P" : GL_UNSIGNED_BYTE,
     "GBRP"  : GL_UNSIGNED_BYTE,
-    "GBRP10" : GL_UNSIGNED_SHORT,
+    "GBRP16" : GL_UNSIGNED_SHORT,
     }
 CONSTANT_TO_PIXEL_FORMAT = {
     GL_BGR   : "BGR",
@@ -303,11 +302,11 @@ def init_formats(self):
         if self.bit_depth>32:
             self.internal_format = GL_RGBA16
             self.RGB_MODES.append("r210")
-            self.RGB_MODES.append("GBRP10")
+            #self.RGB_MODES.append("GBRP16")
         elif self.bit_depth==30:
             self.internal_format = GL_RGB10_A2
             self.RGB_MODES.append("r210")
-            self.RGB_MODES.append("GBRP10")
+            #self.RGB_MODES.append("GBRP16")
         elif 0<self.bit_depth<=16:
             if self._alpha_enabled:
                 if envbool("XPRA_GL_RGBA4", True):
@@ -1112,6 +1111,10 @@ def do_video_paint(self, img, x : int, y : int, enc_width : int, enc_height : in
             #copy so the data will be usable (usually a str)
             img.clone_pixel_data()
         pixel_format = img.get_pixel_format()
+        if pixel_format=="GBRP10":
+            #call superclass to handle csc
+            #which will end up calling paint rgb with r210 data
+            return super().do_video_paint(img, x, y, enc_width, enc_height, width, height, options, callbacks)
         if pixel_format.startswith("GBRP"):
             shader = RGBP2RGB_SHADER
         else:
@@ -1127,7 +1130,7 @@ def gl_paint_planar(self, shader, flush, encoding, img,
         x, y = self.gravity_adjust(x, y, options)
         try:
             pixel_format = img.get_pixel_format()
-            assert pixel_format in ("YUV420P", "YUV422P", "YUV444P", "GBRP", "GBRP10"), \
+            assert pixel_format in ("YUV420P", "YUV422P", "YUV444P", "GBRP", "GBRP16"), \
                 "sorry the GL backing does not handle pixel format '%s' yet!" % (pixel_format)
 
             context = self.gl_context()
@@ -1192,8 +1195,7 @@ def update_planar_textures(self, width : int, height : int, img, pixel_format, s
         self.gl_marker("updating planar textures: %sx%s %s", width, height, pixel_format)
         rowstrides = img.get_rowstride()
         img_data = img.get_pixels()
-        BPP = 2 if pixel_format.endswith("P10") else 1
-        glPixelStorei(GL_UNPACK_SWAP_BYTES, BPP==2)
+        BPP = 2 if pixel_format.endswith("P16") else 1
         assert len(rowstrides)==3 and len(img_data)==3
         for texture, index, tex_name in (
             (GL_TEXTURE0, TEX_Y, pixel_format[0:1]*BPP),
@@ -1223,12 +1225,11 @@ def update_planar_textures(self, width : int, height : int, img, pixel_format, s
             glTexSubImage2D(target, 0, 0, 0, w, h, GL_LUMINANCE, upload_format, pixel_data)
             glBindTexture(target, 0)
         #glActiveTexture(GL_TEXTURE0)    #redundant, we always call render_planar_update afterwards
-        glPixelStorei(GL_UNPACK_SWAP_BYTES, False)
 
     def render_planar_update(self, rx : int, ry : int, rw : int, rh : int, x_scale=1, y_scale=1, shader=YUV2RGB_SHADER):
         log("%s.render_planar_update%s pixel_format=%s",
             self, (rx, ry, rw, rh, x_scale, y_scale, shader), self.pixel_format)
-        if self.pixel_format not in ("YUV420P", "YUV422P", "YUV444P", "GBRP", "GBRP10"):
+        if self.pixel_format not in ("YUV420P", "YUV422P", "YUV444P", "GBRP", "GBRP16"):
             #not ready to render yet
             return
         self.gl_marker("painting planar update, format %s", self.pixel_format)

diff --git a/src/xpra/codecs/csc_cython/colorspace_converter.pyx b/src/xpra/codecs/csc_cython/colorspace_converter.pyx
@@ -71,6 +71,7 @@ COLORSPACES = {
                "YUV420P"    : get_CS("YUV420P", ["RGB", "BGR", "RGBX", "BGRX"]),
                "GBRP"       : get_CS("GBRP",    ["RGBX", "BGRX"]),
                "r210"       : get_CS("r210",    ["YUV420P", "BGR48"]),
+               "GBRP10"     : get_CS("GBRP10",  ["r210", ]),
                }
 
 
@@ -102,10 +103,12 @@ def get_output_colorspaces(input_colorspace):
 def get_spec(in_colorspace, out_colorspace):
     assert in_colorspace in COLORSPACES, "invalid input colorspace: %s (must be one of %s)" % (in_colorspace, get_input_colorspaces())
     assert out_colorspace in COLORSPACES.get(in_colorspace), "invalid output colorspace: %s (must be one of %s)" % (out_colorspace, get_output_colorspaces(in_colorspace))
-    #low score as this should be used as fallback only:
     can_scale = True
     if in_colorspace=="r210" and out_colorspace=="BGR48":
         can_scale = False
+    elif in_colorspace=="GBRP10" and out_colorspace=="r210":
+        can_scale = False
+    #low score as this should be used as fallback only:
     return csc_spec(in_colorspace, out_colorspace,
                     ColorspaceConverter, codec_type=get_type(),
                     quality=50, speed=10, setup_cost=10, min_w=2, min_h=2, max_w=16*1024, max_h=16*1024, can_scale=can_scale)
@@ -193,6 +196,7 @@ cdef inline unsigned char clamp(const long v) nogil:
 cdef inline void r210_to_BGR48_copy(unsigned short *bgr48, const unsigned int *r210,
                                     unsigned int w, unsigned int h,
                                     unsigned int src_stride, unsigned int dst_stride) nogil:
+    assert (dst_stride%2)==0
     cdef unsigned int y = 0
     cdef unsigned int i = 0
     cdef unsigned int v
@@ -203,9 +207,25 @@ cdef inline void r210_to_BGR48_copy(unsigned short *bgr48, const unsigned int *r
             bgr48[i] = v&0x000003ff
             bgr48[i+1] = (v&0x000ffc00) >> 10
             bgr48[i+2] = (v&0x3ff00000) >> 20
-            i = i + 3
+            i += 3
         r210 = <unsigned int*> ((<uintptr_t> r210) + src_stride)
 
+cdef inline void gbrp10_to_r210_copy(uintptr_t r210, uintptr_t[3] gbrp10,
+                                     unsigned int width, unsigned int height,
+                                     unsigned int src_stride, unsigned int dst_stride) nogil:
+    cdef unsigned int x, y
+    cdef unsigned short *b
+    cdef unsigned short *g
+    cdef unsigned short *r
+    cdef unsigned int *dst
+    for y in range(height):
+        dst = <unsigned int*> (r210 + y*dst_stride)
+        g = <unsigned short*> ((<uintptr_t> gbrp10[0]) + y*src_stride)
+        b = <unsigned short*> ((<uintptr_t> gbrp10[1]) + y*src_stride)
+        r = <unsigned short*> ((<uintptr_t> gbrp10[2]) + y*src_stride)
+        for x in range(width):
+            dst[x] = (b[x] & 0x3ff) + ((g[x] & 0x3ff)<<10) + ((r[x] & 0x3ff)<<20)
+
 
 cdef class ColorspaceConverter:
     cdef unsigned int src_width
@@ -248,6 +268,9 @@ cdef class ColorspaceConverter:
             self.dst_sizes[i]   = 0
             self.offsets[i]     = 0
 
+        def assert_no_scaling():
+            assert src_width==dst_width and src_height==dst_height, "scaling is not supported for %s to %s" % (src_format, dst_format)
+
         if src_format in ("BGRX", "RGBX", "RGB", "BGR", "r210") and dst_format=="YUV420P":
             self.dst_strides[0] = roundup(self.dst_width,   STRIDE_ROUNDUP)
             self.dst_strides[1] = roundup(self.dst_width/2, STRIDE_ROUNDUP)
@@ -273,12 +296,17 @@ cdef class ColorspaceConverter:
                 assert src_format=="r210"
                 self.convert_image_function = self.r210_to_YUV420P
         elif src_format=="r210" and dst_format=="BGR48":
+            assert_no_scaling()
             self.dst_strides[0] = roundup(self.dst_width*6, STRIDE_ROUNDUP)
             self.dst_sizes[0] = self.dst_strides[0] * self.dst_height
             self.buffer_size = self.dst_sizes[0]+self.dst_strides[0]
             self.convert_image_function = self.r210_to_BGR48
-            assert src_width==dst_width
-            assert src_height==dst_height
+        elif src_format=="GBRP10" and dst_format=="r210":
+            assert_no_scaling()
+            self.dst_strides[0] = roundup(self.dst_width*4, STRIDE_ROUNDUP)
+            self.dst_sizes[0] = self.dst_strides[0] * self.dst_height
+            self.buffer_size = self.dst_sizes[0]+self.dst_strides[0]
+            self.convert_image_function = self.GBRP10_to_r210
         elif src_format=="YUV420P" and dst_format in ("RGBX", "BGRX", "RGB", "BGR"):
             #3 or 4 bytes per pixel:
             self.dst_strides[0] = roundup(self.dst_width*len(dst_format), STRIDE_ROUNDUP)
@@ -425,7 +453,7 @@ cdef class ColorspaceConverter:
         pixels = image.get_pixels()
         assert pixels, "failed to get pixels from %s" % image
         input_stride = image.get_rowstride()
-        log("do_RGB_to_YUV420P(%s, %i, %i, %i, %i) input=%s, strides=%s" % (image, Bpp, Rindex, Gindex, Bindex, len(pixels), input_stride))
+        log("do_RGB_to_YUV420P(%s, %i, %i, %i, %i) input=%s, strides=%s", image, Bpp, Rindex, Gindex, Bindex, len(pixels), input_stride)
 
         assert object_as_buffer(pixels, <const void**> &input_image, &pic_buf_len)==0
         #allocate output buffer:
@@ -542,7 +570,7 @@ cdef class ColorspaceConverter:
         pixels = image.get_pixels()
         assert pixels, "failed to get pixels from %s" % image
         input_stride = image.get_rowstride()
-        log("r210_to_BGR48(%s) input=%s, strides=%s" % (image, len(pixels), input_stride))
+        log("r210_to_BGR48(%s) input=%s, strides=%s", image, len(pixels), input_stride)
 
         cdef Py_ssize_t pic_buf_len = 0
         cdef const unsigned int *r210
@@ -564,14 +592,62 @@ cdef class ColorspaceConverter:
 
         bgr48_buffer = memory_as_pybuffer(<void *> bgr48, self.dst_sizes[0], True)
         cdef double elapsed = time.time()-start
-        log("%s took %.1fms", self, 1000.0*elapsed)
+        log("r210_to_BGR48 took %.1fms", 1000.0*elapsed)
         self.time += elapsed
         self.frames += 1
         out_image = CythonImageWrapper(0, 0, self.dst_width, self.dst_height, bgr48_buffer, "BGR48", 48, dst_stride, ImageWrapper.PACKED)
         out_image.cython_buffer = <uintptr_t> bgr48
         return out_image
 
 
+    def GBRP10_to_r210(self, image):
+        cdef double start = time.time()
+        assert image.get_planes()==ImageWrapper.PLANAR_3, "invalid input format: %s planes" % image.get_planes()
+        assert image.get_width()>=self.src_width, "invalid image width: %s (minimum is %s)" % (image.get_width(), self.src_width)
+        assert image.get_height()>=self.src_height, "invalid image height: %s (minimum is %s)" % (image.get_height(), self.src_height)
+        pixels = image.get_pixels()
+        assert pixels, "failed to get pixels from %s" % image
+
+        input_strides = image.get_rowstride()
+        cdef unsigned int w = self.src_width
+        cdef unsigned int h = self.src_height
+        cdef unsigned int src_stride = input_strides[0]
+        cdef unsigned int dst_stride = self.dst_strides[0]
+        assert input_strides[1]==src_stride and input_strides[2]==src_stride, "mismatch in rowstrides: %s" % (input_strides,)
+        assert src_stride>=w*2
+        assert dst_stride>=w*4
+        assert self.dst_sizes[0]>=dst_stride*h
+
+        cdef Py_ssize_t pic_buf_len[3]
+        cdef uintptr_t gbrp10[3]
+        cdef unsigned int i
+        for i in range(3):
+            assert object_as_buffer(pixels[i], <const void**> &gbrp10[i], &pic_buf_len[i])==0
+            assert pic_buf_len[i]>=src_stride*h, "input plane '%s' is too small: %i bytes" % ("GBR"[i], pic_buf_len[i])
+
+        #allocate output buffer:
+        cdef unsigned int *r210 = <unsigned int*> memalign(self.dst_sizes[0])
+
+        if image.is_thread_safe():
+            with nogil:
+                gbrp10_to_r210_copy(<uintptr_t> r210, gbrp10,
+                                    w, h,
+                                    src_stride, dst_stride)
+        else:
+            gbrp10_to_r210_copy(<uintptr_t> r210, gbrp10,
+                                w, h,
+                                src_stride, dst_stride)
+
+        r210_buffer = memory_as_pybuffer(<void *> r210, self.dst_sizes[0], True)
+        cdef double elapsed = time.time()-start
+        log("GBRP10_to_r210 took %.1fms", 1000.0*elapsed)
+        self.time += elapsed
+        self.frames += 1
+        out_image = CythonImageWrapper(0, 0, self.dst_width, self.dst_height, r210_buffer, "r210", 30, dst_stride, ImageWrapper.PACKED)
+        out_image.cython_buffer = <uintptr_t> r210
+        return out_image
+
+
     def YUV420P_to_RGBX(self, image):
         return self.do_YUV420P_to_RGB(image, 4, RGBX_R, RGBX_G, RGBX_B, RGBX_X)