Skip to content

Commit

Permalink
add codec_type and delay initialization to workaround the TLS conflic…
Browse files Browse the repository at this point in the history
…t with GTK

git-svn-id: https://xpra.org/svn/Xpra/trunk@4282 3bb7dfac-3a0b-4e04-842a-767bc560f471
  • Loading branch information
totaam committed Sep 5, 2013
1 parent c07fbef commit 9f8baff
Show file tree
Hide file tree
Showing 3 changed files with 136 additions and 116 deletions.
26 changes: 16 additions & 10 deletions src/xpra/codecs/csc_nvcuda/colorspace_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,12 +39,9 @@
if host_mem and selected_device is None:
selected_device = d
assert selected_device is not None
context = selected_device.make_context(flags=driver.ctx_flags.SCHED_YIELD | driver.ctx_flags.MAP_HOST)
debug("testing with context=%s", context)
debug("api version=%s", context.get_api_version())
free, total = driver.mem_get_info()
debug("using device %s, memory: free=%sMB, total=%sMB", selected_device, int(free/1024/1024), int(total/1024/1024))
context.pop()

context = None
context_wrapper = None
#ensure we cleanup:
class CudaContextWrapper(object):

Expand All @@ -54,7 +51,16 @@ def __init__(self, context):
def __del__(self):
self.context.detach()
self.context = None
ccw = CudaContextWrapper(context)

def init_context():
global context, context_wrapper
context = selected_device.make_context(flags=driver.ctx_flags.SCHED_YIELD | driver.ctx_flags.MAP_HOST)
debug("testing with context=%s", context)
debug("api version=%s", context.get_api_version())
free, total = driver.mem_get_info()
debug("using device %s, memory: free=%sMB, total=%sMB", selected_device, int(free/1024/1024), int(total/1024/1024))
context.pop()
context_wrapper = CudaContextWrapper(context)


def find_lib(basename):
Expand Down Expand Up @@ -229,8 +235,7 @@ def validate_in_out(in_colorspace, out_colorspace):

def get_spec(in_colorspace, out_colorspace):
validate_in_out(in_colorspace, out_colorspace)
#ratings: quality, speed, setup cost, cpu cost, gpu cost, latency, max_w, max_h, max_pixels
return codec_spec(ColorspaceConverter, speed=100, setup_cost=10, cpu_cost=10, gpu_cost=50, min_w=16, min_h=16, can_scale=False)
return codec_spec(ColorspaceConverter, codec_type=get_type(), speed=100, setup_cost=10, cpu_cost=10, gpu_cost=50, min_w=128, min_h=128, can_scale=False)


class ColorspaceConverter(object):
Expand All @@ -247,8 +252,9 @@ def __init__(self):
self.kernel_function = None

def init_context(self, src_width, src_height, src_format,
dst_width, dst_height, dst_format): #@DuplicatedSignature
dst_width, dst_height, dst_format, speed=100): #@DuplicatedSignature
validate_in_out(src_format, dst_format)
init_context()
self.src_width = src_width
self.src_height = src_height
self.src_format = src_format
Expand Down
224 changes: 119 additions & 105 deletions src/xpra/codecs/csc_opencl/colorspace_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,119 +52,132 @@ def platform_info(platform):
selected_platform = platform
log.info(" %s %s", p, device_info(d))

if selected_device:
log.info("using platform: %s", platform_info(selected_platform))
log.info("using device: %s", device_info(selected_device))
debug("max_work_group_size=%s", selected_device.max_work_group_size)
debug("max_work_item_dimensions=%s", selected_device.max_work_item_dimensions)
debug("max_work_item_sizes=%s", selected_device.max_work_item_sizes)


context = None
try:
def init_context():
global context, selected_device,selected_platform
if context is not None:
return
if selected_device:
log.info("using platform: %s", platform_info(selected_platform))
log.info("using device: %s", device_info(selected_device))
debug("max_work_group_size=%s", selected_device.max_work_group_size)
debug("max_work_item_dimensions=%s", selected_device.max_work_item_dimensions)
debug("max_work_item_sizes=%s", selected_device.max_work_item_sizes)
context = pyopencl.Context([selected_device])
else:
context = pyopencl.create_some_context(interactive=False)
assert context is not None
except Exception, e:
error("cannot create an OpenCL context: %s", e, exc_info=True)
raise ImportError("cannot create an OpenCL context: %s" % e)


from xpra.codecs.csc_opencl.opencl_kernels import gen_yuv_to_rgb_kernels, gen_rgb_to_yuv_kernels
#TODO: we could handle other formats here and manage the channel swap ourselves
#(most of the code to do this is already implemented in the kernel generators)
def has_image_format(image_formats, channel_order, channel_type):
for iformat in image_formats:
if iformat.channel_order==channel_order and iformat.channel_data_type==channel_type:
return True
return False
IN_CHANNEL_ORDER = {
"RGBA" : pyopencl.channel_order.RGBA,
"RGBX" : pyopencl.channel_order.RGBA,
"BGRA" : pyopencl.channel_order.BGRA,
"BGRX" : pyopencl.channel_order.BGRA,
"RGBX" : pyopencl.channel_order.RGBx,
"RGB" : pyopencl.channel_order.RGB,
}

#for YUV to RGB support we need to be able to handle the channel_order in WRITE_ONLY mode:
YUV_to_RGB_KERNELS = {}
sif = pyopencl.get_supported_image_formats(context, mem_flags.WRITE_ONLY, pyopencl.mem_object_type.IMAGE2D)
debug("get_supported_image_formats(WRITE_ONLY, IMAGE2D)=%s", sif)
for rgb_mode, channel_order in IN_CHANNEL_ORDER.items():
if not has_image_format(sif, channel_order, pyopencl.channel_type.UNSIGNED_INT8):
debug("YUV 2 RGB: channel order %s is not supported in WRITE_ONLY mode", rgb_mode)
continue
kernels = gen_yuv_to_rgb_kernels(rgb_modes=["RGBX"])
for key, k_def in kernels.items():
src, dst = key
kname, ksrc = k_def
#note: "RGBX" isn't actually used (yet?)
YUV_to_RGB_KERNELS[(src, rgb_mode)] = (kname, "RGBX", channel_order, ksrc)
debug("YUV 2 RGB conversions=%s", sorted(YUV_to_RGB_KERNELS.keys()))
#debug("YUV 2 RGB kernels=%s", YUV_to_RGB_KERNELS)
debug("YUV 2 RGB kernels=%s", set([x[0] for x in YUV_to_RGB_KERNELS.values()]))

#for RGB to YUV support we need to be able to handle the channel_order,
#with READ_ONLY and both with COPY_HOST_PTR and USE_HOST_PTR since we
#do not know in advance which one we can use..
#TODO: enable channel_order anyway and use COPY as fallback..
RGB_to_YUV_KERNELS = {}
sif_copy = pyopencl.get_supported_image_formats(context, mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR, pyopencl.mem_object_type.IMAGE2D)
debug("get_supported_image_formats(READ_ONLY | COPY_HOST_PTR, IMAGE2D)=%s", sif)
sif_use = pyopencl.get_supported_image_formats(context, mem_flags.READ_ONLY | mem_flags.USE_HOST_PTR, pyopencl.mem_object_type.IMAGE2D)
debug("get_supported_image_formats(READ_ONLY | USE_HOST_PTR, IMAGE2D)=%s", sif)
if not has_image_format(sif_copy, pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8) or \
not has_image_format(sif_use, pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8):
log.error("cannot convert to yuv without support for R channel!")
else:


KERNELS_DEFS = {}
def gen_kernels():
global context, KERNELS_DEFS
from xpra.codecs.csc_opencl.opencl_kernels import gen_yuv_to_rgb_kernels, gen_rgb_to_yuv_kernels
#TODO: we could handle other formats here and manage the channel swap ourselves
#(most of the code to do this is already implemented in the kernel generators)
def has_image_format(image_formats, channel_order, channel_type):
for iformat in image_formats:
if iformat.channel_order==channel_order and iformat.channel_data_type==channel_type:
return True
return False
IN_CHANNEL_ORDER = {
"RGBA" : pyopencl.channel_order.RGBA,
"RGBX" : pyopencl.channel_order.RGBA,
"BGRA" : pyopencl.channel_order.BGRA,
"BGRX" : pyopencl.channel_order.BGRA,
"RGBX" : pyopencl.channel_order.RGBx,
"RGB" : pyopencl.channel_order.RGB,
}

#for YUV to RGB support we need to be able to handle the channel_order in WRITE_ONLY mode:
YUV_to_RGB_KERNELS = {}
sif = pyopencl.get_supported_image_formats(context, mem_flags.WRITE_ONLY, pyopencl.mem_object_type.IMAGE2D)
debug("get_supported_image_formats(WRITE_ONLY, IMAGE2D)=%s", sif)
for rgb_mode, channel_order in IN_CHANNEL_ORDER.items():
errs = []
if not has_image_format(sif_copy, channel_order, pyopencl.channel_type.UNSIGNED_INT8):
errs.append("COPY_HOST_PTR")
if not has_image_format(sif_use, channel_order, pyopencl.channel_type.UNSIGNED_INT8):
errs.append("USE_HOST_PTR")
if len(errs)>0:
debug("RGB 2 YUV: channel order %s is not supported in READ_ONLY mode(s): %s", rgb_mode, " or ".join(errs))
if not has_image_format(sif, channel_order, pyopencl.channel_type.UNSIGNED_INT8):
debug("YUV 2 RGB: channel order %s is not supported in WRITE_ONLY mode", rgb_mode)
continue
#we hardcode RGB here since we currently handle byteswapping
#via the channel_order only for now:
kernels = gen_rgb_to_yuv_kernels(rgb_modes=["RGB"])
#debug("kernels(%s)=%s", rgb_mode, kernels)
kernels = gen_yuv_to_rgb_kernels(rgb_modes=["RGBX"])
for key, k_def in kernels.items():
src, dst = key
kname, ksrc = k_def
#note: "RGBX" isn't actually used (yet?)
RGB_to_YUV_KERNELS[(rgb_mode, dst)] = (kname, "RGB", channel_order, ksrc)
debug("RGB 2 YUV conversions=%s", sorted(RGB_to_YUV_KERNELS.keys()))
#debug("RGB 2 YUV kernels=%s", RGB_to_YUV_KERNELS)
debug("RGB 2 YUV kernels=%s", set([x[0] for x in RGB_to_YUV_KERNELS.values()]))

YUV_to_RGB_KERNELS[(src, rgb_mode)] = (kname, "RGBX", channel_order, ksrc)
debug("YUV 2 RGB conversions=%s", sorted(YUV_to_RGB_KERNELS.keys()))
#debug("YUV 2 RGB kernels=%s", YUV_to_RGB_KERNELS)
debug("YUV 2 RGB kernels=%s", set([x[0] for x in YUV_to_RGB_KERNELS.values()]))

#for RGB to YUV support we need to be able to handle the channel_order,
#with READ_ONLY and both with COPY_HOST_PTR and USE_HOST_PTR since we
#do not know in advance which one we can use..
#TODO: enable channel_order anyway and use COPY as fallback?
RGB_to_YUV_KERNELS = {}
sif_copy = pyopencl.get_supported_image_formats(context, mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR, pyopencl.mem_object_type.IMAGE2D)
debug("get_supported_image_formats(READ_ONLY | COPY_HOST_PTR, IMAGE2D)=%s", sif)
sif_use = pyopencl.get_supported_image_formats(context, mem_flags.READ_ONLY | mem_flags.USE_HOST_PTR, pyopencl.mem_object_type.IMAGE2D)
debug("get_supported_image_formats(READ_ONLY | USE_HOST_PTR, IMAGE2D)=%s", sif)
if not has_image_format(sif_copy, pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8) or \
not has_image_format(sif_use, pyopencl.channel_order.R, pyopencl.channel_type.UNSIGNED_INT8):
log.error("cannot convert to yuv without support for R channel!")
else:
for rgb_mode, channel_order in IN_CHANNEL_ORDER.items():
errs = []
if not has_image_format(sif_copy, channel_order, pyopencl.channel_type.UNSIGNED_INT8):
errs.append("COPY_HOST_PTR")
if not has_image_format(sif_use, channel_order, pyopencl.channel_type.UNSIGNED_INT8):
errs.append("USE_HOST_PTR")
if len(errs)>0:
debug("RGB 2 YUV: channel order %s is not supported in READ_ONLY mode(s): %s", rgb_mode, " or ".join(errs))
continue
#we hardcode RGB here since we currently handle byteswapping
#via the channel_order only for now:
kernels = gen_rgb_to_yuv_kernels(rgb_modes=["RGB"])
#debug("kernels(%s)=%s", rgb_mode, kernels)
for key, k_def in kernels.items():
src, dst = key
kname, ksrc = k_def
#note: "RGBX" isn't actually used (yet?)
RGB_to_YUV_KERNELS[(rgb_mode, dst)] = (kname, "RGB", channel_order, ksrc)
debug("RGB 2 YUV conversions=%s", sorted(RGB_to_YUV_KERNELS.keys()))
#debug("RGB 2 YUV kernels=%s", RGB_to_YUV_KERNELS)
debug("RGB 2 YUV kernels=%s", set([x[0] for x in RGB_to_YUV_KERNELS.values()]))

KERNELS_DEFS = RGB_to_YUV_KERNELS.copy()
KERNELS_DEFS.update(YUV_to_RGB_KERNELS)
debug("all conversions=%s", KERNELS_DEFS.keys())
#work out the unique kernels we have generated (kname -> ksrc)
NAMES_TO_KERNELS = {}
for name, _, _, kernel in KERNELS_DEFS.values():
NAMES_TO_KERNELS[name] = kernel
return NAMES_TO_KERNELS

KERNELS_DEFS = RGB_to_YUV_KERNELS.copy()
KERNELS_DEFS.update(YUV_to_RGB_KERNELS)
debug("all conversions=%s", KERNELS_DEFS.keys())
#debug("KERNELS=%s", KERNELS_DEFS)
#work out the unique kernels we have generated (kname -> ksrc)
NAMES_TO_KERNELS = {}
for name, _, _, kernel in KERNELS_DEFS.values():
NAMES_TO_KERNELS[name] = kernel

program = None
try:
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
log.info("building %s kernels: %s", len(NAMES_TO_KERNELS), ", ".join(NAMES_TO_KERNELS.keys()))
program = pyopencl.Program(context, "\n".join(NAMES_TO_KERNELS.values()))
program.build()
log.debug("all warnings:%s", "\n* ".join([str(x) for x in w]))
build_warnings = [x for x in w if x.category==pyopencl.CompilerWarning]
if len(build_warnings)>0:
debug("%s build warnings:", len(build_warnings))
for x in build_warnings:
debug(str(x))
except Exception, e:
error("cannot build the OpenCL program: %s", e, exc_info=True)
raise ImportError("cannot build the OpenCL program: %s" % e)
def build_kernels():
global program
if program is not None:
return
init_context()
NAMES_TO_KERNELS = gen_kernels()
try:
with warnings.catch_warnings(record=True) as w:
warnings.simplefilter("always")
log.info("building %s kernels: %s", len(NAMES_TO_KERNELS), ", ".join(NAMES_TO_KERNELS.keys()))
program = pyopencl.Program(context, "\n".join(NAMES_TO_KERNELS.values()))
program.build()
log.debug("all warnings:%s", "\n* ".join([str(x) for x in w]))
build_warnings = [x for x in w if x.category==pyopencl.CompilerWarning]
if len(build_warnings)>0:
debug("%s build warnings:", len(build_warnings))
for x in build_warnings:
debug(str(x))
except Exception, e:
error("cannot build the OpenCL program: %s", e, exc_info=True)
raise ImportError("cannot build the OpenCL program: %s" % e)


def roundup(n, m):
return (n + m - 1) & ~(m - 1)
Expand All @@ -187,9 +200,11 @@ def get_version():
return pyopencl.version.VERSION_TEXT

def get_input_colorspaces():
build_kernels()
return [src for (src, _) in KERNELS_DEFS.keys()]

def get_output_colorspaces(input_colorspace):
build_kernels()
return [dst for (src, dst) in KERNELS_DEFS.keys() if src==input_colorspace]

def validate_in_out(in_colorspace, out_colorspace):
Expand All @@ -198,13 +213,13 @@ def validate_in_out(in_colorspace, out_colorspace):

def get_spec(in_colorspace, out_colorspace):
validate_in_out(in_colorspace, out_colorspace)
#ratings: quality, speed, setup cost, cpu cost, gpu cost, latency, max_w, max_h, max_pixels
return codec_spec(ColorspaceConverter, speed=100, setup_cost=10, cpu_cost=10, gpu_cost=50, min_w=16, min_h=16, can_scale=False)
return codec_spec(ColorspaceConverter, codec_type=get_type(), speed=100, setup_cost=10, cpu_cost=10, gpu_cost=50, min_w=128, min_h=128, can_scale=False)


class ColorspaceConverter(object):

def __init__(self):
build_kernels()
self.src_width = 0
self.src_height = 0
self.src_format = ""
Expand All @@ -219,7 +234,7 @@ def __init__(self):
self.kernel_function_name = None

def init_context(self, src_width, src_height, src_format,
dst_width, dst_height, dst_format): #@DuplicatedSignature
dst_width, dst_height, dst_format, csc_speed=100): #@DuplicatedSignature
global context
validate_in_out(src_format, dst_format)
self.src_width = src_width
Expand Down Expand Up @@ -399,15 +414,14 @@ def convert_image_rgb(self, image):

#input image:
bpp = len(self.src_format)
#UNSIGNED_INT8 / UNORM_INT8
iformat = pyopencl.ImageFormat(self.channel_order, pyopencl.channel_type.UNSIGNED_INT8)
shape = (stride/bpp, height)
debug("convert_image() input image format=%s, shape=%s, work size: local=%s, global=%s", iformat, shape, localWorkSize, globalWorkSize)
if type(pixels)==str:
#str is not a buffer, so we have to copy the data
#alternatively, we could copy it first ourselves using this:
#pixels = numpy.fromstring(pixels, dtype=numpy.byte).data
#but I think this is even slower
#but I think this would be even slower
flags = mem_flags.READ_ONLY | mem_flags.COPY_HOST_PTR
else:
flags = mem_flags.READ_ONLY | mem_flags.USE_HOST_PTR
Expand Down Expand Up @@ -435,7 +449,7 @@ def convert_image_rgb(self, image):
debug("convert_image(%s) calling %s%s after %.1fms", image, self.kernel_function_name, tuple(kernelargs), 1000.0*(kstart-start))
self.kernel_function(*kernelargs)
kend = time.time()
debug("%s took %.1fms", self.kernel_function, 1000.0*(kend-kstart))
debug("%s took %.1fms", self.kernel_function_name, 1000.0*(kend-kstart))

#read back:
pixels = []
Expand All @@ -446,7 +460,7 @@ def convert_image_rgb(self, image):
read = pyopencl.enqueue_read_buffer(self.queue, out_buffers[i], out_array, is_blocking=False)
read_events.append(read)
readstart = time.time()
debug("queue read events took %.1fms", 1000.0*(readstart-kend))
debug("queue read events took %.1fms (3 planes of size %s, with strides=%s)", 1000.0*(readstart-kend), out_sizes, strides)
pyopencl.wait_for_events(read_events)
self.queue.finish()
readend = time.time()
Expand Down
2 changes: 1 addition & 1 deletion src/xpra/codecs/csc_swscale/colorspace_converter.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ def get_spec(in_colorspace, out_colorspace):
#setup cost is very low (usually less than 1ms!)
#there are restrictions on dimensions (8x2 minimum!)
#swscale can be used to scale (obviously)
return codec_spec(ColorspaceConverter, setup_cost=20, min_w=8, min_h=2, can_scale=True)
return codec_spec(ColorspaceConverter, codec_type=get_type(), setup_cost=20, min_w=8, min_h=2, can_scale=True)


cdef class CSCImage:
Expand Down

0 comments on commit 9f8baff

Please sign in to comment.