Skip to content

Commit

Permalink
compile the cuda kernel just once per device: cache the results as a …
Browse files Browse the repository at this point in the history
…cubin file buffer

git-svn-id: https://xpra.org/svn/Xpra/trunk@4703 3bb7dfac-3a0b-4e04-842a-767bc560f471
  • Loading branch information
totaam committed Nov 7, 2013
1 parent b6b33f5 commit 2c648e8
Showing 1 changed file with 20 additions and 10 deletions.
30 changes: 20 additions & 10 deletions src/xpra/codecs/nvenc/encoder.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1203,13 +1203,24 @@ cdef raiseNVENC(NVENCSTATUS ret, msg=""):
raise Exception("%s - returned %s" % (msg, nvencStatusInfo(ret)))


#BGRA2NV12_functions = {}
def get_BGRA2NV12():
from xpra.codecs.nvenc.CUDA_rgb2nv12 import BGRA2NV12_kernel
from pycuda.compiler import SourceModule
debug("BGRA2NV12=%s", BGRA2NV12_kernel)
mod = SourceModule(BGRA2NV12_kernel)
#cache the cubin files for each device_id:
BGRA2NV12_functions = {}
def get_BGRA2NV12(device_id):
start = time.time()
global BGRA2NV12_functions
from pycuda.compiler import compile
from pycuda import driver
cubin = BGRA2NV12_functions.get(device_id)
if cubin is None:
from xpra.codecs.nvenc.CUDA_rgb2nv12 import BGRA2NV12_kernel
debug("compiling for device %s: BGRA2NV12=%s", device_id, BGRA2NV12_kernel)
cubin = compile(BGRA2NV12_kernel)
BGRA2NV12_functions[device_id] = cubin
#now load from cubin:
mod = driver.module_from_buffer(cubin)
BGRA2NV12_function = mod.get_function("BGRA2NV12")
end = time.time()
debug("compilation took %.1fms", 1000.0*(end-start))
return BGRA2NV12_function

API_V2_WARNING = False
Expand Down Expand Up @@ -1301,14 +1312,15 @@ cdef class Encoder:
self.driver = driver
debug("init_cuda(%s)", device_id)
self.cuda_device = driver.Device(DEFAULT_CUDA_DEVICE_ID)
debug("init_cuda(%s) cuda_device=%s", device_id, self.cuda_device)
self.cuda_context = self.cuda_device.make_context(flags=driver.ctx_flags.SCHED_AUTO | driver.ctx_flags.MAP_HOST)
debug("init_cuda(%s) cuda_context=%s", device_id, self.cuda_context)
#use alias to make code easier to read:
d = self.cuda_device
da = driver.device_attribute
try:
debug("init_cuda(%s) cuda_device=%s, cuda_context=%s", device_id, self.cuda_device, self.cuda_context)
#compile/get kernel:
self.BGRA2NV12 = get_BGRA2NV12()
self.BGRA2NV12 = get_BGRA2NV12(device_id)
#allocate CUDA input buffer (on device) 32-bit RGB:
self.cudaInputBuffer, self.inputPitch = driver.mem_alloc_pitch(self.encoder_width*4, self.encoder_height, 16)
debug("CUDA Input Buffer=%s, pitch=%s", hex(int(self.cudaInputBuffer)), self.inputPitch)
Expand Down Expand Up @@ -1833,8 +1845,6 @@ cdef class Encoder:


def init_module():
#check for module availibility:

#check that we have CUDA device(s):
cuda_check()

Expand Down

0 comments on commit 2c648e8

Please sign in to comment.