Skip to content

Commit

Permalink
* ensure we only access the device from the encoding thread with the …
Browse files Browse the repository at this point in the history
…context set (push/pop)

* track device memory usage
* lower our score when device memory is scarce
* import pycuda globally since it is mandatory and does not have side effects (initialization does - but we do that in "init_module" now)

git-svn-id: https://xpra.org/svn/Xpra/trunk@4729 3bb7dfac-3a0b-4e04-842a-767bc560f471
  • Loading branch information
totaam committed Nov 10, 2013
1 parent 2feed85 commit a86fb84
Showing 1 changed file with 27 additions and 13 deletions.
40 changes: 27 additions & 13 deletions src/xpra/codecs/nvenc/encoder.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,10 @@ import time
import os
import numpy

import pycuda
from pycuda import driver
from pycuda.compiler import compile

from xpra.util import AtomicInteger
from xpra.deque import maxdeque
from xpra.codecs.codec_constants import codec_spec, TransientCodecException
Expand Down Expand Up @@ -1139,10 +1143,16 @@ HEIGHT_MASK = 0xFFFE
context_counter = AtomicInteger()
context_failures_history = maxdeque(100)

free_memory = 0
total_memory = 0

def get_runtime_factor():
global context_failures_history, context_counter
global context_failures_history, context_counter, free_memory, total_memory
cc = context_counter.get()
if len(context_failures_history)==0 and cc<8:
fm_pct = 100
if total_memory>0:
fm_pct = int(100.0*free_memory/total_memory)
if len(context_failures_history)==0 and cc<8 fm_pct>=25:
#no problems!
debug("nvenc.get_runtime_factor()=%s", 1.0)
return 1.0
Expand All @@ -1158,6 +1168,9 @@ def get_runtime_factor():
#the last recent error had as many contexts available
#so this is unlikely to work, lower more:
f /= 2.0
#if we are low on free memory, reduce further:
if fm_pct<30:
f *= fm_pct/30.0
debug("nvenc.get_runtime_factor()=%s", f)
return f

Expand Down Expand Up @@ -1194,8 +1207,6 @@ def device_info(d):
cdef cuda_init_devices():
start = time.time()
log.info("CUDA initialization (this may take a few seconds)")
import pycuda
from pycuda import driver
driver.init()
ngpus = driver.Device.count()
debug("PyCUDA found %s devices:", ngpus)
Expand Down Expand Up @@ -1238,7 +1249,6 @@ def cuda_check():
raise ImportError("no CUDA devices found!")
assert DEFAULT_CUDA_DEVICE_ID in cuda_devices.keys(), "specified CUDA device ID %s not found in %s" % (DEFAULT_CUDA_DEVICE_ID, devices)
#create context for testing:
from pycuda import driver
d = driver.Device(DEFAULT_CUDA_DEVICE_ID)
context = d.make_context(flags=driver.ctx_flags.SCHED_AUTO | driver.ctx_flags.MAP_HOST)
debug("cuda_check created test context, api_version=%s", context.get_api_version())
Expand All @@ -1261,8 +1271,6 @@ KERNEL_cubins = {}
def get_CUDA_kernel(device_id, kernel_name, kernel_source):
start = time.time()
global KERNEL_cubins
from pycuda.compiler import compile
from pycuda import driver
cubin = KERNEL_cubins.get((device_id, kernel_name))
if cubin is None:
debug("compiling for device %s: %s=%s", device_id, kernel_name, kernel_source)
Expand Down Expand Up @@ -1298,6 +1306,7 @@ cdef class Encoder:
#PyCUDA:
cdef int device_id
cdef object driver
cdef object cuda_device_info
cdef object cuda_device
cdef object cuda_context
cdef object kernel
Expand Down Expand Up @@ -1383,12 +1392,11 @@ cdef class Encoder:
def init_cuda(self):
assert self.device_id in get_cuda_devices().keys(), "invalid device_id '%s' (available: %s)" % (self.device_id, cuda_devices)
global context_counter, context_failures_history
from pycuda import driver
self.driver = driver
debug("init_cuda() device_id=%s", self.device_id)
try:
self.cuda_device = driver.Device(DEFAULT_CUDA_DEVICE_ID)
debug("init_cuda() cuda_device=%s", self.cuda_device)
self.cuda_device_info = device_info(self.cuda_device)
debug("init_cuda() cuda_device=%s (%s)", self.cuda_device, self.cuda_device_info)
self.cuda_context = self.cuda_device.make_context(flags=driver.ctx_flags.SCHED_AUTO | driver.ctx_flags.MAP_HOST)
debug("init_cuda() cuda_context=%s", self.cuda_context)
except driver.MemoryError, e:
Expand Down Expand Up @@ -1511,8 +1519,8 @@ cdef class Encoder:
"encoder_width" : self.encoder_width,
"encoder_height" : self.encoder_height,
"version" : get_version()}
if self.cuda_device:
info["device"] = device_info(self.cuda_device)
if self.cuda_device_info:
info["device"] = self.cuda_device_info
if self.src_format:
info["src_format"] = self.src_format
if self.pixel_format:
Expand All @@ -1528,6 +1536,10 @@ cdef class Encoder:
pps = float(self.width) * float(self.height) * float(self.frames) / self.time
info["total_time_ms"] = int(self.time*1000.0)
info["pixels_per_second"] = int(pps)
if total_memory>0:
info["free_memory"] = int(free_memory)
info["total_memory"] = int(total_memory)
info["free_memory_pct"] = int(100.0*free_memory/total_memory)
return info

def __str__(self):
Expand Down Expand Up @@ -1651,7 +1663,7 @@ cdef class Encoder:
debug("compress_image(..) host buffer populated with %s bytes (max %s)", len(pixels), input_size)

#copy input buffer to CUDA buffer:
self.driver.memcpy_htod(self.cudaInputBuffer, self.inputBuffer)
driver.memcpy_htod(self.cudaInputBuffer, self.inputBuffer)
debug("compress_image(..) input buffer copied to device")

#FIXME: find better values and validate against max_block/max_grid:
Expand Down Expand Up @@ -1738,6 +1750,8 @@ cdef class Encoder:

end = time.time()
debug("compress_image(..) download took %.1f ms", (end-encode_end)*1000.0)
global free_memory, total_memory
free_memory, total_memory = driver.mem_get_info()

self.time += end-start
debug("compress_image(..) returning %s bytes (%.1f%%), complete compression for frame %s took %.1fms", size, 100.0*size/input_size, self.frames, 1000.0*(end-start))
Expand Down

0 comments on commit a86fb84

Please sign in to comment.