Skip to content

Commit

Permalink
qmemman: handle memory assigned to VM but not yet used
Browse files Browse the repository at this point in the history
When VM got some memory assigned, balloon driver may not pick it up
immediatelly and the memory will still be seen as "free" by Xen, but VM
can use (request) it at any time. Qmemman needs to take care of such
memory (exclude it from "free" pool), otherwise it would redistribute it
to other domains, allowing the original domain to drain Xen memory pool.

Do this by redefining DomainState.memory_actual - it is now amount of
memory available to the VM (currently used, or possibly used). Then
calculate free memory by subtracting memory allocated but not used
(memory_target-memory_current).

Fixes QubesOS/qubes-issues#1389
  • Loading branch information
marmarek committed Jan 5, 2016
1 parent e30e802 commit 181eb3e
Show file tree
Hide file tree
Showing 2 changed files with 36 additions and 9 deletions.
37 changes: 28 additions & 9 deletions qmemman/qmemman.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@
class DomainState:
def __init__(self, id):
self.meminfo = None #dictionary of memory info read from client
self.memory_actual = None #the current memory size
self.memory_current = None #the current memory size
self.memory_actual = None # the current memory allocation (what VM
# is using or can use at any time)
self.memory_maximum = None #the maximum memory size
self.mem_used = None #used memory, computed based on meminfo
self.id = id #domain id
Expand Down Expand Up @@ -65,25 +67,42 @@ def __init__(self):
def add_domain(self, id):
self.log.debug('add_domain(id={!r})'.format(id))
self.domdict[id] = DomainState(id)
# TODO: move to DomainState.__init__
target_str = self.xs.read('', '/local/domain/' + id + '/memory/target')
if target_str:
self.domdict[id].last_target = int(target_str) * 1024

def del_domain(self, id):
self.log.debug('del_domain(id={!r})'.format(id))
self.domdict.pop(id)

def get_free_xen_memory(self):
return int(self.xc.physinfo()['free_memory']*1024 * self.MEM_OVERHEAD_FACTOR)
# hosts = self.xend_session.session.xenapi.host.get_all()
# host_record = self.xend_session.session.xenapi.host.get_record(hosts[0])
# host_metrics_record = self.xend_session.session.xenapi.host_metrics.get_record(host_record["metrics"])
# ret = host_metrics_record["memory_free"]
# return long(ret)
xen_free = int(self.xc.physinfo()['free_memory']*1024 *
self.MEM_OVERHEAD_FACTOR)
# now check for domains which have assigned more memory than really
# used - do not count it as "free", because domain is free to use it
# at any time
# assumption: self.refresh_memactual was called before
# (so domdict[id].memory_actual is up to date)
assigned_but_unused = reduce(
lambda acc, dom: acc + max(0, dom.last_target-dom.memory_current),
self.domdict.values(),
0
)
return xen_free - assigned_but_unused

#refresh information on memory assigned to all domains
def refresh_memactual(self):
for domain in self.xc.domain_getinfo():
id = str(domain['domid'])
if self.domdict.has_key(id):
self.domdict[id].memory_actual = domain['mem_kb']*1024
# real memory usage
self.domdict[id].memory_current = domain['mem_kb']*1024
# what VM is using or can use
self.domdict[id].memory_actual = max(
self.domdict[id].memory_current,
self.domdict[id].last_target
)
self.domdict[id].memory_maximum = self.xs.read('', '/local/domain/%s/memory/static-max' % str(id))
if self.domdict[id].memory_maximum:
self.domdict[id].memory_maximum = int(self.domdict[id].memory_maximum)*1024
Expand Down Expand Up @@ -272,11 +291,11 @@ def do_balance(self):
self.log.debug('do_balance dom={!r} sleeping ntries={}'.format(
dom, ntries))
time.sleep(self.BALOON_DELAY)
self.refresh_memactual()
ntries -= 1
if ntries <= 0:
# Waiting haven't helped; Find which domain get stuck and
# abort balance (after distributing what we have)
self.refresh_memactual()
for rq2 in memset_reqs:
dom2, mem2 = rq2
if dom2 == dom:
Expand Down
8 changes: 8 additions & 0 deletions qmemman/qmemman_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,14 @@ def handle(self):
self.log.debug('data={!r}'.format(self.data))
if len(self.data) == 0:
self.log.info('EOF')
# FIXME: there is a race condition here: if XS_Watcher will

This comment has been minimized.

Copy link
@Rudd-O

Rudd-O Jan 7, 2016

maybe have xs watcher use a tombstone value set at VM startup to do redistribution?

redistribution always sucks in these ways, and I mean beyond the engineering ways.

This comment has been minimized.

Copy link
@marmarek

marmarek Jan 7, 2016

Author Owner

The problem is that XS watcher doesn't know about the domain yet. And QMemmanReqHandler object doesn't have any reference to it, so cannot pass such info (or even a flag that domain list needs to be updated).
And BTW QMemmanReqHandler (this object) doesn't know for which domain memory was requested...

In practice the problem shouldn't be anything serious, because a moment later '@introduceDomain' event will be handled and situation will be corrected.

# handle meminfo event before @introduceDomain, it will use
# incomplete domain list for that and may redistribute memory
# allocated to some VM, but not yet used (see #1389).
# To fix that, system_state should be updated (refresh domain
# list) before releasing the lock, but in the current code
# layout XS_Watcher instance isn't available here,
# so xenstore watches would not be registered
if got_lock:
global_lock.release()
self.log.debug('global_lock released')
Expand Down

0 comments on commit 181eb3e

Please sign in to comment.