Skip to content

Commit

Permalink
postcopy: Allow registering of fd handler
Browse files Browse the repository at this point in the history
Allow other userfaultfd's to be registered into the fault thread
so that handlers for shared memory can get responses.

Signed-off-by: Dr. David Alan Gilbert <[email protected]>
Reviewed-by: Peter Xu <[email protected]>
Reviewed-by: Michael S. Tsirkin <[email protected]>
Signed-off-by: Michael S. Tsirkin <[email protected]>
  • Loading branch information
dagrh authored and mstsirkin committed Mar 20, 2018
1 parent 2a84ffc commit 00fa4fc
Show file tree
Hide file tree
Showing 5 changed files with 187 additions and 53 deletions.
6 changes: 6 additions & 0 deletions migration/migration.c
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,8 @@ MigrationIncomingState *migration_incoming_get_current(void)
if (!once) {
mis_current.state = MIGRATION_STATUS_NONE;
memset(&mis_current, 0, sizeof(MigrationIncomingState));
mis_current.postcopy_remote_fds = g_array_new(FALSE, TRUE,
sizeof(struct PostCopyFD));
qemu_mutex_init(&mis_current.rp_mutex);
qemu_event_init(&mis_current.main_thread_load_event, false);
once = true;
Expand All @@ -177,6 +179,10 @@ void migration_incoming_state_destroy(void)
qemu_fclose(mis->from_src_file);
mis->from_src_file = NULL;
}
if (mis->postcopy_remote_fds) {
g_array_free(mis->postcopy_remote_fds, TRUE);
mis->postcopy_remote_fds = NULL;
}

qemu_event_reset(&mis->main_thread_load_event);
}
Expand Down
2 changes: 2 additions & 0 deletions migration/migration.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ struct MigrationIncomingState {
QemuMutex rp_mutex; /* We send replies from multiple threads */
void *postcopy_tmp_page;
void *postcopy_tmp_zero_page;
/* PostCopyFD's for external userfaultfds & handlers of shared memory */
GArray *postcopy_remote_fds;

QEMUBH *bh;

Expand Down
209 changes: 156 additions & 53 deletions migration/postcopy-ram.c
Original file line number Diff line number Diff line change
Expand Up @@ -533,29 +533,44 @@ static void *postcopy_ram_fault_thread(void *opaque)
MigrationIncomingState *mis = opaque;
struct uffd_msg msg;
int ret;
size_t index;
RAMBlock *rb = NULL;
RAMBlock *last_rb = NULL; /* last RAMBlock we sent part of */

trace_postcopy_ram_fault_thread_entry();
qemu_sem_post(&mis->fault_thread_sem);

struct pollfd *pfd;
size_t pfd_len = 2 + mis->postcopy_remote_fds->len;

pfd = g_new0(struct pollfd, pfd_len);

pfd[0].fd = mis->userfault_fd;
pfd[0].events = POLLIN;
pfd[1].fd = mis->userfault_event_fd;
pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
trace_postcopy_ram_fault_thread_fds_core(pfd[0].fd, pfd[1].fd);
for (index = 0; index < mis->postcopy_remote_fds->len; index++) {
struct PostCopyFD *pcfd = &g_array_index(mis->postcopy_remote_fds,
struct PostCopyFD, index);
pfd[2 + index].fd = pcfd->fd;
pfd[2 + index].events = POLLIN;
trace_postcopy_ram_fault_thread_fds_extra(2 + index, pcfd->idstr,
pcfd->fd);
}

while (true) {
ram_addr_t rb_offset;
struct pollfd pfd[2];
int poll_result;

/*
* We're mainly waiting for the kernel to give us a faulting HVA,
* however we can be told to quit via userfault_quit_fd which is
* an eventfd
*/
pfd[0].fd = mis->userfault_fd;
pfd[0].events = POLLIN;
pfd[0].revents = 0;
pfd[1].fd = mis->userfault_event_fd;
pfd[1].events = POLLIN; /* Waiting for eventfd to go positive */
pfd[1].revents = 0;

if (poll(pfd, 2, -1 /* Wait forever */) == -1) {

poll_result = poll(pfd, pfd_len, -1 /* Wait forever */);
if (poll_result == -1) {
error_report("%s: userfault poll: %s", __func__, strerror(errno));
break;
}
Expand All @@ -575,57 +590,117 @@ static void *postcopy_ram_fault_thread(void *opaque)
}
}

ret = read(mis->userfault_fd, &msg, sizeof(msg));
if (ret != sizeof(msg)) {
if (errno == EAGAIN) {
/*
* if a wake up happens on the other thread just after
* the poll, there is nothing to read.
*/
continue;
if (pfd[0].revents) {
poll_result--;
ret = read(mis->userfault_fd, &msg, sizeof(msg));
if (ret != sizeof(msg)) {
if (errno == EAGAIN) {
/*
* if a wake up happens on the other thread just after
* the poll, there is nothing to read.
*/
continue;
}
if (ret < 0) {
error_report("%s: Failed to read full userfault "
"message: %s",
__func__, strerror(errno));
break;
} else {
error_report("%s: Read %d bytes from userfaultfd "
"expected %zd",
__func__, ret, sizeof(msg));
break; /* Lost alignment, don't know what we'd read next */
}
}
if (ret < 0) {
error_report("%s: Failed to read full userfault message: %s",
__func__, strerror(errno));
break;
} else {
error_report("%s: Read %d bytes from userfaultfd expected %zd",
__func__, ret, sizeof(msg));
break; /* Lost alignment, don't know what we'd read next */
if (msg.event != UFFD_EVENT_PAGEFAULT) {
error_report("%s: Read unexpected event %ud from userfaultfd",
__func__, msg.event);
continue; /* It's not a page fault, shouldn't happen */
}
}
if (msg.event != UFFD_EVENT_PAGEFAULT) {
error_report("%s: Read unexpected event %ud from userfaultfd",
__func__, msg.event);
continue; /* It's not a page fault, shouldn't happen */
}

rb = qemu_ram_block_from_host(
(void *)(uintptr_t)msg.arg.pagefault.address,
true, &rb_offset);
if (!rb) {
error_report("postcopy_ram_fault_thread: Fault outside guest: %"
PRIx64, (uint64_t)msg.arg.pagefault.address);
break;
}
rb = qemu_ram_block_from_host(
(void *)(uintptr_t)msg.arg.pagefault.address,
true, &rb_offset);
if (!rb) {
error_report("postcopy_ram_fault_thread: Fault outside guest: %"
PRIx64, (uint64_t)msg.arg.pagefault.address);
break;
}

rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
rb_offset &= ~(qemu_ram_pagesize(rb) - 1);
trace_postcopy_ram_fault_thread_request(msg.arg.pagefault.address,
qemu_ram_get_idstr(rb),
rb_offset);
/*
* Send the request to the source - we want to request one
* of our host page sizes (which is >= TPS)
*/
if (rb != last_rb) {
last_rb = rb;
migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
rb_offset, qemu_ram_pagesize(rb));
} else {
/* Save some space */
migrate_send_rp_req_pages(mis, NULL,
rb_offset, qemu_ram_pagesize(rb));
}
}

/*
* Send the request to the source - we want to request one
* of our host page sizes (which is >= TPS)
*/
if (rb != last_rb) {
last_rb = rb;
migrate_send_rp_req_pages(mis, qemu_ram_get_idstr(rb),
rb_offset, qemu_ram_pagesize(rb));
} else {
/* Save some space */
migrate_send_rp_req_pages(mis, NULL,
rb_offset, qemu_ram_pagesize(rb));
/* Now handle any requests from external processes on shared memory */
/* TODO: May need to handle devices deregistering during postcopy */
for (index = 2; index < pfd_len && poll_result; index++) {
if (pfd[index].revents) {
struct PostCopyFD *pcfd =
&g_array_index(mis->postcopy_remote_fds,
struct PostCopyFD, index - 2);

poll_result--;
if (pfd[index].revents & POLLERR) {
error_report("%s: POLLERR on poll %zd fd=%d",
__func__, index, pcfd->fd);
pfd[index].events = 0;
continue;
}

ret = read(pcfd->fd, &msg, sizeof(msg));
if (ret != sizeof(msg)) {
if (errno == EAGAIN) {
/*
* if a wake up happens on the other thread just after
* the poll, there is nothing to read.
*/
continue;
}
if (ret < 0) {
error_report("%s: Failed to read full userfault "
"message: %s (shared) revents=%d",
__func__, strerror(errno),
pfd[index].revents);
/*TODO: Could just disable this sharer */
break;
} else {
error_report("%s: Read %d bytes from userfaultfd "
"expected %zd (shared)",
__func__, ret, sizeof(msg));
/*TODO: Could just disable this sharer */
break; /*Lost alignment,don't know what we'd read next*/
}
}
if (msg.event != UFFD_EVENT_PAGEFAULT) {
error_report("%s: Read unexpected event %ud "
"from userfaultfd (shared)",
__func__, msg.event);
continue; /* It's not a page fault, shouldn't happen */
}
/* Call the device handler registered with us */
ret = pcfd->handler(pcfd, &msg);
if (ret) {
error_report("%s: Failed to resolve shared fault on %zd/%s",
__func__, index, pcfd->idstr);
/* TODO: Fail? Disable this sharer? */
}
}
}
}
trace_postcopy_ram_fault_thread_exit();
Expand Down Expand Up @@ -970,3 +1045,31 @@ PostcopyState postcopy_state_set(PostcopyState new_state)
{
return atomic_xchg(&incoming_postcopy_state, new_state);
}

/* Register a handler for external shared memory postcopy
* called on the destination.
*/
void postcopy_register_shared_ufd(struct PostCopyFD *pcfd)
{
MigrationIncomingState *mis = migration_incoming_get_current();

mis->postcopy_remote_fds = g_array_append_val(mis->postcopy_remote_fds,
*pcfd);
}

/* Unregister a handler for external shared memory postcopy
*/
void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd)
{
guint i;
MigrationIncomingState *mis = migration_incoming_get_current();
GArray *pcrfds = mis->postcopy_remote_fds;

for (i = 0; i < pcrfds->len; i++) {
struct PostCopyFD *cur = &g_array_index(pcrfds, struct PostCopyFD, i);
if (cur->fd == pcfd->fd) {
mis->postcopy_remote_fds = g_array_remove_index(pcrfds, i);
return;
}
}
}
21 changes: 21 additions & 0 deletions migration/postcopy-ram.h
Original file line number Diff line number Diff line change
Expand Up @@ -143,4 +143,25 @@ void postcopy_remove_notifier(NotifierWithReturn *n);
/* Call the notifier list set by postcopy_add_start_notifier */
int postcopy_notify(enum PostcopyNotifyReason reason, Error **errp);

struct PostCopyFD;

/* ufd is a pointer to the struct uffd_msg *TODO: more Portable! */
typedef int (*pcfdhandler)(struct PostCopyFD *pcfd, void *ufd);

struct PostCopyFD {
int fd;
/* Data to pass to handler */
void *data;
/* Handler to be called whenever we get a poll event */
pcfdhandler handler;
/* A string to use in error messages */
const char *idstr;
};

/* Register a userfaultfd owned by an external process for
* shared memory.
*/
void postcopy_register_shared_ufd(struct PostCopyFD *pcfd);
void postcopy_unregister_shared_ufd(struct PostCopyFD *pcfd);

#endif
2 changes: 2 additions & 0 deletions migration/trace-events
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,8 @@ postcopy_place_page_zero(void *host_addr) "host=%p"
postcopy_ram_enable_notify(void) ""
postcopy_ram_fault_thread_entry(void) ""
postcopy_ram_fault_thread_exit(void) ""
postcopy_ram_fault_thread_fds_core(int baseufd, int quitfd) "ufd: %d quitfd: %d"
postcopy_ram_fault_thread_fds_extra(size_t index, const char *name, int fd) "%zd/%s: %d"
postcopy_ram_fault_thread_quit(void) ""
postcopy_ram_fault_thread_request(uint64_t hostaddr, const char *ramblock, size_t offset) "Request for HVA=0x%" PRIx64 " rb=%s offset=0x%zx"
postcopy_ram_incoming_cleanup_closeuf(void) ""
Expand Down

0 comments on commit 00fa4fc

Please sign in to comment.