Skip to content

Commit

Permalink
mmu: do not free pages before flushing remote tlbs during unpopulate
Browse files Browse the repository at this point in the history
Page cannot be freed before remote tlbs are flushed since if
remote cpu has the page in its tlb and the page is reallocated
for some other purposes remote cpu can still access the page through
tlb and corrupt its content. Think about two threads running on two
different cpus: first one writes to a virtual address constantly and
second unmaps the virtual address. Physical page, virtual address is
mapped to, cannot be freed before both cpus tlb are flushed.

Signed-off-by: Gleb Natapov <[email protected]>
Signed-off-by: Avi Kivity <[email protected]>
  • Loading branch information
gleb-cloudius authored and avikivity committed Dec 26, 2013
1 parent 325a7b8 commit 73a97f1
Showing 1 changed file with 32 additions and 8 deletions.
40 changes: 32 additions & 8 deletions core/mmu.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <osv/error.h>
#include <osv/trace.hh>
#include "arch-mmu.hh"
#include <stack>

extern void* elf_start;
extern size_t elf_size;
Expand Down Expand Up @@ -412,7 +413,14 @@ class linear_page_mapper :

template<allocate_intermediate_opt Allocate, skip_empty_opt Skip = skip_empty_opt::yes>
class vma_operation :
public page_table_operation<Allocate, Skip, descend_opt::yes, once_opt::no, split_opt::yes> {};
public page_table_operation<Allocate, Skip, descend_opt::yes, once_opt::no, split_opt::yes> {
public:
// returns true if tlb flush is needed after address range processing is completed.
bool tlb_flush_needed(void) { return false; }
// this function is called at the very end of operate_range(). vma_operation may do
// whatever cleanup is needed here.
void finalize(void) { return; }
};

/*
* populate() populates the page table with the entries it is (assumed to be)
Expand Down Expand Up @@ -460,26 +468,40 @@ class populate : public vma_operation<allocate_intermediate_opt::yes, skip_empty
* and marking the pages non-present.
*/
class unpopulate : public vma_operation<allocate_intermediate_opt::no> {
private:
std::stack<void*> small_pages;
std::stack<void*> huge_pages;
public:
void small_page(hw_ptep ptep, uintptr_t offset) {
// Note: we free the page even if it is already marked "not present".
// evacuate() makes sure we are only called for allocated pages, and
// not-present may only mean mprotect(PROT_NONE).
pt_element pte = ptep.read();
ptep.write(make_empty_pte());
// FIXME: tlb flush
memory::free_page(phys_to_virt(pte.addr(false)));
small_pages.push(phys_to_virt(pte.addr(false)));
}
bool huge_page(hw_ptep ptep, uintptr_t offset) {
pt_element pte = ptep.read();
ptep.write(make_empty_pte());
// FIXME: tlb flush
memory::free_huge_page(phys_to_virt(pte.addr(true)), huge_page_size);
huge_pages.push(phys_to_virt(pte.addr(false)));
return true;
}
void intermediate_page(hw_ptep ptep, uintptr_t offset) {
small_page(ptep, offset);
}
bool tlb_flush_needed(void) {
return !small_pages.empty() || !huge_pages.empty();
}
void finalize(void) {
while (!small_pages.empty()) {
memory::free_page(small_pages.top());
small_pages.pop();
}
while (!huge_pages.empty()) {
memory::free_huge_page(huge_pages.top(), huge_page_size);
huge_pages.pop();
}
}
};

class protection : public vma_operation<allocate_intermediate_opt::no> {
Expand All @@ -494,6 +516,7 @@ class protection : public vma_operation<allocate_intermediate_opt::no> {
change_perm(ptep, perm);
return true;
}
bool tlb_flush_needed(void) {return true;}
};

class virt_to_phys_map :
Expand Down Expand Up @@ -537,9 +560,10 @@ template<typename T> void operate_range(T mapper, void *start, size_t size)
// instead try to make more judicious use of INVLPG - e.g., in
// split_large_page() and other specific places where we modify specific
// page table entries.
// TODO: Consider if we're doing tlb_flush() too often, e.g., twice
// in one mmap which first does evacuate() and then allocate().
tlb_flush();
if (mapper.tlb_flush_needed()) {
tlb_flush();
}
mapper.finalize();
}

template<typename T> void operate_range(T mapper, const vma &vma)
Expand Down

0 comments on commit 73a97f1

Please sign in to comment.