Skip to content

Commit

Permalink
mm/migrate: support un-addressable ZONE_DEVICE page in migration
Browse files Browse the repository at this point in the history
Allow to unmap and restore special swap entry of un-addressable
ZONE_DEVICE memory.

Link: http://lkml.kernel.org/r/[email protected]
Signed-off-by: Jérôme Glisse <[email protected]>
Cc: Kirill A. Shutemov <[email protected]>
Cc: Aneesh Kumar <[email protected]>
Cc: Balbir Singh <[email protected]>
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Dan Williams <[email protected]>
Cc: David Nellans <[email protected]>
Cc: Evgeny Baskakov <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: John Hubbard <[email protected]>
Cc: Mark Hairgrove <[email protected]>
Cc: Michal Hocko <[email protected]>
Cc: Paul E. McKenney <[email protected]>
Cc: Ross Zwisler <[email protected]>
Cc: Sherry Cheung <[email protected]>
Cc: Subhash Gutti <[email protected]>
Cc: Vladimir Davydov <[email protected]>
Cc: Bob Liu <[email protected]>
Signed-off-by: Andrew Morton <[email protected]>
Signed-off-by: Linus Torvalds <[email protected]>
  • Loading branch information
Jérôme Glisse authored and torvalds committed Sep 9, 2017
1 parent 8c3328f commit a5430dd
Show file tree
Hide file tree
Showing 4 changed files with 165 additions and 30 deletions.
10 changes: 8 additions & 2 deletions include/linux/migrate.h
Original file line number Diff line number Diff line change
Expand Up @@ -159,12 +159,18 @@ static inline int migrate_misplaced_transhuge_page(struct mm_struct *mm,

#ifdef CONFIG_MIGRATION

/*
* Watch out for PAE architecture, which has an unsigned long, and might not
* have enough bits to store all physical address and flags. So far we have
* enough room for all our flags.
*/
#define MIGRATE_PFN_VALID (1UL << 0)
#define MIGRATE_PFN_MIGRATE (1UL << 1)
#define MIGRATE_PFN_LOCKED (1UL << 2)
#define MIGRATE_PFN_WRITE (1UL << 3)
#define MIGRATE_PFN_ERROR (1UL << 4)
#define MIGRATE_PFN_SHIFT 5
#define MIGRATE_PFN_DEVICE (1UL << 4)
#define MIGRATE_PFN_ERROR (1UL << 5)
#define MIGRATE_PFN_SHIFT 6

static inline struct page *migrate_pfn_to_page(unsigned long mpfn)
{
Expand Down
149 changes: 121 additions & 28 deletions mm/migrate.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#include <linux/hugetlb.h>
#include <linux/hugetlb_cgroup.h>
#include <linux/gfp.h>
#include <linux/memremap.h>
#include <linux/balloon_compaction.h>
#include <linux/mmu_notifier.h>
#include <linux/page_idle.h>
Expand Down Expand Up @@ -237,7 +238,13 @@ static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma,
if (is_write_migration_entry(entry))
pte = maybe_mkwrite(pte, vma);

flush_dcache_page(new);
if (unlikely(is_zone_device_page(new)) &&
is_device_private_page(new)) {
entry = make_device_private_entry(new, pte_write(pte));
pte = swp_entry_to_pte(entry);
} else
flush_dcache_page(new);

#ifdef CONFIG_HUGETLB_PAGE
if (PageHuge(new)) {
pte = pte_mkhuge(pte);
Expand Down Expand Up @@ -2205,17 +2212,40 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
pte = *ptep;
pfn = pte_pfn(pte);

if (!pte_present(pte)) {
if (pte_none(pte)) {
mpfn = pfn = 0;
goto next;
}

if (!pte_present(pte)) {
mpfn = pfn = 0;

/*
* Only care about unaddressable device page special
* page table entry. Other special swap entries are not
* migratable, and we ignore regular swapped page.
*/
entry = pte_to_swp_entry(pte);
if (!is_device_private_entry(entry))
goto next;

page = device_private_entry_to_page(entry);
mpfn = migrate_pfn(page_to_pfn(page))|
MIGRATE_PFN_DEVICE | MIGRATE_PFN_MIGRATE;
if (is_write_device_private_entry(entry))
mpfn |= MIGRATE_PFN_WRITE;
} else {
page = vm_normal_page(migrate->vma, addr, pte);
mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;
}

/* FIXME support THP */
page = vm_normal_page(migrate->vma, addr, pte);
if (!page || !page->mapping || PageTransCompound(page)) {
mpfn = pfn = 0;
goto next;
}
pfn = page_to_pfn(page);

/*
* By getting a reference on the page we pin it and that blocks
Expand All @@ -2228,8 +2258,6 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
*/
get_page(page);
migrate->cpages++;
mpfn = migrate_pfn(pfn) | MIGRATE_PFN_MIGRATE;
mpfn |= pte_write(pte) ? MIGRATE_PFN_WRITE : 0;

/*
* Optimize for the common case where page is only mapped once
Expand All @@ -2256,10 +2284,13 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp,
*/
page_remove_rmap(page, false);
put_page(page);
unmapped++;

if (pte_present(pte))
unmapped++;
}

next:
migrate->dst[migrate->npages] = 0;
migrate->src[migrate->npages++] = mpfn;
}
arch_leave_lazy_mmu_mode();
Expand Down Expand Up @@ -2329,6 +2360,28 @@ static bool migrate_vma_check_page(struct page *page)
if (PageCompound(page))
return false;

/* Page from ZONE_DEVICE have one extra reference */
if (is_zone_device_page(page)) {
/*
* Private page can never be pin as they have no valid pte and
* GUP will fail for those. Yet if there is a pending migration
* a thread might try to wait on the pte migration entry and
* will bump the page reference count. Sadly there is no way to
* differentiate a regular pin from migration wait. Hence to
* avoid 2 racing thread trying to migrate back to CPU to enter
* infinite loop (one stoping migration because the other is
* waiting on pte migration entry). We always return true here.
*
* FIXME proper solution is to rework migration_entry_wait() so
* it does not need to take a reference on page.
*/
if (is_device_private_page(page))
return true;

/* Other ZONE_DEVICE memory type are not supported */
return false;
}

if ((page_count(page) - extra) > page_mapcount(page))
return false;

Expand Down Expand Up @@ -2379,24 +2432,30 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
migrate->src[i] |= MIGRATE_PFN_LOCKED;
}

if (!PageLRU(page) && allow_drain) {
/* Drain CPU's pagevec */
lru_add_drain_all();
allow_drain = false;
}
/* ZONE_DEVICE pages are not on LRU */
if (!is_zone_device_page(page)) {
if (!PageLRU(page) && allow_drain) {
/* Drain CPU's pagevec */
lru_add_drain_all();
allow_drain = false;
}

if (isolate_lru_page(page)) {
if (remap) {
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
migrate->cpages--;
restore++;
} else {
migrate->src[i] = 0;
unlock_page(page);
migrate->cpages--;
put_page(page);
if (isolate_lru_page(page)) {
if (remap) {
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
migrate->cpages--;
restore++;
} else {
migrate->src[i] = 0;
unlock_page(page);
migrate->cpages--;
put_page(page);
}
continue;
}
continue;

/* Drop the reference we took in collect */
put_page(page);
}

if (!migrate_vma_check_page(page)) {
Expand All @@ -2405,14 +2464,19 @@ static void migrate_vma_prepare(struct migrate_vma *migrate)
migrate->cpages--;
restore++;

get_page(page);
putback_lru_page(page);
if (!is_zone_device_page(page)) {
get_page(page);
putback_lru_page(page);
}
} else {
migrate->src[i] = 0;
unlock_page(page);
migrate->cpages--;

putback_lru_page(page);
if (!is_zone_device_page(page))
putback_lru_page(page);
else
put_page(page);
}
}
}
Expand Down Expand Up @@ -2483,7 +2547,10 @@ static void migrate_vma_unmap(struct migrate_vma *migrate)
unlock_page(page);
restore--;

putback_lru_page(page);
if (is_zone_device_page(page))
put_page(page);
else
putback_lru_page(page);
}
}

Expand Down Expand Up @@ -2514,6 +2581,26 @@ static void migrate_vma_pages(struct migrate_vma *migrate)

mapping = page_mapping(page);

if (is_zone_device_page(newpage)) {
if (is_device_private_page(newpage)) {
/*
* For now only support private anonymous when
* migrating to un-addressable device memory.
*/
if (mapping) {
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
continue;
}
} else {
/*
* Other types of ZONE_DEVICE page are not
* supported.
*/
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
continue;
}
}

r = migrate_page(mapping, newpage, page, MIGRATE_SYNC_NO_COPY);
if (r != MIGRATEPAGE_SUCCESS)
migrate->src[i] &= ~MIGRATE_PFN_MIGRATE;
Expand Down Expand Up @@ -2554,11 +2641,17 @@ static void migrate_vma_finalize(struct migrate_vma *migrate)
unlock_page(page);
migrate->cpages--;

putback_lru_page(page);
if (is_zone_device_page(page))
put_page(page);
else
putback_lru_page(page);

if (newpage != page) {
unlock_page(newpage);
putback_lru_page(newpage);
if (is_zone_device_page(newpage))
put_page(newpage);
else
putback_lru_page(newpage);
}
}
}
Expand Down
10 changes: 10 additions & 0 deletions mm/page_vma_mapped.c
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
if (!is_swap_pte(*pvmw->pte))
return false;
entry = pte_to_swp_entry(*pvmw->pte);

if (!is_migration_entry(entry))
return false;
if (migration_entry_to_page(entry) - pvmw->page >=
Expand All @@ -60,6 +61,15 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw)
WARN_ON_ONCE(1);
#endif
} else {
if (is_swap_pte(*pvmw->pte)) {
swp_entry_t entry;

entry = pte_to_swp_entry(*pvmw->pte);
if (is_device_private_entry(entry) &&
device_private_entry_to_page(entry) == pvmw->page)
return true;
}

if (!pte_present(*pvmw->pte))
return false;

Expand Down
26 changes: 26 additions & 0 deletions mm/rmap.c
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@
#include <linux/hugetlb.h>
#include <linux/backing-dev.h>
#include <linux/page_idle.h>
#include <linux/memremap.h>

#include <asm/tlbflush.h>

Expand Down Expand Up @@ -1346,6 +1347,10 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
if ((flags & TTU_MUNLOCK) && !(vma->vm_flags & VM_LOCKED))
return true;

if (IS_ENABLED(CONFIG_MIGRATION) && (flags & TTU_MIGRATION) &&
is_zone_device_page(page) && !is_device_private_page(page))
return true;

if (flags & TTU_SPLIT_HUGE_PMD) {
split_huge_pmd_address(vma, address,
flags & TTU_SPLIT_FREEZE, page);
Expand Down Expand Up @@ -1403,6 +1408,27 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
address = pvmw.address;


if (IS_ENABLED(CONFIG_MIGRATION) &&
(flags & TTU_MIGRATION) &&
is_zone_device_page(page)) {
swp_entry_t entry;
pte_t swp_pte;

pteval = ptep_get_and_clear(mm, pvmw.address, pvmw.pte);

/*
* Store the pfn of the page in a special migration
* pte. do_swap_page() will wait until the migration
* pte is removed and then restart fault handling.
*/
entry = make_migration_entry(page, 0);
swp_pte = swp_entry_to_pte(entry);
if (pte_soft_dirty(pteval))
swp_pte = pte_swp_mksoft_dirty(swp_pte);
set_pte_at(mm, pvmw.address, pvmw.pte, swp_pte);
goto discard;
}

if (!(flags & TTU_IGNORE_ACCESS)) {
if (ptep_clear_flush_young_notify(vma, address,
pvmw.pte)) {
Expand Down

0 comments on commit a5430dd

Please sign in to comment.