diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 66b621469f52..8d9fe131a240 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -349,7 +349,8 @@ struct page *alloc_huge_page(struct vm_area_struct *vma, struct page *alloc_huge_page_node(struct hstate *h, int nid); struct page *alloc_huge_page_noerr(struct vm_area_struct *vma, unsigned long addr, int avoid_reserve); -struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask); +struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, + nodemask_t *nmask); int huge_add_to_page_cache(struct page *page, struct address_space *mapping, pgoff_t idx); @@ -525,7 +526,7 @@ static inline void set_huge_swap_pte_at(struct mm_struct *mm, unsigned long addr struct hstate {}; #define alloc_huge_page(v, a, r) NULL #define alloc_huge_page_node(h, nid) NULL -#define alloc_huge_page_nodemask(h, nmask) NULL +#define alloc_huge_page_nodemask(h, preferred_nid, nmask) NULL #define alloc_huge_page_noerr(v, a, r) NULL #define alloc_bootmem_huge_page(h) NULL #define hstate_file(f) NULL diff --git a/include/linux/migrate.h b/include/linux/migrate.h index d9675b665cc4..4634da521238 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -38,7 +38,7 @@ static inline struct page *new_page_nodemask(struct page *page, if (PageHuge(page)) return alloc_huge_page_nodemask(page_hstate(compound_head(page)), - nodemask); + preferred_nid, nodemask); if (PageHighMem(page) || (zone_idx(page_zone(page)) == ZONE_MOVABLE)) gfp_mask |= __GFP_HIGHMEM; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index fd6e0c50f949..1e516520433d 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -887,19 +887,39 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid) return page; } -static struct page *dequeue_huge_page_node(struct hstate *h, int nid) +static struct page *dequeue_huge_page_nodemask(struct hstate *h, gfp_t gfp_mask, int nid, + nodemask_t *nmask) { - struct page *page; - int node; + unsigned int cpuset_mems_cookie; + struct zonelist *zonelist; + struct zone *zone; + struct zoneref *z; + int node = -1; - if (nid != NUMA_NO_NODE) - return dequeue_huge_page_node_exact(h, nid); + zonelist = node_zonelist(nid, gfp_mask); + +retry_cpuset: + cpuset_mems_cookie = read_mems_allowed_begin(); + for_each_zone_zonelist_nodemask(zone, z, zonelist, gfp_zone(gfp_mask), nmask) { + struct page *page; + + if (!cpuset_zone_allowed(zone, gfp_mask)) + continue; + /* + * no need to ask again on the same node. Pool is node rather than + * zone aware + */ + if (zone_to_nid(zone) == node) + continue; + node = zone_to_nid(zone); - for_each_online_node(node) { page = dequeue_huge_page_node_exact(h, node); if (page) return page; } + if (unlikely(read_mems_allowed_retry(cpuset_mems_cookie))) + goto retry_cpuset; + return NULL; } @@ -917,15 +937,11 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, unsigned long address, int avoid_reserve, long chg) { - struct page *page = NULL; + struct page *page; struct mempolicy *mpol; - nodemask_t *nodemask; gfp_t gfp_mask; + nodemask_t *nodemask; int nid; - struct zonelist *zonelist; - struct zone *zone; - struct zoneref *z; - unsigned int cpuset_mems_cookie; /* * A child process with MAP_PRIVATE mappings created by their parent @@ -940,32 +956,15 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, if (avoid_reserve && h->free_huge_pages - h->resv_huge_pages == 0) goto err; -retry_cpuset: - cpuset_mems_cookie = read_mems_allowed_begin(); gfp_mask = htlb_alloc_mask(h); nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask); - zonelist = node_zonelist(nid, gfp_mask); - - for_each_zone_zonelist_nodemask(zone, z, zonelist, - MAX_NR_ZONES - 1, nodemask) { - if (cpuset_zone_allowed(zone, gfp_mask)) { - page = dequeue_huge_page_node(h, zone_to_nid(zone)); - if (page) { - if (avoid_reserve) - break; - if (!vma_has_reserves(vma, chg)) - break; - - SetPagePrivate(page); - h->resv_huge_pages--; - break; - } - } + page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask); + if (page && !avoid_reserve && vma_has_reserves(vma, chg)) { + SetPagePrivate(page); + h->resv_huge_pages--; } mpol_cond_put(mpol); - if (unlikely(!page && read_mems_allowed_retry(cpuset_mems_cookie))) - goto retry_cpuset; return page; err: @@ -1633,7 +1632,7 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid) spin_lock(&hugetlb_lock); if (h->free_huge_pages - h->resv_huge_pages > 0) - page = dequeue_huge_page_node(h, nid); + page = dequeue_huge_page_nodemask(h, gfp_mask, nid, NULL); spin_unlock(&hugetlb_lock); if (!page) @@ -1642,26 +1641,27 @@ struct page *alloc_huge_page_node(struct hstate *h, int nid) return page; } -struct page *alloc_huge_page_nodemask(struct hstate *h, nodemask_t *nmask) + +struct page *alloc_huge_page_nodemask(struct hstate *h, int preferred_nid, + nodemask_t *nmask) { gfp_t gfp_mask = htlb_alloc_mask(h); - struct page *page = NULL; - int node; spin_lock(&hugetlb_lock); if (h->free_huge_pages - h->resv_huge_pages > 0) { - for_each_node_mask(node, *nmask) { - page = dequeue_huge_page_node_exact(h, node); - if (page) - break; + struct page *page; + + page = dequeue_huge_page_nodemask(h, gfp_mask, preferred_nid, nmask); + if (page) { + spin_unlock(&hugetlb_lock); + return page; } } spin_unlock(&hugetlb_lock); - if (page) - return page; /* No reservations, try to overcommit */ - return __alloc_buddy_huge_page(h, gfp_mask, NUMA_NO_NODE, nmask); + + return __alloc_buddy_huge_page(h, gfp_mask, preferred_nid, nmask); } /*