diff --git a/mm/memory.c b/mm/memory.c index 8e30675dc077..f721735ff947 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3489,6 +3489,25 @@ static vm_fault_t remove_device_exclusive_entry(struct vm_fault *vmf) return 0; } +static inline bool should_try_to_free_swap(struct page *page, + struct vm_area_struct *vma, + unsigned int fault_flags) +{ + if (!PageSwapCache(page)) + return false; + if (mem_cgroup_swap_full(page) || (vma->vm_flags & VM_LOCKED) || + PageMlocked(page)) + return true; + /* + * If we want to map a page that's in the swapcache writable, we + * have to detect via the refcount if we're really the exclusive + * user. Try freeing the swapcache to get rid of the swapcache + * reference only in case it's likely that we'll be the exlusive user. + */ + return (fault_flags & FAULT_FLAG_WRITE) && !PageKsm(page) && + page_count(page) == 2; +} + /* * We enter with non-exclusive mmap_lock (to exclude vma changes, * but allow concurrent faults), and pte mapped but not yet locked. @@ -3630,6 +3649,16 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) page = swapcache; goto out_page; } + + /* + * If we want to map a page that's in the swapcache writable, we + * have to detect via the refcount if we're really the exclusive + * owner. Try removing the extra reference from the local LRU + * pagevecs if required. + */ + if ((vmf->flags & FAULT_FLAG_WRITE) && page == swapcache && + !PageKsm(page) && !PageLRU(page)) + lru_add_drain(); } cgroup_throttle_swaprate(page, GFP_KERNEL); @@ -3648,19 +3677,25 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) } /* - * The page isn't present yet, go ahead with the fault. - * - * Be careful about the sequence of operations here. - * To get its accounting right, reuse_swap_page() must be called - * while the page is counted on swap but not yet in mapcount i.e. - * before page_add_anon_rmap() and swap_free(); try_to_free_swap() - * must be called after the swap_free(), or it will never succeed. + * Remove the swap entry and conditionally try to free up the swapcache. + * We're already holding a reference on the page but haven't mapped it + * yet. */ + swap_free(entry); + if (should_try_to_free_swap(page, vma, vmf->flags)) + try_to_free_swap(page); inc_mm_counter_fast(vma->vm_mm, MM_ANONPAGES); dec_mm_counter_fast(vma->vm_mm, MM_SWAPENTS); pte = mk_pte(page, vma->vm_page_prot); - if ((vmf->flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) { + + /* + * Same logic as in do_wp_page(); however, optimize for fresh pages + * that are certainly not shared because we just allocated them without + * exposing them to the swapcache. + */ + if ((vmf->flags & FAULT_FLAG_WRITE) && !PageKsm(page) && + (page != swapcache || page_count(page) == 1)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); vmf->flags &= ~FAULT_FLAG_WRITE; ret |= VM_FAULT_WRITE; @@ -3686,10 +3721,6 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) set_pte_at(vma->vm_mm, vmf->address, vmf->pte, pte); arch_do_swap_page(vma->vm_mm, vma, vmf->address, pte, vmf->orig_pte); - swap_free(entry); - if (mem_cgroup_swap_full(page) || - (vma->vm_flags & VM_LOCKED) || PageMlocked(page)) - try_to_free_swap(page); unlock_page(page); if (page != swapcache && swapcache) { /*