diff --git a/mm/compaction.c b/mm/compaction.c index b69b7dac0361..b9cf751cc00e 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -313,8 +313,15 @@ static inline bool compact_should_abort(struct compact_control *cc) static bool suitable_migration_target(struct page *page) { /* If the page is a large free page, then disallow migration */ - if (PageBuddy(page) && page_order(page) >= pageblock_order) - return false; + if (PageBuddy(page)) { + /* + * We are checking page_order without zone->lock taken. But + * the only small danger is that we skip a potentially suitable + * pageblock, so it's not worth to check order for valid range. + */ + if (page_order_unsafe(page) >= pageblock_order) + return false; + } /* If the block is MIGRATE_MOVABLE or MIGRATE_CMA, allow migration */ if (migrate_async_suitable(get_pageblock_migratetype(page))) @@ -608,11 +615,23 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn, valid_page = page; /* - * Skip if free. page_order cannot be used without zone->lock - * as nothing prevents parallel allocations or buddy merging. + * Skip if free. We read page order here without zone lock + * which is generally unsafe, but the race window is small and + * the worst thing that can happen is that we skip some + * potential isolation targets. */ - if (PageBuddy(page)) + if (PageBuddy(page)) { + unsigned long freepage_order = page_order_unsafe(page); + + /* + * Without lock, we cannot be sure that what we got is + * a valid page order. Consider only values in the + * valid order range to prevent low_pfn overflow. + */ + if (freepage_order > 0 && freepage_order < MAX_ORDER) + low_pfn += (1UL << freepage_order) - 1; continue; + } /* * Check may be lockless but that's ok as we recheck later. @@ -698,6 +717,13 @@ isolate_success: } } + /* + * The PageBuddy() check could have potentially brought us outside + * the range to be scanned. + */ + if (unlikely(low_pfn > end_pfn)) + low_pfn = end_pfn; + if (locked) spin_unlock_irqrestore(&zone->lru_lock, flags); diff --git a/mm/internal.h b/mm/internal.h index 4c1d604c396c..86ae964a25b0 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -164,7 +164,8 @@ isolate_migratepages_range(struct compact_control *cc, * general, page_zone(page)->lock must be held by the caller to prevent the * page from being allocated in parallel and returning garbage as the order. * If a caller does not hold page_zone(page)->lock, it must guarantee that the - * page cannot be allocated or merged in parallel. + * page cannot be allocated or merged in parallel. Alternatively, it must + * handle invalid values gracefully, and use page_order_unsafe() below. */ static inline unsigned long page_order(struct page *page) { @@ -172,6 +173,19 @@ static inline unsigned long page_order(struct page *page) return page_private(page); } +/* + * Like page_order(), but for callers who cannot afford to hold the zone lock. + * PageBuddy() should be checked first by the caller to minimize race window, + * and invalid values must be handled gracefully. + * + * ACCESS_ONCE is used so that if the caller assigns the result into a local + * variable and e.g. tests it for valid range before using, the compiler cannot + * decide to remove the variable and inline the page_private(page) multiple + * times, potentially observing different values in the tests and the actual + * use of the result. + */ +#define page_order_unsafe(page) ACCESS_ONCE(page_private(page)) + static inline bool is_cow_mapping(vm_flags_t flags) { return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;