From 678309117768e25751594a48a2d873b0552a3130 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 2 Jun 2016 13:20:32 +0100 Subject: [PATCH 01/16] PM / Hibernate: Don't let kasan instrument snapshot.c Kasan causes the compiler to instrument C code and is used at runtime to detect accesses to memory that has been freed, or not yet allocated. The code in snapshot.c saves and restores memory when hibernating. This will access whole pages in the slab cache that have both free and allocated areas, resulting in a large number of false positives from Kasan. Disable instrumentation of this file. Signed-off-by: James Morse Acked-by: Catalin Marinas Signed-off-by: Rafael J. Wysocki --- kernel/power/Makefile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/kernel/power/Makefile b/kernel/power/Makefile index cb880a14cc39..eb4f717705ba 100644 --- a/kernel/power/Makefile +++ b/kernel/power/Makefile @@ -1,6 +1,8 @@ ccflags-$(CONFIG_PM_DEBUG) := -DDEBUG +KASAN_SANITIZE_snapshot.o := n + obj-y += qos.o obj-$(CONFIG_PM) += main.o obj-$(CONFIG_VT_CONSOLE_SLEEP) += console.o From ca5f2b4c4fb7bb7397317ee2ead83485aa295a3e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 14 Jun 2016 16:23:22 +0200 Subject: [PATCH 02/16] PM / sleep: Make pm_prepare_console() return void Nothing is using its return value so change it to return void. No functionality change. Signed-off-by: Borislav Petkov Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 5 ++--- kernel/power/console.c | 8 ++++---- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 8b6ec7ef0854..7693e39b14fe 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -18,12 +18,11 @@ static inline void pm_set_vt_switch(int do_switch) #endif #ifdef CONFIG_VT_CONSOLE_SLEEP -extern int pm_prepare_console(void); +extern void pm_prepare_console(void); extern void pm_restore_console(void); #else -static inline int pm_prepare_console(void) +static inline void pm_prepare_console(void) { - return 0; } static inline void pm_restore_console(void) diff --git a/kernel/power/console.c b/kernel/power/console.c index aba9c545a0e3..0e781798b0b3 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -126,17 +126,17 @@ out: return ret; } -int pm_prepare_console(void) +void pm_prepare_console(void) { if (!pm_vt_switch()) - return 0; + return; orig_fgconsole = vt_move_to_console(SUSPEND_CONSOLE, 1); if (orig_fgconsole < 0) - return 1; + return; orig_kmsg = vt_kmsg_redirect(SUSPEND_CONSOLE); - return 0; + return; } void pm_restore_console(void) From ac485cb4b3b79fb7ef979cb243ee16e1a1ffa767 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 17 Jun 2016 15:59:33 +0300 Subject: [PATCH 03/16] tools/turbostat: allow user to alter DESTDIR and PREFIX When run make -C tools DESTDIR=/my/nice/dir turbostat_install get a message install: cannot create regular file '/usr/bin/turbostat': Permission denied Allow user to alter DESTDIR and PREFIX variables. Signed-off-by: Andy Shevchenko Signed-off-by: Rafael J. Wysocki --- tools/power/x86/turbostat/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index e367b1a85d70..8561e7ddca59 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -1,7 +1,7 @@ CC = $(CROSS_COMPILE)gcc BUILD_OUTPUT := $(CURDIR) -PREFIX := /usr -DESTDIR := +PREFIX ?= /usr +DESTDIR ?= ifeq ("$(origin O)", "command line") BUILD_OUTPUT := $(O) From ea00f4f4f00cc2bc3b63ad512a4e6df3b20832b9 Mon Sep 17 00:00:00 2001 From: Lianwei Wang Date: Sun, 19 Jun 2016 23:52:27 -0700 Subject: [PATCH 04/16] PM / sleep: make PM notifiers called symmetrically This makes pm notifier PREPARE/POST symmetrical: if PREPARE fails, we will only undo what ever happened on PREPARE. It fixes the unbalanced CPU hotplug enable in CPU PM notifier. Signed-off-by: Lianwei Wang Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 20 ++++++++++++-------- kernel/power/main.c | 11 +++++++++-- kernel/power/power.h | 2 ++ kernel/power/suspend.c | 10 ++++++---- kernel/power/user.c | 14 ++++++++------ 5 files changed, 37 insertions(+), 20 deletions(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index fca9254280ee..126e24caa82e 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -647,7 +647,7 @@ static void power_down(void) */ int hibernate(void) { - int error; + int error, nr_calls = 0; if (!hibernation_available()) { pr_debug("PM: Hibernation not available.\n"); @@ -662,9 +662,11 @@ int hibernate(void) } pm_prepare_console(); - error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); - if (error) + error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); + if (error) { + nr_calls--; goto Exit; + } printk(KERN_INFO "PM: Syncing filesystems ... "); sys_sync(); @@ -714,7 +716,7 @@ int hibernate(void) /* Don't bother checking whether freezer_test_done is true */ freezer_test_done = false; Exit: - pm_notifier_call_chain(PM_POST_HIBERNATION); + __pm_notifier_call_chain(PM_POST_HIBERNATION, nr_calls, NULL); pm_restore_console(); atomic_inc(&snapshot_device_available); Unlock: @@ -740,7 +742,7 @@ int hibernate(void) */ static int software_resume(void) { - int error; + int error, nr_calls = 0; unsigned int flags; /* @@ -827,9 +829,11 @@ static int software_resume(void) } pm_prepare_console(); - error = pm_notifier_call_chain(PM_RESTORE_PREPARE); - if (error) + error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls); + if (error) { + nr_calls--; goto Close_Finish; + } pr_debug("PM: Preparing processes for restore.\n"); error = freeze_processes(); @@ -855,7 +859,7 @@ static int software_resume(void) unlock_device_hotplug(); thaw_processes(); Finish: - pm_notifier_call_chain(PM_POST_RESTORE); + __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL); pm_restore_console(); atomic_inc(&snapshot_device_available); /* For success case, the suspend path will release the lock */ diff --git a/kernel/power/main.c b/kernel/power/main.c index 27946975eff0..5ea50b1b7595 100644 --- a/kernel/power/main.c +++ b/kernel/power/main.c @@ -38,12 +38,19 @@ int unregister_pm_notifier(struct notifier_block *nb) } EXPORT_SYMBOL_GPL(unregister_pm_notifier); -int pm_notifier_call_chain(unsigned long val) +int __pm_notifier_call_chain(unsigned long val, int nr_to_call, int *nr_calls) { - int ret = blocking_notifier_call_chain(&pm_chain_head, val, NULL); + int ret; + + ret = __blocking_notifier_call_chain(&pm_chain_head, val, NULL, + nr_to_call, nr_calls); return notifier_to_errno(ret); } +int pm_notifier_call_chain(unsigned long val) +{ + return __pm_notifier_call_chain(val, -1, NULL); +} /* If set, devices may be suspended and resumed asynchronously. */ int pm_async_enabled = 1; diff --git a/kernel/power/power.h b/kernel/power/power.h index efe1b3b17c88..51f02ecaf125 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -200,6 +200,8 @@ static inline void suspend_test_finish(const char *label) {} #ifdef CONFIG_PM_SLEEP /* kernel/power/main.c */ +extern int __pm_notifier_call_chain(unsigned long val, int nr_to_call, + int *nr_calls); extern int pm_notifier_call_chain(unsigned long val); #endif diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c index 5b70d64b871e..0acab9d7f96f 100644 --- a/kernel/power/suspend.c +++ b/kernel/power/suspend.c @@ -266,16 +266,18 @@ static int suspend_test(int level) */ static int suspend_prepare(suspend_state_t state) { - int error; + int error, nr_calls = 0; if (!sleep_state_supported(state)) return -EPERM; pm_prepare_console(); - error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); - if (error) + error = __pm_notifier_call_chain(PM_SUSPEND_PREPARE, -1, &nr_calls); + if (error) { + nr_calls--; goto Finish; + } trace_suspend_resume(TPS("freeze_processes"), 0, true); error = suspend_freeze_processes(); @@ -286,7 +288,7 @@ static int suspend_prepare(suspend_state_t state) suspend_stats.failed_freeze++; dpm_save_failed_step(SUSPEND_FREEZE); Finish: - pm_notifier_call_chain(PM_POST_SUSPEND); + __pm_notifier_call_chain(PM_POST_SUSPEND, nr_calls, NULL); pm_restore_console(); return error; } diff --git a/kernel/power/user.c b/kernel/power/user.c index 526e8911460a..35310b627388 100644 --- a/kernel/power/user.c +++ b/kernel/power/user.c @@ -47,7 +47,7 @@ atomic_t snapshot_device_available = ATOMIC_INIT(1); static int snapshot_open(struct inode *inode, struct file *filp) { struct snapshot_data *data; - int error; + int error, nr_calls = 0; if (!hibernation_available()) return -EPERM; @@ -74,9 +74,9 @@ static int snapshot_open(struct inode *inode, struct file *filp) swap_type_of(swsusp_resume_device, 0, NULL) : -1; data->mode = O_RDONLY; data->free_bitmaps = false; - error = pm_notifier_call_chain(PM_HIBERNATION_PREPARE); + error = __pm_notifier_call_chain(PM_HIBERNATION_PREPARE, -1, &nr_calls); if (error) - pm_notifier_call_chain(PM_POST_HIBERNATION); + __pm_notifier_call_chain(PM_POST_HIBERNATION, --nr_calls, NULL); } else { /* * Resuming. We may need to wait for the image device to @@ -86,13 +86,15 @@ static int snapshot_open(struct inode *inode, struct file *filp) data->swap = -1; data->mode = O_WRONLY; - error = pm_notifier_call_chain(PM_RESTORE_PREPARE); + error = __pm_notifier_call_chain(PM_RESTORE_PREPARE, -1, &nr_calls); if (!error) { error = create_basic_memory_bitmaps(); data->free_bitmaps = !error; - } + } else + nr_calls--; + if (error) - pm_notifier_call_chain(PM_POST_RESTORE); + __pm_notifier_call_chain(PM_POST_RESTORE, nr_calls, NULL); } if (error) atomic_inc(&snapshot_device_available); From 7b776af66dc462caa7e839cc5c950a61db1f8551 Mon Sep 17 00:00:00 2001 From: Roger Lu Date: Fri, 1 Jul 2016 11:05:02 +0800 Subject: [PATCH 05/16] PM / suspend: show workqueue state in suspend flow If freezable workqueue aborts suspend flow, show workqueue state for debug purpose. Signed-off-by: Roger Lu Acked-by: Tejun Heo Signed-off-by: Rafael J. Wysocki --- kernel/power/process.c | 3 +++ kernel/workqueue.c | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/kernel/power/process.c b/kernel/power/process.c index df058bed53ce..6eef250a5705 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -89,6 +89,9 @@ static int try_to_freeze_tasks(bool user_only) elapsed_msecs / 1000, elapsed_msecs % 1000, todo - wq_busy, wq_busy); + if (wq_busy) + show_workqueue_state(); + if (!wakeup) { read_lock(&tasklist_lock); for_each_process_thread(g, p) { diff --git a/kernel/workqueue.c b/kernel/workqueue.c index e1c0e996b5ae..619e80ce4a59 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -4369,8 +4369,8 @@ static void show_pwq(struct pool_workqueue *pwq) /** * show_workqueue_state - dump workqueue state * - * Called from a sysrq handler and prints out all busy workqueues and - * pools. + * Called from a sysrq handler or try_to_freeze_tasks() and prints out + * all busy workqueues and pools. */ void show_workqueue_state(void) { From 9c744481c003697de453e8fc039468143ba604aa Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 29 Jun 2016 03:00:51 +0200 Subject: [PATCH 06/16] PM / hibernate: Do not free preallocated safe pages during image restore The core image restoration code preallocates some safe pages (ie. pages that weren't used by the image kernel before hibernation) for future use before allocating the bulk of memory for loading the image data. Those safe pages are then freed so they can be allocated again (with the memory management subsystem's help). That's done to ensure that there will be enough safe pages for temporary data structures needed during image restoration. However, it is not really necessary to free those pages after they have been allocated. They can be added to the (global) list of safe pages right away and then picked up from there when needed without freeing. That reduces the overhead related to using safe pages, especially in the arch-specific code, so modify the code accordingly. Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 66 ++++++++++++++++++++++++----------------- 1 file changed, 38 insertions(+), 28 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 3a970604308f..d9476ff877b8 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -74,6 +74,22 @@ void __init hibernate_image_size_init(void) */ struct pbe *restore_pblist; +/* struct linked_page is used to build chains of pages */ + +#define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) + +struct linked_page { + struct linked_page *next; + char data[LINKED_PAGE_DATA_SIZE]; +} __packed; + +/* + * List of "safe" pages (ie. pages that were not used by the image kernel + * before hibernation) that may be used as temporary storage for image kernel + * memory contents. + */ +static struct linked_page *safe_pages_list; + /* Pointer to an auxiliary buffer (1 page) */ static void *buffer; @@ -113,9 +129,21 @@ static void *get_image_page(gfp_t gfp_mask, int safe_needed) return res; } +static void *__get_safe_page(gfp_t gfp_mask) +{ + if (safe_pages_list) { + void *ret = safe_pages_list; + + safe_pages_list = safe_pages_list->next; + memset(ret, 0, PAGE_SIZE); + return ret; + } + return get_image_page(gfp_mask, PG_SAFE); +} + unsigned long get_safe_page(gfp_t gfp_mask) { - return (unsigned long)get_image_page(gfp_mask, PG_SAFE); + return (unsigned long)__get_safe_page(gfp_mask); } static struct page *alloc_image_page(gfp_t gfp_mask) @@ -150,15 +178,6 @@ static inline void free_image_page(void *addr, int clear_nosave_free) __free_page(page); } -/* struct linked_page is used to build chains of pages */ - -#define LINKED_PAGE_DATA_SIZE (PAGE_SIZE - sizeof(void *)) - -struct linked_page { - struct linked_page *next; - char data[LINKED_PAGE_DATA_SIZE]; -} __packed; - static inline void free_list_of_pages(struct linked_page *list, int clear_page_nosave) { @@ -208,7 +227,8 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) if (LINKED_PAGE_DATA_SIZE - ca->used_space < size) { struct linked_page *lp; - lp = get_image_page(ca->gfp_mask, ca->safe_needed); + lp = ca->safe_needed ? __get_safe_page(ca->gfp_mask) : + get_image_page(ca->gfp_mask, PG_ANY); if (!lp) return NULL; @@ -2104,11 +2124,6 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) return 0; } -/* List of "safe" pages that may be used to store data loaded from the suspend - * image - */ -static struct linked_page *safe_pages_list; - #ifdef CONFIG_HIGHMEM /* struct highmem_pbe is used for creating the list of highmem pages that * should be restored atomically during the resume from disk, because the page @@ -2334,7 +2349,7 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) { unsigned int nr_pages, nr_highmem; - struct linked_page *sp_list, *lp; + struct linked_page *lp; int error; /* If there is no highmem, the buffer will not be necessary */ @@ -2362,9 +2377,9 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) * NOTE: This way we make sure there will be enough safe pages for the * chain_alloc() in get_buffer(). It is a bit wasteful, but * nr_copy_pages cannot be greater than 50% of the memory anyway. + * + * nr_copy_pages cannot be less than allocated_unsafe_pages too. */ - sp_list = NULL; - /* nr_copy_pages cannot be lesser than allocated_unsafe_pages */ nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; nr_pages = DIV_ROUND_UP(nr_pages, PBES_PER_LINKED_PAGE); while (nr_pages > 0) { @@ -2373,12 +2388,11 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) error = -ENOMEM; goto Free; } - lp->next = sp_list; - sp_list = lp; + lp->next = safe_pages_list; + safe_pages_list = lp; nr_pages--; } /* Preallocate memory for the image */ - safe_pages_list = NULL; nr_pages = nr_copy_pages - nr_highmem - allocated_unsafe_pages; while (nr_pages > 0) { lp = (struct linked_page *)get_zeroed_page(GFP_ATOMIC); @@ -2396,12 +2410,6 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) swsusp_set_page_free(virt_to_page(lp)); nr_pages--; } - /* Free the reserved safe pages so that chain_alloc() can use them */ - while (sp_list) { - lp = sp_list->next; - free_image_page(sp_list, PG_UNSAFE_CLEAR); - sp_list = lp; - } return 0; Free: @@ -2491,6 +2499,8 @@ int snapshot_write_next(struct snapshot_handle *handle) if (error) return error; + safe_pages_list = NULL; + error = memory_bm_create(©_bm, GFP_ATOMIC, PG_ANY); if (error) return error; From 6dbecfd345a617888da370b13d5b190c9ff3df53 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 29 Jun 2016 03:02:16 +0200 Subject: [PATCH 07/16] PM / hibernate: Simplify mark_unsafe_pages() Rework mark_unsafe_pages() to use a simpler method of clearing all bits in free_pages_map and to set the bits for the "unsafe" pages (ie. pages that were used by the image kernel before hibernation) with the help of duplicate_memory_bitmap(). For this purpose, move the pfn_valid() check from mark_unsafe_pages() to unpack_orig_pfns() where the "unsafe" pages are discovered. Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 70 +++++++++++++++++------------------------ 1 file changed, 28 insertions(+), 42 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index d9476ff877b8..39bbad5fac5a 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -2019,44 +2019,8 @@ int snapshot_read_next(struct snapshot_handle *handle) return PAGE_SIZE; } -/** - * mark_unsafe_pages - mark the pages that cannot be used for storing - * the image during resume, because they conflict with the pages that - * had been used before suspend - */ - -static int mark_unsafe_pages(struct memory_bitmap *bm) -{ - struct zone *zone; - unsigned long pfn, max_zone_pfn; - - /* Clear page flags */ - for_each_populated_zone(zone) { - max_zone_pfn = zone_end_pfn(zone); - for (pfn = zone->zone_start_pfn; pfn < max_zone_pfn; pfn++) - if (pfn_valid(pfn)) - swsusp_unset_page_free(pfn_to_page(pfn)); - } - - /* Mark pages that correspond to the "original" pfns as "unsafe" */ - memory_bm_position_reset(bm); - do { - pfn = memory_bm_next_pfn(bm); - if (likely(pfn != BM_END_OF_MAP)) { - if (likely(pfn_valid(pfn))) - swsusp_set_page_free(pfn_to_page(pfn)); - else - return -EFAULT; - } - } while (pfn != BM_END_OF_MAP); - - allocated_unsafe_pages = 0; - - return 0; -} - -static void -duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src) +static void duplicate_memory_bitmap(struct memory_bitmap *dst, + struct memory_bitmap *src) { unsigned long pfn; @@ -2068,6 +2032,30 @@ duplicate_memory_bitmap(struct memory_bitmap *dst, struct memory_bitmap *src) } } +/** + * mark_unsafe_pages - mark the pages that cannot be used for storing + * the image during resume, because they conflict with the pages that + * had been used before suspend + */ + +static void mark_unsafe_pages(struct memory_bitmap *bm) +{ + unsigned long pfn; + + /* Clear the "free"/"unsafe" bit for all PFNs */ + memory_bm_position_reset(free_pages_map); + pfn = memory_bm_next_pfn(free_pages_map); + while (pfn != BM_END_OF_MAP) { + memory_bm_clear_current(free_pages_map); + pfn = memory_bm_next_pfn(free_pages_map); + } + + /* Mark pages that correspond to the "original" PFNs as "unsafe" */ + duplicate_memory_bitmap(free_pages_map, bm); + + allocated_unsafe_pages = 0; +} + static int check_header(struct swsusp_info *info) { char *reason; @@ -2115,7 +2103,7 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) /* Extract and buffer page key for data page (s390 only). */ page_key_memorize(buf + j); - if (memory_bm_pfn_present(bm, buf[j])) + if (pfn_valid(buf[j]) && memory_bm_pfn_present(bm, buf[j])) memory_bm_set_bit(bm, buf[j]); else return -EFAULT; @@ -2357,9 +2345,7 @@ prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) buffer = NULL; nr_highmem = count_highmem_image_pages(bm); - error = mark_unsafe_pages(bm); - if (error) - goto Free; + mark_unsafe_pages(bm); error = memory_bm_create(new_bm, GFP_ATOMIC, PG_SAFE); if (error) From 307c5971c972ef2bfd541d2850b36a692c6354c9 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 29 Jun 2016 03:05:10 +0200 Subject: [PATCH 08/16] PM / hibernate: Recycle safe pages after image restoration One of the memory bitmaps used by the hibernation image restoration code is freed after the image has been loaded. That is not quite efficient, though, because the memory pages used for building that bitmap are known to be safe (ie. they were not used by the image kernel before hibernation) and the arch-specific code finalizing the image restoration may need them. In that case it needs to allocate those pages again via the memory management subsystem, check if they are really safe again by consulting the other bitmaps and so on. To avoid that, recycle those pages by putting them into the global list of known safe pages so that they can be given to the arch code right away when necessary. Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 40 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 38 insertions(+), 2 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 39bbad5fac5a..94b6fe6c9ae3 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -158,6 +158,14 @@ static struct page *alloc_image_page(gfp_t gfp_mask) return page; } +static void recycle_safe_page(void *page_address) +{ + struct linked_page *lp = page_address; + + lp->next = safe_pages_list; + safe_pages_list = lp; +} + /** * free_image_page - free page represented by @addr, allocated with * get_image_page (page flags set by it must be cleared) @@ -852,6 +860,34 @@ struct nosave_region { static LIST_HEAD(nosave_regions); +static void recycle_zone_bm_rtree(struct mem_zone_bm_rtree *zone) +{ + struct rtree_node *node; + + list_for_each_entry(node, &zone->nodes, list) + recycle_safe_page(node->data); + + list_for_each_entry(node, &zone->leaves, list) + recycle_safe_page(node->data); +} + +static void memory_bm_recycle(struct memory_bitmap *bm) +{ + struct mem_zone_bm_rtree *zone; + struct linked_page *p_list; + + list_for_each_entry(zone, &bm->zones, list) + recycle_zone_bm_rtree(zone); + + p_list = bm->p_list; + while (p_list) { + struct linked_page *lp = p_list; + + p_list = lp->next; + recycle_safe_page(lp); + } +} + /** * register_nosave_region - register a range of page frames the contents * of which should not be saved during the suspend (to be used in the early @@ -2542,9 +2578,9 @@ void snapshot_write_finalize(struct snapshot_handle *handle) /* Restore page key for data page (s390 only). */ page_key_write(handle->buffer); page_key_free(); - /* Free only if we have loaded the image entirely */ + /* Do that only if we have loaded the image entirely */ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { - memory_bm_free(&orig_bm, PG_UNSAFE_CLEAR); + memory_bm_recycle(&orig_bm); free_highmem_data(); } } From 2f88e41a22ccfa95291c4df573f8ed4c6a71f29b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 6 Jul 2016 02:40:56 +0200 Subject: [PATCH 09/16] PM / hibernate: Add missing braces in hibernate_setup() Make hibernate_setup() follow the coding style more closely by adding some missing braces to the if () statement in it. Signed-off-by: Rafael J. Wysocki --- kernel/power/hibernate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 126e24caa82e..b00f270d328e 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1119,11 +1119,11 @@ static int __init resume_offset_setup(char *str) static int __init hibernate_setup(char *str) { - if (!strncmp(str, "noresume", 8)) + if (!strncmp(str, "noresume", 8)) { noresume = 1; - else if (!strncmp(str, "nocompress", 10)) + } else if (!strncmp(str, "nocompress", 10)) { nocompress = 1; - else if (!strncmp(str, "no", 2)) { + } else if (!strncmp(str, "no", 2)) { noresume = 1; nohibernate = 1; } From efd5a85242e996275ebf3df71013beabd723bda3 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 6 Jul 2016 23:42:46 +0200 Subject: [PATCH 10/16] PM / hibernate: Clean up function headers in snapshot.c The formatting of some function headers in kernel/power/snapshot.c is not consistent with the general kernel coding style and with the formatting of some other function headers in the same file. Make all of them follow the same formatting convention. No functional changes. Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 93 +++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 51 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 94b6fe6c9ae3..1fe0ddb6fd0d 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -186,8 +186,8 @@ static inline void free_image_page(void *addr, int clear_nosave_free) __free_page(page); } -static inline void -free_list_of_pages(struct linked_page *list, int clear_page_nosave) +static inline void free_list_of_pages(struct linked_page *list, + int clear_page_nosave) { while (list) { struct linked_page *lp = list->next; @@ -219,8 +219,8 @@ struct chain_allocator { int safe_needed; /* if set, only "safe" pages are allocated */ }; -static void -chain_init(struct chain_allocator *ca, gfp_t gfp_mask, int safe_needed) +static void chain_init(struct chain_allocator *ca, gfp_t gfp_mask, + int safe_needed) { ca->chain = NULL; ca->used_space = LINKED_PAGE_DATA_SIZE; @@ -452,10 +452,11 @@ static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, * This function also allocated and builds the radix tree for the * zone. */ -static struct mem_zone_bm_rtree * -create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed, - struct chain_allocator *ca, - unsigned long start, unsigned long end) +static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask, + int safe_needed, + struct chain_allocator *ca, + unsigned long start, + unsigned long end) { struct mem_zone_bm_rtree *zone; unsigned int i, nr_blocks; @@ -595,8 +596,8 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) /** * memory_bm_create - allocate memory for a memory bitmap */ -static int -memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) +static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, + int safe_needed) { struct chain_allocator ca; struct list_head mem_extents; @@ -894,9 +895,8 @@ static void memory_bm_recycle(struct memory_bitmap *bm) * initialization code) */ -void __init -__register_nosave_region(unsigned long start_pfn, unsigned long end_pfn, - int use_kmalloc) +void __init __register_nosave_region(unsigned long start_pfn, + unsigned long end_pfn, int use_kmalloc) { struct nosave_region *region; @@ -1277,8 +1277,7 @@ static void safe_copy_page(void *dst, struct page *s_page) #ifdef CONFIG_HIGHMEM -static inline struct page * -page_is_saveable(struct zone *zone, unsigned long pfn) +static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn) { return is_highmem(zone) ? saveable_highmem_page(zone, pfn) : saveable_page(zone, pfn); @@ -1321,8 +1320,8 @@ static inline void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) } #endif /* CONFIG_HIGHMEM */ -static void -copy_data_pages(struct memory_bitmap *copy_bm, struct memory_bitmap *orig_bm) +static void copy_data_pages(struct memory_bitmap *copy_bm, + struct memory_bitmap *orig_bm) { struct zone *zone; unsigned long pfn; @@ -1485,8 +1484,8 @@ static unsigned long __fraction(u64 x, u64 multiplier, u64 base) } static unsigned long preallocate_highmem_fraction(unsigned long nr_pages, - unsigned long highmem, - unsigned long total) + unsigned long highmem, + unsigned long total) { unsigned long alloc = __fraction(nr_pages, highmem, total); @@ -1499,8 +1498,8 @@ static inline unsigned long preallocate_image_highmem(unsigned long nr_pages) } static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, - unsigned long highmem, - unsigned long total) + unsigned long highmem, + unsigned long total) { return 0; } @@ -1780,8 +1779,7 @@ static unsigned int count_pages_for_highmem(unsigned int nr_highmem) return nr_highmem; } #else -static unsigned int -count_pages_for_highmem(unsigned int nr_highmem) { return 0; } +static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; } #endif /* CONFIG_HIGHMEM */ /** @@ -1823,8 +1821,8 @@ static inline int get_highmem_buffer(int safe_needed) * highmem pages is lesser than that, allocate them all. */ -static inline unsigned int -alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) +static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, + unsigned int nr_highmem) { unsigned int to_alloc = count_free_highmem_pages(); @@ -1843,8 +1841,8 @@ alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) #else static inline int get_highmem_buffer(int safe_needed) { return 0; } -static inline unsigned int -alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } +static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, + unsigned int n) { return 0; } #endif /* CONFIG_HIGHMEM */ /** @@ -1859,9 +1857,9 @@ alloc_highmem_pages(struct memory_bitmap *bm, unsigned int n) { return 0; } * copy_data_pages() works. */ -static int -swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, - unsigned int nr_pages, unsigned int nr_highmem) +static int swsusp_alloc(struct memory_bitmap *orig_bm, + struct memory_bitmap *copy_bm, + unsigned int nr_pages, unsigned int nr_highmem) { if (nr_highmem > 0) { if (get_highmem_buffer(PG_ANY)) @@ -1978,8 +1976,7 @@ static int init_header(struct swsusp_info *info) * are stored in the array @buf[] (1 page at a time) */ -static inline void -pack_pfns(unsigned long *buf, struct memory_bitmap *bm) +static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm) { int j; @@ -2110,8 +2107,7 @@ static int check_header(struct swsusp_info *info) * load header - check the image header and copy data from it */ -static int -load_header(struct swsusp_info *info) +static int load_header(struct swsusp_info *info) { int error; @@ -2204,8 +2200,8 @@ static unsigned int safe_highmem_pages; static struct memory_bitmap *safe_highmem_bm; -static int -prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) +static int prepare_highmem_image(struct memory_bitmap *bm, + unsigned int *nr_highmem_p) { unsigned int to_alloc; @@ -2259,8 +2255,8 @@ prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) static struct page *last_highmem_page; -static void * -get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) +static void *get_highmem_page_buffer(struct page *page, + struct chain_allocator *ca) { struct highmem_pbe *pbe; void *kaddr; @@ -2333,17 +2329,13 @@ static inline void free_highmem_data(void) free_image_page(buffer, PG_UNSAFE_CLEAR); } #else -static unsigned int -count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } +static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { return 0; } -static inline int -prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) -{ - return 0; -} +static inline int prepare_highmem_image(struct memory_bitmap *bm, + unsigned int *nr_highmem_p) { return 0; } -static inline void * -get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) +static inline void *get_highmem_page_buffer(struct page *page, + struct chain_allocator *ca) { return ERR_PTR(-EINVAL); } @@ -2369,8 +2361,7 @@ static inline void free_highmem_data(void) {} #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) -static int -prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) +static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) { unsigned int nr_pages, nr_highmem; struct linked_page *lp; @@ -2593,8 +2584,8 @@ int snapshot_image_loaded(struct snapshot_handle *handle) #ifdef CONFIG_HIGHMEM /* Assumes that @buf is ready and points to a "safe" page */ -static inline void -swap_two_pages_data(struct page *p1, struct page *p2, void *buf) +static inline void swap_two_pages_data(struct page *p1, struct page *p2, + void *buf) { void *kaddr1, *kaddr2; From ef96f639ea663474c4e1c57bd64e118ffbb92be4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 6 Jul 2016 23:43:46 +0200 Subject: [PATCH 11/16] PM / hibernate: Clean up comments in snapshot.c Many comments in kernel/power/snapshot.c do not follow the general comment formatting rules. They look odd, some of them are outdated too, some are hard to parse and generally difficult to understand. Clean them up to make them easier to comprehend. No functional changes. Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 642 +++++++++++++++++++++------------------- 1 file changed, 333 insertions(+), 309 deletions(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 1fe0ddb6fd0d..bd927d9efeb7 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -67,7 +67,8 @@ void __init hibernate_image_size_init(void) image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE; } -/* List of PBEs needed for restoring the pages that were allocated before +/* + * List of PBEs needed for restoring the pages that were allocated before * the suspend and included in the suspend image, but have also been * allocated by the "resume" kernel, so their contents cannot be written * directly to their "original" page frames. @@ -93,16 +94,6 @@ static struct linked_page *safe_pages_list; /* Pointer to an auxiliary buffer (1 page) */ static void *buffer; -/** - * @safe_needed - on resume, for storing the PBE list and the image, - * we can only use memory pages that do not conflict with the pages - * used before suspend. The unsafe pages have PageNosaveFree set - * and we count them using unsafe_pages. - * - * Each allocated image page is marked as PageNosave and PageNosaveFree - * so that swsusp_free() can release it. - */ - #define PG_ANY 0 #define PG_SAFE 1 #define PG_UNSAFE_CLEAR 1 @@ -110,6 +101,19 @@ static void *buffer; static unsigned int allocated_unsafe_pages; +/** + * get_image_page - Allocate a page for a hibernation image. + * @gfp_mask: GFP mask for the allocation. + * @safe_needed: Get pages that were not used before hibernation (restore only) + * + * During image restoration, for storing the PBE list and the image data, we can + * only use memory pages that do not conflict with the pages used before + * hibernation. The "unsafe" pages have PageNosaveFree set and we count them + * using allocated_unsafe_pages. + * + * Each allocated image page is marked as PageNosave and PageNosaveFree so that + * swsusp_free() can release it. + */ static void *get_image_page(gfp_t gfp_mask, int safe_needed) { void *res; @@ -167,10 +171,13 @@ static void recycle_safe_page(void *page_address) } /** - * free_image_page - free page represented by @addr, allocated with - * get_image_page (page flags set by it must be cleared) + * free_image_page - Free a page allocated for hibernation image. + * @addr: Address of the page to free. + * @clear_nosave_free: If set, clear the PageNosaveFree bit for the page. + * + * The page to free should have been allocated by get_image_page() (page flags + * set by it are affected). */ - static inline void free_image_page(void *addr, int clear_nosave_free) { struct page *page; @@ -197,24 +204,22 @@ static inline void free_list_of_pages(struct linked_page *list, } } -/** - * struct chain_allocator is used for allocating small objects out of - * a linked list of pages called 'the chain'. - * - * The chain grows each time when there is no room for a new object in - * the current page. The allocated objects cannot be freed individually. - * It is only possible to free them all at once, by freeing the entire - * chain. - * - * NOTE: The chain allocator may be inefficient if the allocated objects - * are not much smaller than PAGE_SIZE. - */ - +/* + * struct chain_allocator is used for allocating small objects out of + * a linked list of pages called 'the chain'. + * + * The chain grows each time when there is no room for a new object in + * the current page. The allocated objects cannot be freed individually. + * It is only possible to free them all at once, by freeing the entire + * chain. + * + * NOTE: The chain allocator may be inefficient if the allocated objects + * are not much smaller than PAGE_SIZE. + */ struct chain_allocator { struct linked_page *chain; /* the chain */ unsigned int used_space; /* total size of objects allocated out - * of the current page - */ + of the current page */ gfp_t gfp_mask; /* mask for allocating pages */ int safe_needed; /* if set, only "safe" pages are allocated */ }; @@ -250,44 +255,44 @@ static void *chain_alloc(struct chain_allocator *ca, unsigned int size) } /** - * Data types related to memory bitmaps. + * Data types related to memory bitmaps. * - * Memory bitmap is a structure consiting of many linked lists of - * objects. The main list's elements are of type struct zone_bitmap - * and each of them corresonds to one zone. For each zone bitmap - * object there is a list of objects of type struct bm_block that - * represent each blocks of bitmap in which information is stored. + * Memory bitmap is a structure consiting of many linked lists of + * objects. The main list's elements are of type struct zone_bitmap + * and each of them corresonds to one zone. For each zone bitmap + * object there is a list of objects of type struct bm_block that + * represent each blocks of bitmap in which information is stored. * - * struct memory_bitmap contains a pointer to the main list of zone - * bitmap objects, a struct bm_position used for browsing the bitmap, - * and a pointer to the list of pages used for allocating all of the - * zone bitmap objects and bitmap block objects. + * struct memory_bitmap contains a pointer to the main list of zone + * bitmap objects, a struct bm_position used for browsing the bitmap, + * and a pointer to the list of pages used for allocating all of the + * zone bitmap objects and bitmap block objects. * - * NOTE: It has to be possible to lay out the bitmap in memory - * using only allocations of order 0. Additionally, the bitmap is - * designed to work with arbitrary number of zones (this is over the - * top for now, but let's avoid making unnecessary assumptions ;-). + * NOTE: It has to be possible to lay out the bitmap in memory + * using only allocations of order 0. Additionally, the bitmap is + * designed to work with arbitrary number of zones (this is over the + * top for now, but let's avoid making unnecessary assumptions ;-). * - * struct zone_bitmap contains a pointer to a list of bitmap block - * objects and a pointer to the bitmap block object that has been - * most recently used for setting bits. Additionally, it contains the - * pfns that correspond to the start and end of the represented zone. + * struct zone_bitmap contains a pointer to a list of bitmap block + * objects and a pointer to the bitmap block object that has been + * most recently used for setting bits. Additionally, it contains the + * PFNs that correspond to the start and end of the represented zone. * - * struct bm_block contains a pointer to the memory page in which - * information is stored (in the form of a block of bitmap) - * It also contains the pfns that correspond to the start and end of - * the represented memory area. + * struct bm_block contains a pointer to the memory page in which + * information is stored (in the form of a block of bitmap) + * It also contains the pfns that correspond to the start and end of + * the represented memory area. * - * The memory bitmap is organized as a radix tree to guarantee fast random - * access to the bits. There is one radix tree for each zone (as returned - * from create_mem_extents). + * The memory bitmap is organized as a radix tree to guarantee fast random + * access to the bits. There is one radix tree for each zone (as returned + * from create_mem_extents). * - * One radix tree is represented by one struct mem_zone_bm_rtree. There are - * two linked lists for the nodes of the tree, one for the inner nodes and - * one for the leave nodes. The linked leave nodes are used for fast linear - * access of the memory bitmap. + * One radix tree is represented by one struct mem_zone_bm_rtree. There are + * two linked lists for the nodes of the tree, one for the inner nodes and + * one for the leave nodes. The linked leave nodes are used for fast linear + * access of the memory bitmap. * - * The struct rtree_node represents one node of the radix tree. + * The struct rtree_node represents one node of the radix tree. */ #define BM_END_OF_MAP (~0UL) @@ -333,9 +338,8 @@ struct bm_position { struct memory_bitmap { struct list_head zones; struct linked_page *p_list; /* list of pages used to store zone - * bitmap objects and bitmap block - * objects - */ + bitmap objects and bitmap block + objects */ struct bm_position cur; /* most recently used bit position */ }; @@ -349,12 +353,12 @@ struct memory_bitmap { #endif #define BM_RTREE_LEVEL_MASK ((1UL << BM_RTREE_LEVEL_SHIFT) - 1) -/* - * alloc_rtree_node - Allocate a new node and add it to the radix tree. +/** + * alloc_rtree_node - Allocate a new node and add it to the radix tree. * - * This function is used to allocate inner nodes as well as the - * leave nodes of the radix tree. It also adds the node to the - * corresponding linked list passed in by the *list parameter. + * This function is used to allocate inner nodes as well as the + * leave nodes of the radix tree. It also adds the node to the + * corresponding linked list passed in by the *list parameter. */ static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, struct chain_allocator *ca, @@ -375,12 +379,12 @@ static struct rtree_node *alloc_rtree_node(gfp_t gfp_mask, int safe_needed, return node; } -/* - * add_rtree_block - Add a new leave node to the radix tree +/** + * add_rtree_block - Add a new leave node to the radix tree. * - * The leave nodes need to be allocated in order to keep the leaves - * linked list in order. This is guaranteed by the zone->blocks - * counter. + * The leave nodes need to be allocated in order to keep the leaves + * linked list in order. This is guaranteed by the zone->blocks + * counter. */ static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, int safe_needed, struct chain_allocator *ca) @@ -445,12 +449,12 @@ static int add_rtree_block(struct mem_zone_bm_rtree *zone, gfp_t gfp_mask, static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, int clear_nosave_free); -/* - * create_zone_bm_rtree - create a radix tree for one zone +/** + * create_zone_bm_rtree - Create a radix tree for one zone. * - * Allocated the mem_zone_bm_rtree structure and initializes it. - * This function also allocated and builds the radix tree for the - * zone. + * Allocated the mem_zone_bm_rtree structure and initializes it. + * This function also allocated and builds the radix tree for the + * zone. */ static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask, int safe_needed, @@ -483,12 +487,12 @@ static struct mem_zone_bm_rtree *create_zone_bm_rtree(gfp_t gfp_mask, return zone; } -/* - * free_zone_bm_rtree - Free the memory of the radix tree +/** + * free_zone_bm_rtree - Free the memory of the radix tree. * - * Free all node pages of the radix tree. The mem_zone_bm_rtree - * structure itself is not freed here nor are the rtree_node - * structs. + * Free all node pages of the radix tree. The mem_zone_bm_rtree + * structure itself is not freed here nor are the rtree_node + * structs. */ static void free_zone_bm_rtree(struct mem_zone_bm_rtree *zone, int clear_nosave_free) @@ -521,8 +525,8 @@ struct mem_extent { }; /** - * free_mem_extents - free a list of memory extents - * @list - list of extents to empty + * free_mem_extents - Free a list of memory extents. + * @list: List of extents to free. */ static void free_mem_extents(struct list_head *list) { @@ -535,10 +539,11 @@ static void free_mem_extents(struct list_head *list) } /** - * create_mem_extents - create a list of memory extents representing - * contiguous ranges of PFNs - * @list - list to put the extents into - * @gfp_mask - mask to use for memory allocations + * create_mem_extents - Create a list of memory extents. + * @list: List to put the extents into. + * @gfp_mask: Mask to use for memory allocations. + * + * The extents represent contiguous ranges of PFNs. */ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) { @@ -594,8 +599,8 @@ static int create_mem_extents(struct list_head *list, gfp_t gfp_mask) } /** - * memory_bm_create - allocate memory for a memory bitmap - */ + * memory_bm_create - Allocate memory for a memory bitmap. + */ static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, int safe_needed) { @@ -636,8 +641,9 @@ static int memory_bm_create(struct memory_bitmap *bm, gfp_t gfp_mask, } /** - * memory_bm_free - free memory occupied by the memory bitmap @bm - */ + * memory_bm_free - Free memory occupied by the memory bitmap. + * @bm: Memory bitmap. + */ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) { struct mem_zone_bm_rtree *zone; @@ -651,14 +657,13 @@ static void memory_bm_free(struct memory_bitmap *bm, int clear_nosave_free) } /** - * memory_bm_find_bit - Find the bit for pfn in the memory - * bitmap + * memory_bm_find_bit - Find the bit for a given PFN in a memory bitmap. * - * Find the bit in the bitmap @bm that corresponds to given pfn. - * The cur.zone, cur.block and cur.node_pfn member of @bm are - * updated. - * It walks the radix tree to find the page which contains the bit for - * pfn and returns the bit position in **addr and *bit_nr. + * Find the bit in memory bitmap @bm that corresponds to the given PFN. + * The cur.zone, cur.block and cur.node_pfn members of @bm are updated. + * + * Walk the radix tree to find the page containing the bit that represents @pfn + * and return the position of the bit in @addr and @bit_nr. */ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, void **addr, unsigned int *bit_nr) @@ -687,10 +692,9 @@ static int memory_bm_find_bit(struct memory_bitmap *bm, unsigned long pfn, zone_found: /* - * We have a zone. Now walk the radix tree to find the leave - * node for our pfn. + * We have found the zone. Now walk the radix tree to find the leaf node + * for our PFN. */ - node = bm->cur.node; if (((pfn - zone->start_pfn) & ~BM_BLOCK_MASK) == bm->cur.node_pfn) goto node_found; @@ -783,14 +787,14 @@ static bool memory_bm_pfn_present(struct memory_bitmap *bm, unsigned long pfn) } /* - * rtree_next_node - Jumps to the next leave node + * rtree_next_node - Jump to the next leaf node. * - * Sets the position to the beginning of the next node in the - * memory bitmap. This is either the next node in the current - * zone's radix tree or the first node in the radix tree of the - * next zone. + * Set the position to the beginning of the next node in the + * memory bitmap. This is either the next node in the current + * zone's radix tree or the first node in the radix tree of the + * next zone. * - * Returns true if there is a next node, false otherwise. + * Return true if there is a next node, false otherwise. */ static bool rtree_next_node(struct memory_bitmap *bm) { @@ -819,14 +823,15 @@ static bool rtree_next_node(struct memory_bitmap *bm) } /** - * memory_bm_rtree_next_pfn - Find the next set bit in the bitmap @bm + * memory_bm_rtree_next_pfn - Find the next set bit in a memory bitmap. + * @bm: Memory bitmap. * - * Starting from the last returned position this function searches - * for the next set bit in the memory bitmap and returns its - * number. If no more bit is set BM_END_OF_MAP is returned. + * Starting from the last returned position this function searches for the next + * set bit in @bm and returns the PFN represented by it. If no more bits are + * set, BM_END_OF_MAP is returned. * - * It is required to run memory_bm_position_reset() before the - * first call to this function. + * It is required to run memory_bm_position_reset() before the first call to + * this function for the given memory bitmap. */ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) { @@ -848,11 +853,10 @@ static unsigned long memory_bm_next_pfn(struct memory_bitmap *bm) return BM_END_OF_MAP; } -/** - * This structure represents a range of page frames the contents of which - * should not be saved during the suspend. +/* + * This structure represents a range of page frames the contents of which + * should not be saved during hibernation. */ - struct nosave_region { struct list_head list; unsigned long start_pfn; @@ -890,11 +894,11 @@ static void memory_bm_recycle(struct memory_bitmap *bm) } /** - * register_nosave_region - register a range of page frames the contents - * of which should not be saved during the suspend (to be used in the early - * initialization code) + * register_nosave_region - Register a region of unsaveable memory. + * + * Register a range of page frames the contents of which should not be saved + * during hibernation (to be used in the early initialization code). */ - void __init __register_nosave_region(unsigned long start_pfn, unsigned long end_pfn, int use_kmalloc) { @@ -913,7 +917,7 @@ void __init __register_nosave_region(unsigned long start_pfn, } } if (use_kmalloc) { - /* during init, this shouldn't fail */ + /* During init, this shouldn't fail */ region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); BUG_ON(!region); } else @@ -979,10 +983,12 @@ static void swsusp_unset_page_forbidden(struct page *page) } /** - * mark_nosave_pages - set bits corresponding to the page frames the - * contents of which should not be saved in a given bitmap. + * mark_nosave_pages - Mark pages that should not be saved. + * @bm: Memory bitmap. + * + * Set the bits in @bm that correspond to the page frames the contents of which + * should not be saved. */ - static void mark_nosave_pages(struct memory_bitmap *bm) { struct nosave_region *region; @@ -1012,13 +1018,13 @@ static void mark_nosave_pages(struct memory_bitmap *bm) } /** - * create_basic_memory_bitmaps - create bitmaps needed for marking page - * frames that should not be saved and free page frames. The pointers - * forbidden_pages_map and free_pages_map are only modified if everything - * goes well, because we don't want the bits to be used before both bitmaps - * are set up. + * create_basic_memory_bitmaps - Create bitmaps to hold basic page information. + * + * Create bitmaps needed for marking page frames that should not be saved and + * free page frames. The forbidden_pages_map and free_pages_map pointers are + * only modified if everything goes well, because we don't want the bits to be + * touched before both bitmaps are set up. */ - int create_basic_memory_bitmaps(void) { struct memory_bitmap *bm1, *bm2; @@ -1063,12 +1069,12 @@ int create_basic_memory_bitmaps(void) } /** - * free_basic_memory_bitmaps - free memory bitmaps allocated by - * create_basic_memory_bitmaps(). The auxiliary pointers are necessary - * so that the bitmaps themselves are not referred to while they are being - * freed. + * free_basic_memory_bitmaps - Free memory bitmaps holding basic information. + * + * Free memory bitmaps allocated by create_basic_memory_bitmaps(). The + * auxiliary pointers are necessary so that the bitmaps themselves are not + * referred to while they are being freed. */ - void free_basic_memory_bitmaps(void) { struct memory_bitmap *bm1, *bm2; @@ -1089,11 +1095,13 @@ void free_basic_memory_bitmaps(void) } /** - * snapshot_additional_pages - estimate the number of additional pages - * be needed for setting up the suspend image data structures for given - * zone (usually the returned value is greater than the exact number) + * snapshot_additional_pages - Estimate the number of extra pages needed. + * @zone: Memory zone to carry out the computation for. + * + * Estimate the number of additional pages needed for setting up a hibernation + * image data structures for @zone (usually, the returned value is greater than + * the exact number). */ - unsigned int snapshot_additional_pages(struct zone *zone) { unsigned int rtree, nodes; @@ -1111,10 +1119,10 @@ unsigned int snapshot_additional_pages(struct zone *zone) #ifdef CONFIG_HIGHMEM /** - * count_free_highmem_pages - compute the total number of free highmem - * pages, system-wide. + * count_free_highmem_pages - Compute the total number of free highmem pages. + * + * The returned number is system-wide. */ - static unsigned int count_free_highmem_pages(void) { struct zone *zone; @@ -1128,11 +1136,12 @@ static unsigned int count_free_highmem_pages(void) } /** - * saveable_highmem_page - Determine whether a highmem page should be - * included in the suspend image. + * saveable_highmem_page - Check if a highmem page is saveable. * - * We should save the page if it isn't Nosave or NosaveFree, or Reserved, - * and it isn't a part of a free chunk of pages. + * Determine whether a highmem page should be included in a hibernation image. + * + * We should save the page if it isn't Nosave or NosaveFree, or Reserved, + * and it isn't part of a free chunk of pages. */ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) { @@ -1158,10 +1167,8 @@ static struct page *saveable_highmem_page(struct zone *zone, unsigned long pfn) } /** - * count_highmem_pages - compute the total number of saveable highmem - * pages. + * count_highmem_pages - Compute the total number of saveable highmem pages. */ - static unsigned int count_highmem_pages(void) { struct zone *zone; @@ -1189,12 +1196,14 @@ static inline void *saveable_highmem_page(struct zone *z, unsigned long p) #endif /* CONFIG_HIGHMEM */ /** - * saveable_page - Determine whether a non-highmem page should be included - * in the suspend image. + * saveable_page - Check if the given page is saveable. * - * We should save the page if it isn't Nosave, and is not in the range - * of pages statically defined as 'unsaveable', and it isn't a part of - * a free chunk of pages. + * Determine whether a non-highmem page should be included in a hibernation + * image. + * + * We should save the page if it isn't Nosave, and is not in the range + * of pages statically defined as 'unsaveable', and it isn't part of + * a free chunk of pages. */ static struct page *saveable_page(struct zone *zone, unsigned long pfn) { @@ -1223,10 +1232,8 @@ static struct page *saveable_page(struct zone *zone, unsigned long pfn) } /** - * count_data_pages - compute the total number of saveable non-highmem - * pages. + * count_data_pages - Compute the total number of saveable non-highmem pages. */ - static unsigned int count_data_pages(void) { struct zone *zone; @@ -1246,7 +1253,8 @@ static unsigned int count_data_pages(void) return n; } -/* This is needed, because copy_page and memcpy are not usable for copying +/* + * This is needed, because copy_page and memcpy are not usable for copying * task structs. */ static inline void do_copy_page(long *dst, long *src) @@ -1257,12 +1265,12 @@ static inline void do_copy_page(long *dst, long *src) *dst++ = *src++; } - /** - * safe_copy_page - check if the page we are going to copy is marked as - * present in the kernel page tables (this always is the case if - * CONFIG_DEBUG_PAGEALLOC is not set and in that case - * kernel_page_present() always returns 'true'). + * safe_copy_page - Copy a page in a safe way. + * + * Check if the page we are going to copy is marked as present in the kernel + * page tables (this always is the case if CONFIG_DEBUG_PAGEALLOC is not set + * and in that case kernel_page_present() always returns 'true'). */ static void safe_copy_page(void *dst, struct page *s_page) { @@ -1275,7 +1283,6 @@ static void safe_copy_page(void *dst, struct page *s_page) } } - #ifdef CONFIG_HIGHMEM static inline struct page *page_is_saveable(struct zone *zone, unsigned long pfn) { @@ -1298,7 +1305,8 @@ static void copy_data_page(unsigned long dst_pfn, unsigned long src_pfn) kunmap_atomic(src); } else { if (PageHighMem(d_page)) { - /* Page pointed to by src may contain some kernel + /* + * The page pointed to by src may contain some kernel * data modified by kmap_atomic() */ safe_copy_page(buffer, s_page); @@ -1370,12 +1378,11 @@ static struct memory_bitmap orig_bm; static struct memory_bitmap copy_bm; /** - * swsusp_free - free pages allocated for the suspend. + * swsusp_free - Free pages allocated for hibernation image. * - * Suspend pages are alocated before the atomic copy is made, so we - * need to release them after the resume. + * Image pages are alocated before snapshot creation, so they need to be + * released after resume. */ - void swsusp_free(void) { unsigned long fb_pfn, fr_pfn; @@ -1424,7 +1431,7 @@ out: #define GFP_IMAGE (GFP_KERNEL | __GFP_NOWARN) /** - * preallocate_image_pages - Allocate a number of pages for hibernation image + * preallocate_image_pages - Allocate a number of pages for hibernation image. * @nr_pages: Number of page frames to allocate. * @mask: GFP flags to use for the allocation. * @@ -1474,7 +1481,7 @@ static unsigned long preallocate_image_highmem(unsigned long nr_pages) } /** - * __fraction - Compute (an approximation of) x * (multiplier / base) + * __fraction - Compute (an approximation of) x * (multiplier / base). */ static unsigned long __fraction(u64 x, u64 multiplier, u64 base) { @@ -1506,7 +1513,7 @@ static inline unsigned long preallocate_highmem_fraction(unsigned long nr_pages, #endif /* CONFIG_HIGHMEM */ /** - * free_unnecessary_pages - Release preallocated pages not needed for the image + * free_unnecessary_pages - Release preallocated pages not needed for the image. */ static unsigned long free_unnecessary_pages(void) { @@ -1560,7 +1567,7 @@ static unsigned long free_unnecessary_pages(void) } /** - * minimum_image_size - Estimate the minimum acceptable size of an image + * minimum_image_size - Estimate the minimum acceptable size of an image. * @saveable: Number of saveable pages in the system. * * We want to avoid attempting to free too much memory too hard, so estimate the @@ -1590,7 +1597,7 @@ static unsigned long minimum_image_size(unsigned long saveable) } /** - * hibernate_preallocate_memory - Preallocate memory for hibernation image + * hibernate_preallocate_memory - Preallocate memory for hibernation image. * * To create a hibernation image it is necessary to make a copy of every page * frame in use. We also need a number of page frames to be free during @@ -1763,10 +1770,11 @@ int hibernate_preallocate_memory(void) #ifdef CONFIG_HIGHMEM /** - * count_pages_for_highmem - compute the number of non-highmem pages - * that will be necessary for creating copies of highmem pages. - */ - + * count_pages_for_highmem - Count non-highmem pages needed for copying highmem. + * + * Compute the number of non-highmem pages that will be necessary for creating + * copies of highmem pages. + */ static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { unsigned int free_highmem = count_free_highmem_pages() + alloc_highmem; @@ -1783,10 +1791,8 @@ static unsigned int count_pages_for_highmem(unsigned int nr_highmem) { return 0; #endif /* CONFIG_HIGHMEM */ /** - * enough_free_mem - Make sure we have enough free memory for the - * snapshot image. + * enough_free_mem - Check if there is enough free memory for the image. */ - static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) { struct zone *zone; @@ -1805,10 +1811,11 @@ static int enough_free_mem(unsigned int nr_pages, unsigned int nr_highmem) #ifdef CONFIG_HIGHMEM /** - * get_highmem_buffer - if there are some highmem pages in the suspend - * image, we may need the buffer to copy them and/or load their data. + * get_highmem_buffer - Allocate a buffer for highmem pages. + * + * If there are some highmem pages in the hibernation image, we may need a + * buffer to copy them and/or load their data. */ - static inline int get_highmem_buffer(int safe_needed) { buffer = get_image_page(GFP_ATOMIC | __GFP_COLD, safe_needed); @@ -1816,11 +1823,11 @@ static inline int get_highmem_buffer(int safe_needed) } /** - * alloc_highmem_image_pages - allocate some highmem pages for the image. - * Try to allocate as many pages as needed, but if the number of free - * highmem pages is lesser than that, allocate them all. + * alloc_highmem_image_pages - Allocate some highmem pages for the image. + * + * Try to allocate as many pages as needed, but if the number of free highmem + * pages is less than that, allocate them all. */ - static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, unsigned int nr_highmem) { @@ -1846,17 +1853,16 @@ static inline unsigned int alloc_highmem_pages(struct memory_bitmap *bm, #endif /* CONFIG_HIGHMEM */ /** - * swsusp_alloc - allocate memory for the suspend image + * swsusp_alloc - Allocate memory for hibernation image. * - * We first try to allocate as many highmem pages as there are - * saveable highmem pages in the system. If that fails, we allocate - * non-highmem pages for the copies of the remaining highmem ones. + * We first try to allocate as many highmem pages as there are + * saveable highmem pages in the system. If that fails, we allocate + * non-highmem pages for the copies of the remaining highmem ones. * - * In this approach it is likely that the copies of highmem pages will - * also be located in the high memory, because of the way in which - * copy_data_pages() works. + * In this approach it is likely that the copies of highmem pages will + * also be located in the high memory, because of the way in which + * copy_data_pages() works. */ - static int swsusp_alloc(struct memory_bitmap *orig_bm, struct memory_bitmap *copy_bm, unsigned int nr_pages, unsigned int nr_highmem) @@ -1909,7 +1915,8 @@ asmlinkage __visible int swsusp_save(void) return -ENOMEM; } - /* During allocating of suspend pagedir, new cold pages may appear. + /* + * During allocating of suspend pagedir, new cold pages may appear. * Kill them. */ drain_local_pages(NULL); @@ -1972,10 +1979,13 @@ static int init_header(struct swsusp_info *info) } /** - * pack_pfns - pfns corresponding to the set bits found in the bitmap @bm - * are stored in the array @buf[] (1 page at a time) + * pack_pfns - Prepare PFNs for saving. + * @bm: Memory bitmap. + * @buf: Memory buffer to store the PFNs in. + * + * PFNs corresponding to set bits in @bm are stored in the area of memory + * pointed to by @buf (1 page at a time). */ - static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm) { int j; @@ -1990,22 +2000,21 @@ static inline void pack_pfns(unsigned long *buf, struct memory_bitmap *bm) } /** - * snapshot_read_next - used for reading the system memory snapshot. + * snapshot_read_next - Get the address to read the next image page from. + * @handle: Snapshot handle to be used for the reading. * - * On the first call to it @handle should point to a zeroed - * snapshot_handle structure. The structure gets updated and a pointer - * to it should be passed to this function every next time. + * On the first call, @handle should point to a zeroed snapshot_handle + * structure. The structure gets populated then and a pointer to it should be + * passed to this function every next time. * - * On success the function returns a positive number. Then, the caller - * is allowed to read up to the returned number of bytes from the memory - * location computed by the data_of() macro. + * On success, the function returns a positive number. Then, the caller + * is allowed to read up to the returned number of bytes from the memory + * location computed by the data_of() macro. * - * The function returns 0 to indicate the end of data stream condition, - * and a negative number is returned on error. In such cases the - * structure pointed to by @handle is not updated and should not be used - * any more. + * The function returns 0 to indicate the end of the data stream condition, + * and negative numbers are returned on errors. If that happens, the structure + * pointed to by @handle is not updated and should not be used any more. */ - int snapshot_read_next(struct snapshot_handle *handle) { if (handle->cur > nr_meta_pages + nr_copy_pages) @@ -2034,7 +2043,8 @@ int snapshot_read_next(struct snapshot_handle *handle) page = pfn_to_page(memory_bm_next_pfn(©_bm)); if (PageHighMem(page)) { - /* Highmem pages are copied to the buffer, + /* + * Highmem pages are copied to the buffer, * because we can't return with a kmapped * highmem page (we may not be called again). */ @@ -2066,11 +2076,11 @@ static void duplicate_memory_bitmap(struct memory_bitmap *dst, } /** - * mark_unsafe_pages - mark the pages that cannot be used for storing - * the image during resume, because they conflict with the pages that - * had been used before suspend + * mark_unsafe_pages - Mark pages that were used before hibernation. + * + * Mark the pages that cannot be used for storing the image during restoration, + * because they conflict with the pages that had been used before hibernation. */ - static void mark_unsafe_pages(struct memory_bitmap *bm) { unsigned long pfn; @@ -2104,9 +2114,8 @@ static int check_header(struct swsusp_info *info) } /** - * load header - check the image header and copy data from it + * load header - Check the image header and copy the data from it. */ - static int load_header(struct swsusp_info *info) { int error; @@ -2121,8 +2130,12 @@ static int load_header(struct swsusp_info *info) } /** - * unpack_orig_pfns - for each element of @buf[] (1 page at a time) set - * the corresponding bit in the memory bitmap @bm + * unpack_orig_pfns - Set bits corresponding to given PFNs in a memory bitmap. + * @bm: Memory bitmap. + * @buf: Area of memory containing the PFNs. + * + * For each element of the array pointed to by @buf (1 page at a time), set the + * corresponding bit in @bm. */ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) { @@ -2145,7 +2158,8 @@ static int unpack_orig_pfns(unsigned long *buf, struct memory_bitmap *bm) } #ifdef CONFIG_HIGHMEM -/* struct highmem_pbe is used for creating the list of highmem pages that +/* + * struct highmem_pbe is used for creating the list of highmem pages that * should be restored atomically during the resume from disk, because the page * frames they have occupied before the suspend are in use. */ @@ -2155,7 +2169,8 @@ struct highmem_pbe { struct highmem_pbe *next; }; -/* List of highmem PBEs needed for restoring the highmem pages that were +/* + * List of highmem PBEs needed for restoring the highmem pages that were * allocated before the suspend and included in the suspend image, but have * also been allocated by the "resume" kernel, so their contents cannot be * written directly to their "original" page frames. @@ -2163,11 +2178,11 @@ struct highmem_pbe { static struct highmem_pbe *highmem_pblist; /** - * count_highmem_image_pages - compute the number of highmem pages in the - * suspend image. The bits in the memory bitmap @bm that correspond to the - * image pages are assumed to be set. + * count_highmem_image_pages - Compute the number of highmem pages in the image. + * @bm: Memory bitmap. + * + * The bits in @bm that correspond to image pages are assumed to be set. */ - static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) { unsigned long pfn; @@ -2184,22 +2199,23 @@ static unsigned int count_highmem_image_pages(struct memory_bitmap *bm) return cnt; } -/** - * prepare_highmem_image - try to allocate as many highmem pages as - * there are highmem image pages (@nr_highmem_p points to the variable - * containing the number of highmem image pages). The pages that are - * "safe" (ie. will not be overwritten when the suspend image is - * restored) have the corresponding bits set in @bm (it must be - * unitialized). - * - * NOTE: This function should not be called if there are no highmem - * image pages. - */ - static unsigned int safe_highmem_pages; static struct memory_bitmap *safe_highmem_bm; +/** + * prepare_highmem_image - Allocate memory for loading highmem data from image. + * @bm: Pointer to an uninitialized memory bitmap structure. + * @nr_highmem_p: Pointer to the number of highmem image pages. + * + * Try to allocate as many highmem pages as there are highmem image pages + * (@nr_highmem_p points to the variable containing the number of highmem image + * pages). The pages that are "safe" (ie. will not be overwritten when the + * hibernation image is restored entirely) have the corresponding bits set in + * @bm (it must be unitialized). + * + * NOTE: This function should not be called if there are no highmem image pages. + */ static int prepare_highmem_image(struct memory_bitmap *bm, unsigned int *nr_highmem_p) { @@ -2236,25 +2252,26 @@ static int prepare_highmem_image(struct memory_bitmap *bm, return 0; } -/** - * get_highmem_page_buffer - for given highmem image page find the buffer - * that suspend_write_next() should set for its caller to write to. - * - * If the page is to be saved to its "original" page frame or a copy of - * the page is to be made in the highmem, @buffer is returned. Otherwise, - * the copy of the page is to be made in normal memory, so the address of - * the copy is returned. - * - * If @buffer is returned, the caller of suspend_write_next() will write - * the page's contents to @buffer, so they will have to be copied to the - * right location on the next call to suspend_write_next() and it is done - * with the help of copy_last_highmem_page(). For this purpose, if - * @buffer is returned, @last_highmem page is set to the page to which - * the data will have to be copied from @buffer. - */ - static struct page *last_highmem_page; +/** + * get_highmem_page_buffer - Prepare a buffer to store a highmem image page. + * + * For a given highmem image page get a buffer that suspend_write_next() should + * return to its caller to write to. + * + * If the page is to be saved to its "original" page frame or a copy of + * the page is to be made in the highmem, @buffer is returned. Otherwise, + * the copy of the page is to be made in normal memory, so the address of + * the copy is returned. + * + * If @buffer is returned, the caller of suspend_write_next() will write + * the page's contents to @buffer, so they will have to be copied to the + * right location on the next call to suspend_write_next() and it is done + * with the help of copy_last_highmem_page(). For this purpose, if + * @buffer is returned, @last_highmem_page is set to the page to which + * the data will have to be copied from @buffer. + */ static void *get_highmem_page_buffer(struct page *page, struct chain_allocator *ca) { @@ -2262,13 +2279,15 @@ static void *get_highmem_page_buffer(struct page *page, void *kaddr; if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) { - /* We have allocated the "original" page frame and we can + /* + * We have allocated the "original" page frame and we can * use it directly to store the loaded page. */ last_highmem_page = page; return buffer; } - /* The "original" page frame has not been allocated and we have to + /* + * The "original" page frame has not been allocated and we have to * use a "safe" page frame to store the loaded page. */ pbe = chain_alloc(ca, sizeof(struct highmem_pbe)); @@ -2298,11 +2317,12 @@ static void *get_highmem_page_buffer(struct page *page, } /** - * copy_last_highmem_page - copy the contents of a highmem image from - * @buffer, where the caller of snapshot_write_next() has place them, - * to the right location represented by @last_highmem_page . + * copy_last_highmem_page - Copy most the most recent highmem image page. + * + * Copy the contents of a highmem image from @buffer, where the caller of + * snapshot_write_next() has stored them, to the right location represented by + * @last_highmem_page . */ - static void copy_last_highmem_page(void) { if (last_highmem_page) { @@ -2345,22 +2365,23 @@ static inline int last_highmem_page_copied(void) { return 1; } static inline void free_highmem_data(void) {} #endif /* CONFIG_HIGHMEM */ -/** - * prepare_image - use the memory bitmap @bm to mark the pages that will - * be overwritten in the process of restoring the system memory state - * from the suspend image ("unsafe" pages) and allocate memory for the - * image. - * - * The idea is to allocate a new memory bitmap first and then allocate - * as many pages as needed for the image data, but not to assign these - * pages to specific tasks initially. Instead, we just mark them as - * allocated and create a lists of "safe" pages that will be used - * later. On systems with high memory a list of "safe" highmem pages is - * also created. - */ - #define PBES_PER_LINKED_PAGE (LINKED_PAGE_DATA_SIZE / sizeof(struct pbe)) +/** + * prepare_image - Make room for loading hibernation image. + * @new_bm: Unitialized memory bitmap structure. + * @bm: Memory bitmap with unsafe pages marked. + * + * Use @bm to mark the pages that will be overwritten in the process of + * restoring the system memory state from the suspend image ("unsafe" pages) + * and allocate memory for the image. + * + * The idea is to allocate a new memory bitmap first and then allocate + * as many pages as needed for image data, but without specifying what those + * pages will be used for just yet. Instead, we mark them all as allocated and + * create a lists of "safe" pages to be used later. On systems with high + * memory a list of "safe" highmem pages is created too. + */ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) { unsigned int nr_pages, nr_highmem; @@ -2385,7 +2406,8 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) if (error) goto Free; } - /* Reserve some safe pages for potential later use. + /* + * Reserve some safe pages for potential later use. * * NOTE: This way we make sure there will be enough safe pages for the * chain_alloc() in get_buffer(). It is a bit wasteful, but @@ -2431,10 +2453,11 @@ static int prepare_image(struct memory_bitmap *new_bm, struct memory_bitmap *bm) } /** - * get_buffer - compute the address that snapshot_write_next() should - * set for its caller to write to. + * get_buffer - Get the address to store the next image data page. + * + * Get the address that snapshot_write_next() should return to its caller to + * write to. */ - static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) { struct pbe *pbe; @@ -2449,12 +2472,14 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) return get_highmem_page_buffer(page, ca); if (swsusp_page_is_forbidden(page) && swsusp_page_is_free(page)) - /* We have allocated the "original" page frame and we can + /* + * We have allocated the "original" page frame and we can * use it directly to store the loaded page. */ return page_address(page); - /* The "original" page frame has not been allocated and we have to + /* + * The "original" page frame has not been allocated and we have to * use a "safe" page frame to store the loaded page. */ pbe = chain_alloc(ca, sizeof(struct pbe)); @@ -2471,22 +2496,21 @@ static void *get_buffer(struct memory_bitmap *bm, struct chain_allocator *ca) } /** - * snapshot_write_next - used for writing the system memory snapshot. + * snapshot_write_next - Get the address to store the next image page. + * @handle: Snapshot handle structure to guide the writing. * - * On the first call to it @handle should point to a zeroed - * snapshot_handle structure. The structure gets updated and a pointer - * to it should be passed to this function every next time. + * On the first call, @handle should point to a zeroed snapshot_handle + * structure. The structure gets populated then and a pointer to it should be + * passed to this function every next time. * - * On success the function returns a positive number. Then, the caller - * is allowed to write up to the returned number of bytes to the memory - * location computed by the data_of() macro. + * On success, the function returns a positive number. Then, the caller + * is allowed to write up to the returned number of bytes to the memory + * location computed by the data_of() macro. * - * The function returns 0 to indicate the "end of file" condition, - * and a negative number is returned on error. In such cases the - * structure pointed to by @handle is not updated and should not be used - * any more. + * The function returns 0 to indicate the "end of file" condition. Negative + * numbers are returned on errors, in which cases the structure pointed to by + * @handle is not updated and should not be used any more. */ - int snapshot_write_next(struct snapshot_handle *handle) { static struct chain_allocator ca; @@ -2556,13 +2580,13 @@ int snapshot_write_next(struct snapshot_handle *handle) } /** - * snapshot_write_finalize - must be called after the last call to - * snapshot_write_next() in case the last page in the image happens - * to be a highmem page and its contents should be stored in the - * highmem. Additionally, it releases the memory that will not be - * used any more. + * snapshot_write_finalize - Complete the loading of a hibernation image. + * + * Must be called after the last call to snapshot_write_next() in case the last + * page in the image happens to be a highmem page and its contents should be + * stored in highmem. Additionally, it recycles bitmap memory that's not + * necessary any more. */ - void snapshot_write_finalize(struct snapshot_handle *handle) { copy_last_highmem_page(); @@ -2599,15 +2623,15 @@ static inline void swap_two_pages_data(struct page *p1, struct page *p2, } /** - * restore_highmem - for each highmem page that was allocated before - * the suspend and included in the suspend image, and also has been - * allocated by the "resume" kernel swap its current (ie. "before - * resume") contents with the previous (ie. "before suspend") one. + * restore_highmem - Put highmem image pages into their original locations. * - * If the resume eventually fails, we can call this function once - * again and restore the "before resume" highmem state. + * For each highmem page that was in use before hibernation and is included in + * the image, and also has been allocated by the "restore" kernel, swap its + * current contents with the previous (ie. "before hibernation") ones. + * + * If the restore eventually fails, we can call this function once again and + * restore the highmem state as seen by the restore kernel. */ - int restore_highmem(void) { struct highmem_pbe *pbe = highmem_pblist; From d5f32af3100165cbd625855bd155b3aa9bd87ebf Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 6 Jul 2016 23:44:31 +0200 Subject: [PATCH 12/16] PM / hibernate: Add missing braces in __register_nosave_region() One branch of an if/else statement in __register_nosave_region() is formatted against the kernel coding style which causes the code to look slightly odd. To fix that, add missing braces to it. No functional changes. Signed-off-by: Rafael J. Wysocki --- kernel/power/snapshot.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index bd927d9efeb7..d64d5d0efa79 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -920,9 +920,10 @@ void __init __register_nosave_region(unsigned long start_pfn, /* During init, this shouldn't fail */ region = kmalloc(sizeof(struct nosave_region), GFP_KERNEL); BUG_ON(!region); - } else + } else { /* This allocation cannot fail */ region = memblock_virt_alloc(sizeof(struct nosave_region), 0); + } region->start_pfn = start_pfn; region->end_pfn = end_pfn; list_add_tail(®ion->list, &nosave_regions); From 4c0b6c10fbaf0c82efe2a7ba6c236c633d4f2ed7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sun, 10 Jul 2016 02:12:10 +0200 Subject: [PATCH 13/16] PM / hibernate: Image data protection during restoration Make it possible to protect all pages holding image data during hibernate image restoration by setting them read-only (so as to catch attempts to write to those pages after image data have been stored in them). This adds overhead to image restoration code (it may cause large page mappings to be split as a result of page flags changes) and the errors it protects against should never happen in theory, so the feature is only active after passing hibernate=protect_image to the command line of the restore kernel. Also it only is built if CONFIG_DEBUG_RODATA is set. Signed-off-by: Rafael J. Wysocki --- Documentation/kernel-parameters.txt | 3 +++ kernel/power/hibernate.c | 3 +++ kernel/power/power.h | 7 +++++ kernel/power/snapshot.c | 42 +++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 82b42c958d1c..07960c24642d 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -3594,6 +3594,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted. present during boot. nocompress Don't compress/decompress hibernation images. no Disable hibernation and resume. + protect_image Turn on image protection during restoration + (that will set all pages holding image data + during restoration read-only). retain_initrd [RAM] Keep initrd memory after extraction diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index b00f270d328e..51441d87f0b6 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -1126,6 +1126,9 @@ static int __init hibernate_setup(char *str) } else if (!strncmp(str, "no", 2)) { noresume = 1; nohibernate = 1; + } else if (IS_ENABLED(CONFIG_DEBUG_RODATA) + && !strncmp(str, "protect_image", 13)) { + enable_restore_image_protection(); } return 1; } diff --git a/kernel/power/power.h b/kernel/power/power.h index 51f02ecaf125..064963e89194 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -59,6 +59,13 @@ extern int hibernation_snapshot(int platform_mode); extern int hibernation_restore(int platform_mode); extern int hibernation_platform_enter(void); +#ifdef CONFIG_DEBUG_RODATA +/* kernel/power/snapshot.c */ +extern void enable_restore_image_protection(void); +#else +static inline void enable_restore_image_protection(void) {} +#endif /* CONFIG_DEBUG_RODATA */ + #else /* !CONFIG_HIBERNATION */ static inline void hibernate_reserved_size_init(void) {} diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index d64d5d0efa79..d90df926b59f 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -38,6 +38,43 @@ #include "power.h" +#ifdef CONFIG_DEBUG_RODATA +static bool hibernate_restore_protection; +static bool hibernate_restore_protection_active; + +void enable_restore_image_protection(void) +{ + hibernate_restore_protection = true; +} + +static inline void hibernate_restore_protection_begin(void) +{ + hibernate_restore_protection_active = hibernate_restore_protection; +} + +static inline void hibernate_restore_protection_end(void) +{ + hibernate_restore_protection_active = false; +} + +static inline void hibernate_restore_protect_page(void *page_address) +{ + if (hibernate_restore_protection_active) + set_memory_ro((unsigned long)page_address, 1); +} + +static inline void hibernate_restore_unprotect_page(void *page_address) +{ + if (hibernate_restore_protection_active) + set_memory_rw((unsigned long)page_address, 1); +} +#else +static inline void hibernate_restore_protection_begin(void) {} +static inline void hibernate_restore_protection_end(void) {} +static inline void hibernate_restore_protect_page(void *page_address) {} +static inline void hibernate_restore_unprotect_page(void *page_address) {} +#endif /* CONFIG_DEBUG_RODATA */ + static int swsusp_page_is_free(struct page *); static void swsusp_set_page_forbidden(struct page *); static void swsusp_unset_page_forbidden(struct page *); @@ -1414,6 +1451,7 @@ loop: memory_bm_clear_current(forbidden_pages_map); memory_bm_clear_current(free_pages_map); + hibernate_restore_unprotect_page(page_address(page)); __free_page(page); goto loop; } @@ -1425,6 +1463,7 @@ out: buffer = NULL; alloc_normal = 0; alloc_highmem = 0; + hibernate_restore_protection_end(); } /* Helper functions used for the shrinking of memory. */ @@ -2548,6 +2587,7 @@ int snapshot_write_next(struct snapshot_handle *handle) if (error) return error; + hibernate_restore_protection_begin(); } else if (handle->cur <= nr_meta_pages + 1) { error = unpack_orig_pfns(buffer, ©_bm); if (error) @@ -2570,6 +2610,7 @@ int snapshot_write_next(struct snapshot_handle *handle) copy_last_highmem_page(); /* Restore page key for data page (s390 only). */ page_key_write(handle->buffer); + hibernate_restore_protect_page(handle->buffer); handle->buffer = get_buffer(&orig_bm, &ca); if (IS_ERR(handle->buffer)) return PTR_ERR(handle->buffer); @@ -2594,6 +2635,7 @@ void snapshot_write_finalize(struct snapshot_handle *handle) /* Restore page key for data page (s390 only). */ page_key_write(handle->buffer); page_key_free(); + hibernate_restore_protect_page(handle->buffer); /* Do that only if we have loaded the image entirely */ if (handle->cur > 1 && handle->cur > nr_meta_pages + nr_copy_pages) { memory_bm_recycle(&orig_bm); From 406f992e4a372dafbe3c2cff7efbb2002a5c8ebd Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 14 Jul 2016 03:55:23 +0200 Subject: [PATCH 14/16] x86 / hibernate: Use hlt_play_dead() when resuming from hibernation On Intel hardware, native_play_dead() uses mwait_play_dead() by default and only falls back to the other methods if that fails. That also happens during resume from hibernation, when the restore (boot) kernel runs disable_nonboot_cpus() to take all of the CPUs except for the boot one offline. However, that is problematic, because the address passed to __monitor() in mwait_play_dead() is likely to be written to in the last phase of hibernate image restoration and that causes the "dead" CPU to start executing instructions again. Unfortunately, the page containing the address in that CPU's instruction pointer may not be valid any more at that point. First, that page may have been overwritten with image kernel memory contents already, so the instructions the CPU attempts to execute may simply be invalid. Second, the page tables previously used by that CPU may have been overwritten by image kernel memory contents, so the address in its instruction pointer is impossible to resolve then. A report from Varun Koyyalagunta and investigation carried out by Chen Yu show that the latter sometimes happens in practice. To prevent it from happening, temporarily change the smp_ops.play_dead pointer during resume from hibernation so that it points to a special "play dead" routine which uses hlt_play_dead() and avoids the inadvertent "revivals" of "dead" CPUs this way. A slightly unpleasant consequence of this change is that if the system is hibernated with one or more CPUs offline, it will generally draw more power after resume than it did before hibernation, because the physical state entered by CPUs via hlt_play_dead() is higher-power than the mwait_play_dead() one in the majority of cases. It is possible to work around this, but it is unclear how much of a problem that's going to be in practice, so the workaround will be implemented later if it turns out to be necessary. Link: https://bugzilla.kernel.org/show_bug.cgi?id=106371 Reported-by: Varun Koyyalagunta Original-by: Chen Yu Tested-by: Chen Yu Signed-off-by: Rafael J. Wysocki Acked-by: Ingo Molnar --- arch/x86/include/asm/smp.h | 1 + arch/x86/kernel/smpboot.c | 2 +- arch/x86/power/cpu.c | 30 ++++++++++++++++++++++++++++++ kernel/power/hibernate.c | 7 ++++++- kernel/power/power.h | 2 ++ 5 files changed, 40 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 66b057306f40..7427ca895a27 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -135,6 +135,7 @@ int native_cpu_up(unsigned int cpunum, struct task_struct *tidle); int native_cpu_disable(void); int common_cpu_die(unsigned int cpu); void native_cpu_die(unsigned int cpu); +void hlt_play_dead(void); void native_play_dead(void); void play_dead_common(void); void wbinvd_on_cpu(int cpu); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fafe8b923cac..8264dfad9cf8 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1622,7 +1622,7 @@ static inline void mwait_play_dead(void) } } -static inline void hlt_play_dead(void) +void hlt_play_dead(void) { if (__this_cpu_read(cpu_info.x86) >= 4) wbinvd(); diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c index d5f64996394a..b12c26e2e309 100644 --- a/arch/x86/power/cpu.c +++ b/arch/x86/power/cpu.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -266,6 +267,35 @@ void notrace restore_processor_state(void) EXPORT_SYMBOL(restore_processor_state); #endif +#if defined(CONFIG_HIBERNATION) && defined(CONFIG_HOTPLUG_CPU) +static void resume_play_dead(void) +{ + play_dead_common(); + tboot_shutdown(TB_SHUTDOWN_WFS); + hlt_play_dead(); +} + +int hibernate_resume_nonboot_cpu_disable(void) +{ + void (*play_dead)(void) = smp_ops.play_dead; + int ret; + + /* + * Ensure that MONITOR/MWAIT will not be used in the "play dead" loop + * during hibernate image restoration, because it is likely that the + * monitored address will be actually written to at that time and then + * the "dead" CPU will attempt to execute instructions again, but the + * address in its instruction pointer may not be possible to resolve + * any more at that point (the page tables used by it previously may + * have been overwritten by hibernate image data). + */ + smp_ops.play_dead = resume_play_dead; + ret = disable_nonboot_cpus(); + smp_ops.play_dead = play_dead; + return ret; +} +#endif + /* * When bsp_check() is called in hibernate and suspend, cpu hotplug * is disabled already. So it's unnessary to handle race condition between diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 51441d87f0b6..5f3523e18e46 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -409,6 +409,11 @@ int hibernation_snapshot(int platform_mode) goto Close; } +int __weak hibernate_resume_nonboot_cpu_disable(void) +{ + return disable_nonboot_cpus(); +} + /** * resume_target_kernel - Restore system state from a hibernation image. * @platform_mode: Whether or not to use the platform driver. @@ -433,7 +438,7 @@ static int resume_target_kernel(bool platform_mode) if (error) goto Cleanup; - error = disable_nonboot_cpus(); + error = hibernate_resume_nonboot_cpu_disable(); if (error) goto Enable_cpus; diff --git a/kernel/power/power.h b/kernel/power/power.h index 064963e89194..242d8b827dd5 100644 --- a/kernel/power/power.h +++ b/kernel/power/power.h @@ -38,6 +38,8 @@ static inline char *check_image_kernel(struct swsusp_info *info) } #endif /* CONFIG_ARCH_HIBERNATION_HEADER */ +extern int hibernate_resume_nonboot_cpu_disable(void); + /* * Keep some memory free so that I/O operations can succeed without paging * [Might this be more than 4 MB?] From af1e45e6ab5f0f32a0b68a0d4bb28af7907c6192 Mon Sep 17 00:00:00 2001 From: Todd Brandt Date: Fri, 15 Jul 2016 23:34:14 +0200 Subject: [PATCH 15/16] PM / tools: scripts: AnalyzeSuspend v4.2 Update AnalyzeSuspend to v4.2: - kprobe support for function tracing - config file support in lieu of command line options - advanced callgraph support for function debug - dev mode for monitoring common sources of delay, e.g. msleep, udelay - many bug fixes and formatting upgrades Signed-off-by: Todd Brandt Signed-off-by: Rafael J. Wysocki --- scripts/analyze_suspend.py | 3641 ++++++++++++++++++++++++------------ 1 file changed, 2474 insertions(+), 1167 deletions(-) diff --git a/scripts/analyze_suspend.py b/scripts/analyze_suspend.py index 93e1fd40f430..a0ba48fa2c5e 100755 --- a/scripts/analyze_suspend.py +++ b/scripts/analyze_suspend.py @@ -19,6 +19,17 @@ # Authors: # Todd Brandt # +# Links: +# Home Page +# https://01.org/suspendresume +# Source repo +# https://github.com/01org/suspendresume +# Documentation +# Getting Started +# https://01.org/suspendresume/documentation/getting-started +# Command List: +# https://01.org/suspendresume/documentation/command-list +# # Description: # This tool is designed to assist kernel and OS developers in optimizing # their linux stack's suspend/resume time. Using a kernel image built @@ -35,6 +46,8 @@ # CONFIG_FTRACE=y # CONFIG_FUNCTION_TRACER=y # CONFIG_FUNCTION_GRAPH_TRACER=y +# CONFIG_KPROBES=y +# CONFIG_KPROBES_ON_FTRACE=y # # For kernel versions older than 3.15: # The following additional kernel parameters are required: @@ -52,6 +65,7 @@ import re import platform from datetime import datetime import struct +import ConfigParser # ----------------- CLASSES -------------------- @@ -60,8 +74,15 @@ import struct # A global, single-instance container used to # store system values and test parameters class SystemValues: - version = 3.0 + ansi = False + version = '4.2' verbose = False + addlogs = False + mindevlen = 0.001 + mincglen = 1.0 + srgap = 0 + cgexp = False + outdir = '' testdir = '.' tpath = '/sys/kernel/debug/tracing/' fpdtpath = '/sys/firmware/acpi/tables/FPDT' @@ -71,26 +92,21 @@ class SystemValues: 'device_pm_callback_end', 'device_pm_callback_start' ] - modename = { - 'freeze': 'Suspend-To-Idle (S0)', - 'standby': 'Power-On Suspend (S1)', - 'mem': 'Suspend-to-RAM (S3)', - 'disk': 'Suspend-to-disk (S4)' - } + testcommand = '' mempath = '/dev/mem' powerfile = '/sys/power/state' suspendmode = 'mem' hostname = 'localhost' prefix = 'test' teststamp = '' + dmesgstart = 0.0 dmesgfile = '' ftracefile = '' htmlfile = '' + embedded = False rtcwake = False rtcwaketime = 10 rtcpath = '' - android = False - adb = 'adb' devicefilter = [] stamp = 0 execcount = 1 @@ -98,16 +114,90 @@ class SystemValues: usecallgraph = False usetraceevents = False usetraceeventsonly = False + usetracemarkers = True + usekprobes = True + usedevsrc = False notestrun = False - altdevname = dict() + devprops = dict() postresumetime = 0 + devpropfmt = '# Device Properties: .*' tracertypefmt = '# tracer: (?P.*)' firmwarefmt = '# fwsuspend (?P[0-9]*) fwresume (?P[0-9]*)$' postresumefmt = '# post resume time (?P[0-9]*)$' stampfmt = '# suspend-(?P[0-9]{2})(?P[0-9]{2})(?P[0-9]{2})-'+\ '(?P[0-9]{2})(?P[0-9]{2})(?P[0-9]{2})'+\ ' (?P.*) (?P.*) (?P.*)$' + kprobecolor = 'rgba(204,204,204,0.5)' + synccolor = 'rgba(204,204,204,0.5)' + debugfuncs = [] + tracefuncs = { + 'sys_sync': dict(), + 'pm_prepare_console': dict(), + 'pm_notifier_call_chain': dict(), + 'freeze_processes': dict(), + 'freeze_kernel_threads': dict(), + 'pm_restrict_gfp_mask': dict(), + 'acpi_suspend_begin': dict(), + 'suspend_console': dict(), + 'acpi_pm_prepare': dict(), + 'syscore_suspend': dict(), + 'arch_enable_nonboot_cpus_end': dict(), + 'syscore_resume': dict(), + 'acpi_pm_finish': dict(), + 'resume_console': dict(), + 'acpi_pm_end': dict(), + 'pm_restore_gfp_mask': dict(), + 'thaw_processes': dict(), + 'pm_restore_console': dict(), + 'CPU_OFF': { + 'func':'_cpu_down', + 'args_x86_64': {'cpu':'%di:s32'}, + 'format': 'CPU_OFF[{cpu}]', + 'mask': 'CPU_.*_DOWN' + }, + 'CPU_ON': { + 'func':'_cpu_up', + 'args_x86_64': {'cpu':'%di:s32'}, + 'format': 'CPU_ON[{cpu}]', + 'mask': 'CPU_.*_UP' + }, + } + dev_tracefuncs = { + # general wait/delay/sleep + 'msleep': { 'args_x86_64': {'time':'%di:s32'} }, + 'udelay': { 'func':'__const_udelay', 'args_x86_64': {'loops':'%di:s32'} }, + 'acpi_os_stall': dict(), + # ACPI + 'acpi_resume_power_resources': dict(), + 'acpi_ps_parse_aml': dict(), + # filesystem + 'ext4_sync_fs': dict(), + # ATA + 'ata_eh_recover': { 'args_x86_64': {'port':'+36(%di):s32'} }, + # i915 + 'i915_gem_restore_gtt_mappings': dict(), + 'intel_opregion_setup': dict(), + 'intel_dp_detect': dict(), + 'intel_hdmi_detect': dict(), + 'intel_opregion_init': dict(), + } + kprobes_postresume = [ + { + 'name': 'ataportrst', + 'func': 'ata_eh_recover', + 'args': {'port':'+36(%di):s32'}, + 'format': 'ata{port}_port_reset', + 'mask': 'ata.*_port_reset' + } + ] + kprobes = dict() + timeformat = '%.3f' def __init__(self): + # if this is a phoronix test run, set some default options + if('LOG_FILE' in os.environ and 'TEST_RESULTS_IDENTIFIER' in os.environ): + self.embedded = True + self.addlogs = True + self.htmlfile = os.environ['LOG_FILE'] self.hostname = platform.node() if(self.hostname == ''): self.hostname = 'localhost' @@ -118,6 +208,12 @@ class SystemValues: if os.path.exists(rtc) and os.path.exists(rtc+'/date') and \ os.path.exists(rtc+'/time') and os.path.exists(rtc+'/wakealarm'): self.rtcpath = rtc + if (hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()): + self.ansi = True + def setPrecision(self, num): + if num < 0 or num > 6: + return + self.timeformat = '%.{0}f'.format(num) def setOutputFile(self): if((self.htmlfile == '') and (self.dmesgfile != '')): m = re.match('(?P.*)_dmesg\.txt$', self.dmesgfile) @@ -129,32 +225,37 @@ class SystemValues: self.htmlfile = m.group('name')+'.html' if(self.htmlfile == ''): self.htmlfile = 'output.html' - def initTestOutput(self, subdir): - if(not self.android): - self.prefix = self.hostname - v = open('/proc/version', 'r').read().strip() - kver = string.split(v)[2] - else: - self.prefix = 'android' - v = os.popen(self.adb+' shell cat /proc/version').read().strip() - kver = string.split(v)[2] - testtime = datetime.now().strftime('suspend-%m%d%y-%H%M%S') + def initTestOutput(self, subdir, testpath=''): + self.prefix = self.hostname + v = open('/proc/version', 'r').read().strip() + kver = string.split(v)[2] + n = datetime.now() + testtime = n.strftime('suspend-%m%d%y-%H%M%S') + if not testpath: + testpath = n.strftime('suspend-%y%m%d-%H%M%S') if(subdir != "."): - self.testdir = subdir+"/"+testtime + self.testdir = subdir+"/"+testpath else: - self.testdir = testtime + self.testdir = testpath self.teststamp = \ '# '+testtime+' '+self.prefix+' '+self.suspendmode+' '+kver + if(self.embedded): + self.dmesgfile = \ + '/tmp/'+testtime+'_'+self.suspendmode+'_dmesg.txt' + self.ftracefile = \ + '/tmp/'+testtime+'_'+self.suspendmode+'_ftrace.txt' + return self.dmesgfile = \ self.testdir+'/'+self.prefix+'_'+self.suspendmode+'_dmesg.txt' self.ftracefile = \ self.testdir+'/'+self.prefix+'_'+self.suspendmode+'_ftrace.txt' self.htmlfile = \ self.testdir+'/'+self.prefix+'_'+self.suspendmode+'.html' - os.mkdir(self.testdir) + if not os.path.isdir(self.testdir): + os.mkdir(self.testdir) def setDeviceFilter(self, devnames): self.devicefilter = string.split(devnames) - def rtcWakeAlarm(self): + def rtcWakeAlarmOn(self): os.system('echo 0 > '+self.rtcpath+'/wakealarm') outD = open(self.rtcpath+'/date', 'r').read().strip() outT = open(self.rtcpath+'/time', 'r').read().strip() @@ -172,9 +273,361 @@ class SystemValues: nowtime = int(datetime.now().strftime('%s')) alarm = nowtime + self.rtcwaketime os.system('echo %d > %s/wakealarm' % (alarm, self.rtcpath)) + def rtcWakeAlarmOff(self): + os.system('echo 0 > %s/wakealarm' % self.rtcpath) + def initdmesg(self): + # get the latest time stamp from the dmesg log + fp = os.popen('dmesg') + ktime = '0' + for line in fp: + line = line.replace('\r\n', '') + idx = line.find('[') + if idx > 1: + line = line[idx:] + m = re.match('[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) + if(m): + ktime = m.group('ktime') + fp.close() + self.dmesgstart = float(ktime) + def getdmesg(self): + # store all new dmesg lines since initdmesg was called + fp = os.popen('dmesg') + op = open(self.dmesgfile, 'a') + for line in fp: + line = line.replace('\r\n', '') + idx = line.find('[') + if idx > 1: + line = line[idx:] + m = re.match('[ \t]*(\[ *)(?P[0-9\.]*)(\]) (?P.*)', line) + if(not m): + continue + ktime = float(m.group('ktime')) + if ktime > self.dmesgstart: + op.write(line) + fp.close() + op.close() + def addFtraceFilterFunctions(self, file): + fp = open(file) + list = fp.read().split('\n') + fp.close() + for i in list: + if len(i) < 2: + continue + self.tracefuncs[i] = dict() + def getFtraceFilterFunctions(self, current): + rootCheck(True) + if not current: + os.system('cat '+self.tpath+'available_filter_functions') + return + fp = open(self.tpath+'available_filter_functions') + master = fp.read().split('\n') + fp.close() + if len(self.debugfuncs) > 0: + for i in self.debugfuncs: + if i in master: + print i + else: + print self.colorText(i) + else: + for i in self.tracefuncs: + if 'func' in self.tracefuncs[i]: + i = self.tracefuncs[i]['func'] + if i in master: + print i + else: + print self.colorText(i) + def setFtraceFilterFunctions(self, list): + fp = open(self.tpath+'available_filter_functions') + master = fp.read().split('\n') + fp.close() + flist = '' + for i in list: + if i not in master: + continue + if ' [' in i: + flist += i.split(' ')[0]+'\n' + else: + flist += i+'\n' + fp = open(self.tpath+'set_graph_function', 'w') + fp.write(flist) + fp.close() + def kprobeMatch(self, name, target): + if name not in self.kprobes: + return False + if re.match(self.kprobes[name]['mask'], target): + return True + return False + def basicKprobe(self, name): + self.kprobes[name] = {'name': name,'func': name,'args': dict(),'format': name,'mask': name} + def defaultKprobe(self, name, kdata): + k = kdata + for field in ['name', 'format', 'mask', 'func']: + if field not in k: + k[field] = name + archargs = 'args_'+platform.machine() + if archargs in k: + k['args'] = k[archargs] + else: + k['args'] = dict() + k['format'] = name + self.kprobes[name] = k + def kprobeColor(self, name): + if name not in self.kprobes or 'color' not in self.kprobes[name]: + return '' + return self.kprobes[name]['color'] + def kprobeDisplayName(self, name, dataraw): + if name not in self.kprobes: + self.basicKprobe(name) + data = '' + quote=0 + # first remvoe any spaces inside quotes, and the quotes + for c in dataraw: + if c == '"': + quote = (quote + 1) % 2 + if quote and c == ' ': + data += '_' + elif c != '"': + data += c + fmt, args = self.kprobes[name]['format'], self.kprobes[name]['args'] + arglist = dict() + # now process the args + for arg in sorted(args): + arglist[arg] = '' + m = re.match('.* '+arg+'=(?P.*) ', data); + if m: + arglist[arg] = m.group('arg') + else: + m = re.match('.* '+arg+'=(?P.*)', data); + if m: + arglist[arg] = m.group('arg') + out = fmt.format(**arglist) + out = out.replace(' ', '_').replace('"', '') + return out + def kprobeText(self, kprobe): + name, fmt, func, args = kprobe['name'], kprobe['format'], kprobe['func'], kprobe['args'] + if re.findall('{(?P[a-z,A-Z,0-9]*)}', func): + doError('Kprobe "%s" has format info in the function name "%s"' % (name, func), False) + for arg in re.findall('{(?P[a-z,A-Z,0-9]*)}', fmt): + if arg not in args: + doError('Kprobe "%s" is missing argument "%s"' % (name, arg), False) + val = 'p:%s_cal %s' % (name, func) + for i in sorted(args): + val += ' %s=%s' % (i, args[i]) + val += '\nr:%s_ret %s $retval\n' % (name, func) + return val + def addKprobes(self): + # first test each kprobe + print('INITIALIZING KPROBES...') + rejects = [] + for name in sorted(self.kprobes): + if not self.testKprobe(self.kprobes[name]): + rejects.append(name) + # remove all failed ones from the list + for name in rejects: + vprint('Skipping KPROBE: %s' % name) + self.kprobes.pop(name) + self.fsetVal('', 'kprobe_events') + kprobeevents = '' + # set the kprobes all at once + for kp in self.kprobes: + val = self.kprobeText(self.kprobes[kp]) + vprint('Adding KPROBE: %s\n%s' % (kp, val.strip())) + kprobeevents += self.kprobeText(self.kprobes[kp]) + self.fsetVal(kprobeevents, 'kprobe_events') + # verify that the kprobes were set as ordered + check = self.fgetVal('kprobe_events') + linesout = len(kprobeevents.split('\n')) + linesack = len(check.split('\n')) + if linesack < linesout: + # if not, try appending the kprobes 1 by 1 + for kp in self.kprobes: + kprobeevents = self.kprobeText(self.kprobes[kp]) + self.fsetVal(kprobeevents, 'kprobe_events', 'a') + self.fsetVal('1', 'events/kprobes/enable') + def testKprobe(self, kprobe): + kprobeevents = self.kprobeText(kprobe) + if not kprobeevents: + return False + try: + self.fsetVal(kprobeevents, 'kprobe_events') + check = self.fgetVal('kprobe_events') + except: + return False + linesout = len(kprobeevents.split('\n')) + linesack = len(check.split('\n')) + if linesack < linesout: + return False + return True + def fsetVal(self, val, path, mode='w'): + file = self.tpath+path + if not os.path.exists(file): + return False + try: + fp = open(file, mode) + fp.write(val) + fp.close() + except: + pass + return True + def fgetVal(self, path): + file = self.tpath+path + res = '' + if not os.path.exists(file): + return res + try: + fp = open(file, 'r') + res = fp.read() + fp.close() + except: + pass + return res + def cleanupFtrace(self): + if(self.usecallgraph or self.usetraceevents): + self.fsetVal('0', 'events/kprobes/enable') + self.fsetVal('', 'kprobe_events') + def setupAllKprobes(self): + for name in self.tracefuncs: + self.defaultKprobe(name, self.tracefuncs[name]) + for name in self.dev_tracefuncs: + self.defaultKprobe(name, self.dev_tracefuncs[name]) + def isCallgraphFunc(self, name): + if len(self.debugfuncs) < 1 and self.suspendmode == 'command': + return True + if name in self.debugfuncs: + return True + funclist = [] + for i in self.tracefuncs: + if 'func' in self.tracefuncs[i]: + funclist.append(self.tracefuncs[i]['func']) + else: + funclist.append(i) + if name in funclist: + return True + return False + def initFtrace(self, testing=False): + tp = self.tpath + print('INITIALIZING FTRACE...') + # turn trace off + self.fsetVal('0', 'tracing_on') + self.cleanupFtrace() + # set the trace clock to global + self.fsetVal('global', 'trace_clock') + # set trace buffer to a huge value + self.fsetVal('nop', 'current_tracer') + self.fsetVal('100000', 'buffer_size_kb') + # go no further if this is just a status check + if testing: + return + if self.usekprobes: + # add tracefunc kprobes so long as were not using full callgraph + if(not self.usecallgraph or len(self.debugfuncs) > 0): + for name in self.tracefuncs: + self.defaultKprobe(name, self.tracefuncs[name]) + if self.usedevsrc: + for name in self.dev_tracefuncs: + self.defaultKprobe(name, self.dev_tracefuncs[name]) + else: + self.usedevsrc = False + self.addKprobes() + # initialize the callgraph trace, unless this is an x2 run + if(self.usecallgraph): + # set trace type + self.fsetVal('function_graph', 'current_tracer') + self.fsetVal('', 'set_ftrace_filter') + # set trace format options + self.fsetVal('print-parent', 'trace_options') + self.fsetVal('funcgraph-abstime', 'trace_options') + self.fsetVal('funcgraph-cpu', 'trace_options') + self.fsetVal('funcgraph-duration', 'trace_options') + self.fsetVal('funcgraph-proc', 'trace_options') + self.fsetVal('funcgraph-tail', 'trace_options') + self.fsetVal('nofuncgraph-overhead', 'trace_options') + self.fsetVal('context-info', 'trace_options') + self.fsetVal('graph-time', 'trace_options') + self.fsetVal('0', 'max_graph_depth') + if len(self.debugfuncs) > 0: + self.setFtraceFilterFunctions(self.debugfuncs) + elif self.suspendmode == 'command': + self.fsetVal('', 'set_graph_function') + else: + cf = ['dpm_run_callback'] + if(self.usetraceeventsonly): + cf += ['dpm_prepare', 'dpm_complete'] + for fn in self.tracefuncs: + if 'func' in self.tracefuncs[fn]: + cf.append(self.tracefuncs[fn]['func']) + else: + cf.append(fn) + self.setFtraceFilterFunctions(cf) + if(self.usetraceevents): + # turn trace events on + events = iter(self.traceevents) + for e in events: + self.fsetVal('1', 'events/power/'+e+'/enable') + # clear the trace buffer + self.fsetVal('', 'trace') + def verifyFtrace(self): + # files needed for any trace data + files = ['buffer_size_kb', 'current_tracer', 'trace', 'trace_clock', + 'trace_marker', 'trace_options', 'tracing_on'] + # files needed for callgraph trace data + tp = self.tpath + if(self.usecallgraph): + files += [ + 'available_filter_functions', + 'set_ftrace_filter', + 'set_graph_function' + ] + for f in files: + if(os.path.exists(tp+f) == False): + return False + return True + def verifyKprobes(self): + # files needed for kprobes to work + files = ['kprobe_events', 'events'] + tp = self.tpath + for f in files: + if(os.path.exists(tp+f) == False): + return False + return True + def colorText(self, str): + if not self.ansi: + return str + return '\x1B[31;40m'+str+'\x1B[m' sysvals = SystemValues() +# Class: DevProps +# Description: +# Simple class which holds property values collected +# for all the devices used in the timeline. +class DevProps: + syspath = '' + altname = '' + async = True + xtraclass = '' + xtrainfo = '' + def out(self, dev): + return '%s,%s,%d;' % (dev, self.altname, self.async) + def debug(self, dev): + print '%s:\n\taltname = %s\n\t async = %s' % (dev, self.altname, self.async) + def altName(self, dev): + if not self.altname or self.altname == dev: + return dev + return '%s [%s]' % (self.altname, dev) + def xtraClass(self): + if self.xtraclass: + return ' '+self.xtraclass + if not self.async: + return ' sync' + return '' + def xtraInfo(self): + if self.xtraclass: + return ' '+self.xtraclass + if self.async: + return ' async' + return ' sync' + # Class: DeviceNode # Description: # A container used to create a device hierachy, with a single root node @@ -228,6 +681,7 @@ class Data: html_device_id = 0 stamp = 0 outfile = '' + dev_ubiquitous = ['msleep', 'udelay'] def __init__(self, num): idchar = 'abcdefghijklmnopqrstuvwxyz' self.testnumber = num @@ -257,6 +711,9 @@ class Data: 'row': 0, 'color': '#FFFFCC', 'order': 9} } self.phases = self.sortedPhases() + self.devicegroups = [] + for phase in self.phases: + self.devicegroups.append([phase]) def getStart(self): return self.dmesg[self.phases[0]]['start'] def setStart(self, time): @@ -273,51 +730,61 @@ class Data: for dev in list: d = list[dev] if(d['pid'] == pid and time >= d['start'] and - time <= d['end']): + time < d['end']): return False return True - def addIntraDevTraceEvent(self, action, name, pid, time): - if(action == 'mutex_lock_try'): - color = 'red' - elif(action == 'mutex_lock_pass'): - color = 'green' - elif(action == 'mutex_unlock'): - color = 'blue' - else: - # create separate colors based on the name - v1 = len(name)*10 % 256 - v2 = string.count(name, 'e')*100 % 256 - v3 = ord(name[0])*20 % 256 - color = '#%06X' % ((v1*0x10000) + (v2*0x100) + v3) - for phase in self.phases: + def targetDevice(self, phaselist, start, end, pid=-1): + tgtdev = '' + for phase in phaselist: list = self.dmesg[phase]['list'] - for dev in list: - d = list[dev] - if(d['pid'] == pid and time >= d['start'] and - time <= d['end']): - e = TraceEvent(action, name, color, time) - if('traceevents' not in d): - d['traceevents'] = [] - d['traceevents'].append(e) - return d - break - return 0 - def capIntraDevTraceEvent(self, action, name, pid, time): - for phase in self.phases: - list = self.dmesg[phase]['list'] - for dev in list: - d = list[dev] - if(d['pid'] == pid and time >= d['start'] and - time <= d['end']): - if('traceevents' not in d): - return - for e in d['traceevents']: - if(e.action == action and - e.name == name and not e.ready): - e.length = time - e.time - e.ready = True - break - return + for devname in list: + dev = list[devname] + if(pid >= 0 and dev['pid'] != pid): + continue + devS = dev['start'] + devE = dev['end'] + if(start < devS or start >= devE or end <= devS or end > devE): + continue + tgtdev = dev + break + return tgtdev + def addDeviceFunctionCall(self, displayname, kprobename, proc, pid, start, end, cdata, rdata): + machstart = self.dmesg['suspend_machine']['start'] + machend = self.dmesg['resume_machine']['end'] + tgtdev = self.targetDevice(self.phases, start, end, pid) + if not tgtdev and start >= machstart and end < machend: + # device calls in machine phases should be serial + tgtdev = self.targetDevice(['suspend_machine', 'resume_machine'], start, end) + if not tgtdev: + if 'scsi_eh' in proc: + self.newActionGlobal(proc, start, end, pid) + self.addDeviceFunctionCall(displayname, kprobename, proc, pid, start, end, cdata, rdata) + else: + vprint('IGNORE: %s[%s](%d) [%f - %f] | %s | %s | %s' % (displayname, kprobename, + pid, start, end, cdata, rdata, proc)) + return False + # detail block fits within tgtdev + if('src' not in tgtdev): + tgtdev['src'] = [] + title = cdata+' '+rdata + mstr = '\(.*\) *(?P.*) *\((?P.*)\+.* arg1=(?P.*)' + m = re.match(mstr, title) + if m: + c = m.group('caller') + a = m.group('args').strip() + r = m.group('ret') + if len(r) > 6: + r = '' + else: + r = 'ret=%s ' % r + l = '%0.3fms' % ((end - start) * 1000) + if kprobename in self.dev_ubiquitous: + title = '%s(%s) <- %s, %s(%s)' % (displayname, a, c, r, l) + else: + title = '%s(%s) %s(%s)' % (displayname, a, r, l) + e = TraceEvent(title, kprobename, start, end - start) + tgtdev['src'].append(e) + return True def trimTimeVal(self, t, t0, dT, left): if left: if(t > t0): @@ -353,11 +820,11 @@ class Data: cg.end = self.trimTimeVal(cg.end, t0, dT, left) for line in cg.list: line.time = self.trimTimeVal(line.time, t0, dT, left) - if('traceevents' in d): - for e in d['traceevents']: + if('src' in d): + for e in d['src']: e.time = self.trimTimeVal(e.time, t0, dT, left) def normalizeTime(self, tZero): - # first trim out any standby or freeze clock time + # trim out any standby or freeze clock time if(self.tSuspended != self.tResumed): if(self.tResumed > tZero): self.trimTime(self.tSuspended, \ @@ -365,29 +832,6 @@ class Data: else: self.trimTime(self.tSuspended, \ self.tResumed-self.tSuspended, False) - # shift the timeline so that tZero is the new 0 - self.tSuspended -= tZero - self.tResumed -= tZero - self.start -= tZero - self.end -= tZero - for phase in self.phases: - p = self.dmesg[phase] - p['start'] -= tZero - p['end'] -= tZero - list = p['list'] - for name in list: - d = list[name] - d['start'] -= tZero - d['end'] -= tZero - if('ftrace' in d): - cg = d['ftrace'] - cg.start -= tZero - cg.end -= tZero - for line in cg.list: - line.time -= tZero - if('traceevents' in d): - for e in d['traceevents']: - e.time -= tZero def newPhaseWithSingleAction(self, phasename, devname, start, end, color): for phase in self.phases: self.dmesg[phase]['order'] += 1 @@ -417,6 +861,7 @@ class Data: {'list': list, 'start': start, 'end': end, 'row': 0, 'color': color, 'order': order} self.phases = self.sortedPhases() + self.devicegroups.append([phasename]) def setPhase(self, phase, ktime, isbegin): if(isbegin): self.dmesg[phase]['start'] = ktime @@ -442,7 +887,10 @@ class Data: for devname in phaselist: dev = phaselist[devname] if(dev['end'] < 0): - dev['end'] = end + for p in self.phases: + if self.dmesg[p]['end'] > dev['start']: + dev['end'] = self.dmesg[p]['end'] + break vprint('%s (%s): callback didnt return' % (devname, phase)) def deviceFilter(self, devicefilter): # remove all by the relatives of the filter devnames @@ -472,22 +920,58 @@ class Data: # if any calls never returned, clip them at system resume end for phase in self.phases: self.fixupInitcalls(phase, self.getEnd()) - def newActionGlobal(self, name, start, end): + def isInsideTimeline(self, start, end): + if(self.start <= start and self.end > start): + return True + return False + def phaseOverlap(self, phases): + rmgroups = [] + newgroup = [] + for group in self.devicegroups: + for phase in phases: + if phase not in group: + continue + for p in group: + if p not in newgroup: + newgroup.append(p) + if group not in rmgroups: + rmgroups.append(group) + for group in rmgroups: + self.devicegroups.remove(group) + self.devicegroups.append(newgroup) + def newActionGlobal(self, name, start, end, pid=-1, color=''): + # if event starts before timeline start, expand timeline + if(start < self.start): + self.setStart(start) + # if event ends after timeline end, expand the timeline + if(end > self.end): + self.setEnd(end) # which phase is this device callback or action "in" targetphase = "none" + htmlclass = '' overlap = 0.0 + phases = [] for phase in self.phases: pstart = self.dmesg[phase]['start'] pend = self.dmesg[phase]['end'] o = max(0, min(end, pend) - max(start, pstart)) - if(o > overlap): + if o > 0: + phases.append(phase) + if o > overlap: + if overlap > 0 and phase == 'post_resume': + continue targetphase = phase overlap = o + if pid == -2: + htmlclass = ' bg' + if len(phases) > 1: + htmlclass = ' bg' + self.phaseOverlap(phases) if targetphase in self.phases: - self.newAction(targetphase, name, -1, '', start, end, '') - return True + newname = self.newAction(targetphase, name, pid, '', start, end, '', htmlclass, color) + return (targetphase, newname) return False - def newAction(self, phase, name, pid, parent, start, end, drv): + def newAction(self, phase, name, pid, parent, start, end, drv, htmlclass='', color=''): # new device callback for a specific phase self.html_device_id += 1 devid = '%s%d' % (self.idstr, self.html_device_id) @@ -495,8 +979,19 @@ class Data: length = -1.0 if(start >= 0 and end >= 0): length = end - start + if pid == -2: + i = 2 + origname = name + while(name in list): + name = '%s[%d]' % (origname, i) + i += 1 list[name] = {'start': start, 'end': end, 'pid': pid, 'par': parent, 'length': length, 'row': 0, 'id': devid, 'drv': drv } + if htmlclass: + list[name]['htmlclass'] = htmlclass + if color: + list[name]['color'] = color + return name def deviceIDs(self, devlist, phase): idlist = [] list = self.dmesg[phase]['list'] @@ -536,10 +1031,21 @@ class Data: vprint(' %16s: %f - %f (%d devices)' % (phase, \ self.dmesg[phase]['start'], self.dmesg[phase]['end'], dc)) vprint(' test end: %f' % self.end) + def deviceChildrenAllPhases(self, devname): + devlist = [] + for phase in self.phases: + list = self.deviceChildren(devname, phase) + for dev in list: + if dev not in devlist: + devlist.append(dev) + return devlist def masterTopology(self, name, list, depth): node = DeviceNode(name, depth) for cname in list: - clist = self.deviceChildren(cname, 'resume') + # avoid recursions + if name == cname: + continue + clist = self.deviceChildrenAllPhases(cname) cnode = self.masterTopology(cname, clist, depth+1) node.children.append(cnode) return node @@ -580,7 +1086,8 @@ class Data: list = self.dmesg[phase]['list'] for dev in list: pdev = list[dev]['par'] - if(re.match('[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)): + pid = list[dev]['pid'] + if(pid < 0 or re.match('[0-9]*-[0-9]*\.[0-9]*[\.0-9]*\:[\.0-9]*$', pdev)): continue if pdev and pdev not in real and pdev not in rootlist: rootlist.append(pdev) @@ -589,22 +1096,33 @@ class Data: rootlist = self.rootDeviceList() master = self.masterTopology('', rootlist, 0) return self.printTopology(master) + def selectTimelineDevices(self, widfmt, tTotal, mindevlen): + # only select devices that will actually show up in html + self.tdevlist = dict() + for phase in self.dmesg: + devlist = [] + list = self.dmesg[phase]['list'] + for dev in list: + length = (list[dev]['end'] - list[dev]['start']) * 1000 + width = widfmt % (((list[dev]['end']-list[dev]['start'])*100)/tTotal) + if width != '0.000000' and length >= mindevlen: + devlist.append(dev) + self.tdevlist[phase] = devlist # Class: TraceEvent # Description: # A container for trace event data found in the ftrace file class TraceEvent: - ready = False - name = '' + text = '' time = 0.0 - color = '#FFFFFF' length = 0.0 - action = '' - def __init__(self, a, n, c, t): - self.action = a - self.name = n - self.color = c + title = '' + row = 0 + def __init__(self, a, n, t, l): + self.title = a + self.text = n self.time = t + self.length = l # Class: FTraceLine # Description: @@ -623,11 +1141,14 @@ class FTraceLine: fcall = False freturn = False fevent = False + fkprobe = False depth = 0 name = '' type = '' - def __init__(self, t, m, d): + def __init__(self, t, m='', d=''): self.time = float(t) + if not m and not d: + return # is this a trace event if(d == 'traceevent' or re.match('^ *\/\* *(?P.*) \*\/ *$', m)): if(d == 'traceevent'): @@ -644,6 +1165,18 @@ class FTraceLine: self.type = emm.group('call') else: self.name = msg + km = re.match('^(?P.*)_cal$', self.type) + if km: + self.fcall = True + self.fkprobe = True + self.type = km.group('n') + return + km = re.match('^(?P.*)_ret$', self.type) + if km: + self.freturn = True + self.fkprobe = True + self.type = km.group('n') + return self.fevent = True return # convert the duration to seconds @@ -662,7 +1195,7 @@ class FTraceLine: # includes comment with function name match = re.match('^} *\/\* *(?P.*) *\*\/$', m) if(match): - self.name = match.group('n') + self.name = match.group('n').strip() # function call else: self.fcall = True @@ -670,19 +1203,19 @@ class FTraceLine: if(m[-1] == '{'): match = re.match('^(?P.*) *\(.*', m) if(match): - self.name = match.group('n') + self.name = match.group('n').strip() # function call with no children (leaf) elif(m[-1] == ';'): self.freturn = True match = re.match('^(?P.*) *\(.*', m) if(match): - self.name = match.group('n') + self.name = match.group('n').strip() # something else (possibly a trace marker) else: self.name = m def getDepth(self, str): return len(str)/2 - def debugPrint(self, dev): + def debugPrint(self, dev=''): if(self.freturn and self.fcall): print('%s -- %f (%02d): %s(); (%.3f us)' % (dev, self.time, \ self.depth, self.name, self.length*1000000)) @@ -692,6 +1225,33 @@ class FTraceLine: else: print('%s -- %f (%02d): %s() { (%.3f us)' % (dev, self.time, \ self.depth, self.name, self.length*1000000)) + def startMarker(self): + global sysvals + # Is this the starting line of a suspend? + if not self.fevent: + return False + if sysvals.usetracemarkers: + if(self.name == 'SUSPEND START'): + return True + return False + else: + if(self.type == 'suspend_resume' and + re.match('suspend_enter\[.*\] begin', self.name)): + return True + return False + def endMarker(self): + # Is this the ending line of a resume? + if not self.fevent: + return False + if sysvals.usetracemarkers: + if(self.name == 'RESUME COMPLETE'): + return True + return False + else: + if(self.type == 'suspend_resume' and + re.match('thaw_processes\[.*\] end', self.name)): + return True + return False # Class: FTraceCallGraph # Description: @@ -705,54 +1265,124 @@ class FTraceCallGraph: list = [] invalid = False depth = 0 - def __init__(self): + pid = 0 + def __init__(self, pid): self.start = -1.0 self.end = -1.0 self.list = [] self.depth = 0 - def setDepth(self, line): + self.pid = pid + def addLine(self, line, debug=False): + # if this is already invalid, just leave + if(self.invalid): + return False + # invalidate on too much data or bad depth + if(len(self.list) >= 1000000 or self.depth < 0): + self.invalidate(line) + return False + # compare current depth with this lines pre-call depth + prelinedep = line.depth + if(line.freturn and not line.fcall): + prelinedep += 1 + last = 0 + lasttime = line.time + virtualfname = 'execution_misalignment' + if len(self.list) > 0: + last = self.list[-1] + lasttime = last.time + # handle low misalignments by inserting returns + if prelinedep < self.depth: + if debug and last: + print '-------- task %d --------' % self.pid + last.debugPrint() + idx = 0 + # add return calls to get the depth down + while prelinedep < self.depth: + if debug: + print 'MISALIGN LOW (add returns): C%d - eC%d' % (self.depth, prelinedep) + self.depth -= 1 + if idx == 0 and last and last.fcall and not last.freturn: + # special case, turn last call into a leaf + last.depth = self.depth + last.freturn = True + last.length = line.time - last.time + if debug: + last.debugPrint() + else: + vline = FTraceLine(lasttime) + vline.depth = self.depth + vline.name = virtualfname + vline.freturn = True + self.list.append(vline) + if debug: + vline.debugPrint() + idx += 1 + if debug: + line.debugPrint() + print '' + # handle high misalignments by inserting calls + elif prelinedep > self.depth: + if debug and last: + print '-------- task %d --------' % self.pid + last.debugPrint() + idx = 0 + # add calls to get the depth up + while prelinedep > self.depth: + if debug: + print 'MISALIGN HIGH (add calls): C%d - eC%d' % (self.depth, prelinedep) + if idx == 0 and line.freturn and not line.fcall: + # special case, turn this return into a leaf + line.fcall = True + prelinedep -= 1 + else: + vline = FTraceLine(lasttime) + vline.depth = self.depth + vline.name = virtualfname + vline.fcall = True + if debug: + vline.debugPrint() + self.list.append(vline) + self.depth += 1 + if not last: + self.start = vline.time + idx += 1 + if debug: + line.debugPrint() + print '' + # process the call and set the new depth if(line.fcall and not line.freturn): - line.depth = self.depth self.depth += 1 elif(line.freturn and not line.fcall): self.depth -= 1 - line.depth = self.depth - else: - line.depth = self.depth - def addLine(self, line, match): - if(not self.invalid): - self.setDepth(line) + if len(self.list) < 1: + self.start = line.time + self.list.append(line) if(line.depth == 0 and line.freturn): if(self.start < 0): self.start = line.time self.end = line.time - self.list.append(line) + if line.fcall: + self.end += line.length + if self.list[0].name == virtualfname: + self.invalid = True return True - if(self.invalid): - return False - if(len(self.list) >= 1000000 or self.depth < 0): - if(len(self.list) > 0): - first = self.list[0] - self.list = [] - self.list.append(first) - self.invalid = True - if(not match): - return False - id = 'task %s cpu %s' % (match.group('pid'), match.group('cpu')) - window = '(%f - %f)' % (self.start, line.time) - if(self.depth < 0): - print('Too much data for '+id+\ - ' (buffer overflow), ignoring this callback') - else: - print('Too much data for '+id+\ - ' '+window+', ignoring this callback') - return False - self.list.append(line) - if(self.start < 0): - self.start = line.time return False + def invalidate(self, line): + if(len(self.list) > 0): + first = self.list[0] + self.list = [] + self.list.append(first) + self.invalid = True + id = 'task %s' % (self.pid) + window = '(%f - %f)' % (self.start, line.time) + if(self.depth < 0): + vprint('Too much data for '+id+\ + ' (buffer overflow), ignoring this callback') + else: + vprint('Too much data for '+id+\ + ' '+window+', ignoring this callback') def slice(self, t0, tN): - minicg = FTraceCallGraph() + minicg = FTraceCallGraph(0) count = -1 firstdepth = 0 for l in self.list: @@ -764,13 +1394,26 @@ class FTraceCallGraph: firstdepth = l.depth count = 0 l.depth -= firstdepth - minicg.addLine(l, 0) + minicg.addLine(l) if((count == 0 and l.freturn and l.fcall) or (count > 0 and l.depth <= 0)): break count += 1 return minicg - def sanityCheck(self): + def repair(self, enddepth): + # bring the depth back to 0 with additional returns + fixed = False + last = self.list[-1] + for i in reversed(range(enddepth)): + t = FTraceLine(last.time) + t.depth = i + t.freturn = True + fixed = self.addLine(t) + if fixed: + self.end = last.time + return True + return False + def postProcess(self, debug=False): stack = dict() cnt = 0 for l in self.list: @@ -779,98 +1422,317 @@ class FTraceCallGraph: cnt += 1 elif(l.freturn and not l.fcall): if(l.depth not in stack): + if debug: + print 'Post Process Error: Depth missing' + l.debugPrint() return False + # transfer total time from return line to call line stack[l.depth].length = l.length - stack[l.depth] = 0 + stack.pop(l.depth) l.length = 0 cnt -= 1 if(cnt == 0): + # trace caught the whole call tree return True - return False - def debugPrint(self, filename): - if(filename == 'stdout'): - print('[%f - %f]') % (self.start, self.end) - for l in self.list: - if(l.freturn and l.fcall): - print('%f (%02d): %s(); (%.3f us)' % (l.time, \ - l.depth, l.name, l.length*1000000)) - elif(l.freturn): - print('%f (%02d): %s} (%.3f us)' % (l.time, \ - l.depth, l.name, l.length*1000000)) - else: - print('%f (%02d): %s() { (%.3f us)' % (l.time, \ - l.depth, l.name, l.length*1000000)) - print(' ') - else: - fp = open(filename, 'w') - print(filename) - for l in self.list: - if(l.freturn and l.fcall): - fp.write('%f (%02d): %s(); (%.3f us)\n' % (l.time, \ - l.depth, l.name, l.length*1000000)) - elif(l.freturn): - fp.write('%f (%02d): %s} (%.3f us)\n' % (l.time, \ - l.depth, l.name, l.length*1000000)) - else: - fp.write('%f (%02d): %s() { (%.3f us)\n' % (l.time, \ - l.depth, l.name, l.length*1000000)) - fp.close() + elif(cnt < 0): + if debug: + print 'Post Process Error: Depth is less than 0' + return False + # trace ended before call tree finished + return self.repair(cnt) + def deviceMatch(self, pid, data): + found = False + # add the callgraph data to the device hierarchy + borderphase = { + 'dpm_prepare': 'suspend_prepare', + 'dpm_complete': 'resume_complete' + } + if(self.list[0].name in borderphase): + p = borderphase[self.list[0].name] + list = data.dmesg[p]['list'] + for devname in list: + dev = list[devname] + if(pid == dev['pid'] and + self.start <= dev['start'] and + self.end >= dev['end']): + dev['ftrace'] = self.slice(dev['start'], dev['end']) + found = True + return found + for p in data.phases: + if(data.dmesg[p]['start'] <= self.start and + self.start <= data.dmesg[p]['end']): + list = data.dmesg[p]['list'] + for devname in list: + dev = list[devname] + if(pid == dev['pid'] and + self.start <= dev['start'] and + self.end >= dev['end']): + dev['ftrace'] = self + found = True + break + break + return found + def newActionFromFunction(self, data): + name = self.list[0].name + if name in ['dpm_run_callback', 'dpm_prepare', 'dpm_complete']: + return + fs = self.start + fe = self.end + if fs < data.start or fe > data.end: + return + phase = '' + for p in data.phases: + if(data.dmesg[p]['start'] <= self.start and + self.start < data.dmesg[p]['end']): + phase = p + break + if not phase: + return + out = data.newActionGlobal(name, fs, fe, -2) + if out: + phase, myname = out + data.dmesg[phase]['list'][myname]['ftrace'] = self + def debugPrint(self): + print('[%f - %f] %s (%d)') % (self.start, self.end, self.list[0].name, self.pid) + for l in self.list: + if(l.freturn and l.fcall): + print('%f (%02d): %s(); (%.3f us)' % (l.time, \ + l.depth, l.name, l.length*1000000)) + elif(l.freturn): + print('%f (%02d): %s} (%.3f us)' % (l.time, \ + l.depth, l.name, l.length*1000000)) + else: + print('%f (%02d): %s() { (%.3f us)' % (l.time, \ + l.depth, l.name, l.length*1000000)) + print(' ') # Class: Timeline # Description: -# A container for a suspend/resume html timeline. In older versions -# of the script there were multiple timelines, but in the latest -# there is only one. +# A container for a device timeline which calculates +# all the html properties to display it correctly class Timeline: html = {} - scaleH = 0.0 # height of the row as a percent of the timeline height - rowH = 0.0 # height of each row in percent of the timeline height - row_height_pixels = 30 - maxrows = 0 - height = 0 - def __init__(self): + height = 0 # total timeline height + scaleH = 20 # timescale (top) row height + rowH = 30 # device row height + bodyH = 0 # body height + rows = 0 # total timeline rows + phases = [] + rowmaxlines = dict() + rowcount = dict() + rowheight = dict() + def __init__(self, rowheight): + self.rowH = rowheight self.html = { + 'header': '', 'timeline': '', 'legend': '', - 'scale': '' } - def setRows(self, rows): - self.maxrows = int(rows) - self.scaleH = 100.0/float(self.maxrows) - self.height = self.maxrows*self.row_height_pixels - r = float(self.maxrows - 1) - if(r < 1.0): - r = 1.0 - self.rowH = (100.0 - self.scaleH)/r + # Function: getDeviceRows + # Description: + # determine how may rows the device funcs will take + # Arguments: + # rawlist: the list of devices/actions for a single phase + # Output: + # The total number of rows needed to display this phase of the timeline + def getDeviceRows(self, rawlist): + # clear all rows and set them to undefined + lendict = dict() + for item in rawlist: + item.row = -1 + lendict[item] = item.length + list = [] + for i in sorted(lendict, key=lendict.get, reverse=True): + list.append(i) + remaining = len(list) + rowdata = dict() + row = 1 + # try to pack each row with as many ranges as possible + while(remaining > 0): + if(row not in rowdata): + rowdata[row] = [] + for i in list: + if(i.row >= 0): + continue + s = i.time + e = i.time + i.length + valid = True + for ritem in rowdata[row]: + rs = ritem.time + re = ritem.time + ritem.length + if(not (((s <= rs) and (e <= rs)) or + ((s >= re) and (e >= re)))): + valid = False + break + if(valid): + rowdata[row].append(i) + i.row = row + remaining -= 1 + row += 1 + return row + # Function: getPhaseRows + # Description: + # Organize the timeline entries into the smallest + # number of rows possible, with no entry overlapping + # Arguments: + # list: the list of devices/actions for a single phase + # devlist: string list of device names to use + # Output: + # The total number of rows needed to display this phase of the timeline + def getPhaseRows(self, dmesg, devlist): + # clear all rows and set them to undefined + remaining = len(devlist) + rowdata = dict() + row = 0 + lendict = dict() + myphases = [] + for item in devlist: + if item[0] not in self.phases: + self.phases.append(item[0]) + if item[0] not in myphases: + myphases.append(item[0]) + self.rowmaxlines[item[0]] = dict() + self.rowheight[item[0]] = dict() + dev = dmesg[item[0]]['list'][item[1]] + dev['row'] = -1 + lendict[item] = float(dev['end']) - float(dev['start']) + if 'src' in dev: + dev['devrows'] = self.getDeviceRows(dev['src']) + lenlist = [] + for i in sorted(lendict, key=lendict.get, reverse=True): + lenlist.append(i) + orderedlist = [] + for item in lenlist: + dev = dmesg[item[0]]['list'][item[1]] + if dev['pid'] == -2: + orderedlist.append(item) + for item in lenlist: + if item not in orderedlist: + orderedlist.append(item) + # try to pack each row with as many ranges as possible + while(remaining > 0): + rowheight = 1 + if(row not in rowdata): + rowdata[row] = [] + for item in orderedlist: + dev = dmesg[item[0]]['list'][item[1]] + if(dev['row'] < 0): + s = dev['start'] + e = dev['end'] + valid = True + for ritem in rowdata[row]: + rs = ritem['start'] + re = ritem['end'] + if(not (((s <= rs) and (e <= rs)) or + ((s >= re) and (e >= re)))): + valid = False + break + if(valid): + rowdata[row].append(dev) + dev['row'] = row + remaining -= 1 + if 'devrows' in dev and dev['devrows'] > rowheight: + rowheight = dev['devrows'] + for phase in myphases: + self.rowmaxlines[phase][row] = rowheight + self.rowheight[phase][row] = rowheight * self.rowH + row += 1 + if(row > self.rows): + self.rows = int(row) + for phase in myphases: + self.rowcount[phase] = row + return row + def phaseRowHeight(self, phase, row): + return self.rowheight[phase][row] + def phaseRowTop(self, phase, row): + top = 0 + for i in sorted(self.rowheight[phase]): + if i >= row: + break + top += self.rowheight[phase][i] + return top + # Function: calcTotalRows + # Description: + # Calculate the heights and offsets for the header and rows + def calcTotalRows(self): + maxrows = 0 + standardphases = [] + for phase in self.phases: + total = 0 + for i in sorted(self.rowmaxlines[phase]): + total += self.rowmaxlines[phase][i] + if total > maxrows: + maxrows = total + if total == self.rowcount[phase]: + standardphases.append(phase) + self.height = self.scaleH + (maxrows*self.rowH) + self.bodyH = self.height - self.scaleH + for phase in standardphases: + for i in sorted(self.rowheight[phase]): + self.rowheight[phase][i] = self.bodyH/self.rowcount[phase] + # Function: createTimeScale + # Description: + # Create the timescale for a timeline block + # Arguments: + # m0: start time (mode begin) + # mMax: end time (mode end) + # tTotal: total timeline time + # mode: suspend or resume + # Output: + # The html code needed to display the time scale + def createTimeScale(self, m0, mMax, tTotal, mode): + timescale = '
{1}
\n' + rline = '
Resume
\n' + output = '
\n' + # set scale for timeline + mTotal = mMax - m0 + tS = 0.1 + if(tTotal <= 0): + return output+'
\n' + if(tTotal > 4): + tS = 1 + divTotal = int(mTotal/tS) + 1 + divEdge = (mTotal - tS*(divTotal-1))*100/mTotal + for i in range(divTotal): + htmlline = '' + if(mode == 'resume'): + pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal)) + val = '%0.fms' % (float(i)*tS*1000) + htmlline = timescale.format(pos, val) + if(i == 0): + htmlline = rline + else: + pos = '%0.3f' % (100 - ((float(i)*tS*100)/mTotal) - divEdge) + val = '%0.fms' % (float(i-divTotal+1)*tS*1000) + if(i == divTotal - 1): + val = 'Suspend' + htmlline = timescale.format(pos, val) + output += htmlline + output += '\n' + return output -# Class: TestRun +# Class: TestProps # Description: -# A container for a suspend/resume test run. This is necessary as -# there could be more than one, and they need to be separate. -class TestRun: +# A list of values describing the properties of these test runs +class TestProps: + stamp = '' + tracertype = '' + S0i3 = False + fwdata = [] ftrace_line_fmt_fg = \ '^ *(?P